table_importer 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +18 -0
  3. data/.travis.yml +7 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE +21 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +9 -0
  8. data/Rakefile +2 -0
  9. data/lib/table_importer/copy_and_paste.rb +118 -0
  10. data/lib/table_importer/csv.rb +146 -0
  11. data/lib/table_importer/excel.rb +92 -0
  12. data/lib/table_importer/exceptions.rb +29 -0
  13. data/lib/table_importer/source.rb +149 -0
  14. data/lib/table_importer/version.rb +3 -0
  15. data/lib/table_importer.rb +3 -0
  16. data/spec/copy_and_paste_spec.rb +134 -0
  17. data/spec/csv_spec.rb +135 -0
  18. data/spec/excel_spec.rb +139 -0
  19. data/spec/files/csv/10-1359651839-google_(1).csv +0 -0
  20. data/spec/files/csv/11-1359651879-contacts_(1) (1).csv +158 -0
  21. data/spec/files/csv/11-1359651879-contacts_(1).csv +158 -0
  22. data/spec/files/csv/6-1359649307-contacts (1).csv +157 -0
  23. data/spec/files/csv/6-1359649307-contacts (2).csv +158 -0
  24. data/spec/files/csv/6-1359649307-contacts (3).csv +158 -0
  25. data/spec/files/csv/6-1359649307-contacts.csv +158 -0
  26. data/spec/files/csv/7-1359650836-6-1359649307-contacts.csv +158 -0
  27. data/spec/files/csv/8-1359651745-contacts.csv +158 -0
  28. data/spec/files/csv/9-1359651826-google_(1).csv +0 -0
  29. data/spec/files/csv/bad_headers_2.csv +45 -0
  30. data/spec/files/csv/csv_headers.csv +55 -0
  31. data/spec/files/csv/csv_no_headers.csv +5 -0
  32. data/spec/files/csv/edge_cases.csv +16 -0
  33. data/spec/files/csv/hong_kong.csv +1150 -0
  34. data/spec/files/csv/hong_kong_no_headers.csv +9 -0
  35. data/spec/files/csv/hong_kong_small.csv +10 -0
  36. data/spec/files/csv/mexico2013_pressdoc.csv +3248 -0
  37. data/spec/files/csv/no_content.csv +22 -0
  38. data/spec/files/csv/semicolon.csv +214 -0
  39. data/spec/files/csv/with_headers.csv +10 -0
  40. data/spec/files/csv/with_headers_large.csv +10760 -0
  41. data/spec/files/csv/without_headers.csv +9 -0
  42. data/spec/files/excel/edge_cases.xls +0 -0
  43. data/spec/files/excel/no_content.xls +0 -0
  44. data/spec/files/excel/no_content.xlsx +0 -0
  45. data/spec/files/excel/with_headers.xls +0 -0
  46. data/spec/files/excel/with_headers_large.xls +0 -0
  47. data/spec/files/excel/with_headers_large.xlsx +0 -0
  48. data/spec/files/excel/without_headers.xls +0 -0
  49. data/spec/spec_helper.rb +20 -0
  50. data/table_importer.gemspec +32 -0
  51. data/tasks/rspec.rake +4 -0
  52. metadata +254 -0
@@ -0,0 +1,29 @@
1
+ module Exceptions
2
+
3
+ class ImportError < StandardError;
4
+ end
5
+
6
+ class EmptyFileImportError < ImportError
7
+ def initialize(message = "The file you uploaded has no valid content to import or the content cannot be read. If there is content in your file please try copying and pasting it in instead.")
8
+ super(message)
9
+ end
10
+ end
11
+
12
+ class EmptyStringImportError < ImportError
13
+ def initialize(message = "The data you pasted in has no valid content to import or it cannot be read.")
14
+ super(message)
15
+ end
16
+ end
17
+
18
+ class IncorrectFileError < ImportError
19
+ def initialize(message = "Sorry, you didn't upload the type of file you said you did.")
20
+ super(message)
21
+ end
22
+ end
23
+
24
+ class HeaderMismatchError < ImportError
25
+ def initialize(message = "Sorry, we couldn't process your file. Did you correctly check whether your file has headers?")
26
+ super(message)
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,149 @@
1
+ module TableImporter
2
+ # require classes
3
+ # require 'import_sources/excel_source'
4
+ # require 'import_sources/string_source'
5
+ class Source
6
+
7
+ SEPARATORS = {comma: ",", space: " ", tab: "\t", newline_mac: "\n", semicolon: ";", pipe: "|", newline_windows: "\r\n", old_newline_mac: "\r"}
8
+ require 'table_importer/csv'
9
+ require 'table_importer/copy_and_paste'
10
+ require 'table_importer/excel'
11
+
12
+ def initialize (data)
13
+ case data[:type]
14
+ when 'copy_and_paste'
15
+ @source = CopyAndPaste.new(data)
16
+ when 'csv'
17
+ @source = CSV.new(data)
18
+ when 'xls', 'xlsx'
19
+ @source = Excel.new(data)
20
+ else
21
+ raise Exceptions::IncorrectFileError.new
22
+ end
23
+ @source
24
+ end
25
+
26
+ def get_type
27
+ @source.get_type
28
+ end
29
+
30
+ def get_column_separator(first_line = "")
31
+ SEPARATORS.key(@source.get_column_separator(first_line))
32
+ end
33
+
34
+ def get_record_separator(first_line = "")
35
+ SEPARATORS.key(@source.get_record_separator(first_line))
36
+ end
37
+
38
+ def get_headers
39
+ @source.get_headers
40
+ end
41
+
42
+ def get_lines(start_point = 0, number = -1)
43
+ @source.get_lines(start_point, number)
44
+ end
45
+
46
+ def get_preview_lines
47
+ @source.get_preview_lines
48
+ end
49
+
50
+ def get_chunks(chunk_size = 50)
51
+ @source.get_chunks(chunk_size)
52
+ end
53
+
54
+ def default_headers(number = 100)
55
+ return @default_headers if @default_headers
56
+ @default_headers = 1.upto(number).collect do |n|
57
+ "column_#{n}".to_sym
58
+ end
59
+ end
60
+
61
+ def get_sep_count(first_line)
62
+ SEPARATORS.values.collect do |sep|
63
+ {sep => first_line.scan(sep).count}
64
+ end
65
+ end
66
+
67
+ def sort_separators(separators)
68
+ highest_value = 0
69
+ highest_key = ""
70
+ separators.each do |sep|
71
+ if sep.values[0] >= highest_value
72
+ highest_value = sep.values[0]
73
+ highest_key = sep.keys[0]
74
+ end
75
+ end
76
+ highest_key
77
+ end
78
+
79
+ def clean_chunks(chunks, compulsory_headers = {}, delete_empty_columns = false)
80
+ result = []
81
+ empty_headers = chunks.first.first.keys
82
+ chunks.each do |chunk|
83
+ new_chunk = { :lines => [], :errors => []}
84
+ chunk.each_with_index do |line, index|
85
+ line_empty = line_empty?(line)
86
+ no_compulsory_headers, missing_header = check_compulsory_headers?(line, compulsory_headers)
87
+ if line_empty || no_compulsory_headers
88
+ new_chunk[:errors] << format_error(line, line_empty, no_compulsory_headers, compulsory_headers, missing_header)
89
+ else
90
+ if delete_empty_columns
91
+ line.each do |key, value|
92
+ if value.present? && value.to_s.gsub(/[^A-Za-z0-9]/, '').present?
93
+ empty_headers.delete(key)
94
+ end
95
+ end
96
+ end
97
+ new_chunk[:lines] << line
98
+ end
99
+ end
100
+ result << new_chunk unless new_chunk[:lines] == [] && new_chunk[:errors] == []
101
+ end
102
+ if delete_empty_columns
103
+ remove_empty_columns(result, empty_headers)
104
+ end
105
+ result
106
+ end
107
+
108
+ private
109
+ def line_empty?(line)
110
+ line.all?{ |item_key, item_value| line_item_is_garbage?(item_value)}
111
+ end
112
+
113
+ def check_compulsory_headers?(line, compulsory_headers)
114
+ if compulsory_headers.key?(:email)
115
+ if line.key?(:email)
116
+ line[:email] = clean_email(line[:email])
117
+ return true, "email" if line[:email].nil? || !line[:email].to_s.match(/@\S/)
118
+ end
119
+ return true, "email" if !line.values.any?{ |value| /@\S/ =~ value.to_s }
120
+ end
121
+ # here perform other checks for other compulsory headers we might have.
122
+ return false
123
+ end
124
+
125
+ def clean_email(email)
126
+ if email
127
+ email.to_s.gsub(/\A[^A-Za-z0-9]/, '').reverse.gsub(/\A[^A-Za-z0-9]/, '').reverse
128
+ end
129
+ end
130
+
131
+ def line_item_is_garbage?(item_value)
132
+ item_value.blank?
133
+ end
134
+
135
+ def format_error(line, line_empty, no_compulsory_headers, compulsory_headers, missing_header)
136
+ message = line_empty ? "it did not have any content" : " it did not contain this/these required headers: #{missing_header}"
137
+ {:level => :error, :message => "The following line was not imported because #{message}.", :data => {:line => line, :line_empty => line_empty, :headers => no_compulsory_headers}}
138
+ end
139
+
140
+ def remove_empty_columns(chunks, headers)
141
+ chunks.each do |chunk|
142
+ headers.each do |header|
143
+ chunk[:lines][0][header] = "empty_column"
144
+ end
145
+ end
146
+ chunks
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,3 @@
1
+ module TableImporter
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,3 @@
1
+ require "table_importer/version"
2
+ require 'table_importer/source'
3
+
@@ -0,0 +1,134 @@
1
+ # encoding: UTF-8
2
+ require 'spec_helper'
3
+
4
+ describe TableImporter::Source do
5
+
6
+ context 'when source is a string it' do
7
+
8
+ before(:each) do
9
+ @source = TableImporter::Source.new({:content => "nick@pr.co, dennis@pr.co, lorenzo@pr.co", :headers_present => false, :headers => nil, :user_headers => nil, :type => "copy_and_paste", :col_sep => "", :rec_sep => "", :compulsory_headers => {:email => true}})
10
+ end
11
+
12
+ it "creates a source object" do
13
+ TableImporter::Source.new({:content => "nick@pr.co, dennis@pr.co, lorenzo@pr.co", :headers_present => false, :headers => nil, :user_headers => nil, :type => "copy_and_paste", :col_sep => "", :rec_sep => "", :compulsory_headers => {:email => true}})
14
+ end
15
+
16
+ it "gets the correct copy and paste chunks" do
17
+ source = TableImporter::Source.new({:content => "nick@pr.co, dennis@pr.co, lorenzo@pr.co", :headers_present => false, :headers => {"first_name"=>"", "last_name"=>"", "salutation"=>"", "tag_list"=>"", "email"=>"0", "organization"=>"", "url"=>"", "phone"=>"", "job_title"=>"", "second_url"=>"", "notes"=>"", "twitter_username"=>"", "skype_username"=>"", "pinterest_username"=>"", "instagram_username"=>"", "facebook_username"=>"", "last_name_prefix"=>"", "second_email"=>"", "phone_mobile"=>"", "street"=>"", "street_number"=>"", "zipcode"=>"", "city"=>"", "country"=>""}, :user_headers => nil, :type => "copy_and_paste", :col_sep => :space, :rec_sep => :comma, :compulsory_headers => {:email => true}})
18
+ source.get_chunks.first[:lines].first[:email].should eql("nick@pr.co")
19
+ end
20
+
21
+ it "has the correct number of lines" do
22
+ source = TableImporter::Source.new({:content => "nick@pr.co, dennis@pr.co, lorenzo@pr.co", :headers_present => false, :headers => {"first_name"=>"", "last_name"=>"", "salutation"=>"", "tag_list"=>"", "email"=>"0", "organization"=>"", "url"=>"", "phone"=>"", "job_title"=>"", "second_url"=>"", "notes"=>"", "twitter_username"=>"", "skype_username"=>"", "pinterest_username"=>"", "instagram_username"=>"", "facebook_username"=>"", "last_name_prefix"=>"", "second_email"=>"", "phone_mobile"=>"", "street"=>"", "street_number"=>"", "zipcode"=>"", "city"=>"", "country"=>""}, :user_headers => nil, :type => "copy_and_paste", :col_sep => :space, :rec_sep => :comma, :compulsory_headers => {:email => true}})
23
+ source.get_chunks(1).count.should eql(3)
24
+ end
25
+
26
+ it "has the correct number of chunks" do
27
+ source = TableImporter::Source.new({:content => "nick@pr.co, dennis@pr.co, lorenzo@pr.co", :headers_present => false, :headers => {"first_name"=>"", "last_name"=>"", "salutation"=>"", "tag_list"=>"", "email"=>"0", "organization"=>"", "url"=>"", "phone"=>"", "job_title"=>"", "second_url"=>"", "notes"=>"", "twitter_username"=>"", "skype_username"=>"", "pinterest_username"=>"", "instagram_username"=>"", "facebook_username"=>"", "last_name_prefix"=>"", "second_email"=>"", "phone_mobile"=>"", "street"=>"", "street_number"=>"", "zipcode"=>"", "city"=>"", "country"=>""}, :user_headers => nil, :type => "copy_and_paste", :col_sep => :space, :rec_sep => :comma, :compulsory_headers => {:email => true}})
28
+ source.get_chunks(2).count.should eql(2)
29
+ end
30
+
31
+ it "does not have extra spaces in the final chunk" do
32
+ source = TableImporter::Source.new({:content => "nick@pr.co, dennis@pr.co, lorenzo@pr.co", :headers_present => false, :headers => {"first_name"=>"", "last_name"=>"", "salutation"=>"", "tag_list"=>"", "email"=>"0", "organization"=>"", "url"=>"", "phone"=>"", "job_title"=>"", "second_url"=>"", "notes"=>"", "twitter_username"=>"", "skype_username"=>"", "pinterest_username"=>"", "instagram_username"=>"", "facebook_username"=>"", "last_name_prefix"=>"", "second_email"=>"", "phone_mobile"=>"", "street"=>"", "street_number"=>"", "zipcode"=>"", "city"=>"", "country"=>""}, :user_headers => nil, :type => "copy_and_paste", :col_sep => :space, :rec_sep => :comma, :compulsory_headers => {:email => true}})
33
+ last_chunk = source.get_chunks(2).last
34
+ (last_chunk[:lines].count + last_chunk[:errors].count).should eql(1)
35
+ end
36
+
37
+ it "gets the correct preview lines" do
38
+ @source.get_preview_lines.count.should eql(3)
39
+ end
40
+
41
+ it "can get the correct record separator" do
42
+ @source.get_record_separator.should eql(:old_newline_mac)
43
+ end
44
+
45
+ it "can get the correct column separator" do
46
+ @source.get_column_separator.should eql(:old_newline_mac)
47
+ end
48
+
49
+ it "has the correct type" do
50
+ @source.get_type.should eql("copy_and_paste")
51
+ end
52
+
53
+ after(:each) do
54
+ @source = nil
55
+ end
56
+ end
57
+
58
+ context 'when source is a different string' do
59
+
60
+ before(:each) do
61
+ @source = TableImporter::Source.new({:content => "Nick Dowse <nick@pr.co>, Dennis van der Vliet <dennis@pr.co>, Jeroen Bos <jeroen@pr.co>", :headers_present => false, :headers => {"first_name"=>"0", "last_name"=>"", "salutation"=>"", "tag_list"=>"", "email"=>"1", "organization"=>"", "url"=>"", "phone"=>"", "job_title"=>"", "second_url"=>"", "notes"=>"", "twitter_username"=>"", "skype_username"=>"", "pinterest_username"=>"", "instagram_username"=>"", "facebook_username"=>"", "last_name_prefix"=>"", "second_email"=>"", "phone_mobile"=>"", "street"=>"", "street_number"=>"", "zipcode"=>"", "city"=>"", "country"=>""}, :user_headers => nil, :type => "copy_and_paste", :col_sep => "", :rec_sep => "", :compulsory_headers => {:email => true}})
62
+ end
63
+
64
+ it "gets the correct chunks" do
65
+ @source.get_chunks.first[:lines].first[:email].should eql("nick@pr.co")
66
+ end
67
+
68
+ it "has the correct column_separator" do
69
+ @source.get_column_separator.should eql(nil)
70
+ end
71
+
72
+ it "has the correct record_separator" do
73
+ @source.get_record_separator.should eql(nil)
74
+ end
75
+
76
+ it "has the correct number of lines" do
77
+ @source.get_chunks(1).count.should eql(3)
78
+ end
79
+
80
+ it "has the correct number of chunks" do
81
+ @source.get_chunks(2).count.should eql(2)
82
+ end
83
+
84
+ it "does not have extra spaces in the final chunk" do
85
+ last_chunk = @source.get_chunks(2).last
86
+ (last_chunk[:lines].count + last_chunk[:errors].count).should eql(1)
87
+ end
88
+
89
+ after(:each) do
90
+ @source = nil
91
+ end
92
+ end
93
+
94
+ context 'when source is a bad string' do
95
+
96
+ before(:each) do
97
+ @source = TableImporter::Source.new({
98
+ :content => "Dennis,denni@pr.co,Amsterdam
99
+ Nick@test.com,”
100
+ “, Amsterdam
101
+ jeroen@, \"jeroe
102
+ adine, \"
103
+
104
+ lorenzo,\"lorenzo@pr.co\"
105
+ HÐ, “nick¯â@test”, ¾,€",
106
+ :headers_present => false, :headers => nil, :user_headers => nil, :type => "copy_and_paste", :col_sep => :comma, :rec_sep => :newline_mac, :compulsory_headers => {:email => true}})
107
+ end
108
+
109
+ it "has the correct number of lines" do
110
+ @source.get_lines.count.should eql(8)
111
+ end
112
+
113
+ it "has the correct number of chunks" do
114
+ @source.get_chunks(4).count.should eql(2)
115
+ end
116
+
117
+ it "does not have extra spaces in the final chunk" do
118
+ last_chunk = @source.get_chunks(3).last
119
+ (last_chunk[:lines].count + last_chunk[:errors].count).should eql(2)
120
+ end
121
+
122
+ after(:each) do
123
+ @source = nil
124
+ end
125
+ end
126
+
127
+ context 'when string is empty' do
128
+ it 'raises an error when creating a source object' do
129
+ expect{
130
+ TableImporter::Source.new({:content => "", :headers_present => false, :headers => nil, :user_headers => nil, :type => "copy_and_paste", :col_sep => :comma, :rec_sep => :newline_mac, :compulsory_headers => {:email => true}})
131
+ }.to raise_error(Exceptions::EmptyFileImportError)
132
+ end
133
+ end
134
+ end
data/spec/csv_spec.rb ADDED
@@ -0,0 +1,135 @@
1
+ # encoding: UTF-8
2
+ require 'spec_helper'
3
+ require 'smarter_csv'
4
+
5
+ describe TableImporter::Source do
6
+
7
+ context 'when source is a csv file with headers' do
8
+ before(:each) do
9
+ @source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/with_headers.csv"].join), :headers_present => true, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
10
+ end
11
+
12
+ it "creates a source object" do
13
+ TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/with_headers.csv"].join), :headers_present => true, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
14
+ end
15
+
16
+ it "has the correct headers" do
17
+ @source.get_headers.should eql(["country", "medium", "salutation", "first_name|", "last_name", "email", "phone_number", "tags|"])
18
+ end
19
+
20
+ it "has the correct number of chunks" do
21
+ @source.get_chunks(4).count.should eql(3)
22
+ end
23
+
24
+ it "does not have extra spaces in the final chunk" do
25
+ last_chunk = @source.get_chunks(4).last
26
+ (last_chunk[:lines].count + last_chunk[:errors].count).should eql(1)
27
+ end
28
+
29
+ it "can get the correct record separator" do
30
+ @source.get_record_separator.should eql(:newline_mac)
31
+ end
32
+
33
+ it "can get the correct column separator" do
34
+ @source.get_column_separator.should eql(:semicolon)
35
+ end
36
+
37
+ it "has the correct type" do
38
+ @source.get_type.should eql("csv")
39
+ end
40
+
41
+ after(:each) do
42
+ @source = nil
43
+ end
44
+ end
45
+
46
+ context 'when source is a csv file without headers it' do
47
+ before(:each) do
48
+ @source_headers = "false"
49
+ @source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/without_headers.csv"].join), :headers_present => false, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
50
+ end
51
+
52
+ it "creates a source object" do
53
+ TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/without_headers.csv"].join), :headers_present => false, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
54
+ end
55
+
56
+ it "has the correct number of chunks" do
57
+ source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/without_headers.csv"].join), :headers_present => false, :headers => {"first_name"=>"", "last_name"=>"", "salutation"=>"", "tag_list"=>"", "email"=>"5", "organization"=>"", "url"=>"", "phone"=>"", "job_title"=>"", "second_url"=>"", "notes"=>"", "twitter_username"=>"", "skype_username"=>"", "pinterest_username"=>"", "instagram_username"=>"", "facebook_username"=>"", "last_name_prefix"=>"", "second_email"=>"", "phone_mobile"=>"", "street"=>"", "street_number"=>"", "zipcode"=>"", "city"=>"", "country"=>""}, :user_headers => nil, :type => "csv", :column_separator => :semicolon, :record_separator => :newline_mac, :compulsory_headers => {:email => true}})
58
+ source.get_chunks(4).count.should eql(3)
59
+ end
60
+
61
+ it "does not have extra spaces in the final chunk" do
62
+ source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/without_headers.csv"].join), :headers_present => false, :headers => {"first_name"=>"", "last_name"=>"", "salutation"=>"", "tag_list"=>"", "email"=>"5", "organization"=>"", "url"=>"", "phone"=>"", "job_title"=>"", "second_url"=>"", "notes"=>"", "twitter_username"=>"", "skype_username"=>"", "pinterest_username"=>"", "instagram_username"=>"", "facebook_username"=>"", "last_name_prefix"=>"", "second_email"=>"", "phone_mobile"=>"", "street"=>"", "street_number"=>"", "zipcode"=>"", "city"=>"", "country"=>""}, :user_headers => nil, :type => "csv", :column_separator => :semicolon, :record_separator => :newline_mac, :compulsory_headers => {:email => true}})
63
+ source.get_chunks(4).last[:lines].count.should eql(1)
64
+ end
65
+
66
+ after(:each) do
67
+ @source = nil
68
+ end
69
+ end
70
+
71
+ context 'when source is a large csv file with headers' do
72
+
73
+ before { skip }
74
+
75
+ before(:all) do
76
+ @source_headers = 'true'
77
+ @source = TableImporter::Source.new({:content => File.open([Dir.pwd, '/spec/files/csv/with_headers_large.csv'].join), :headers_present => true, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
78
+ end
79
+
80
+ it "creates a source object" do
81
+ TableImporter::Source.new({:content => File.open([Dir.pwd, '/spec/files/csv/with_headers_large.csv'].join), :headers_present => true, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
82
+ end
83
+
84
+ it "has the correct number of chunks" do
85
+ @source.get_chunks(4).count.should eql(2690)
86
+ end
87
+
88
+ it "does not have extra spaces in the final chunk" do
89
+ last = @source.get_chunks(4).last
90
+ (last[:errors].count + last[:lines].count).should eql(3)
91
+ end
92
+
93
+ after(:all) do
94
+ @source = nil
95
+ end
96
+ end
97
+
98
+ context 'when source is an edge-case csv file without headers' do
99
+ before(:each) do
100
+ @source_headers = "false"
101
+ @source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/edge_cases.csv"].join), :headers_present => false, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
102
+ end
103
+
104
+ it "creates a source object" do
105
+ TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/edge_cases.csv"].join), :headers_present => false, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
106
+ end
107
+
108
+ it "has the correct number of chunks" do
109
+ source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/edge_cases.csv"].join), :headers_present => false, :headers => {"first_name"=>"", "last_name"=>"", "salutation"=>"", "tag_list"=>"", "email"=>"1", "organization"=>"", "url"=>"", "phone"=>"", "job_title"=>"", "second_url"=>"", "notes"=>"", "twitter_username"=>"", "skype_username"=>"", "pinterest_username"=>"", "instagram_username"=>"", "facebook_username"=>"", "last_name_prefix"=>"", "second_email"=>"", "phone_mobile"=>"", "street"=>"", "street_number"=>"", "zipcode"=>"", "city"=>"", "country"=>""}, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
110
+ source.get_chunks(4).count.should eql(3)
111
+ end
112
+
113
+ after(:each) do
114
+ @source = nil
115
+ end
116
+ end
117
+
118
+ context 'when source is a badly encoded file' do
119
+ it 'can still get the correct chunks' do
120
+ source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/mexico2013_pressdoc.csv"].join), :headers_present => true, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
121
+ source.get_chunks.first[:lines].count.should eql(49)
122
+ end
123
+ end
124
+
125
+ context 'when source is an empty csv file' do
126
+
127
+ it 'raises an error when creating a source object' do
128
+ begin
129
+ TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/no_content.csv"].join), :headers_present => true, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
130
+ rescue Exceptions::EmptyFileImportError => e
131
+ e.message
132
+ end
133
+ end
134
+ end
135
+ end