table_importer 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.travis.yml +7 -0
- data/Gemfile +4 -0
- data/LICENSE +21 -0
- data/LICENSE.txt +22 -0
- data/README.md +9 -0
- data/Rakefile +2 -0
- data/lib/table_importer/copy_and_paste.rb +118 -0
- data/lib/table_importer/csv.rb +146 -0
- data/lib/table_importer/excel.rb +92 -0
- data/lib/table_importer/exceptions.rb +29 -0
- data/lib/table_importer/source.rb +149 -0
- data/lib/table_importer/version.rb +3 -0
- data/lib/table_importer.rb +3 -0
- data/spec/copy_and_paste_spec.rb +134 -0
- data/spec/csv_spec.rb +135 -0
- data/spec/excel_spec.rb +139 -0
- data/spec/files/csv/10-1359651839-google_(1).csv +0 -0
- data/spec/files/csv/11-1359651879-contacts_(1) (1).csv +158 -0
- data/spec/files/csv/11-1359651879-contacts_(1).csv +158 -0
- data/spec/files/csv/6-1359649307-contacts (1).csv +157 -0
- data/spec/files/csv/6-1359649307-contacts (2).csv +158 -0
- data/spec/files/csv/6-1359649307-contacts (3).csv +158 -0
- data/spec/files/csv/6-1359649307-contacts.csv +158 -0
- data/spec/files/csv/7-1359650836-6-1359649307-contacts.csv +158 -0
- data/spec/files/csv/8-1359651745-contacts.csv +158 -0
- data/spec/files/csv/9-1359651826-google_(1).csv +0 -0
- data/spec/files/csv/bad_headers_2.csv +45 -0
- data/spec/files/csv/csv_headers.csv +55 -0
- data/spec/files/csv/csv_no_headers.csv +5 -0
- data/spec/files/csv/edge_cases.csv +16 -0
- data/spec/files/csv/hong_kong.csv +1150 -0
- data/spec/files/csv/hong_kong_no_headers.csv +9 -0
- data/spec/files/csv/hong_kong_small.csv +10 -0
- data/spec/files/csv/mexico2013_pressdoc.csv +3248 -0
- data/spec/files/csv/no_content.csv +22 -0
- data/spec/files/csv/semicolon.csv +214 -0
- data/spec/files/csv/with_headers.csv +10 -0
- data/spec/files/csv/with_headers_large.csv +10760 -0
- data/spec/files/csv/without_headers.csv +9 -0
- data/spec/files/excel/edge_cases.xls +0 -0
- data/spec/files/excel/no_content.xls +0 -0
- data/spec/files/excel/no_content.xlsx +0 -0
- data/spec/files/excel/with_headers.xls +0 -0
- data/spec/files/excel/with_headers_large.xls +0 -0
- data/spec/files/excel/with_headers_large.xlsx +0 -0
- data/spec/files/excel/without_headers.xls +0 -0
- data/spec/spec_helper.rb +20 -0
- data/table_importer.gemspec +32 -0
- data/tasks/rspec.rake +4 -0
- metadata +254 -0
@@ -0,0 +1,29 @@
|
|
1
|
+
module Exceptions
|
2
|
+
|
3
|
+
class ImportError < StandardError;
|
4
|
+
end
|
5
|
+
|
6
|
+
class EmptyFileImportError < ImportError
|
7
|
+
def initialize(message = "The file you uploaded has no valid content to import or the content cannot be read. If there is content in your file please try copying and pasting it in instead.")
|
8
|
+
super(message)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
class EmptyStringImportError < ImportError
|
13
|
+
def initialize(message = "The data you pasted in has no valid content to import or it cannot be read.")
|
14
|
+
super(message)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class IncorrectFileError < ImportError
|
19
|
+
def initialize(message = "Sorry, you didn't upload the type of file you said you did.")
|
20
|
+
super(message)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class HeaderMismatchError < ImportError
|
25
|
+
def initialize(message = "Sorry, we couldn't process your file. Did you correctly check whether your file has headers?")
|
26
|
+
super(message)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,149 @@
|
|
1
|
+
module TableImporter
|
2
|
+
# require classes
|
3
|
+
# require 'import_sources/excel_source'
|
4
|
+
# require 'import_sources/string_source'
|
5
|
+
class Source
|
6
|
+
|
7
|
+
SEPARATORS = {comma: ",", space: " ", tab: "\t", newline_mac: "\n", semicolon: ";", pipe: "|", newline_windows: "\r\n", old_newline_mac: "\r"}
|
8
|
+
require 'table_importer/csv'
|
9
|
+
require 'table_importer/copy_and_paste'
|
10
|
+
require 'table_importer/excel'
|
11
|
+
|
12
|
+
def initialize (data)
|
13
|
+
case data[:type]
|
14
|
+
when 'copy_and_paste'
|
15
|
+
@source = CopyAndPaste.new(data)
|
16
|
+
when 'csv'
|
17
|
+
@source = CSV.new(data)
|
18
|
+
when 'xls', 'xlsx'
|
19
|
+
@source = Excel.new(data)
|
20
|
+
else
|
21
|
+
raise Exceptions::IncorrectFileError.new
|
22
|
+
end
|
23
|
+
@source
|
24
|
+
end
|
25
|
+
|
26
|
+
def get_type
|
27
|
+
@source.get_type
|
28
|
+
end
|
29
|
+
|
30
|
+
def get_column_separator(first_line = "")
|
31
|
+
SEPARATORS.key(@source.get_column_separator(first_line))
|
32
|
+
end
|
33
|
+
|
34
|
+
def get_record_separator(first_line = "")
|
35
|
+
SEPARATORS.key(@source.get_record_separator(first_line))
|
36
|
+
end
|
37
|
+
|
38
|
+
def get_headers
|
39
|
+
@source.get_headers
|
40
|
+
end
|
41
|
+
|
42
|
+
def get_lines(start_point = 0, number = -1)
|
43
|
+
@source.get_lines(start_point, number)
|
44
|
+
end
|
45
|
+
|
46
|
+
def get_preview_lines
|
47
|
+
@source.get_preview_lines
|
48
|
+
end
|
49
|
+
|
50
|
+
def get_chunks(chunk_size = 50)
|
51
|
+
@source.get_chunks(chunk_size)
|
52
|
+
end
|
53
|
+
|
54
|
+
def default_headers(number = 100)
|
55
|
+
return @default_headers if @default_headers
|
56
|
+
@default_headers = 1.upto(number).collect do |n|
|
57
|
+
"column_#{n}".to_sym
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def get_sep_count(first_line)
|
62
|
+
SEPARATORS.values.collect do |sep|
|
63
|
+
{sep => first_line.scan(sep).count}
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def sort_separators(separators)
|
68
|
+
highest_value = 0
|
69
|
+
highest_key = ""
|
70
|
+
separators.each do |sep|
|
71
|
+
if sep.values[0] >= highest_value
|
72
|
+
highest_value = sep.values[0]
|
73
|
+
highest_key = sep.keys[0]
|
74
|
+
end
|
75
|
+
end
|
76
|
+
highest_key
|
77
|
+
end
|
78
|
+
|
79
|
+
def clean_chunks(chunks, compulsory_headers = {}, delete_empty_columns = false)
|
80
|
+
result = []
|
81
|
+
empty_headers = chunks.first.first.keys
|
82
|
+
chunks.each do |chunk|
|
83
|
+
new_chunk = { :lines => [], :errors => []}
|
84
|
+
chunk.each_with_index do |line, index|
|
85
|
+
line_empty = line_empty?(line)
|
86
|
+
no_compulsory_headers, missing_header = check_compulsory_headers?(line, compulsory_headers)
|
87
|
+
if line_empty || no_compulsory_headers
|
88
|
+
new_chunk[:errors] << format_error(line, line_empty, no_compulsory_headers, compulsory_headers, missing_header)
|
89
|
+
else
|
90
|
+
if delete_empty_columns
|
91
|
+
line.each do |key, value|
|
92
|
+
if value.present? && value.to_s.gsub(/[^A-Za-z0-9]/, '').present?
|
93
|
+
empty_headers.delete(key)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
new_chunk[:lines] << line
|
98
|
+
end
|
99
|
+
end
|
100
|
+
result << new_chunk unless new_chunk[:lines] == [] && new_chunk[:errors] == []
|
101
|
+
end
|
102
|
+
if delete_empty_columns
|
103
|
+
remove_empty_columns(result, empty_headers)
|
104
|
+
end
|
105
|
+
result
|
106
|
+
end
|
107
|
+
|
108
|
+
private
|
109
|
+
def line_empty?(line)
|
110
|
+
line.all?{ |item_key, item_value| line_item_is_garbage?(item_value)}
|
111
|
+
end
|
112
|
+
|
113
|
+
def check_compulsory_headers?(line, compulsory_headers)
|
114
|
+
if compulsory_headers.key?(:email)
|
115
|
+
if line.key?(:email)
|
116
|
+
line[:email] = clean_email(line[:email])
|
117
|
+
return true, "email" if line[:email].nil? || !line[:email].to_s.match(/@\S/)
|
118
|
+
end
|
119
|
+
return true, "email" if !line.values.any?{ |value| /@\S/ =~ value.to_s }
|
120
|
+
end
|
121
|
+
# here perform other checks for other compulsory headers we might have.
|
122
|
+
return false
|
123
|
+
end
|
124
|
+
|
125
|
+
def clean_email(email)
|
126
|
+
if email
|
127
|
+
email.to_s.gsub(/\A[^A-Za-z0-9]/, '').reverse.gsub(/\A[^A-Za-z0-9]/, '').reverse
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
def line_item_is_garbage?(item_value)
|
132
|
+
item_value.blank?
|
133
|
+
end
|
134
|
+
|
135
|
+
def format_error(line, line_empty, no_compulsory_headers, compulsory_headers, missing_header)
|
136
|
+
message = line_empty ? "it did not have any content" : " it did not contain this/these required headers: #{missing_header}"
|
137
|
+
{:level => :error, :message => "The following line was not imported because #{message}.", :data => {:line => line, :line_empty => line_empty, :headers => no_compulsory_headers}}
|
138
|
+
end
|
139
|
+
|
140
|
+
def remove_empty_columns(chunks, headers)
|
141
|
+
chunks.each do |chunk|
|
142
|
+
headers.each do |header|
|
143
|
+
chunk[:lines][0][header] = "empty_column"
|
144
|
+
end
|
145
|
+
end
|
146
|
+
chunks
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe TableImporter::Source do
|
5
|
+
|
6
|
+
context 'when source is a string it' do
|
7
|
+
|
8
|
+
before(:each) do
|
9
|
+
@source = TableImporter::Source.new({:content => "nick@pr.co, dennis@pr.co, lorenzo@pr.co", :headers_present => false, :headers => nil, :user_headers => nil, :type => "copy_and_paste", :col_sep => "", :rec_sep => "", :compulsory_headers => {:email => true}})
|
10
|
+
end
|
11
|
+
|
12
|
+
it "creates a source object" do
|
13
|
+
TableImporter::Source.new({:content => "nick@pr.co, dennis@pr.co, lorenzo@pr.co", :headers_present => false, :headers => nil, :user_headers => nil, :type => "copy_and_paste", :col_sep => "", :rec_sep => "", :compulsory_headers => {:email => true}})
|
14
|
+
end
|
15
|
+
|
16
|
+
it "gets the correct copy and paste chunks" do
|
17
|
+
source = TableImporter::Source.new({:content => "nick@pr.co, dennis@pr.co, lorenzo@pr.co", :headers_present => false, :headers => {"first_name"=>"", "last_name"=>"", "salutation"=>"", "tag_list"=>"", "email"=>"0", "organization"=>"", "url"=>"", "phone"=>"", "job_title"=>"", "second_url"=>"", "notes"=>"", "twitter_username"=>"", "skype_username"=>"", "pinterest_username"=>"", "instagram_username"=>"", "facebook_username"=>"", "last_name_prefix"=>"", "second_email"=>"", "phone_mobile"=>"", "street"=>"", "street_number"=>"", "zipcode"=>"", "city"=>"", "country"=>""}, :user_headers => nil, :type => "copy_and_paste", :col_sep => :space, :rec_sep => :comma, :compulsory_headers => {:email => true}})
|
18
|
+
source.get_chunks.first[:lines].first[:email].should eql("nick@pr.co")
|
19
|
+
end
|
20
|
+
|
21
|
+
it "has the correct number of lines" do
|
22
|
+
source = TableImporter::Source.new({:content => "nick@pr.co, dennis@pr.co, lorenzo@pr.co", :headers_present => false, :headers => {"first_name"=>"", "last_name"=>"", "salutation"=>"", "tag_list"=>"", "email"=>"0", "organization"=>"", "url"=>"", "phone"=>"", "job_title"=>"", "second_url"=>"", "notes"=>"", "twitter_username"=>"", "skype_username"=>"", "pinterest_username"=>"", "instagram_username"=>"", "facebook_username"=>"", "last_name_prefix"=>"", "second_email"=>"", "phone_mobile"=>"", "street"=>"", "street_number"=>"", "zipcode"=>"", "city"=>"", "country"=>""}, :user_headers => nil, :type => "copy_and_paste", :col_sep => :space, :rec_sep => :comma, :compulsory_headers => {:email => true}})
|
23
|
+
source.get_chunks(1).count.should eql(3)
|
24
|
+
end
|
25
|
+
|
26
|
+
it "has the correct number of chunks" do
|
27
|
+
source = TableImporter::Source.new({:content => "nick@pr.co, dennis@pr.co, lorenzo@pr.co", :headers_present => false, :headers => {"first_name"=>"", "last_name"=>"", "salutation"=>"", "tag_list"=>"", "email"=>"0", "organization"=>"", "url"=>"", "phone"=>"", "job_title"=>"", "second_url"=>"", "notes"=>"", "twitter_username"=>"", "skype_username"=>"", "pinterest_username"=>"", "instagram_username"=>"", "facebook_username"=>"", "last_name_prefix"=>"", "second_email"=>"", "phone_mobile"=>"", "street"=>"", "street_number"=>"", "zipcode"=>"", "city"=>"", "country"=>""}, :user_headers => nil, :type => "copy_and_paste", :col_sep => :space, :rec_sep => :comma, :compulsory_headers => {:email => true}})
|
28
|
+
source.get_chunks(2).count.should eql(2)
|
29
|
+
end
|
30
|
+
|
31
|
+
it "does not have extra spaces in the final chunk" do
|
32
|
+
source = TableImporter::Source.new({:content => "nick@pr.co, dennis@pr.co, lorenzo@pr.co", :headers_present => false, :headers => {"first_name"=>"", "last_name"=>"", "salutation"=>"", "tag_list"=>"", "email"=>"0", "organization"=>"", "url"=>"", "phone"=>"", "job_title"=>"", "second_url"=>"", "notes"=>"", "twitter_username"=>"", "skype_username"=>"", "pinterest_username"=>"", "instagram_username"=>"", "facebook_username"=>"", "last_name_prefix"=>"", "second_email"=>"", "phone_mobile"=>"", "street"=>"", "street_number"=>"", "zipcode"=>"", "city"=>"", "country"=>""}, :user_headers => nil, :type => "copy_and_paste", :col_sep => :space, :rec_sep => :comma, :compulsory_headers => {:email => true}})
|
33
|
+
last_chunk = source.get_chunks(2).last
|
34
|
+
(last_chunk[:lines].count + last_chunk[:errors].count).should eql(1)
|
35
|
+
end
|
36
|
+
|
37
|
+
it "gets the correct preview lines" do
|
38
|
+
@source.get_preview_lines.count.should eql(3)
|
39
|
+
end
|
40
|
+
|
41
|
+
it "can get the correct record separator" do
|
42
|
+
@source.get_record_separator.should eql(:old_newline_mac)
|
43
|
+
end
|
44
|
+
|
45
|
+
it "can get the correct column separator" do
|
46
|
+
@source.get_column_separator.should eql(:old_newline_mac)
|
47
|
+
end
|
48
|
+
|
49
|
+
it "has the correct type" do
|
50
|
+
@source.get_type.should eql("copy_and_paste")
|
51
|
+
end
|
52
|
+
|
53
|
+
after(:each) do
|
54
|
+
@source = nil
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
context 'when source is a different string' do
|
59
|
+
|
60
|
+
before(:each) do
|
61
|
+
@source = TableImporter::Source.new({:content => "Nick Dowse <nick@pr.co>, Dennis van der Vliet <dennis@pr.co>, Jeroen Bos <jeroen@pr.co>", :headers_present => false, :headers => {"first_name"=>"0", "last_name"=>"", "salutation"=>"", "tag_list"=>"", "email"=>"1", "organization"=>"", "url"=>"", "phone"=>"", "job_title"=>"", "second_url"=>"", "notes"=>"", "twitter_username"=>"", "skype_username"=>"", "pinterest_username"=>"", "instagram_username"=>"", "facebook_username"=>"", "last_name_prefix"=>"", "second_email"=>"", "phone_mobile"=>"", "street"=>"", "street_number"=>"", "zipcode"=>"", "city"=>"", "country"=>""}, :user_headers => nil, :type => "copy_and_paste", :col_sep => "", :rec_sep => "", :compulsory_headers => {:email => true}})
|
62
|
+
end
|
63
|
+
|
64
|
+
it "gets the correct chunks" do
|
65
|
+
@source.get_chunks.first[:lines].first[:email].should eql("nick@pr.co")
|
66
|
+
end
|
67
|
+
|
68
|
+
it "has the correct column_separator" do
|
69
|
+
@source.get_column_separator.should eql(nil)
|
70
|
+
end
|
71
|
+
|
72
|
+
it "has the correct record_separator" do
|
73
|
+
@source.get_record_separator.should eql(nil)
|
74
|
+
end
|
75
|
+
|
76
|
+
it "has the correct number of lines" do
|
77
|
+
@source.get_chunks(1).count.should eql(3)
|
78
|
+
end
|
79
|
+
|
80
|
+
it "has the correct number of chunks" do
|
81
|
+
@source.get_chunks(2).count.should eql(2)
|
82
|
+
end
|
83
|
+
|
84
|
+
it "does not have extra spaces in the final chunk" do
|
85
|
+
last_chunk = @source.get_chunks(2).last
|
86
|
+
(last_chunk[:lines].count + last_chunk[:errors].count).should eql(1)
|
87
|
+
end
|
88
|
+
|
89
|
+
after(:each) do
|
90
|
+
@source = nil
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
context 'when source is a bad string' do
|
95
|
+
|
96
|
+
before(:each) do
|
97
|
+
@source = TableImporter::Source.new({
|
98
|
+
:content => "Dennis,denni@pr.co,Amsterdam
|
99
|
+
Nick@test.com,”
|
100
|
+
“, Amsterdam
|
101
|
+
jeroen@, \"jeroe
|
102
|
+
adine, \"
|
103
|
+
|
104
|
+
lorenzo,\"lorenzo@pr.co\"
|
105
|
+
HÐ, “nick¯â@test”, ¾,€",
|
106
|
+
:headers_present => false, :headers => nil, :user_headers => nil, :type => "copy_and_paste", :col_sep => :comma, :rec_sep => :newline_mac, :compulsory_headers => {:email => true}})
|
107
|
+
end
|
108
|
+
|
109
|
+
it "has the correct number of lines" do
|
110
|
+
@source.get_lines.count.should eql(8)
|
111
|
+
end
|
112
|
+
|
113
|
+
it "has the correct number of chunks" do
|
114
|
+
@source.get_chunks(4).count.should eql(2)
|
115
|
+
end
|
116
|
+
|
117
|
+
it "does not have extra spaces in the final chunk" do
|
118
|
+
last_chunk = @source.get_chunks(3).last
|
119
|
+
(last_chunk[:lines].count + last_chunk[:errors].count).should eql(2)
|
120
|
+
end
|
121
|
+
|
122
|
+
after(:each) do
|
123
|
+
@source = nil
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
context 'when string is empty' do
|
128
|
+
it 'raises an error when creating a source object' do
|
129
|
+
expect{
|
130
|
+
TableImporter::Source.new({:content => "", :headers_present => false, :headers => nil, :user_headers => nil, :type => "copy_and_paste", :col_sep => :comma, :rec_sep => :newline_mac, :compulsory_headers => {:email => true}})
|
131
|
+
}.to raise_error(Exceptions::EmptyFileImportError)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
data/spec/csv_spec.rb
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'spec_helper'
|
3
|
+
require 'smarter_csv'
|
4
|
+
|
5
|
+
describe TableImporter::Source do
|
6
|
+
|
7
|
+
context 'when source is a csv file with headers' do
|
8
|
+
before(:each) do
|
9
|
+
@source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/with_headers.csv"].join), :headers_present => true, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
|
10
|
+
end
|
11
|
+
|
12
|
+
it "creates a source object" do
|
13
|
+
TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/with_headers.csv"].join), :headers_present => true, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
|
14
|
+
end
|
15
|
+
|
16
|
+
it "has the correct headers" do
|
17
|
+
@source.get_headers.should eql(["country", "medium", "salutation", "first_name|", "last_name", "email", "phone_number", "tags|"])
|
18
|
+
end
|
19
|
+
|
20
|
+
it "has the correct number of chunks" do
|
21
|
+
@source.get_chunks(4).count.should eql(3)
|
22
|
+
end
|
23
|
+
|
24
|
+
it "does not have extra spaces in the final chunk" do
|
25
|
+
last_chunk = @source.get_chunks(4).last
|
26
|
+
(last_chunk[:lines].count + last_chunk[:errors].count).should eql(1)
|
27
|
+
end
|
28
|
+
|
29
|
+
it "can get the correct record separator" do
|
30
|
+
@source.get_record_separator.should eql(:newline_mac)
|
31
|
+
end
|
32
|
+
|
33
|
+
it "can get the correct column separator" do
|
34
|
+
@source.get_column_separator.should eql(:semicolon)
|
35
|
+
end
|
36
|
+
|
37
|
+
it "has the correct type" do
|
38
|
+
@source.get_type.should eql("csv")
|
39
|
+
end
|
40
|
+
|
41
|
+
after(:each) do
|
42
|
+
@source = nil
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
context 'when source is a csv file without headers it' do
|
47
|
+
before(:each) do
|
48
|
+
@source_headers = "false"
|
49
|
+
@source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/without_headers.csv"].join), :headers_present => false, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
|
50
|
+
end
|
51
|
+
|
52
|
+
it "creates a source object" do
|
53
|
+
TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/without_headers.csv"].join), :headers_present => false, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
|
54
|
+
end
|
55
|
+
|
56
|
+
it "has the correct number of chunks" do
|
57
|
+
source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/without_headers.csv"].join), :headers_present => false, :headers => {"first_name"=>"", "last_name"=>"", "salutation"=>"", "tag_list"=>"", "email"=>"5", "organization"=>"", "url"=>"", "phone"=>"", "job_title"=>"", "second_url"=>"", "notes"=>"", "twitter_username"=>"", "skype_username"=>"", "pinterest_username"=>"", "instagram_username"=>"", "facebook_username"=>"", "last_name_prefix"=>"", "second_email"=>"", "phone_mobile"=>"", "street"=>"", "street_number"=>"", "zipcode"=>"", "city"=>"", "country"=>""}, :user_headers => nil, :type => "csv", :column_separator => :semicolon, :record_separator => :newline_mac, :compulsory_headers => {:email => true}})
|
58
|
+
source.get_chunks(4).count.should eql(3)
|
59
|
+
end
|
60
|
+
|
61
|
+
it "does not have extra spaces in the final chunk" do
|
62
|
+
source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/without_headers.csv"].join), :headers_present => false, :headers => {"first_name"=>"", "last_name"=>"", "salutation"=>"", "tag_list"=>"", "email"=>"5", "organization"=>"", "url"=>"", "phone"=>"", "job_title"=>"", "second_url"=>"", "notes"=>"", "twitter_username"=>"", "skype_username"=>"", "pinterest_username"=>"", "instagram_username"=>"", "facebook_username"=>"", "last_name_prefix"=>"", "second_email"=>"", "phone_mobile"=>"", "street"=>"", "street_number"=>"", "zipcode"=>"", "city"=>"", "country"=>""}, :user_headers => nil, :type => "csv", :column_separator => :semicolon, :record_separator => :newline_mac, :compulsory_headers => {:email => true}})
|
63
|
+
source.get_chunks(4).last[:lines].count.should eql(1)
|
64
|
+
end
|
65
|
+
|
66
|
+
after(:each) do
|
67
|
+
@source = nil
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
context 'when source is a large csv file with headers' do
|
72
|
+
|
73
|
+
before { skip }
|
74
|
+
|
75
|
+
before(:all) do
|
76
|
+
@source_headers = 'true'
|
77
|
+
@source = TableImporter::Source.new({:content => File.open([Dir.pwd, '/spec/files/csv/with_headers_large.csv'].join), :headers_present => true, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
|
78
|
+
end
|
79
|
+
|
80
|
+
it "creates a source object" do
|
81
|
+
TableImporter::Source.new({:content => File.open([Dir.pwd, '/spec/files/csv/with_headers_large.csv'].join), :headers_present => true, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
|
82
|
+
end
|
83
|
+
|
84
|
+
it "has the correct number of chunks" do
|
85
|
+
@source.get_chunks(4).count.should eql(2690)
|
86
|
+
end
|
87
|
+
|
88
|
+
it "does not have extra spaces in the final chunk" do
|
89
|
+
last = @source.get_chunks(4).last
|
90
|
+
(last[:errors].count + last[:lines].count).should eql(3)
|
91
|
+
end
|
92
|
+
|
93
|
+
after(:all) do
|
94
|
+
@source = nil
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
context 'when source is an edge-case csv file without headers' do
|
99
|
+
before(:each) do
|
100
|
+
@source_headers = "false"
|
101
|
+
@source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/edge_cases.csv"].join), :headers_present => false, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
|
102
|
+
end
|
103
|
+
|
104
|
+
it "creates a source object" do
|
105
|
+
TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/edge_cases.csv"].join), :headers_present => false, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
|
106
|
+
end
|
107
|
+
|
108
|
+
it "has the correct number of chunks" do
|
109
|
+
source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/edge_cases.csv"].join), :headers_present => false, :headers => {"first_name"=>"", "last_name"=>"", "salutation"=>"", "tag_list"=>"", "email"=>"1", "organization"=>"", "url"=>"", "phone"=>"", "job_title"=>"", "second_url"=>"", "notes"=>"", "twitter_username"=>"", "skype_username"=>"", "pinterest_username"=>"", "instagram_username"=>"", "facebook_username"=>"", "last_name_prefix"=>"", "second_email"=>"", "phone_mobile"=>"", "street"=>"", "street_number"=>"", "zipcode"=>"", "city"=>"", "country"=>""}, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
|
110
|
+
source.get_chunks(4).count.should eql(3)
|
111
|
+
end
|
112
|
+
|
113
|
+
after(:each) do
|
114
|
+
@source = nil
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
context 'when source is a badly encoded file' do
|
119
|
+
it 'can still get the correct chunks' do
|
120
|
+
source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/mexico2013_pressdoc.csv"].join), :headers_present => true, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
|
121
|
+
source.get_chunks.first[:lines].count.should eql(49)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
context 'when source is an empty csv file' do
|
126
|
+
|
127
|
+
it 'raises an error when creating a source object' do
|
128
|
+
begin
|
129
|
+
TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/no_content.csv"].join), :headers_present => true, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
|
130
|
+
rescue Exceptions::EmptyFileImportError => e
|
131
|
+
e.message
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|