table_importer 0.0.7 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/table_importer/copy_and_paste.rb +1 -1
- data/lib/table_importer/csv.rb +16 -6
- data/lib/table_importer/excel.rb +4 -3
- data/lib/table_importer/source.rb +0 -1
- data/lib/table_importer/version.rb +1 -1
- data/lib/table_importer.rb +2 -1
- data/spec/files/csv/empty_lines_at_start.csv +26 -0
- data/spec/files/csv/with_headers.csv +7 -7
- data/spec/files/excel/empty_lines.xlsx +0 -0
- data/spec/files/excel/empty_lines_at_start.xlsx +0 -0
- data/spec/sources/csv_spec.rb +16 -5
- data/spec/sources/excel_spec.rb +30 -0
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 43d3bdb6bc28dc89cfd18f3cf85fa28e126817e6
|
4
|
+
data.tar.gz: 2f0b9aa934e7955a4bc2c0277e4603c191569588
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ab65d630b278af498ba4d2bc2950fe792d1ab41d4c69ee19cf939b48114410032bae34cfbffec98aa101c2900cdfe03d85b685bb824a44dc83a9160d1173c33
|
7
|
+
data.tar.gz: 0923656a555afc4ba6dd22e07422d2e53f3f93f5635b55af6ae385a69524e04cc2f034fdf804d547a2fcbe22857ca2f81ebc65b81b945f01a03684a1589aafde
|
@@ -59,7 +59,7 @@ module TableImporter
|
|
59
59
|
|
60
60
|
def get_preview_lines(start_point = @headers_present ? 1 : 0, end_point = 10)
|
61
61
|
begin
|
62
|
-
lines = clean_chunks([get_lines(start_point, end_point)],
|
62
|
+
lines = clean_chunks([get_lines(start_point, end_point)], {}, @delete_empty_columns)[0][:lines]
|
63
63
|
if lines.first.nil?
|
64
64
|
get_preview_lines(start_point+10, end_point+10)
|
65
65
|
else
|
data/lib/table_importer/csv.rb
CHANGED
@@ -33,7 +33,7 @@ module TableImporter
|
|
33
33
|
|
34
34
|
def get_first_line
|
35
35
|
begin
|
36
|
-
SmarterCSV.process(@file.path, default_options({:col_sep => @column_separator.present? ? @column_separator : "\n", :row_sep => @record_separator != nil ? @record_separator : "\n", :chunk_size =>
|
36
|
+
SmarterCSV.process(@file.path, default_options({:col_sep => @column_separator.present? ? @column_separator : "\n", :row_sep => @record_separator != nil ? @record_separator : "\n", :chunk_size => 2})) do |chunk|
|
37
37
|
if @headers_present
|
38
38
|
return chunk.first.keys[0].to_s
|
39
39
|
else
|
@@ -80,10 +80,13 @@ module TableImporter
|
|
80
80
|
@record_separator = sort_separators(separators)
|
81
81
|
end
|
82
82
|
|
83
|
-
def get_preview_lines
|
83
|
+
def get_preview_lines(start = 0, finish = 7, chunk_size = 8)
|
84
84
|
begin
|
85
|
-
SmarterCSV.process(@file.path, default_options({:row_sep => @record_separator != nil ? @record_separator : "\n", :chunk_size =>
|
86
|
-
|
85
|
+
SmarterCSV.process(@file.path, default_options({:row_sep => @record_separator != nil ? @record_separator : "\n", :chunk_size => chunk_size})) do |chunk|
|
86
|
+
cleaned_chunk = clean_chunks([chunk], @compulsory_headers, @delete_empty_columns)[0].symbolize_keys[:lines]
|
87
|
+
return cleaned_chunk[start..finish] if cleaned_chunk.first.present?
|
88
|
+
@headers_present = false
|
89
|
+
get_preview_lines(start+8, finish+8, chunk_size+8)
|
87
90
|
end
|
88
91
|
rescue SmarterCSV::HeaderSizeMismatch
|
89
92
|
raise TableImporter::HeaderMismatchError.new
|
@@ -143,11 +146,18 @@ module TableImporter
|
|
143
146
|
def clean_file(file)
|
144
147
|
contents = file.read
|
145
148
|
import = Tempfile.new(["import", ".xls"], :encoding => "UTF-8")
|
146
|
-
|
149
|
+
utf8_content = contents.force_encoding('UTF-8').encode('UTF-16', :invalid => :replace, :replace => '?').encode('UTF-8').gsub!(/\r\n|\r/, "\n").squeeze("\n")
|
150
|
+
clean_contents = utf8_content[0] == "\n" ? utf8_content[1..-1] : utf8_content
|
151
|
+
import.write(clean_contents)
|
147
152
|
import.close
|
153
|
+
reset_separators
|
154
|
+
return import
|
155
|
+
end
|
156
|
+
|
157
|
+
def reset_separators
|
148
158
|
SEPARATORS.except!(:newline_windows, :old_newline_mac)
|
149
159
|
@record_separator = "\n"
|
150
|
-
|
160
|
+
@column_separator = ""
|
151
161
|
end
|
152
162
|
end
|
153
163
|
end
|
data/lib/table_importer/excel.rb
CHANGED
@@ -15,7 +15,7 @@ module TableImporter
|
|
15
15
|
if !data[:headers].nil?
|
16
16
|
@headers = data[:headers]
|
17
17
|
else
|
18
|
-
@headers = @headers_present ? @file.row(1).map { |header| header.
|
18
|
+
@headers = @headers_present ? @file.row(1).map.with_index { |header, index| header.present? ? header.to_sym : "column_#{index}"} : default_headers
|
19
19
|
end
|
20
20
|
rescue NoMethodError
|
21
21
|
raise TableImporter::HeaderMismatchError.new
|
@@ -44,11 +44,12 @@ module TableImporter
|
|
44
44
|
|
45
45
|
def get_preview_lines(start_point = 0, end_point = 10)
|
46
46
|
begin
|
47
|
-
|
47
|
+
lines = clean_chunks([get_lines(start_point, end_point)], @compulsory_headers)[0][:lines]
|
48
|
+
if lines.first.nil?
|
48
49
|
get_preview_lines(start_point+10, end_point+10)
|
49
50
|
else
|
50
51
|
@headers = @mapping.present? ? convert_headers : @headers
|
51
|
-
|
52
|
+
lines[0..8]
|
52
53
|
end
|
53
54
|
rescue SystemStackError
|
54
55
|
raise TableImporter::EmptyFileImportError.new
|
data/lib/table_importer.rb
CHANGED
@@ -0,0 +1,26 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
|
4
|
+
|
5
|
+
|
6
|
+
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
Country;Medium;Salutation;First name;Last name;Email;Phone number;Tags
|
18
|
+
Hong Kong;Men's Uno (Hong Kong Edition);Ms;Noelle;Chan;info@chicgroups.com;-28544930;Ressort_Business
|
19
|
+
Hong Kong;Hong Kong Commercial Daily;Mr;Ching Wai
|
20
|
+
Hong Kong;South China Morning Post;Mr;John;Cremer;john.cremer@scmp.com;-25651370;Ressort_Business
|
21
|
+
Hong Kong;Hong Kong Economic Journal;Mr;Toby;Yiu;tcsyiu@hkej.com;-28566702;Ressort_Business
|
22
|
+
Hong Kong;Hong KongTrader;Ms;Regina;Deluna;regina.deluna@hktdc.org;-25843524;Ressort_Business
|
23
|
+
Hong Kong;GS1 Hong Kong;Ms;Stella;Cheang;info@gs1hk.org;-28611967;Ressort_Business
|
24
|
+
Hong Kong;Playtimes;Ms;Jo;Allun;jo.allum@ppp.com.hk;-22018864;gfgsdfgs;Ressort_Business
|
25
|
+
Hong Kong;China Tourism;Ms;Mickey;Ching;edit-c@hkctp.com.hk;-25617149;Ressort_Business
|
26
|
+
Hong Kong;Metro Daily (Hong Kong);Mr;Jeff;Lee;news@metrohk.com.hk;-31960748;Ressort_Business
|
@@ -1,10 +1,10 @@
|
|
1
|
-
Country;Medium;Salutation;First name
|
2
|
-
Hong Kong;Men's Uno (Hong Kong Edition);Ms;Noelle;Chan
|
3
|
-
Hong Kong;Hong Kong Commercial Daily;Mr;Ching Wai
|
4
|
-
Hong Kong;South China Morning Post;Mr;John;Cremer
|
5
|
-
Hong Kong;Hong Kong Economic
|
6
|
-
Hong Kong;Hong
|
1
|
+
Country;Medium;Salutation;First name;Last name;Email;Phone number;Tags
|
2
|
+
Hong Kong;Men's Uno (Hong Kong Edition);Ms;Noelle;Chan;info@chicgroups.com;-28544930;Ressort_Business
|
3
|
+
Hong Kong;Hong Kong Commercial Daily;Mr;Ching Wai
|
4
|
+
Hong Kong;South China Morning Post;Mr;John;Cremer;john.cremer@scmp.com;-25651370;Ressort_Business
|
5
|
+
Hong Kong;Hong Kong Economic Journal;Mr;Toby;Yiu;tcsyiu@hkej.com;-28566702;Ressort_Business
|
6
|
+
Hong Kong;Hong KongTrader;Ms;Regina;Deluna;regina.deluna@hktdc.org;-25843524;Ressort_Business
|
7
7
|
Hong Kong;GS1 Hong Kong;Ms;Stella;Cheang;info@gs1hk.org;-28611967;Ressort_Business
|
8
|
-
Hong Kong;Playtimes;Ms;Jo;Allun;jo.allum@ppp.com.hk;-22018864;gfgsdfgs
|
8
|
+
Hong Kong;Playtimes;Ms;Jo;Allun;jo.allum@ppp.com.hk;-22018864;gfgsdfgs;Ressort_Business
|
9
9
|
Hong Kong;China Tourism;Ms;Mickey;Ching;edit-c@hkctp.com.hk;-25617149;Ressort_Business
|
10
10
|
Hong Kong;Metro Daily (Hong Kong);Mr;Jeff;Lee;news@metrohk.com.hk;-31960748;Ressort_Business
|
Binary file
|
Binary file
|
data/spec/sources/csv_spec.rb
CHANGED
@@ -9,12 +9,8 @@ describe TableImporter::Source do
|
|
9
9
|
@source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/with_headers.csv"].join), :headers_present => true, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
|
10
10
|
end
|
11
11
|
|
12
|
-
it "creates a source object" do
|
13
|
-
TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/with_headers.csv"].join), :headers_present => true, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
|
14
|
-
end
|
15
|
-
|
16
12
|
it "has the correct headers" do
|
17
|
-
@source.get_headers.should eql(["country", "medium", "salutation", "first_name
|
13
|
+
@source.get_headers.should eql(["country", "medium", "salutation", "first_name", "last_name", "email", "phone_number", "tags"])
|
18
14
|
end
|
19
15
|
|
20
16
|
it "has the correct number of chunks" do
|
@@ -105,4 +101,19 @@ describe TableImporter::Source do
|
|
105
101
|
end
|
106
102
|
end
|
107
103
|
end
|
104
|
+
|
105
|
+
context 'when source has empty lines at start' do
|
106
|
+
|
107
|
+
before(:each) do
|
108
|
+
@source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/empty_lines_at_start.csv"].join), :headers_present => true, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
|
109
|
+
end
|
110
|
+
|
111
|
+
it "Gets the preview lines without error" do
|
112
|
+
@source.get_preview_lines.count.should eql(7)
|
113
|
+
end
|
114
|
+
|
115
|
+
after(:each) do
|
116
|
+
@source = nil
|
117
|
+
end
|
118
|
+
end
|
108
119
|
end
|
data/spec/sources/excel_spec.rb
CHANGED
@@ -112,6 +112,36 @@ describe TableImporter::Source do
|
|
112
112
|
end
|
113
113
|
end
|
114
114
|
|
115
|
+
context 'when source has empty lines' do
|
116
|
+
|
117
|
+
before(:each) do
|
118
|
+
@source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/excel/empty_lines.xlsx"].join), :headers_present => false, :headers => nil, :user_headers => nil, :type => "xls", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
|
119
|
+
end
|
120
|
+
|
121
|
+
it "does not throw an error" do
|
122
|
+
expect {@source.get_preview_lines}.to_not raise_error
|
123
|
+
end
|
124
|
+
|
125
|
+
after(:each) do
|
126
|
+
@source = nil
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
context 'when source has 20 empty lines at the beginning' do
|
131
|
+
|
132
|
+
before(:each) do
|
133
|
+
@source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/excel/empty_lines_at_start.xlsx"].join), :headers_present => true, :headers => nil, :user_headers => nil, :type => "xls", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
|
134
|
+
end
|
135
|
+
|
136
|
+
it "does not throw an error" do
|
137
|
+
@source.get_preview_lines.count.should eql(6)
|
138
|
+
end
|
139
|
+
|
140
|
+
after(:each) do
|
141
|
+
@source = nil
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
115
145
|
context 'when source is an empty xls file' do
|
116
146
|
|
117
147
|
it 'raises an error when creating a source object' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: table_importer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nick Dowse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: spreadsheet
|
@@ -161,11 +161,14 @@ files:
|
|
161
161
|
- lib/table_importer/source.rb
|
162
162
|
- lib/table_importer/version.rb
|
163
163
|
- spec/files/csv/edge_cases.csv
|
164
|
+
- spec/files/csv/empty_lines_at_start.csv
|
164
165
|
- spec/files/csv/mexico2013_pressdoc.csv
|
165
166
|
- spec/files/csv/no_content.csv
|
166
167
|
- spec/files/csv/with_headers.csv
|
167
168
|
- spec/files/csv/without_headers.csv
|
168
169
|
- spec/files/excel/edge_cases.xls
|
170
|
+
- spec/files/excel/empty_lines.xlsx
|
171
|
+
- spec/files/excel/empty_lines_at_start.xlsx
|
169
172
|
- spec/files/excel/no_content.xlsx
|
170
173
|
- spec/files/excel/with_headers.xls
|
171
174
|
- spec/files/excel/without_headers.xls
|
@@ -202,11 +205,14 @@ summary: Given a file (or a string) containing a container, along with options,
|
|
202
205
|
will return a hash of those values. Great for importing poorly formatted CSV files.
|
203
206
|
test_files:
|
204
207
|
- spec/files/csv/edge_cases.csv
|
208
|
+
- spec/files/csv/empty_lines_at_start.csv
|
205
209
|
- spec/files/csv/mexico2013_pressdoc.csv
|
206
210
|
- spec/files/csv/no_content.csv
|
207
211
|
- spec/files/csv/with_headers.csv
|
208
212
|
- spec/files/csv/without_headers.csv
|
209
213
|
- spec/files/excel/edge_cases.xls
|
214
|
+
- spec/files/excel/empty_lines.xlsx
|
215
|
+
- spec/files/excel/empty_lines_at_start.xlsx
|
210
216
|
- spec/files/excel/no_content.xlsx
|
211
217
|
- spec/files/excel/with_headers.xls
|
212
218
|
- spec/files/excel/without_headers.xls
|