table_importer 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/table_importer/copy_and_paste.rb +1 -1
- data/lib/table_importer/csv.rb +16 -6
- data/lib/table_importer/excel.rb +4 -3
- data/lib/table_importer/source.rb +0 -1
- data/lib/table_importer/version.rb +1 -1
- data/lib/table_importer.rb +2 -1
- data/spec/files/csv/empty_lines_at_start.csv +26 -0
- data/spec/files/csv/with_headers.csv +7 -7
- data/spec/files/excel/empty_lines.xlsx +0 -0
- data/spec/files/excel/empty_lines_at_start.xlsx +0 -0
- data/spec/sources/csv_spec.rb +16 -5
- data/spec/sources/excel_spec.rb +30 -0
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 43d3bdb6bc28dc89cfd18f3cf85fa28e126817e6
|
4
|
+
data.tar.gz: 2f0b9aa934e7955a4bc2c0277e4603c191569588
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ab65d630b278af498ba4d2bc2950fe792d1ab41d4c69ee19cf939b48114410032bae34cfbffec98aa101c2900cdfe03d85b685bb824a44dc83a9160d1173c33
|
7
|
+
data.tar.gz: 0923656a555afc4ba6dd22e07422d2e53f3f93f5635b55af6ae385a69524e04cc2f034fdf804d547a2fcbe22857ca2f81ebc65b81b945f01a03684a1589aafde
|
@@ -59,7 +59,7 @@ module TableImporter
|
|
59
59
|
|
60
60
|
def get_preview_lines(start_point = @headers_present ? 1 : 0, end_point = 10)
|
61
61
|
begin
|
62
|
-
lines = clean_chunks([get_lines(start_point, end_point)],
|
62
|
+
lines = clean_chunks([get_lines(start_point, end_point)], {}, @delete_empty_columns)[0][:lines]
|
63
63
|
if lines.first.nil?
|
64
64
|
get_preview_lines(start_point+10, end_point+10)
|
65
65
|
else
|
data/lib/table_importer/csv.rb
CHANGED
@@ -33,7 +33,7 @@ module TableImporter
|
|
33
33
|
|
34
34
|
def get_first_line
|
35
35
|
begin
|
36
|
-
SmarterCSV.process(@file.path, default_options({:col_sep => @column_separator.present? ? @column_separator : "\n", :row_sep => @record_separator != nil ? @record_separator : "\n", :chunk_size =>
|
36
|
+
SmarterCSV.process(@file.path, default_options({:col_sep => @column_separator.present? ? @column_separator : "\n", :row_sep => @record_separator != nil ? @record_separator : "\n", :chunk_size => 2})) do |chunk|
|
37
37
|
if @headers_present
|
38
38
|
return chunk.first.keys[0].to_s
|
39
39
|
else
|
@@ -80,10 +80,13 @@ module TableImporter
|
|
80
80
|
@record_separator = sort_separators(separators)
|
81
81
|
end
|
82
82
|
|
83
|
-
def get_preview_lines
|
83
|
+
def get_preview_lines(start = 0, finish = 7, chunk_size = 8)
|
84
84
|
begin
|
85
|
-
SmarterCSV.process(@file.path, default_options({:row_sep => @record_separator != nil ? @record_separator : "\n", :chunk_size =>
|
86
|
-
|
85
|
+
SmarterCSV.process(@file.path, default_options({:row_sep => @record_separator != nil ? @record_separator : "\n", :chunk_size => chunk_size})) do |chunk|
|
86
|
+
cleaned_chunk = clean_chunks([chunk], @compulsory_headers, @delete_empty_columns)[0].symbolize_keys[:lines]
|
87
|
+
return cleaned_chunk[start..finish] if cleaned_chunk.first.present?
|
88
|
+
@headers_present = false
|
89
|
+
get_preview_lines(start+8, finish+8, chunk_size+8)
|
87
90
|
end
|
88
91
|
rescue SmarterCSV::HeaderSizeMismatch
|
89
92
|
raise TableImporter::HeaderMismatchError.new
|
@@ -143,11 +146,18 @@ module TableImporter
|
|
143
146
|
def clean_file(file)
|
144
147
|
contents = file.read
|
145
148
|
import = Tempfile.new(["import", ".xls"], :encoding => "UTF-8")
|
146
|
-
|
149
|
+
utf8_content = contents.force_encoding('UTF-8').encode('UTF-16', :invalid => :replace, :replace => '?').encode('UTF-8').gsub!(/\r\n|\r/, "\n").squeeze("\n")
|
150
|
+
clean_contents = utf8_content[0] == "\n" ? utf8_content[1..-1] : utf8_content
|
151
|
+
import.write(clean_contents)
|
147
152
|
import.close
|
153
|
+
reset_separators
|
154
|
+
return import
|
155
|
+
end
|
156
|
+
|
157
|
+
def reset_separators
|
148
158
|
SEPARATORS.except!(:newline_windows, :old_newline_mac)
|
149
159
|
@record_separator = "\n"
|
150
|
-
|
160
|
+
@column_separator = ""
|
151
161
|
end
|
152
162
|
end
|
153
163
|
end
|
data/lib/table_importer/excel.rb
CHANGED
@@ -15,7 +15,7 @@ module TableImporter
|
|
15
15
|
if !data[:headers].nil?
|
16
16
|
@headers = data[:headers]
|
17
17
|
else
|
18
|
-
@headers = @headers_present ? @file.row(1).map { |header| header.
|
18
|
+
@headers = @headers_present ? @file.row(1).map.with_index { |header, index| header.present? ? header.to_sym : "column_#{index}"} : default_headers
|
19
19
|
end
|
20
20
|
rescue NoMethodError
|
21
21
|
raise TableImporter::HeaderMismatchError.new
|
@@ -44,11 +44,12 @@ module TableImporter
|
|
44
44
|
|
45
45
|
def get_preview_lines(start_point = 0, end_point = 10)
|
46
46
|
begin
|
47
|
-
|
47
|
+
lines = clean_chunks([get_lines(start_point, end_point)], @compulsory_headers)[0][:lines]
|
48
|
+
if lines.first.nil?
|
48
49
|
get_preview_lines(start_point+10, end_point+10)
|
49
50
|
else
|
50
51
|
@headers = @mapping.present? ? convert_headers : @headers
|
51
|
-
|
52
|
+
lines[0..8]
|
52
53
|
end
|
53
54
|
rescue SystemStackError
|
54
55
|
raise TableImporter::EmptyFileImportError.new
|
data/lib/table_importer.rb
CHANGED
@@ -0,0 +1,26 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
|
4
|
+
|
5
|
+
|
6
|
+
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
Country;Medium;Salutation;First name;Last name;Email;Phone number;Tags
|
18
|
+
Hong Kong;Men's Uno (Hong Kong Edition);Ms;Noelle;Chan;info@chicgroups.com;-28544930;Ressort_Business
|
19
|
+
Hong Kong;Hong Kong Commercial Daily;Mr;Ching Wai
|
20
|
+
Hong Kong;South China Morning Post;Mr;John;Cremer;john.cremer@scmp.com;-25651370;Ressort_Business
|
21
|
+
Hong Kong;Hong Kong Economic Journal;Mr;Toby;Yiu;tcsyiu@hkej.com;-28566702;Ressort_Business
|
22
|
+
Hong Kong;Hong KongTrader;Ms;Regina;Deluna;regina.deluna@hktdc.org;-25843524;Ressort_Business
|
23
|
+
Hong Kong;GS1 Hong Kong;Ms;Stella;Cheang;info@gs1hk.org;-28611967;Ressort_Business
|
24
|
+
Hong Kong;Playtimes;Ms;Jo;Allun;jo.allum@ppp.com.hk;-22018864;gfgsdfgs;Ressort_Business
|
25
|
+
Hong Kong;China Tourism;Ms;Mickey;Ching;edit-c@hkctp.com.hk;-25617149;Ressort_Business
|
26
|
+
Hong Kong;Metro Daily (Hong Kong);Mr;Jeff;Lee;news@metrohk.com.hk;-31960748;Ressort_Business
|
@@ -1,10 +1,10 @@
|
|
1
|
-
Country;Medium;Salutation;First name
|
2
|
-
Hong Kong;Men's Uno (Hong Kong Edition);Ms;Noelle;Chan
|
3
|
-
Hong Kong;Hong Kong Commercial Daily;Mr;Ching Wai
|
4
|
-
Hong Kong;South China Morning Post;Mr;John;Cremer
|
5
|
-
Hong Kong;Hong Kong Economic
|
6
|
-
Hong Kong;Hong
|
1
|
+
Country;Medium;Salutation;First name;Last name;Email;Phone number;Tags
|
2
|
+
Hong Kong;Men's Uno (Hong Kong Edition);Ms;Noelle;Chan;info@chicgroups.com;-28544930;Ressort_Business
|
3
|
+
Hong Kong;Hong Kong Commercial Daily;Mr;Ching Wai
|
4
|
+
Hong Kong;South China Morning Post;Mr;John;Cremer;john.cremer@scmp.com;-25651370;Ressort_Business
|
5
|
+
Hong Kong;Hong Kong Economic Journal;Mr;Toby;Yiu;tcsyiu@hkej.com;-28566702;Ressort_Business
|
6
|
+
Hong Kong;Hong KongTrader;Ms;Regina;Deluna;regina.deluna@hktdc.org;-25843524;Ressort_Business
|
7
7
|
Hong Kong;GS1 Hong Kong;Ms;Stella;Cheang;info@gs1hk.org;-28611967;Ressort_Business
|
8
|
-
Hong Kong;Playtimes;Ms;Jo;Allun;jo.allum@ppp.com.hk;-22018864;gfgsdfgs
|
8
|
+
Hong Kong;Playtimes;Ms;Jo;Allun;jo.allum@ppp.com.hk;-22018864;gfgsdfgs;Ressort_Business
|
9
9
|
Hong Kong;China Tourism;Ms;Mickey;Ching;edit-c@hkctp.com.hk;-25617149;Ressort_Business
|
10
10
|
Hong Kong;Metro Daily (Hong Kong);Mr;Jeff;Lee;news@metrohk.com.hk;-31960748;Ressort_Business
|
Binary file
|
Binary file
|
data/spec/sources/csv_spec.rb
CHANGED
@@ -9,12 +9,8 @@ describe TableImporter::Source do
|
|
9
9
|
@source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/with_headers.csv"].join), :headers_present => true, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
|
10
10
|
end
|
11
11
|
|
12
|
-
it "creates a source object" do
|
13
|
-
TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/with_headers.csv"].join), :headers_present => true, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
|
14
|
-
end
|
15
|
-
|
16
12
|
it "has the correct headers" do
|
17
|
-
@source.get_headers.should eql(["country", "medium", "salutation", "first_name
|
13
|
+
@source.get_headers.should eql(["country", "medium", "salutation", "first_name", "last_name", "email", "phone_number", "tags"])
|
18
14
|
end
|
19
15
|
|
20
16
|
it "has the correct number of chunks" do
|
@@ -105,4 +101,19 @@ describe TableImporter::Source do
|
|
105
101
|
end
|
106
102
|
end
|
107
103
|
end
|
104
|
+
|
105
|
+
context 'when source has empty lines at start' do
|
106
|
+
|
107
|
+
before(:each) do
|
108
|
+
@source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/empty_lines_at_start.csv"].join), :headers_present => true, :headers => nil, :user_headers => nil, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
|
109
|
+
end
|
110
|
+
|
111
|
+
it "Gets the preview lines without error" do
|
112
|
+
@source.get_preview_lines.count.should eql(7)
|
113
|
+
end
|
114
|
+
|
115
|
+
after(:each) do
|
116
|
+
@source = nil
|
117
|
+
end
|
118
|
+
end
|
108
119
|
end
|
data/spec/sources/excel_spec.rb
CHANGED
@@ -112,6 +112,36 @@ describe TableImporter::Source do
|
|
112
112
|
end
|
113
113
|
end
|
114
114
|
|
115
|
+
context 'when source has empty lines' do
|
116
|
+
|
117
|
+
before(:each) do
|
118
|
+
@source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/excel/empty_lines.xlsx"].join), :headers_present => false, :headers => nil, :user_headers => nil, :type => "xls", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
|
119
|
+
end
|
120
|
+
|
121
|
+
it "does not throw an error" do
|
122
|
+
expect {@source.get_preview_lines}.to_not raise_error
|
123
|
+
end
|
124
|
+
|
125
|
+
after(:each) do
|
126
|
+
@source = nil
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
context 'when source has 20 empty lines at the beginning' do
|
131
|
+
|
132
|
+
before(:each) do
|
133
|
+
@source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/excel/empty_lines_at_start.xlsx"].join), :headers_present => true, :headers => nil, :user_headers => nil, :type => "xls", :column_separator => "", :record_separator => "", :compulsory_headers => {:email => true}})
|
134
|
+
end
|
135
|
+
|
136
|
+
it "does not throw an error" do
|
137
|
+
@source.get_preview_lines.count.should eql(6)
|
138
|
+
end
|
139
|
+
|
140
|
+
after(:each) do
|
141
|
+
@source = nil
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
115
145
|
context 'when source is an empty xls file' do
|
116
146
|
|
117
147
|
it 'raises an error when creating a source object' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: table_importer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nick Dowse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: spreadsheet
|
@@ -161,11 +161,14 @@ files:
|
|
161
161
|
- lib/table_importer/source.rb
|
162
162
|
- lib/table_importer/version.rb
|
163
163
|
- spec/files/csv/edge_cases.csv
|
164
|
+
- spec/files/csv/empty_lines_at_start.csv
|
164
165
|
- spec/files/csv/mexico2013_pressdoc.csv
|
165
166
|
- spec/files/csv/no_content.csv
|
166
167
|
- spec/files/csv/with_headers.csv
|
167
168
|
- spec/files/csv/without_headers.csv
|
168
169
|
- spec/files/excel/edge_cases.xls
|
170
|
+
- spec/files/excel/empty_lines.xlsx
|
171
|
+
- spec/files/excel/empty_lines_at_start.xlsx
|
169
172
|
- spec/files/excel/no_content.xlsx
|
170
173
|
- spec/files/excel/with_headers.xls
|
171
174
|
- spec/files/excel/without_headers.xls
|
@@ -202,11 +205,14 @@ summary: Given a file (or a string) containing a container, along with options,
|
|
202
205
|
will return a hash of those values. Great for importing poorly formatted CSV files.
|
203
206
|
test_files:
|
204
207
|
- spec/files/csv/edge_cases.csv
|
208
|
+
- spec/files/csv/empty_lines_at_start.csv
|
205
209
|
- spec/files/csv/mexico2013_pressdoc.csv
|
206
210
|
- spec/files/csv/no_content.csv
|
207
211
|
- spec/files/csv/with_headers.csv
|
208
212
|
- spec/files/csv/without_headers.csv
|
209
213
|
- spec/files/excel/edge_cases.xls
|
214
|
+
- spec/files/excel/empty_lines.xlsx
|
215
|
+
- spec/files/excel/empty_lines_at_start.xlsx
|
210
216
|
- spec/files/excel/no_content.xlsx
|
211
217
|
- spec/files/excel/with_headers.xls
|
212
218
|
- spec/files/excel/without_headers.xls
|