table_importer 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +8 -3
- data/lib/table_importer/copy_and_paste.rb +3 -0
- data/lib/table_importer/csv.rb +15 -3
- data/lib/table_importer/excel.rb +4 -0
- data/lib/table_importer/google.rb +10 -3
- data/lib/table_importer/source.rb +21 -8
- data/lib/table_importer/version.rb +1 -1
- data/spec/files/csv/null_values.csv +10 -0
- data/spec/files/excel/null_values.xls +0 -0
- data/spec/sources/csv_spec.rb +13 -0
- data/spec/sources/excel_spec.rb +13 -0
- data/spec/sources/google_spec.rb +1 -2
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 84f4728211cdfa3a07bfec96d6e3dbf35554ec98
|
4
|
+
data.tar.gz: 08fb3b8daf2428a44348b798d335d741ef210dfd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f4de9554e43a187a8e09820ff4ee9b6dc3b3c3203648a9866aef834de8e23e951d7c1ecf6d7cdb12468b31f5ab58886820d554ad5342e557df22208118311e02
|
7
|
+
data.tar.gz: 67fd83970aa94616152593e6160bb2311da1174526c1f23b6717c7e3eb42d77b01bcb1c19a254f8d9d1008761daa865f71723d4ae799d43fcabd388494fc82bf
|
data/README.md
CHANGED
@@ -64,13 +64,18 @@ The options you pass in are:
|
|
64
64
|
:compulsory_headers => {
|
65
65
|
:email => true, false # Does each record require an email address to be valid?
|
66
66
|
}
|
67
|
+
|
68
|
+
# Whether nil values that are a string (ie strings that equal "NULL", "null", "nil", or "undefined") should be replaced with actual nil values.
|
69
|
+
:remove_nil_values => true
|
70
|
+
:remove_nil_values => false
|
67
71
|
|
68
72
|
```
|
69
73
|
|
70
74
|
There are a few ways to interact with the table importer:
|
71
75
|
|
72
76
|
```
|
73
|
-
|
77
|
+
options = { type: "csv", }
|
78
|
+
importer = TableImporter::Source.new(options)
|
74
79
|
|
75
80
|
# get the type
|
76
81
|
puts importer.get_type
|
@@ -88,7 +93,7 @@ There are a few ways to interact with the table importer:
|
|
88
93
|
puts importer.get_headers
|
89
94
|
=> "column_1, column_2, column_3"
|
90
95
|
|
91
|
-
# Get the first 8 lines (useful for providing a matching option for the user to map their own headers, like
|
96
|
+
# Get the first 8 lines (useful for providing a matching option for the user to map their own headers, like Mailchimp's contact import.
|
92
97
|
puts importer.get_preview_lines
|
93
98
|
=> [{:column_1 => "r1c1", :column_2 => "r1c2", :column_3 => "r1c3"}, {:column_1 => "r2c1", :column_2 => "r2c2", :column_3 => "r2c3"} etc]
|
94
99
|
|
@@ -99,7 +104,7 @@ There are a few ways to interact with the table importer:
|
|
99
104
|
puts importer.get_chunks(25)
|
100
105
|
=> All input chunked into 25 line blocks.
|
101
106
|
|
102
|
-
# The format for the returned chunks is not a simple array of hashes, like get_preview_lines
|
107
|
+
# The format for the returned chunks is not a simple array of hashes, like get_preview_lines, as it also includes per-row errors
|
103
108
|
puts importer.get_chunks(2)
|
104
109
|
=> [{:lines => [{:column_1 => "r1c1", :column_2 => "r1c2", :column_3 => "r1c3"}, {:column_1 => "r2c1", :column_2 => "r2c2", :column_3 => "r2c3"}], :errors => []}, {:lines => [{:column_1 => "r3c1", :column_2 => "r3c2", :column_3 => "r3c3"}, {:column_1 => "r4c1", :column_2 => "r4c2", :column_3 => "r4c3"}], :errors => []}]
|
105
110
|
|
@@ -2,6 +2,8 @@ module TableImporter
|
|
2
2
|
|
3
3
|
class CopyAndPaste < Source
|
4
4
|
|
5
|
+
attr_accessor :remove_nil_values
|
6
|
+
|
5
7
|
def initialize(data)
|
6
8
|
@data = assign_data(data[:content])
|
7
9
|
@column_separator, @record_separator = assign_separators(data[:column_separator], data[:record_separator])
|
@@ -9,6 +11,7 @@ module TableImporter
|
|
9
11
|
@mapping = data[:user_headers]
|
10
12
|
@compulsory_headers = data[:compulsory_headers]
|
11
13
|
@delete_empty_columns = @data.length < 50000
|
14
|
+
@remove_nil_values = data[:remove_nil_values] == true
|
12
15
|
end
|
13
16
|
|
14
17
|
def assign_data(content)
|
data/lib/table_importer/csv.rb
CHANGED
@@ -5,12 +5,16 @@ module TableImporter
|
|
5
5
|
|
6
6
|
class CSV < Source
|
7
7
|
|
8
|
+
attr_accessor :remove_nil_values
|
9
|
+
|
8
10
|
def initialize(data)
|
9
11
|
@headers_present = data[:headers_present] # user has indicated headers are provided
|
10
12
|
@column_separator, @record_separator = initialize_separators(data[:column_separator], data[:record_separator])
|
11
13
|
@compulsory_headers = data[:compulsory_headers]
|
12
14
|
@file = data[:content]
|
13
15
|
@delete_empty_columns = File.size(@file) < 100000
|
16
|
+
@remove_nil_values = data[:remove_nil_values] == true
|
17
|
+
|
14
18
|
begin
|
15
19
|
first_line = get_first_line
|
16
20
|
if first_line == 0
|
@@ -145,9 +149,17 @@ module TableImporter
|
|
145
149
|
end
|
146
150
|
|
147
151
|
def default_options(options = {})
|
148
|
-
{
|
149
|
-
:
|
150
|
-
:
|
152
|
+
{
|
153
|
+
:col_sep => @column_separator,
|
154
|
+
:row_sep => @record_separator,
|
155
|
+
:force_simple_split => true,
|
156
|
+
:strip_chars_from_headers => /[\-"]/,
|
157
|
+
:remove_empty_values => false,
|
158
|
+
:verbose => false,
|
159
|
+
:headers_in_file => @headers_present,
|
160
|
+
:convert_values_to_numeric => false,
|
161
|
+
:user_provided_headers => @headers_present ? (@headers == nil || @headers == {} ? nil : @headers) : default_headers(100)
|
162
|
+
}.merge(options)
|
151
163
|
end
|
152
164
|
|
153
165
|
def clean_file(file)
|
data/lib/table_importer/excel.rb
CHANGED
@@ -2,6 +2,8 @@ module TableImporter
|
|
2
2
|
|
3
3
|
class Excel < RooSpreadsheetSource
|
4
4
|
|
5
|
+
attr_accessor :remove_nil_values
|
6
|
+
|
5
7
|
def initialize(data)
|
6
8
|
begin
|
7
9
|
@type = File.extname(data[:content]) == ".xls" ? "xls" : "xlsx"
|
@@ -10,6 +12,8 @@ module TableImporter
|
|
10
12
|
@compulsory_headers = data[:compulsory_headers]
|
11
13
|
@delete_empty_columns = (File.size(data[:content].path) < 100000)
|
12
14
|
@mapping = data[:user_headers]
|
15
|
+
@remove_nil_values = data[:remove_nil_values] == true
|
16
|
+
|
13
17
|
raise TableImporter::EmptyFileImportError.new if !@file.first_row
|
14
18
|
@headers = @headers_present ? @file.row(1).map.with_index { |header, index| header.present? ? header.to_sym : "column_#{index}"} : default_headers
|
15
19
|
rescue NoMethodError
|
@@ -2,14 +2,21 @@ module TableImporter
|
|
2
2
|
|
3
3
|
class Google < RooSpreadsheetSource
|
4
4
|
|
5
|
+
attr_accessor :remove_nil_values
|
6
|
+
|
5
7
|
def initialize(data)
|
6
8
|
begin
|
7
|
-
@headers_present = data[:headers_present]
|
8
9
|
@file = get_file(data[:content].split(", ")[1], data[:content].split(", ")[0])
|
10
|
+
|
11
|
+
@headers_present = data[:headers_present]
|
9
12
|
@compulsory_headers = data[:compulsory_headers]
|
13
|
+
@mapping = data[:user_headers] if data[:user_headers].present?
|
14
|
+
@remove_nil_values = data[:remove_nil_values] == true
|
15
|
+
|
10
16
|
@delete_empty_columns = false
|
11
|
-
|
17
|
+
|
12
18
|
raise TableImporter::EmptyFileImportError.new if !@file.first_row
|
19
|
+
|
13
20
|
@headers = @headers_present ? @file.row(1).map.with_index { |header, index| header.present? ? header.to_sym : "column_#{index}"} : default_headers
|
14
21
|
rescue NoMethodError
|
15
22
|
raise TableImporter::HeaderMismatchError.new
|
@@ -18,7 +25,7 @@ module TableImporter
|
|
18
25
|
|
19
26
|
def get_file(file_key, access_token)
|
20
27
|
begin
|
21
|
-
Roo::Google.new(file_key, {:access_token => access_token})
|
28
|
+
Roo::Google.new(file_key, { :access_token => access_token })
|
22
29
|
rescue TypeError
|
23
30
|
raise TableImporter::IncorrectFileError.new
|
24
31
|
end
|
@@ -1,10 +1,15 @@
|
|
1
1
|
require 'roo'
|
2
|
+
require 'roo-xls'
|
3
|
+
require 'roo-google'
|
2
4
|
|
3
5
|
module TableImporter
|
4
6
|
|
5
7
|
class Source
|
6
8
|
|
7
|
-
|
9
|
+
|
10
|
+
NIL_VALUES = %w( NULL null nil undefined )
|
11
|
+
|
12
|
+
SEPARATORS = { comma: ",", space: " ", tab: "\t", newline_mac: "\n", semicolon: ";", newline_windows: "\r\n", old_newline_mac: "\r" }
|
8
13
|
|
9
14
|
def initialize (data)
|
10
15
|
case data[:type]
|
@@ -108,26 +113,34 @@ module TableImporter
|
|
108
113
|
def check_empty_headers(line, empty_headers)
|
109
114
|
line.each do |key, value|
|
110
115
|
if value.present? && value.to_s.gsub(/[^A-Za-z0-9]/, '').present?
|
111
|
-
empty_headers.delete(
|
116
|
+
empty_headers.delete(clean_key(key).to_sym)
|
112
117
|
end
|
113
118
|
end
|
114
119
|
end
|
115
120
|
|
116
121
|
def line_empty?(line)
|
117
122
|
line = clean_line(line)
|
118
|
-
return line, line.all?{ |
|
123
|
+
return line, line.all?{ |_, item_value| line_item_is_garbage?(item_value) } && line.all?{ |item_key, _| line_item_is_garbage?(item_key) }
|
119
124
|
end
|
120
125
|
|
121
126
|
def clean_line(line)
|
122
127
|
map = {}
|
123
|
-
line.each_pair do |key,value|
|
124
|
-
map[
|
128
|
+
line.each_pair do |key, value|
|
129
|
+
map[clean_key(key).to_sym] = clean_value(value)
|
130
|
+
end
|
131
|
+
return map
|
132
|
+
end
|
133
|
+
|
134
|
+
def clean_value(item)
|
135
|
+
if remove_nil_values == true && NIL_VALUES.include?(item.to_s)
|
136
|
+
return nil
|
125
137
|
end
|
126
|
-
|
138
|
+
|
139
|
+
return item.to_s.delete("\u0000").to_s.delete("\x00")
|
127
140
|
end
|
128
141
|
|
129
|
-
def
|
130
|
-
item.to_s.delete("\u0000").to_s.delete("\x00")
|
142
|
+
def clean_key(item)
|
143
|
+
return item.to_s.delete("\u0000").to_s.delete("\x00")
|
131
144
|
end
|
132
145
|
|
133
146
|
def check_compulsory_headers?(line, compulsory_headers)
|
@@ -0,0 +1,10 @@
|
|
1
|
+
Country;Medium;Salutation;First name;Last name;Email;Phone number;Tags
|
2
|
+
Hong Kong;Men's Uno (Hong Kong Edition);Ms;Noelle;NULL;info@chicgroups.com;-28544930;Ressort_Business
|
3
|
+
Hong Kong;Hong Kong Commercial Daily;Mr;Ching Wai
|
4
|
+
Hong Kong;South China Morning Post;Mr;John;Cremer;john.cremer@scmp.com;-25651370;Ressort_Business
|
5
|
+
Hong Kong;Hong Kong Economic Journal;Mr;Toby;Yiu;tcsyiu@hkej.com;-28566702;Ressort_Business
|
6
|
+
Hong Kong;Hong KongTrader;Ms;Regina;Deluna;regina.deluna@hktdc.org;-25843524;Ressort_Business
|
7
|
+
Hong Kong;GS1 Hong Kong;Ms;Stella;Cheang;info@gs1hk.org;-28611967;Ressort_Business
|
8
|
+
Hong Kong;Playtimes;Ms;Jo;Allun;jo.allum@ppp.com.hk;-22018864;gfgsdfgs;Ressort_Business
|
9
|
+
Hong Kong;China Tourism;Ms;Mickey;Ching;edit-c@hkctp.com.hk;-25617149;Ressort_Business
|
10
|
+
Hong Kong;Metro Daily (Hong Kong);Mr;Jeff;Lee;news@metrohk.com.hk;-31960748;Ressort_Business
|
Binary file
|
data/spec/sources/csv_spec.rb
CHANGED
@@ -131,4 +131,17 @@ describe TableImporter::Source do
|
|
131
131
|
@source = nil
|
132
132
|
end
|
133
133
|
end
|
134
|
+
|
135
|
+
context 'when source has NULL values in it' do
|
136
|
+
|
137
|
+
it "Skips the null values if specified" do
|
138
|
+
source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/null_values.csv"].join), :headers_present => false, :user_headers => { "email" => "5" }, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => { email: true }, remove_nil_values: true })
|
139
|
+
expect(source.get_chunks[0][:lines].first[:column_4]).to eql(nil)
|
140
|
+
end
|
141
|
+
|
142
|
+
it "Doesn't skip the null values if not specified" do
|
143
|
+
source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/null_values.csv"].join), :headers_present => false, :user_headers => { "email" => "5" }, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => { email: true }, remove_nil_values: false })
|
144
|
+
expect(source.get_chunks[0][:lines].first[:column_4]).to eql("NULL")
|
145
|
+
end
|
146
|
+
end
|
134
147
|
end
|
data/spec/sources/excel_spec.rb
CHANGED
@@ -238,5 +238,18 @@ describe TableImporter::Source do
|
|
238
238
|
@source = nil
|
239
239
|
end
|
240
240
|
end
|
241
|
+
|
242
|
+
context 'when source has NULL values in it' do
|
243
|
+
|
244
|
+
it "Skips the null values if specified" do
|
245
|
+
source = TableImporter::Source.new({ content: File.open([Dir.pwd, "/spec/files/excel/null_values.xls"].join), :headers_present => false, :user_headers => { "email" => "0" }, :type => "xls", :column_separator => "", :record_separator => "", :compulsory_headers => { email: true }, remove_nil_values: true })
|
246
|
+
expect(source.get_preview_lines.first[:column_2]).to eql(nil)
|
247
|
+
end
|
248
|
+
|
249
|
+
it "Doesn't skip the null values if not specified" do
|
250
|
+
source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/excel/null_values.xls"].join), :headers_present => false, :user_headers => { "email" => "0" }, :type => "xls", :column_separator => "⁄", :record_separator => "", :compulsory_headers => { email: true }, remove_nil_values: false })
|
251
|
+
expect(source.get_preview_lines.first[:column_2]).to eql("NULL")
|
252
|
+
end
|
253
|
+
end
|
241
254
|
end
|
242
255
|
end
|
data/spec/sources/google_spec.rb
CHANGED
@@ -12,12 +12,11 @@ describe TableImporter::Source do
|
|
12
12
|
:content => 'CLIENT_ID, ACCESS_TOKEN',
|
13
13
|
:headers_present => true,
|
14
14
|
:user_headers => nil,
|
15
|
-
:user_headers => nil,
|
16
15
|
:type => "google",
|
17
16
|
:column_separator => "",
|
18
17
|
:record_separator => "",
|
19
18
|
:compulsory_headers =>
|
20
|
-
{:email => true}
|
19
|
+
{ :email => true }
|
21
20
|
})
|
22
21
|
end
|
23
22
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: table_importer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nick Dowse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-04-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: spreadsheet
|
@@ -222,6 +222,7 @@ files:
|
|
222
222
|
- spec/files/csv/empty_lines_at_start.csv
|
223
223
|
- spec/files/csv/mexico2013_pressdoc.csv
|
224
224
|
- spec/files/csv/no_content.csv
|
225
|
+
- spec/files/csv/null_values.csv
|
225
226
|
- spec/files/csv/partway.csv
|
226
227
|
- spec/files/csv/with_headers.csv
|
227
228
|
- spec/files/csv/without_headers.csv
|
@@ -229,6 +230,7 @@ files:
|
|
229
230
|
- spec/files/excel/empty_lines.xlsx
|
230
231
|
- spec/files/excel/empty_lines_at_start.xlsx
|
231
232
|
- spec/files/excel/no_content.xlsx
|
233
|
+
- spec/files/excel/null_values.xls
|
232
234
|
- spec/files/excel/one_line.xls
|
233
235
|
- spec/files/excel/premapped_1.xls
|
234
236
|
- spec/files/excel/premapped_2.xls
|
@@ -273,6 +275,7 @@ test_files:
|
|
273
275
|
- spec/files/csv/empty_lines_at_start.csv
|
274
276
|
- spec/files/csv/mexico2013_pressdoc.csv
|
275
277
|
- spec/files/csv/no_content.csv
|
278
|
+
- spec/files/csv/null_values.csv
|
276
279
|
- spec/files/csv/partway.csv
|
277
280
|
- spec/files/csv/with_headers.csv
|
278
281
|
- spec/files/csv/without_headers.csv
|
@@ -280,6 +283,7 @@ test_files:
|
|
280
283
|
- spec/files/excel/empty_lines.xlsx
|
281
284
|
- spec/files/excel/empty_lines_at_start.xlsx
|
282
285
|
- spec/files/excel/no_content.xlsx
|
286
|
+
- spec/files/excel/null_values.xls
|
283
287
|
- spec/files/excel/one_line.xls
|
284
288
|
- spec/files/excel/premapped_1.xls
|
285
289
|
- spec/files/excel/premapped_2.xls
|