table_importer 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +8 -3
- data/lib/table_importer/copy_and_paste.rb +3 -0
- data/lib/table_importer/csv.rb +15 -3
- data/lib/table_importer/excel.rb +4 -0
- data/lib/table_importer/google.rb +10 -3
- data/lib/table_importer/source.rb +21 -8
- data/lib/table_importer/version.rb +1 -1
- data/spec/files/csv/null_values.csv +10 -0
- data/spec/files/excel/null_values.xls +0 -0
- data/spec/sources/csv_spec.rb +13 -0
- data/spec/sources/excel_spec.rb +13 -0
- data/spec/sources/google_spec.rb +1 -2
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 84f4728211cdfa3a07bfec96d6e3dbf35554ec98
|
4
|
+
data.tar.gz: 08fb3b8daf2428a44348b798d335d741ef210dfd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f4de9554e43a187a8e09820ff4ee9b6dc3b3c3203648a9866aef834de8e23e951d7c1ecf6d7cdb12468b31f5ab58886820d554ad5342e557df22208118311e02
|
7
|
+
data.tar.gz: 67fd83970aa94616152593e6160bb2311da1174526c1f23b6717c7e3eb42d77b01bcb1c19a254f8d9d1008761daa865f71723d4ae799d43fcabd388494fc82bf
|
data/README.md
CHANGED
@@ -64,13 +64,18 @@ The options you pass in are:
|
|
64
64
|
:compulsory_headers => {
|
65
65
|
:email => true, false # Does each record require an email address to be valid?
|
66
66
|
}
|
67
|
+
|
68
|
+
# Whether nil values that are a string (ie strings that equal "NULL", "null", "nil", or "undefined") should be replaced with actual nil values.
|
69
|
+
:remove_nil_values => true
|
70
|
+
:remove_nil_values => false
|
67
71
|
|
68
72
|
```
|
69
73
|
|
70
74
|
There are a few ways to interact with the table importer:
|
71
75
|
|
72
76
|
```
|
73
|
-
|
77
|
+
options = { type: "csv", }
|
78
|
+
importer = TableImporter::Source.new(options)
|
74
79
|
|
75
80
|
# get the type
|
76
81
|
puts importer.get_type
|
@@ -88,7 +93,7 @@ There are a few ways to interact with the table importer:
|
|
88
93
|
puts importer.get_headers
|
89
94
|
=> "column_1, column_2, column_3"
|
90
95
|
|
91
|
-
# Get the first 8 lines (useful for providing a matching option for the user to map their own headers, like
|
96
|
+
# Get the first 8 lines (useful for providing a matching option for the user to map their own headers, like Mailchimp's contact import.
|
92
97
|
puts importer.get_preview_lines
|
93
98
|
=> [{:column_1 => "r1c1", :column_2 => "r1c2", :column_3 => "r1c3"}, {:column_1 => "r2c1", :column_2 => "r2c2", :column_3 => "r2c3"} etc]
|
94
99
|
|
@@ -99,7 +104,7 @@ There are a few ways to interact with the table importer:
|
|
99
104
|
puts importer.get_chunks(25)
|
100
105
|
=> All input chunked into 25 line blocks.
|
101
106
|
|
102
|
-
# The format for the returned chunks is not a simple array of hashes, like get_preview_lines
|
107
|
+
# The format for the returned chunks is not a simple array of hashes, like get_preview_lines, as it also includes per-row errors
|
103
108
|
puts importer.get_chunks(2)
|
104
109
|
=> [{:lines => [{:column_1 => "r1c1", :column_2 => "r1c2", :column_3 => "r1c3"}, {:column_1 => "r2c1", :column_2 => "r2c2", :column_3 => "r2c3"}], :errors => []}, {:lines => [{:column_1 => "r3c1", :column_2 => "r3c2", :column_3 => "r3c3"}, {:column_1 => "r4c1", :column_2 => "r4c2", :column_3 => "r4c3"}], :errors => []}]
|
105
110
|
|
@@ -2,6 +2,8 @@ module TableImporter
|
|
2
2
|
|
3
3
|
class CopyAndPaste < Source
|
4
4
|
|
5
|
+
attr_accessor :remove_nil_values
|
6
|
+
|
5
7
|
def initialize(data)
|
6
8
|
@data = assign_data(data[:content])
|
7
9
|
@column_separator, @record_separator = assign_separators(data[:column_separator], data[:record_separator])
|
@@ -9,6 +11,7 @@ module TableImporter
|
|
9
11
|
@mapping = data[:user_headers]
|
10
12
|
@compulsory_headers = data[:compulsory_headers]
|
11
13
|
@delete_empty_columns = @data.length < 50000
|
14
|
+
@remove_nil_values = data[:remove_nil_values] == true
|
12
15
|
end
|
13
16
|
|
14
17
|
def assign_data(content)
|
data/lib/table_importer/csv.rb
CHANGED
@@ -5,12 +5,16 @@ module TableImporter
|
|
5
5
|
|
6
6
|
class CSV < Source
|
7
7
|
|
8
|
+
attr_accessor :remove_nil_values
|
9
|
+
|
8
10
|
def initialize(data)
|
9
11
|
@headers_present = data[:headers_present] # user has indicated headers are provided
|
10
12
|
@column_separator, @record_separator = initialize_separators(data[:column_separator], data[:record_separator])
|
11
13
|
@compulsory_headers = data[:compulsory_headers]
|
12
14
|
@file = data[:content]
|
13
15
|
@delete_empty_columns = File.size(@file) < 100000
|
16
|
+
@remove_nil_values = data[:remove_nil_values] == true
|
17
|
+
|
14
18
|
begin
|
15
19
|
first_line = get_first_line
|
16
20
|
if first_line == 0
|
@@ -145,9 +149,17 @@ module TableImporter
|
|
145
149
|
end
|
146
150
|
|
147
151
|
def default_options(options = {})
|
148
|
-
{
|
149
|
-
:
|
150
|
-
:
|
152
|
+
{
|
153
|
+
:col_sep => @column_separator,
|
154
|
+
:row_sep => @record_separator,
|
155
|
+
:force_simple_split => true,
|
156
|
+
:strip_chars_from_headers => /[\-"]/,
|
157
|
+
:remove_empty_values => false,
|
158
|
+
:verbose => false,
|
159
|
+
:headers_in_file => @headers_present,
|
160
|
+
:convert_values_to_numeric => false,
|
161
|
+
:user_provided_headers => @headers_present ? (@headers == nil || @headers == {} ? nil : @headers) : default_headers(100)
|
162
|
+
}.merge(options)
|
151
163
|
end
|
152
164
|
|
153
165
|
def clean_file(file)
|
data/lib/table_importer/excel.rb
CHANGED
@@ -2,6 +2,8 @@ module TableImporter
|
|
2
2
|
|
3
3
|
class Excel < RooSpreadsheetSource
|
4
4
|
|
5
|
+
attr_accessor :remove_nil_values
|
6
|
+
|
5
7
|
def initialize(data)
|
6
8
|
begin
|
7
9
|
@type = File.extname(data[:content]) == ".xls" ? "xls" : "xlsx"
|
@@ -10,6 +12,8 @@ module TableImporter
|
|
10
12
|
@compulsory_headers = data[:compulsory_headers]
|
11
13
|
@delete_empty_columns = (File.size(data[:content].path) < 100000)
|
12
14
|
@mapping = data[:user_headers]
|
15
|
+
@remove_nil_values = data[:remove_nil_values] == true
|
16
|
+
|
13
17
|
raise TableImporter::EmptyFileImportError.new if !@file.first_row
|
14
18
|
@headers = @headers_present ? @file.row(1).map.with_index { |header, index| header.present? ? header.to_sym : "column_#{index}"} : default_headers
|
15
19
|
rescue NoMethodError
|
@@ -2,14 +2,21 @@ module TableImporter
|
|
2
2
|
|
3
3
|
class Google < RooSpreadsheetSource
|
4
4
|
|
5
|
+
attr_accessor :remove_nil_values
|
6
|
+
|
5
7
|
def initialize(data)
|
6
8
|
begin
|
7
|
-
@headers_present = data[:headers_present]
|
8
9
|
@file = get_file(data[:content].split(", ")[1], data[:content].split(", ")[0])
|
10
|
+
|
11
|
+
@headers_present = data[:headers_present]
|
9
12
|
@compulsory_headers = data[:compulsory_headers]
|
13
|
+
@mapping = data[:user_headers] if data[:user_headers].present?
|
14
|
+
@remove_nil_values = data[:remove_nil_values] == true
|
15
|
+
|
10
16
|
@delete_empty_columns = false
|
11
|
-
|
17
|
+
|
12
18
|
raise TableImporter::EmptyFileImportError.new if !@file.first_row
|
19
|
+
|
13
20
|
@headers = @headers_present ? @file.row(1).map.with_index { |header, index| header.present? ? header.to_sym : "column_#{index}"} : default_headers
|
14
21
|
rescue NoMethodError
|
15
22
|
raise TableImporter::HeaderMismatchError.new
|
@@ -18,7 +25,7 @@ module TableImporter
|
|
18
25
|
|
19
26
|
def get_file(file_key, access_token)
|
20
27
|
begin
|
21
|
-
Roo::Google.new(file_key, {:access_token => access_token})
|
28
|
+
Roo::Google.new(file_key, { :access_token => access_token })
|
22
29
|
rescue TypeError
|
23
30
|
raise TableImporter::IncorrectFileError.new
|
24
31
|
end
|
@@ -1,10 +1,15 @@
|
|
1
1
|
require 'roo'
|
2
|
+
require 'roo-xls'
|
3
|
+
require 'roo-google'
|
2
4
|
|
3
5
|
module TableImporter
|
4
6
|
|
5
7
|
class Source
|
6
8
|
|
7
|
-
|
9
|
+
|
10
|
+
NIL_VALUES = %w( NULL null nil undefined )
|
11
|
+
|
12
|
+
SEPARATORS = { comma: ",", space: " ", tab: "\t", newline_mac: "\n", semicolon: ";", newline_windows: "\r\n", old_newline_mac: "\r" }
|
8
13
|
|
9
14
|
def initialize (data)
|
10
15
|
case data[:type]
|
@@ -108,26 +113,34 @@ module TableImporter
|
|
108
113
|
def check_empty_headers(line, empty_headers)
|
109
114
|
line.each do |key, value|
|
110
115
|
if value.present? && value.to_s.gsub(/[^A-Za-z0-9]/, '').present?
|
111
|
-
empty_headers.delete(
|
116
|
+
empty_headers.delete(clean_key(key).to_sym)
|
112
117
|
end
|
113
118
|
end
|
114
119
|
end
|
115
120
|
|
116
121
|
def line_empty?(line)
|
117
122
|
line = clean_line(line)
|
118
|
-
return line, line.all?{ |
|
123
|
+
return line, line.all?{ |_, item_value| line_item_is_garbage?(item_value) } && line.all?{ |item_key, _| line_item_is_garbage?(item_key) }
|
119
124
|
end
|
120
125
|
|
121
126
|
def clean_line(line)
|
122
127
|
map = {}
|
123
|
-
line.each_pair do |key,value|
|
124
|
-
map[
|
128
|
+
line.each_pair do |key, value|
|
129
|
+
map[clean_key(key).to_sym] = clean_value(value)
|
130
|
+
end
|
131
|
+
return map
|
132
|
+
end
|
133
|
+
|
134
|
+
def clean_value(item)
|
135
|
+
if remove_nil_values == true && NIL_VALUES.include?(item.to_s)
|
136
|
+
return nil
|
125
137
|
end
|
126
|
-
|
138
|
+
|
139
|
+
return item.to_s.delete("\u0000").to_s.delete("\x00")
|
127
140
|
end
|
128
141
|
|
129
|
-
def
|
130
|
-
item.to_s.delete("\u0000").to_s.delete("\x00")
|
142
|
+
def clean_key(item)
|
143
|
+
return item.to_s.delete("\u0000").to_s.delete("\x00")
|
131
144
|
end
|
132
145
|
|
133
146
|
def check_compulsory_headers?(line, compulsory_headers)
|
@@ -0,0 +1,10 @@
|
|
1
|
+
Country;Medium;Salutation;First name;Last name;Email;Phone number;Tags
|
2
|
+
Hong Kong;Men's Uno (Hong Kong Edition);Ms;Noelle;NULL;info@chicgroups.com;-28544930;Ressort_Business
|
3
|
+
Hong Kong;Hong Kong Commercial Daily;Mr;Ching Wai
|
4
|
+
Hong Kong;South China Morning Post;Mr;John;Cremer;john.cremer@scmp.com;-25651370;Ressort_Business
|
5
|
+
Hong Kong;Hong Kong Economic Journal;Mr;Toby;Yiu;tcsyiu@hkej.com;-28566702;Ressort_Business
|
6
|
+
Hong Kong;Hong KongTrader;Ms;Regina;Deluna;regina.deluna@hktdc.org;-25843524;Ressort_Business
|
7
|
+
Hong Kong;GS1 Hong Kong;Ms;Stella;Cheang;info@gs1hk.org;-28611967;Ressort_Business
|
8
|
+
Hong Kong;Playtimes;Ms;Jo;Allun;jo.allum@ppp.com.hk;-22018864;gfgsdfgs;Ressort_Business
|
9
|
+
Hong Kong;China Tourism;Ms;Mickey;Ching;edit-c@hkctp.com.hk;-25617149;Ressort_Business
|
10
|
+
Hong Kong;Metro Daily (Hong Kong);Mr;Jeff;Lee;news@metrohk.com.hk;-31960748;Ressort_Business
|
Binary file
|
data/spec/sources/csv_spec.rb
CHANGED
@@ -131,4 +131,17 @@ describe TableImporter::Source do
|
|
131
131
|
@source = nil
|
132
132
|
end
|
133
133
|
end
|
134
|
+
|
135
|
+
context 'when source has NULL values in it' do
|
136
|
+
|
137
|
+
it "Skips the null values if specified" do
|
138
|
+
source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/null_values.csv"].join), :headers_present => false, :user_headers => { "email" => "5" }, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => { email: true }, remove_nil_values: true })
|
139
|
+
expect(source.get_chunks[0][:lines].first[:column_4]).to eql(nil)
|
140
|
+
end
|
141
|
+
|
142
|
+
it "Doesn't skip the null values if not specified" do
|
143
|
+
source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/null_values.csv"].join), :headers_present => false, :user_headers => { "email" => "5" }, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => { email: true }, remove_nil_values: false })
|
144
|
+
expect(source.get_chunks[0][:lines].first[:column_4]).to eql("NULL")
|
145
|
+
end
|
146
|
+
end
|
134
147
|
end
|
data/spec/sources/excel_spec.rb
CHANGED
@@ -238,5 +238,18 @@ describe TableImporter::Source do
|
|
238
238
|
@source = nil
|
239
239
|
end
|
240
240
|
end
|
241
|
+
|
242
|
+
context 'when source has NULL values in it' do
|
243
|
+
|
244
|
+
it "Skips the null values if specified" do
|
245
|
+
source = TableImporter::Source.new({ content: File.open([Dir.pwd, "/spec/files/excel/null_values.xls"].join), :headers_present => false, :user_headers => { "email" => "0" }, :type => "xls", :column_separator => "", :record_separator => "", :compulsory_headers => { email: true }, remove_nil_values: true })
|
246
|
+
expect(source.get_preview_lines.first[:column_2]).to eql(nil)
|
247
|
+
end
|
248
|
+
|
249
|
+
it "Doesn't skip the null values if not specified" do
|
250
|
+
source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/excel/null_values.xls"].join), :headers_present => false, :user_headers => { "email" => "0" }, :type => "xls", :column_separator => "⁄", :record_separator => "", :compulsory_headers => { email: true }, remove_nil_values: false })
|
251
|
+
expect(source.get_preview_lines.first[:column_2]).to eql("NULL")
|
252
|
+
end
|
253
|
+
end
|
241
254
|
end
|
242
255
|
end
|
data/spec/sources/google_spec.rb
CHANGED
@@ -12,12 +12,11 @@ describe TableImporter::Source do
|
|
12
12
|
:content => 'CLIENT_ID, ACCESS_TOKEN',
|
13
13
|
:headers_present => true,
|
14
14
|
:user_headers => nil,
|
15
|
-
:user_headers => nil,
|
16
15
|
:type => "google",
|
17
16
|
:column_separator => "",
|
18
17
|
:record_separator => "",
|
19
18
|
:compulsory_headers =>
|
20
|
-
{:email => true}
|
19
|
+
{ :email => true }
|
21
20
|
})
|
22
21
|
end
|
23
22
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: table_importer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nick Dowse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-04-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: spreadsheet
|
@@ -222,6 +222,7 @@ files:
|
|
222
222
|
- spec/files/csv/empty_lines_at_start.csv
|
223
223
|
- spec/files/csv/mexico2013_pressdoc.csv
|
224
224
|
- spec/files/csv/no_content.csv
|
225
|
+
- spec/files/csv/null_values.csv
|
225
226
|
- spec/files/csv/partway.csv
|
226
227
|
- spec/files/csv/with_headers.csv
|
227
228
|
- spec/files/csv/without_headers.csv
|
@@ -229,6 +230,7 @@ files:
|
|
229
230
|
- spec/files/excel/empty_lines.xlsx
|
230
231
|
- spec/files/excel/empty_lines_at_start.xlsx
|
231
232
|
- spec/files/excel/no_content.xlsx
|
233
|
+
- spec/files/excel/null_values.xls
|
232
234
|
- spec/files/excel/one_line.xls
|
233
235
|
- spec/files/excel/premapped_1.xls
|
234
236
|
- spec/files/excel/premapped_2.xls
|
@@ -273,6 +275,7 @@ test_files:
|
|
273
275
|
- spec/files/csv/empty_lines_at_start.csv
|
274
276
|
- spec/files/csv/mexico2013_pressdoc.csv
|
275
277
|
- spec/files/csv/no_content.csv
|
278
|
+
- spec/files/csv/null_values.csv
|
276
279
|
- spec/files/csv/partway.csv
|
277
280
|
- spec/files/csv/with_headers.csv
|
278
281
|
- spec/files/csv/without_headers.csv
|
@@ -280,6 +283,7 @@ test_files:
|
|
280
283
|
- spec/files/excel/empty_lines.xlsx
|
281
284
|
- spec/files/excel/empty_lines_at_start.xlsx
|
282
285
|
- spec/files/excel/no_content.xlsx
|
286
|
+
- spec/files/excel/null_values.xls
|
283
287
|
- spec/files/excel/one_line.xls
|
284
288
|
- spec/files/excel/premapped_1.xls
|
285
289
|
- spec/files/excel/premapped_2.xls
|