table_importer 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 865e1844a77e5b2deb00d9f8d60c3a3c03c6889d
4
- data.tar.gz: 5efeeabd463f044f90c0abf2792ca026c80b9998
3
+ metadata.gz: 84f4728211cdfa3a07bfec96d6e3dbf35554ec98
4
+ data.tar.gz: 08fb3b8daf2428a44348b798d335d741ef210dfd
5
5
  SHA512:
6
- metadata.gz: e554a0f3ba76457f86a42da86c006462b990b627dbae53c2a35c7c0554499160a282dc3b0df0290f72ae29bb2630a72b3fa0a391a04e0e5647ec8cbcf0b0a6c9
7
- data.tar.gz: 76d89f4db35c740e6c1dd7a72d99dac88af2c498c5c047d2ea8bb451ba9e8b529105a421ca7946816e45144085be677eec8b1959d5f91f95b943b2144268cdef
6
+ metadata.gz: f4de9554e43a187a8e09820ff4ee9b6dc3b3c3203648a9866aef834de8e23e951d7c1ecf6d7cdb12468b31f5ab58886820d554ad5342e557df22208118311e02
7
+ data.tar.gz: 67fd83970aa94616152593e6160bb2311da1174526c1f23b6717c7e3eb42d77b01bcb1c19a254f8d9d1008761daa865f71723d4ae799d43fcabd388494fc82bf
data/README.md CHANGED
@@ -64,13 +64,18 @@ The options you pass in are:
64
64
  :compulsory_headers => {
65
65
  :email => true, false # Does each record require an email address to be valid?
66
66
  }
67
+
68
+ # Whether nil values that are a string (ie strings that equal "NULL", "null", "nil", or "undefined") should be replaced with actual nil values.
69
+ :remove_nil_values => true
70
+ :remove_nil_values => false
67
71
 
68
72
  ```
69
73
 
70
74
  There are a few ways to interact with the table importer:
71
75
 
72
76
  ```
73
- importer = TableImporter::Source.new({options})
77
+ options = { type: "csv", }
78
+ importer = TableImporter::Source.new(options)
74
79
 
75
80
  # get the type
76
81
  puts importer.get_type
@@ -88,7 +93,7 @@ There are a few ways to interact with the table importer:
88
93
  puts importer.get_headers
89
94
  => "column_1, column_2, column_3"
90
95
 
91
- # Get the first 8 lines (useful for providing a matching option for the user to map their own headers, like mailchimps contact import.
96
+ # Get the first 8 lines (useful for providing a matching option for the user to map their own headers, like Mailchimp's contact import.
92
97
  puts importer.get_preview_lines
93
98
  => [{:column_1 => "r1c1", :column_2 => "r1c2", :column_3 => "r1c3"}, {:column_1 => "r2c1", :column_2 => "r2c2", :column_3 => "r2c3"} etc]
94
99
 
@@ -99,7 +104,7 @@ There are a few ways to interact with the table importer:
99
104
  puts importer.get_chunks(25)
100
105
  => All input chunked into 25 line blocks.
101
106
 
102
- # The format for the returned chunks is not a simple array of hashes, like get_preview_lines
107
+ # The format for the returned chunks is not a simple array of hashes, like get_preview_lines, as it also includes per-row errors
103
108
  puts importer.get_chunks(2)
104
109
  => [{:lines => [{:column_1 => "r1c1", :column_2 => "r1c2", :column_3 => "r1c3"}, {:column_1 => "r2c1", :column_2 => "r2c2", :column_3 => "r2c3"}], :errors => []}, {:lines => [{:column_1 => "r3c1", :column_2 => "r3c2", :column_3 => "r3c3"}, {:column_1 => "r4c1", :column_2 => "r4c2", :column_3 => "r4c3"}], :errors => []}]
105
110
 
@@ -2,6 +2,8 @@ module TableImporter
2
2
 
3
3
  class CopyAndPaste < Source
4
4
 
5
+ attr_accessor :remove_nil_values
6
+
5
7
  def initialize(data)
6
8
  @data = assign_data(data[:content])
7
9
  @column_separator, @record_separator = assign_separators(data[:column_separator], data[:record_separator])
@@ -9,6 +11,7 @@ module TableImporter
9
11
  @mapping = data[:user_headers]
10
12
  @compulsory_headers = data[:compulsory_headers]
11
13
  @delete_empty_columns = @data.length < 50000
14
+ @remove_nil_values = data[:remove_nil_values] == true
12
15
  end
13
16
 
14
17
  def assign_data(content)
@@ -5,12 +5,16 @@ module TableImporter
5
5
 
6
6
  class CSV < Source
7
7
 
8
+ attr_accessor :remove_nil_values
9
+
8
10
  def initialize(data)
9
11
  @headers_present = data[:headers_present] # user has indicated headers are provided
10
12
  @column_separator, @record_separator = initialize_separators(data[:column_separator], data[:record_separator])
11
13
  @compulsory_headers = data[:compulsory_headers]
12
14
  @file = data[:content]
13
15
  @delete_empty_columns = File.size(@file) < 100000
16
+ @remove_nil_values = data[:remove_nil_values] == true
17
+
14
18
  begin
15
19
  first_line = get_first_line
16
20
  if first_line == 0
@@ -145,9 +149,17 @@ module TableImporter
145
149
  end
146
150
 
147
151
  def default_options(options = {})
148
- {:col_sep => @column_separator, :row_sep => @record_separator, :force_simple_split => true, :strip_chars_from_headers => /[\-"]/, :remove_empty_values => false,
149
- :verbose => false, :headers_in_file => @headers_present, :convert_values_to_numeric => false,
150
- :user_provided_headers => @headers_present ? (@headers == nil || @headers == {} ? nil : @headers) : default_headers(100)}.merge(options)
152
+ {
153
+ :col_sep => @column_separator,
154
+ :row_sep => @record_separator,
155
+ :force_simple_split => true,
156
+ :strip_chars_from_headers => /[\-"]/,
157
+ :remove_empty_values => false,
158
+ :verbose => false,
159
+ :headers_in_file => @headers_present,
160
+ :convert_values_to_numeric => false,
161
+ :user_provided_headers => @headers_present ? (@headers == nil || @headers == {} ? nil : @headers) : default_headers(100)
162
+ }.merge(options)
151
163
  end
152
164
 
153
165
  def clean_file(file)
@@ -2,6 +2,8 @@ module TableImporter
2
2
 
3
3
  class Excel < RooSpreadsheetSource
4
4
 
5
+ attr_accessor :remove_nil_values
6
+
5
7
  def initialize(data)
6
8
  begin
7
9
  @type = File.extname(data[:content]) == ".xls" ? "xls" : "xlsx"
@@ -10,6 +12,8 @@ module TableImporter
10
12
  @compulsory_headers = data[:compulsory_headers]
11
13
  @delete_empty_columns = (File.size(data[:content].path) < 100000)
12
14
  @mapping = data[:user_headers]
15
+ @remove_nil_values = data[:remove_nil_values] == true
16
+
13
17
  raise TableImporter::EmptyFileImportError.new if !@file.first_row
14
18
  @headers = @headers_present ? @file.row(1).map.with_index { |header, index| header.present? ? header.to_sym : "column_#{index}"} : default_headers
15
19
  rescue NoMethodError
@@ -2,14 +2,21 @@ module TableImporter
2
2
 
3
3
  class Google < RooSpreadsheetSource
4
4
 
5
+ attr_accessor :remove_nil_values
6
+
5
7
  def initialize(data)
6
8
  begin
7
- @headers_present = data[:headers_present]
8
9
  @file = get_file(data[:content].split(", ")[1], data[:content].split(", ")[0])
10
+
11
+ @headers_present = data[:headers_present]
9
12
  @compulsory_headers = data[:compulsory_headers]
13
+ @mapping = data[:user_headers] if data[:user_headers].present?
14
+ @remove_nil_values = data[:remove_nil_values] == true
15
+
10
16
  @delete_empty_columns = false
11
- @mapping = data[:user_headers] if data[:user_headers].present?
17
+
12
18
  raise TableImporter::EmptyFileImportError.new if !@file.first_row
19
+
13
20
  @headers = @headers_present ? @file.row(1).map.with_index { |header, index| header.present? ? header.to_sym : "column_#{index}"} : default_headers
14
21
  rescue NoMethodError
15
22
  raise TableImporter::HeaderMismatchError.new
@@ -18,7 +25,7 @@ module TableImporter
18
25
 
19
26
  def get_file(file_key, access_token)
20
27
  begin
21
- Roo::Google.new(file_key, {:access_token => access_token})
28
+ Roo::Google.new(file_key, { :access_token => access_token })
22
29
  rescue TypeError
23
30
  raise TableImporter::IncorrectFileError.new
24
31
  end
@@ -1,10 +1,15 @@
1
1
  require 'roo'
2
+ require 'roo-xls'
3
+ require 'roo-google'
2
4
 
3
5
  module TableImporter
4
6
 
5
7
  class Source
6
8
 
7
- SEPARATORS = {comma: ",", space: " ", tab: "\t", newline_mac: "\n", semicolon: ";", newline_windows: "\r\n", old_newline_mac: "\r"}
9
+
10
+ NIL_VALUES = %w( NULL null nil undefined )
11
+
12
+ SEPARATORS = { comma: ",", space: " ", tab: "\t", newline_mac: "\n", semicolon: ";", newline_windows: "\r\n", old_newline_mac: "\r" }
8
13
 
9
14
  def initialize (data)
10
15
  case data[:type]
@@ -108,26 +113,34 @@ module TableImporter
108
113
  def check_empty_headers(line, empty_headers)
109
114
  line.each do |key, value|
110
115
  if value.present? && value.to_s.gsub(/[^A-Za-z0-9]/, '').present?
111
- empty_headers.delete(clean_item(key).to_sym)
116
+ empty_headers.delete(clean_key(key).to_sym)
112
117
  end
113
118
  end
114
119
  end
115
120
 
116
121
  def line_empty?(line)
117
122
  line = clean_line(line)
118
- return line, line.all?{ |item_key, item_value| line_item_is_garbage?(item_value)} && line.all?{ |item_key, item_value| line_item_is_garbage?(item_key)}
123
+ return line, line.all?{ |_, item_value| line_item_is_garbage?(item_value) } && line.all?{ |item_key, _| line_item_is_garbage?(item_key) }
119
124
  end
120
125
 
121
126
  def clean_line(line)
122
127
  map = {}
123
- line.each_pair do |key,value|
124
- map[clean_item(key).to_sym] = clean_item(value)
128
+ line.each_pair do |key, value|
129
+ map[clean_key(key).to_sym] = clean_value(value)
130
+ end
131
+ return map
132
+ end
133
+
134
+ def clean_value(item)
135
+ if remove_nil_values == true && NIL_VALUES.include?(item.to_s)
136
+ return nil
125
137
  end
126
- map
138
+
139
+ return item.to_s.delete("\u0000").to_s.delete("\x00")
127
140
  end
128
141
 
129
- def clean_item(item)
130
- item.to_s.delete("\u0000").to_s.delete("\x00")
142
+ def clean_key(item)
143
+ return item.to_s.delete("\u0000").to_s.delete("\x00")
131
144
  end
132
145
 
133
146
  def check_compulsory_headers?(line, compulsory_headers)
@@ -1,3 +1,3 @@
1
1
  module TableImporter
2
- VERSION = "0.2.3"
2
+ VERSION = "0.2.4"
3
3
  end
@@ -0,0 +1,10 @@
1
+ Country;Medium;Salutation;First name;Last name;Email;Phone number;Tags
2
+ Hong Kong;Men's Uno (Hong Kong Edition);Ms;Noelle;NULL;info@chicgroups.com;-28544930;Ressort_Business
3
+ Hong Kong;Hong Kong Commercial Daily;Mr;Ching Wai
4
+ Hong Kong;South China Morning Post;Mr;John;Cremer;john.cremer@scmp.com;-25651370;Ressort_Business
5
+ Hong Kong;Hong Kong Economic Journal;Mr;Toby;Yiu;tcsyiu@hkej.com;-28566702;Ressort_Business
6
+ Hong Kong;Hong KongTrader;Ms;Regina;Deluna;regina.deluna@hktdc.org;-25843524;Ressort_Business
7
+ Hong Kong;GS1 Hong Kong;Ms;Stella;Cheang;info@gs1hk.org;-28611967;Ressort_Business
8
+ Hong Kong;Playtimes;Ms;Jo;Allun;jo.allum@ppp.com.hk;-22018864;gfgsdfgs;Ressort_Business
9
+ Hong Kong;China Tourism;Ms;Mickey;Ching;edit-c@hkctp.com.hk;-25617149;Ressort_Business
10
+ Hong Kong;Metro Daily (Hong Kong);Mr;Jeff;Lee;news@metrohk.com.hk;-31960748;Ressort_Business
Binary file
@@ -131,4 +131,17 @@ describe TableImporter::Source do
131
131
  @source = nil
132
132
  end
133
133
  end
134
+
135
+ context 'when source has NULL values in it' do
136
+
137
+ it "Skips the null values if specified" do
138
+ source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/null_values.csv"].join), :headers_present => false, :user_headers => { "email" => "5" }, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => { email: true }, remove_nil_values: true })
139
+ expect(source.get_chunks[0][:lines].first[:column_4]).to eql(nil)
140
+ end
141
+
142
+ it "Doesn't skip the null values if not specified" do
143
+ source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/null_values.csv"].join), :headers_present => false, :user_headers => { "email" => "5" }, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => { email: true }, remove_nil_values: false })
144
+ expect(source.get_chunks[0][:lines].first[:column_4]).to eql("NULL")
145
+ end
146
+ end
134
147
  end
@@ -238,5 +238,18 @@ describe TableImporter::Source do
238
238
  @source = nil
239
239
  end
240
240
  end
241
+
242
+ context 'when source has NULL values in it' do
243
+
244
+ it "Skips the null values if specified" do
245
+ source = TableImporter::Source.new({ content: File.open([Dir.pwd, "/spec/files/excel/null_values.xls"].join), :headers_present => false, :user_headers => { "email" => "0" }, :type => "xls", :column_separator => "", :record_separator => "", :compulsory_headers => { email: true }, remove_nil_values: true })
246
+ expect(source.get_preview_lines.first[:column_2]).to eql(nil)
247
+ end
248
+
249
+ it "Doesn't skip the null values if not specified" do
250
+ source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/excel/null_values.xls"].join), :headers_present => false, :user_headers => { "email" => "0" }, :type => "xls", :column_separator => "⁄", :record_separator => "", :compulsory_headers => { email: true }, remove_nil_values: false })
251
+ expect(source.get_preview_lines.first[:column_2]).to eql("NULL")
252
+ end
253
+ end
241
254
  end
242
255
  end
@@ -12,12 +12,11 @@ describe TableImporter::Source do
12
12
  :content => 'CLIENT_ID, ACCESS_TOKEN',
13
13
  :headers_present => true,
14
14
  :user_headers => nil,
15
- :user_headers => nil,
16
15
  :type => "google",
17
16
  :column_separator => "",
18
17
  :record_separator => "",
19
18
  :compulsory_headers =>
20
- {:email => true}
19
+ { :email => true }
21
20
  })
22
21
  end
23
22
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: table_importer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nick Dowse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-03-15 00:00:00.000000000 Z
11
+ date: 2018-04-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: spreadsheet
@@ -222,6 +222,7 @@ files:
222
222
  - spec/files/csv/empty_lines_at_start.csv
223
223
  - spec/files/csv/mexico2013_pressdoc.csv
224
224
  - spec/files/csv/no_content.csv
225
+ - spec/files/csv/null_values.csv
225
226
  - spec/files/csv/partway.csv
226
227
  - spec/files/csv/with_headers.csv
227
228
  - spec/files/csv/without_headers.csv
@@ -229,6 +230,7 @@ files:
229
230
  - spec/files/excel/empty_lines.xlsx
230
231
  - spec/files/excel/empty_lines_at_start.xlsx
231
232
  - spec/files/excel/no_content.xlsx
233
+ - spec/files/excel/null_values.xls
232
234
  - spec/files/excel/one_line.xls
233
235
  - spec/files/excel/premapped_1.xls
234
236
  - spec/files/excel/premapped_2.xls
@@ -273,6 +275,7 @@ test_files:
273
275
  - spec/files/csv/empty_lines_at_start.csv
274
276
  - spec/files/csv/mexico2013_pressdoc.csv
275
277
  - spec/files/csv/no_content.csv
278
+ - spec/files/csv/null_values.csv
276
279
  - spec/files/csv/partway.csv
277
280
  - spec/files/csv/with_headers.csv
278
281
  - spec/files/csv/without_headers.csv
@@ -280,6 +283,7 @@ test_files:
280
283
  - spec/files/excel/empty_lines.xlsx
281
284
  - spec/files/excel/empty_lines_at_start.xlsx
282
285
  - spec/files/excel/no_content.xlsx
286
+ - spec/files/excel/null_values.xls
283
287
  - spec/files/excel/one_line.xls
284
288
  - spec/files/excel/premapped_1.xls
285
289
  - spec/files/excel/premapped_2.xls