table_importer 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 865e1844a77e5b2deb00d9f8d60c3a3c03c6889d
4
- data.tar.gz: 5efeeabd463f044f90c0abf2792ca026c80b9998
3
+ metadata.gz: 84f4728211cdfa3a07bfec96d6e3dbf35554ec98
4
+ data.tar.gz: 08fb3b8daf2428a44348b798d335d741ef210dfd
5
5
  SHA512:
6
- metadata.gz: e554a0f3ba76457f86a42da86c006462b990b627dbae53c2a35c7c0554499160a282dc3b0df0290f72ae29bb2630a72b3fa0a391a04e0e5647ec8cbcf0b0a6c9
7
- data.tar.gz: 76d89f4db35c740e6c1dd7a72d99dac88af2c498c5c047d2ea8bb451ba9e8b529105a421ca7946816e45144085be677eec8b1959d5f91f95b943b2144268cdef
6
+ metadata.gz: f4de9554e43a187a8e09820ff4ee9b6dc3b3c3203648a9866aef834de8e23e951d7c1ecf6d7cdb12468b31f5ab58886820d554ad5342e557df22208118311e02
7
+ data.tar.gz: 67fd83970aa94616152593e6160bb2311da1174526c1f23b6717c7e3eb42d77b01bcb1c19a254f8d9d1008761daa865f71723d4ae799d43fcabd388494fc82bf
data/README.md CHANGED
@@ -64,13 +64,18 @@ The options you pass in are:
64
64
  :compulsory_headers => {
65
65
  :email => true, false # Does each record require an email address to be valid?
66
66
  }
67
+
68
+ # Whether nil values that are a string (ie strings that equal "NULL", "null", "nil", or "undefined") should be replaced with actual nil values.
69
+ :remove_nil_values => true
70
+ :remove_nil_values => false
67
71
 
68
72
  ```
69
73
 
70
74
  There are a few ways to interact with the table importer:
71
75
 
72
76
  ```
73
- importer = TableImporter::Source.new({options})
77
+ options = { type: "csv", }
78
+ importer = TableImporter::Source.new(options)
74
79
 
75
80
  # get the type
76
81
  puts importer.get_type
@@ -88,7 +93,7 @@ There are a few ways to interact with the table importer:
88
93
  puts importer.get_headers
89
94
  => "column_1, column_2, column_3"
90
95
 
91
- # Get the first 8 lines (useful for providing a matching option for the user to map their own headers, like mailchimps contact import.
96
+ # Get the first 8 lines (useful for providing a matching option for the user to map their own headers, like Mailchimp's contact import.
92
97
  puts importer.get_preview_lines
93
98
  => [{:column_1 => "r1c1", :column_2 => "r1c2", :column_3 => "r1c3"}, {:column_1 => "r2c1", :column_2 => "r2c2", :column_3 => "r2c3"} etc]
94
99
 
@@ -99,7 +104,7 @@ There are a few ways to interact with the table importer:
99
104
  puts importer.get_chunks(25)
100
105
  => All input chunked into 25 line blocks.
101
106
 
102
- # The format for the returned chunks is not a simple array of hashes, like get_preview_lines
107
+ # The format for the returned chunks is not a simple array of hashes, like get_preview_lines, as it also includes per-row errors
103
108
  puts importer.get_chunks(2)
104
109
  => [{:lines => [{:column_1 => "r1c1", :column_2 => "r1c2", :column_3 => "r1c3"}, {:column_1 => "r2c1", :column_2 => "r2c2", :column_3 => "r2c3"}], :errors => []}, {:lines => [{:column_1 => "r3c1", :column_2 => "r3c2", :column_3 => "r3c3"}, {:column_1 => "r4c1", :column_2 => "r4c2", :column_3 => "r4c3"}], :errors => []}]
105
110
 
@@ -2,6 +2,8 @@ module TableImporter
2
2
 
3
3
  class CopyAndPaste < Source
4
4
 
5
+ attr_accessor :remove_nil_values
6
+
5
7
  def initialize(data)
6
8
  @data = assign_data(data[:content])
7
9
  @column_separator, @record_separator = assign_separators(data[:column_separator], data[:record_separator])
@@ -9,6 +11,7 @@ module TableImporter
9
11
  @mapping = data[:user_headers]
10
12
  @compulsory_headers = data[:compulsory_headers]
11
13
  @delete_empty_columns = @data.length < 50000
14
+ @remove_nil_values = data[:remove_nil_values] == true
12
15
  end
13
16
 
14
17
  def assign_data(content)
@@ -5,12 +5,16 @@ module TableImporter
5
5
 
6
6
  class CSV < Source
7
7
 
8
+ attr_accessor :remove_nil_values
9
+
8
10
  def initialize(data)
9
11
  @headers_present = data[:headers_present] # user has indicated headers are provided
10
12
  @column_separator, @record_separator = initialize_separators(data[:column_separator], data[:record_separator])
11
13
  @compulsory_headers = data[:compulsory_headers]
12
14
  @file = data[:content]
13
15
  @delete_empty_columns = File.size(@file) < 100000
16
+ @remove_nil_values = data[:remove_nil_values] == true
17
+
14
18
  begin
15
19
  first_line = get_first_line
16
20
  if first_line == 0
@@ -145,9 +149,17 @@ module TableImporter
145
149
  end
146
150
 
147
151
  def default_options(options = {})
148
- {:col_sep => @column_separator, :row_sep => @record_separator, :force_simple_split => true, :strip_chars_from_headers => /[\-"]/, :remove_empty_values => false,
149
- :verbose => false, :headers_in_file => @headers_present, :convert_values_to_numeric => false,
150
- :user_provided_headers => @headers_present ? (@headers == nil || @headers == {} ? nil : @headers) : default_headers(100)}.merge(options)
152
+ {
153
+ :col_sep => @column_separator,
154
+ :row_sep => @record_separator,
155
+ :force_simple_split => true,
156
+ :strip_chars_from_headers => /[\-"]/,
157
+ :remove_empty_values => false,
158
+ :verbose => false,
159
+ :headers_in_file => @headers_present,
160
+ :convert_values_to_numeric => false,
161
+ :user_provided_headers => @headers_present ? (@headers == nil || @headers == {} ? nil : @headers) : default_headers(100)
162
+ }.merge(options)
151
163
  end
152
164
 
153
165
  def clean_file(file)
@@ -2,6 +2,8 @@ module TableImporter
2
2
 
3
3
  class Excel < RooSpreadsheetSource
4
4
 
5
+ attr_accessor :remove_nil_values
6
+
5
7
  def initialize(data)
6
8
  begin
7
9
  @type = File.extname(data[:content]) == ".xls" ? "xls" : "xlsx"
@@ -10,6 +12,8 @@ module TableImporter
10
12
  @compulsory_headers = data[:compulsory_headers]
11
13
  @delete_empty_columns = (File.size(data[:content].path) < 100000)
12
14
  @mapping = data[:user_headers]
15
+ @remove_nil_values = data[:remove_nil_values] == true
16
+
13
17
  raise TableImporter::EmptyFileImportError.new if !@file.first_row
14
18
  @headers = @headers_present ? @file.row(1).map.with_index { |header, index| header.present? ? header.to_sym : "column_#{index}"} : default_headers
15
19
  rescue NoMethodError
@@ -2,14 +2,21 @@ module TableImporter
2
2
 
3
3
  class Google < RooSpreadsheetSource
4
4
 
5
+ attr_accessor :remove_nil_values
6
+
5
7
  def initialize(data)
6
8
  begin
7
- @headers_present = data[:headers_present]
8
9
  @file = get_file(data[:content].split(", ")[1], data[:content].split(", ")[0])
10
+
11
+ @headers_present = data[:headers_present]
9
12
  @compulsory_headers = data[:compulsory_headers]
13
+ @mapping = data[:user_headers] if data[:user_headers].present?
14
+ @remove_nil_values = data[:remove_nil_values] == true
15
+
10
16
  @delete_empty_columns = false
11
- @mapping = data[:user_headers] if data[:user_headers].present?
17
+
12
18
  raise TableImporter::EmptyFileImportError.new if !@file.first_row
19
+
13
20
  @headers = @headers_present ? @file.row(1).map.with_index { |header, index| header.present? ? header.to_sym : "column_#{index}"} : default_headers
14
21
  rescue NoMethodError
15
22
  raise TableImporter::HeaderMismatchError.new
@@ -18,7 +25,7 @@ module TableImporter
18
25
 
19
26
  def get_file(file_key, access_token)
20
27
  begin
21
- Roo::Google.new(file_key, {:access_token => access_token})
28
+ Roo::Google.new(file_key, { :access_token => access_token })
22
29
  rescue TypeError
23
30
  raise TableImporter::IncorrectFileError.new
24
31
  end
@@ -1,10 +1,15 @@
1
1
  require 'roo'
2
+ require 'roo-xls'
3
+ require 'roo-google'
2
4
 
3
5
  module TableImporter
4
6
 
5
7
  class Source
6
8
 
7
- SEPARATORS = {comma: ",", space: " ", tab: "\t", newline_mac: "\n", semicolon: ";", newline_windows: "\r\n", old_newline_mac: "\r"}
9
+
10
+ NIL_VALUES = %w( NULL null nil undefined )
11
+
12
+ SEPARATORS = { comma: ",", space: " ", tab: "\t", newline_mac: "\n", semicolon: ";", newline_windows: "\r\n", old_newline_mac: "\r" }
8
13
 
9
14
  def initialize (data)
10
15
  case data[:type]
@@ -108,26 +113,34 @@ module TableImporter
108
113
  def check_empty_headers(line, empty_headers)
109
114
  line.each do |key, value|
110
115
  if value.present? && value.to_s.gsub(/[^A-Za-z0-9]/, '').present?
111
- empty_headers.delete(clean_item(key).to_sym)
116
+ empty_headers.delete(clean_key(key).to_sym)
112
117
  end
113
118
  end
114
119
  end
115
120
 
116
121
  def line_empty?(line)
117
122
  line = clean_line(line)
118
- return line, line.all?{ |item_key, item_value| line_item_is_garbage?(item_value)} && line.all?{ |item_key, item_value| line_item_is_garbage?(item_key)}
123
+ return line, line.all?{ |_, item_value| line_item_is_garbage?(item_value) } && line.all?{ |item_key, _| line_item_is_garbage?(item_key) }
119
124
  end
120
125
 
121
126
  def clean_line(line)
122
127
  map = {}
123
- line.each_pair do |key,value|
124
- map[clean_item(key).to_sym] = clean_item(value)
128
+ line.each_pair do |key, value|
129
+ map[clean_key(key).to_sym] = clean_value(value)
130
+ end
131
+ return map
132
+ end
133
+
134
+ def clean_value(item)
135
+ if remove_nil_values == true && NIL_VALUES.include?(item.to_s)
136
+ return nil
125
137
  end
126
- map
138
+
139
+ return item.to_s.delete("\u0000").to_s.delete("\x00")
127
140
  end
128
141
 
129
- def clean_item(item)
130
- item.to_s.delete("\u0000").to_s.delete("\x00")
142
+ def clean_key(item)
143
+ return item.to_s.delete("\u0000").to_s.delete("\x00")
131
144
  end
132
145
 
133
146
  def check_compulsory_headers?(line, compulsory_headers)
@@ -1,3 +1,3 @@
1
1
  module TableImporter
2
- VERSION = "0.2.3"
2
+ VERSION = "0.2.4"
3
3
  end
@@ -0,0 +1,10 @@
1
+ Country;Medium;Salutation;First name;Last name;Email;Phone number;Tags
2
+ Hong Kong;Men's Uno (Hong Kong Edition);Ms;Noelle;NULL;info@chicgroups.com;-28544930;Ressort_Business
3
+ Hong Kong;Hong Kong Commercial Daily;Mr;Ching Wai
4
+ Hong Kong;South China Morning Post;Mr;John;Cremer;john.cremer@scmp.com;-25651370;Ressort_Business
5
+ Hong Kong;Hong Kong Economic Journal;Mr;Toby;Yiu;tcsyiu@hkej.com;-28566702;Ressort_Business
6
+ Hong Kong;Hong KongTrader;Ms;Regina;Deluna;regina.deluna@hktdc.org;-25843524;Ressort_Business
7
+ Hong Kong;GS1 Hong Kong;Ms;Stella;Cheang;info@gs1hk.org;-28611967;Ressort_Business
8
+ Hong Kong;Playtimes;Ms;Jo;Allun;jo.allum@ppp.com.hk;-22018864;gfgsdfgs;Ressort_Business
9
+ Hong Kong;China Tourism;Ms;Mickey;Ching;edit-c@hkctp.com.hk;-25617149;Ressort_Business
10
+ Hong Kong;Metro Daily (Hong Kong);Mr;Jeff;Lee;news@metrohk.com.hk;-31960748;Ressort_Business
Binary file
@@ -131,4 +131,17 @@ describe TableImporter::Source do
131
131
  @source = nil
132
132
  end
133
133
  end
134
+
135
+ context 'when source has NULL values in it' do
136
+
137
+ it "Skips the null values if specified" do
138
+ source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/null_values.csv"].join), :headers_present => false, :user_headers => { "email" => "5" }, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => { email: true }, remove_nil_values: true })
139
+ expect(source.get_chunks[0][:lines].first[:column_4]).to eql(nil)
140
+ end
141
+
142
+ it "Doesn't skip the null values if not specified" do
143
+ source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/csv/null_values.csv"].join), :headers_present => false, :user_headers => { "email" => "5" }, :type => "csv", :column_separator => "", :record_separator => "", :compulsory_headers => { email: true }, remove_nil_values: false })
144
+ expect(source.get_chunks[0][:lines].first[:column_4]).to eql("NULL")
145
+ end
146
+ end
134
147
  end
@@ -238,5 +238,18 @@ describe TableImporter::Source do
238
238
  @source = nil
239
239
  end
240
240
  end
241
+
242
+ context 'when source has NULL values in it' do
243
+
244
+ it "Skips the null values if specified" do
245
+ source = TableImporter::Source.new({ content: File.open([Dir.pwd, "/spec/files/excel/null_values.xls"].join), :headers_present => false, :user_headers => { "email" => "0" }, :type => "xls", :column_separator => "", :record_separator => "", :compulsory_headers => { email: true }, remove_nil_values: true })
246
+ expect(source.get_preview_lines.first[:column_2]).to eql(nil)
247
+ end
248
+
249
+ it "Doesn't skip the null values if not specified" do
250
+ source = TableImporter::Source.new({:content => File.open([Dir.pwd, "/spec/files/excel/null_values.xls"].join), :headers_present => false, :user_headers => { "email" => "0" }, :type => "xls", :column_separator => "⁄", :record_separator => "", :compulsory_headers => { email: true }, remove_nil_values: false })
251
+ expect(source.get_preview_lines.first[:column_2]).to eql("NULL")
252
+ end
253
+ end
241
254
  end
242
255
  end
@@ -12,12 +12,11 @@ describe TableImporter::Source do
12
12
  :content => 'CLIENT_ID, ACCESS_TOKEN',
13
13
  :headers_present => true,
14
14
  :user_headers => nil,
15
- :user_headers => nil,
16
15
  :type => "google",
17
16
  :column_separator => "",
18
17
  :record_separator => "",
19
18
  :compulsory_headers =>
20
- {:email => true}
19
+ { :email => true }
21
20
  })
22
21
  end
23
22
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: table_importer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nick Dowse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-03-15 00:00:00.000000000 Z
11
+ date: 2018-04-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: spreadsheet
@@ -222,6 +222,7 @@ files:
222
222
  - spec/files/csv/empty_lines_at_start.csv
223
223
  - spec/files/csv/mexico2013_pressdoc.csv
224
224
  - spec/files/csv/no_content.csv
225
+ - spec/files/csv/null_values.csv
225
226
  - spec/files/csv/partway.csv
226
227
  - spec/files/csv/with_headers.csv
227
228
  - spec/files/csv/without_headers.csv
@@ -229,6 +230,7 @@ files:
229
230
  - spec/files/excel/empty_lines.xlsx
230
231
  - spec/files/excel/empty_lines_at_start.xlsx
231
232
  - spec/files/excel/no_content.xlsx
233
+ - spec/files/excel/null_values.xls
232
234
  - spec/files/excel/one_line.xls
233
235
  - spec/files/excel/premapped_1.xls
234
236
  - spec/files/excel/premapped_2.xls
@@ -273,6 +275,7 @@ test_files:
273
275
  - spec/files/csv/empty_lines_at_start.csv
274
276
  - spec/files/csv/mexico2013_pressdoc.csv
275
277
  - spec/files/csv/no_content.csv
278
+ - spec/files/csv/null_values.csv
276
279
  - spec/files/csv/partway.csv
277
280
  - spec/files/csv/with_headers.csv
278
281
  - spec/files/csv/without_headers.csv
@@ -280,6 +283,7 @@ test_files:
280
283
  - spec/files/excel/empty_lines.xlsx
281
284
  - spec/files/excel/empty_lines_at_start.xlsx
282
285
  - spec/files/excel/no_content.xlsx
286
+ - spec/files/excel/null_values.xls
283
287
  - spec/files/excel/one_line.xls
284
288
  - spec/files/excel/premapped_1.xls
285
289
  - spec/files/excel/premapped_2.xls