ndr_import 8.0.0 → 8.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f74d60e93cf1f1d72baeebcbe931fc4225c3e74c11aaa13c324578b261561008
4
- data.tar.gz: 70657ad1f113089bd2f719de5b59317310212c730ba34e762d99f9395da8fff2
3
+ metadata.gz: 214f73452462c2ea7182deaa8c90e4818d8ab6caf3fd604af4a64833e2518b41
4
+ data.tar.gz: 5d15e20e7b0c13fa3d3cd9169ad70c9028cc64d688906dd965a09ce791e97022
5
5
  SHA512:
6
- metadata.gz: 91bced100d286a44df6fa193cad58a2ad85f6733d9b8cbbf1970e438ed8a3a7d9785a6d0e8b16396713511d045edd0a6aba00090a903c217e4c71b8e2fde7174
7
- data.tar.gz: 4e2e968084f78f25fffebbc793806402fefd515605afc0ac9c81a5d535f0cb7f27a50af3c4dcb733ff5a9a8bc17311084c65593a2fb971ac35c5a4937bbfe777
6
+ metadata.gz: 7d4080dc6d86e553f8f6a07d2fe4876c6d014ecc8b5b3e729ac089147223ecf2d18f9f9bddfbc64c692e73deb08e27a12e09793fbd155c2a859b29c7efff7c3b
7
+ data.tar.gz: 46b267832414e13f1580da5fc386bebe34c5debb79d03e288c3f793b6c28158e362eb3971d2f275f68c398a4f8e3293321a585df2be2595369197bb70bf32eef
@@ -1,6 +1,11 @@
1
1
  ## [Unreleased]
2
2
  *no unreleased changes*
3
3
 
4
+ ## 8.1.0 / 2019-01-08
5
+ ### Added
6
+ * Support `liberal_parsing` when using the delimited helper
7
+ * Support for Ruby 2.6. Rails 5.0 / Ruby 2.4 is now the minimum.
8
+
4
9
  ## 8.0.0 / 2018-11-26
5
10
  ### Changed
6
11
  * Strip non tabular captured values by default (#28)
@@ -8,6 +13,7 @@
8
13
  ### Added
9
14
  * Add validations to field mappings (#27)
10
15
  * control `ndr_table` serialise order with `encode_with` method (#30)
16
+ * Added a 7-Zip file handler
11
17
 
12
18
  ## 7.0.0 / 2018-11-09
13
19
  ### Changed
@@ -15,11 +15,11 @@ file safety:
15
15
  ".travis.yml":
16
16
  comments:
17
17
  reviewed_by: josh.pencheon
18
- safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
18
+ safe_revision: 607c0668f1fffd70d181bc1a31c4f56eed5f6189
19
19
  CHANGELOG.md:
20
20
  comments:
21
21
  reviewed_by: josh.pencheon
22
- safe_revision: 952e45599240ab96b22d7ce223a7551853e50ae5
22
+ safe_revision: 5ca2f7e9cb0e79134e5fa54ca2316c584696b34a
23
23
  CODE_OF_CONDUCT.md:
24
24
  comments:
25
25
  reviewed_by: timgentry
@@ -44,10 +44,6 @@ file safety:
44
44
  comments:
45
45
  reviewed_by: josh.pencheon
46
46
  safe_revision: 5a7f26cecaabab20f4e666776f9166dcc3fa6bfe
47
- gemfiles/Gemfile.rails42:
48
- comments:
49
- reviewed_by: timgentry
50
- safe_revision: 0c5967732100f43e7b60a97a7f0aae60d3641791
51
47
  gemfiles/Gemfile.rails50:
52
48
  comments:
53
49
  reviewed_by: josh.pencheon
@@ -79,7 +75,7 @@ file safety:
79
75
  lib/ndr_import/file/delimited.rb:
80
76
  comments:
81
77
  reviewed_by: josh.pencheon
82
- safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
78
+ safe_revision: 607c0668f1fffd70d181bc1a31c4f56eed5f6189
83
79
  lib/ndr_import/file/docx.rb:
84
80
  comments:
85
81
  reviewed_by: josh.pencheon
@@ -127,7 +123,7 @@ file safety:
127
123
  lib/ndr_import/helpers/file/delimited.rb:
128
124
  comments:
129
125
  reviewed_by: josh.pencheon
130
- safe_revision: be12e57519d3737e8d3901d7b01485c6995708dd
126
+ safe_revision: 607c0668f1fffd70d181bc1a31c4f56eed5f6189
131
127
  lib/ndr_import/helpers/file/excel.rb:
132
128
  comments:
133
129
  reviewed_by: joshpencheon
@@ -199,7 +195,7 @@ file safety:
199
195
  lib/ndr_import/version.rb:
200
196
  comments: another check?
201
197
  reviewed_by: josh.pencheon
202
- safe_revision: 952e45599240ab96b22d7ce223a7551853e50ae5
198
+ safe_revision: 5ca2f7e9cb0e79134e5fa54ca2316c584696b34a
203
199
  lib/ndr_import/xml/table.rb:
204
200
  comments:
205
201
  reviewed_by: josh.pencheon
@@ -207,7 +203,7 @@ file safety:
207
203
  ndr_import.gemspec:
208
204
  comments:
209
205
  reviewed_by: josh.pencheon
210
- safe_revision: eca44583e9989159b45e90021dd1c65228447180
206
+ safe_revision: 607c0668f1fffd70d181bc1a31c4f56eed5f6189
211
207
  test/file/base_test.rb:
212
208
  comments:
213
209
  reviewed_by: timgentry
@@ -215,7 +211,7 @@ file safety:
215
211
  test/file/delimited_test.rb:
216
212
  comments:
217
213
  reviewed_by: josh.pencheon
218
- safe_revision: ab9f926a53e84f8ffa826abdd967abee455c89df
214
+ safe_revision: 607c0668f1fffd70d181bc1a31c4f56eed5f6189
219
215
  test/file/docx_test.rb:
220
216
  comments:
221
217
  reviewed_by: josh.pencheon
@@ -258,8 +254,8 @@ file safety:
258
254
  safe_revision: dfc958d44b6c58355445fa395db08a62213ee709
259
255
  test/helpers/file/delimited_test.rb:
260
256
  comments:
261
- reviewed_by: joshpencheon
262
- safe_revision: 377c060bd1e012d6e162a4ab8923a3609aacdf57
257
+ reviewed_by: josh.pencheon
258
+ safe_revision: 607c0668f1fffd70d181bc1a31c4f56eed5f6189
263
259
  test/helpers/file/excel_test.rb:
264
260
  comments:
265
261
  reviewed_by: joshpencheon
@@ -1,5 +1,6 @@
1
1
  require 'ndr_support/safe_file'
2
2
  require 'ndr_import/csv_library'
3
+ require 'ndr_import/helpers/file/delimited'
3
4
  require_relative 'registry'
4
5
 
5
6
  module NdrImport
@@ -8,6 +9,8 @@ module NdrImport
8
9
  module File
9
10
  # This class is a delimited file handler that returns a single table.
10
11
  class Delimited < Base
12
+ include Helpers::File::Delimited
13
+
11
14
  DELIMITED_COL_SEP = {
12
15
  'csv' => nil
13
16
  }
@@ -23,64 +26,16 @@ module NdrImport
23
26
  def rows
24
27
  return enum_for(:rows) unless block_given?
25
28
 
26
- safe_path = SafeFile.safepath_to_string(@filename)
27
-
28
- # By now, we know `encodings` should let us read the whole
29
- # file succesfully; if there are problems, we should crash.
30
- CSVLibrary.foreach(safe_path, encodings(safe_path)) do |line|
31
- yield line.map(&:to_s)
32
- end
33
- end
29
+ col_sep = @options['col_sep']
30
+ liberal = @options['liberal_parsing'].presence
34
31
 
35
- # Cache the determined encodings, so rewinding the enumerator doesn't
36
- # have to redo this, but equally it is still done lazily:
37
- def encodings(safe_path)
38
- @encodings ||= determine_encodings!(safe_path)
32
+ delimited_rows(@filename, col_sep, liberal) { |row| yield row }
39
33
  end
40
34
 
41
- # Derive the source encoding by trying all supported encodings.
42
- # Returns first set of working options, or raises if none could be found.
43
- def determine_encodings!(safe_path)
44
- # delimiter encoding => # FasterCSV encoding string
45
- supported_encodings = {
46
- 'UTF-8' => 'r:bom|utf-8',
47
- 'Windows-1252' => 'r:windows-1252:utf-8'
48
- }
49
-
50
- successful_options = nil
51
- supported_encodings.each do |delimiter_encoding, access_mode|
52
- begin
53
- col_sep = @options['col_sep']
54
- options = {
55
- col_sep: (col_sep || ',').force_encoding(delimiter_encoding),
56
- mode: access_mode,
57
- liberal_parsing: @options['liberal_parsing'].presence
58
- }
59
-
60
- row_num = 0
61
- # Iterate through the file; if we reach the end, this encoding worked:
62
- CSVLibrary.foreach(safe_path, options) { |_line| row_num += 1 }
63
- rescue ArgumentError => e
64
- next if e.message =~ /invalid byte sequence/ # This encoding didn't work
65
- raise(e)
66
- rescue CSVLibrary::MalformedCSVError => e
67
- description = (col_sep ? col_sep.inspect + ' delimited' : 'CSV')
68
-
69
- raise(CSVLibrary::MalformedCSVError, "Invalid #{description} format " \
70
- "on row #{row_num + 1} of #{::File.basename(safe_path)}. Original: #{e.message}")
71
- end
72
-
73
- # We got this far => encoding choice worked:
74
- successful_options = options
75
- break
76
- end
77
-
78
- # We tried them all, and none worked:
79
- unless successful_options
80
- raise "None of the encodings #{supported_encodings.values.inspect} were successful!"
81
- end
82
-
83
- successful_options
35
+ # Cache working encodings, so that resetting the enumerator
36
+ # doesn't mean the need to recalculate this:
37
+ def determine_encodings!(*)
38
+ @encoding ||= super
84
39
  end
85
40
  end
86
41
 
@@ -7,32 +7,32 @@ module NdrImport
7
7
  # This mixin adds delimited file functionality to unified importers.
8
8
  module Delimited
9
9
  # Read a plain text CSV file, return an array of the content
10
- def read_csv_file(path)
10
+ def read_csv_file(path, liberal = false)
11
11
  # Read the page below when encountering "CSV::IllegalFormatError" error caused by CSV
12
12
  # file generated at MAC OS
13
13
  # http://stackoverflow.com/questions/1549139/ruby-cannot-parse-excel-file-exported-as-csv-in-os-x
14
14
 
15
- read_delimited_file(path)
15
+ read_delimited_file(path, nil, liberal)
16
16
  end
17
17
 
18
18
  # Slurp the entire file into an array of lines.
19
- def read_delimited_file(path, col_sep = nil)
20
- delimited_rows(path, col_sep).to_a
19
+ def read_delimited_file(path, col_sep = nil, liberal = false)
20
+ delimited_rows(path, col_sep, liberal).to_a
21
21
  end
22
22
 
23
23
  # Iterate through the file table by table, yielding each one in turn.
24
- def delimited_tables(path, col_sep = nil)
25
- return enum_for(:delimited_tables, path, col_sep) unless block_given?
24
+ def delimited_tables(path, col_sep = nil, liberal = false)
25
+ return enum_for(:delimited_tables, path, col_sep, liberal) unless block_given?
26
26
 
27
- yield nil, delimited_rows(path, col_sep)
27
+ yield nil, delimited_rows(path, col_sep, liberal)
28
28
  end
29
29
 
30
30
  # Iterate through the file line by line, yielding each one in turn.
31
- def delimited_rows(path, col_sep = nil)
32
- return enum_for(:delimited_rows, path, col_sep) unless block_given?
31
+ def delimited_rows(path, col_sep = nil, liberal = false)
32
+ return enum_for(:delimited_rows, path, col_sep, liberal) unless block_given?
33
33
 
34
34
  safe_path = SafeFile.safepath_to_string(path)
35
- encodings = determine_encodings!(safe_path, col_sep)
35
+ encodings = determine_encodings!(safe_path, col_sep, liberal)
36
36
 
37
37
  # By now, we know `encodings` should let us read the whole
38
38
  # file succesfully; if there are problems, we should crash.
@@ -45,45 +45,59 @@ module NdrImport
45
45
 
46
46
  # Derive the source encoding by trying all supported encodings.
47
47
  # Returns first set of working options, or raises if none could be found.
48
- def determine_encodings!(safe_path, col_sep = nil)
48
+ def determine_encodings!(safe_path, col_sep, liberal)
49
49
  # delimiter encoding => # FasterCSV encoding string
50
50
  supported_encodings = {
51
51
  'UTF-8' => 'r:bom|utf-8',
52
52
  'Windows-1252' => 'r:windows-1252:utf-8'
53
53
  }
54
54
 
55
- successful_options = nil
55
+ successful_options = try_each_encoding(safe_path, col_sep, liberal, supported_encodings)
56
+
57
+ # We tried them all, and none worked:
58
+ unless successful_options
59
+ raise "None of the encodings #{supported_encodings.values.inspect} were successful!"
60
+ end
61
+
62
+ successful_options
63
+ end
64
+
65
+ def try_each_encoding(safe_path, col_sep, liberal, supported_encodings)
56
66
  supported_encodings.each do |delimiter_encoding, access_mode|
57
67
  begin
58
68
  options = {
59
- :col_sep => (col_sep || ',').force_encoding(delimiter_encoding),
60
- :mode => access_mode
69
+ col_sep: (col_sep || ',').force_encoding(delimiter_encoding),
70
+ liberal_parsing: liberal,
71
+ mode: access_mode
61
72
  }
62
73
 
63
74
  row_num = 0
64
75
  # Iterate through the file; if we reach the end, this encoding worked:
65
76
  CSVLibrary.foreach(safe_path, options) { |_line| row_num += 1 }
77
+ return options
66
78
  rescue ArgumentError => e
67
79
  next if e.message =~ /invalid byte sequence/ # This encoding didn't work
68
80
  raise(e)
81
+ rescue RegexpError => e
82
+ next if e.message =~ /invalid multibyte character/ # This encoding didn't work
83
+ raise(e)
69
84
  rescue CSVLibrary::MalformedCSVError => e
70
- description = (col_sep ? col_sep.inspect + ' delimited' : 'CSV')
71
-
72
- raise(CSVLibrary::MalformedCSVError, "Invalid #{description} format " \
73
- "on row #{row_num + 1} of #{::File.basename(safe_path)}. Original: #{e.message}")
85
+ next if e.message =~ /Invalid byte sequence/ # This encoding didn't work
86
+ raise malformed_csv_error(e, col_sep, row_num + 1, safe_path)
74
87
  end
75
-
76
- # We got this far => encoding choice worked:
77
- successful_options = options
78
- break
79
88
  end
89
+ end
80
90
 
81
- # We tried them all, and none worked:
82
- unless successful_options
83
- fail "None of the encodings #{supported_encodings.values.inspect} were successful!"
84
- end
91
+ def malformed_csv_error(exception, col_sep, line, safe_path)
92
+ type = (col_sep ? col_sep.inspect + ' delimited' : 'CSV')
93
+ message = "Invalid #{type} format on row #{line} of #{::File.basename(safe_path)}"
85
94
 
86
- successful_options
95
+ if exception.respond_to?(:line_number)
96
+ base_message = exception.message.chomp(" in line #{exception.line_number}.")
97
+ exception.class.new("#{message}. Original: #{base_message}", exception.line_number)
98
+ else
99
+ exception.class.new("#{message}. Original: #{exception.message}")
100
+ end
87
101
  end
88
102
  end
89
103
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  # This stores the current version of the NdrImport gem
3
3
  module NdrImport
4
- VERSION = '8.0.0'.freeze
4
+ VERSION = '8.1.0'.freeze
5
5
  end
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
21
21
  spec.require_paths = ['lib']
22
22
 
23
23
  spec.add_dependency 'activemodel'
24
- spec.add_dependency 'activesupport', '>= 3.2.18', '< 5.3'
24
+ spec.add_dependency 'activesupport', '~> 5.0'
25
25
  spec.add_dependency 'ndr_support', '>= 5.3.2', '< 6'
26
26
 
27
27
  spec.add_dependency 'rubyzip', '~> 1.2', '>= 1.2.2'
@@ -185,9 +185,9 @@ module NdrImport
185
185
  end
186
186
  end
187
187
 
188
- msg = 'Invalid CSV format on row 2 of broken.csv. ' \
189
- 'Original: Missing or stray quote in line 2'
190
- assert_equal msg, exception.message
188
+ assert_match(/Invalid CSV format on row 2 of broken\.csv\./, exception.message)
189
+ assert_match(/(Missing or stray quote|col_sep_split)/, exception.message)
190
+ assert_match(/in line 2/, exception.message)
191
191
  end
192
192
 
193
193
  test 'should report addition details upon failure to read csv line-by-line' do
@@ -207,16 +207,16 @@ module NdrImport
207
207
 
208
208
  assert rows_yielded.empty?, 'no rows should have been yielded'
209
209
 
210
- msg = 'Invalid CSV format on row 2 of broken.csv. ' \
211
- 'Original: Missing or stray quote in line 2'
212
- assert_equal msg, exception.message
210
+ assert_match(/Invalid CSV format on row 2 of broken\.csv\./, exception.message)
211
+ assert_match(/(Missing or stray quote|col_sep_split)/, exception.message)
212
+ assert_match(/in line 2/, exception.message)
213
213
  end
214
214
 
215
215
  test 'should only determine encodings once' do
216
216
  file_path = @permanent_test_files.join('normal.csv')
217
217
  handler = NdrImport::File::Delimited.new(file_path, 'csv', 'col_sep' => nil)
218
218
 
219
- handler.expects(determine_encodings!: { mode: 'r:bom|utf-8', col_sep: ',' }).once
219
+ handler.expects(try_each_encoding: { mode: 'r:bom|utf-8', col_sep: ',' }).once
220
220
 
221
221
  2.times do
222
222
  handler.tables.each do |tablename, sheet|
@@ -59,8 +59,22 @@ class DelimitedTest < ActiveSupport::TestCase
59
59
  @importer.read_delimited_file(@permanent_test_files.join('broken.csv'), nil)
60
60
  end
61
61
 
62
- msg = 'Invalid CSV format on row 2 of broken.csv. Original: Missing or stray quote in line 2'
63
- assert_equal msg, exception.message
62
+ assert_match(/Invalid CSV format on row 2 of broken\.csv\./, exception.message)
63
+ assert_match(/(Missing or stray quote|col_sep_split)/, exception.message)
64
+ assert_match(/in line 2/, exception.message)
65
+ end
66
+
67
+ test 'should be able to use liberal parsing to overcome minor CSV errors' do
68
+ file_path = @permanent_test_files.join('malformed.csv')
69
+ assert_raises(CSVLibrary::MalformedCSVError) do
70
+ @importer.read_delimited_file(file_path, nil)
71
+ end
72
+
73
+ rows = @importer.read_delimited_file(file_path, nil, true)
74
+
75
+ expected_row = ['2'] * 25
76
+ expected_row << '2"malformed"'
77
+ assert_equal expected_row, rows[2].sort
64
78
  end
65
79
 
66
80
  test 'should report addition details upon failure to read csv line-by-line' do
@@ -73,8 +87,9 @@ class DelimitedTest < ActiveSupport::TestCase
73
87
 
74
88
  assert rows_yielded.empty?, 'no rows should have been yielded'
75
89
 
76
- msg = 'Invalid CSV format on row 2 of broken.csv. Original: Missing or stray quote in line 2'
77
- assert_equal msg, exception.message
90
+ assert_match(/Invalid CSV format on row 2 of broken\.csv\./, exception.message)
91
+ assert_match(/(Missing or stray quote|col_sep_split)/, exception.message)
92
+ assert_match(/in line 2/, exception.message)
78
93
  end
79
94
 
80
95
  test 'delimited_tables should read table correctly' do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ndr_import
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.0.0
4
+ version: 8.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - NCRS Development Team
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-11-26 00:00:00.000000000 Z
11
+ date: 2019-01-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activemodel
@@ -28,22 +28,16 @@ dependencies:
28
28
  name: activesupport
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: 3.2.18
34
- - - "<"
31
+ - - "~>"
35
32
  - !ruby/object:Gem::Version
36
- version: '5.3'
33
+ version: '5.0'
37
34
  type: :runtime
38
35
  prerelease: false
39
36
  version_requirements: !ruby/object:Gem::Requirement
40
37
  requirements:
41
- - - ">="
42
- - !ruby/object:Gem::Version
43
- version: 3.2.18
44
- - - "<"
38
+ - - "~>"
45
39
  - !ruby/object:Gem::Version
46
- version: '5.3'
40
+ version: '5.0'
47
41
  - !ruby/object:Gem::Dependency
48
42
  name: ndr_support
49
43
  requirement: !ruby/object:Gem::Requirement
@@ -365,7 +359,6 @@ files:
365
359
  - README.md
366
360
  - Rakefile
367
361
  - code_safety.yml
368
- - gemfiles/Gemfile.rails42
369
362
  - gemfiles/Gemfile.rails50
370
363
  - gemfiles/Gemfile.rails51
371
364
  - gemfiles/Gemfile.rails52
@@ -497,8 +490,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
497
490
  - !ruby/object:Gem::Version
498
491
  version: '0'
499
492
  requirements: []
500
- rubyforge_project:
501
- rubygems_version: 2.7.6
493
+ rubygems_version: 3.0.1
502
494
  signing_key:
503
495
  specification_version: 4
504
496
  summary: NDR Import
@@ -1,5 +0,0 @@
1
- source 'https://rubygems.org'
2
-
3
- gemspec path: '..'
4
-
5
- gem 'activesupport', '~> 4.2.0'