ndr_import 8.0.0 → 8.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f74d60e93cf1f1d72baeebcbe931fc4225c3e74c11aaa13c324578b261561008
4
- data.tar.gz: 70657ad1f113089bd2f719de5b59317310212c730ba34e762d99f9395da8fff2
3
+ metadata.gz: 214f73452462c2ea7182deaa8c90e4818d8ab6caf3fd604af4a64833e2518b41
4
+ data.tar.gz: 5d15e20e7b0c13fa3d3cd9169ad70c9028cc64d688906dd965a09ce791e97022
5
5
  SHA512:
6
- metadata.gz: 91bced100d286a44df6fa193cad58a2ad85f6733d9b8cbbf1970e438ed8a3a7d9785a6d0e8b16396713511d045edd0a6aba00090a903c217e4c71b8e2fde7174
7
- data.tar.gz: 4e2e968084f78f25fffebbc793806402fefd515605afc0ac9c81a5d535f0cb7f27a50af3c4dcb733ff5a9a8bc17311084c65593a2fb971ac35c5a4937bbfe777
6
+ metadata.gz: 7d4080dc6d86e553f8f6a07d2fe4876c6d014ecc8b5b3e729ac089147223ecf2d18f9f9bddfbc64c692e73deb08e27a12e09793fbd155c2a859b29c7efff7c3b
7
+ data.tar.gz: 46b267832414e13f1580da5fc386bebe34c5debb79d03e288c3f793b6c28158e362eb3971d2f275f68c398a4f8e3293321a585df2be2595369197bb70bf32eef
@@ -1,6 +1,11 @@
1
1
  ## [Unreleased]
2
2
  *no unreleased changes*
3
3
 
4
+ ## 8.1.0 / 2019-01-08
5
+ ### Added
6
+ * Support `liberal_parsing` when using the delimited helper
7
+ * Support for Ruby 2.6. Rails 5.0 / Ruby 2.4 is now the minimum.
8
+
4
9
  ## 8.0.0 / 2018-11-26
5
10
  ### Changed
6
11
  * Strip non tabular captured values by default (#28)
@@ -8,6 +13,7 @@
8
13
  ### Added
9
14
  * Add validations to field mappings (#27)
10
15
  * control `ndr_table` serialise order with `encode_with` method (#30)
16
+ * Added a 7-Zip file handler
11
17
 
12
18
  ## 7.0.0 / 2018-11-09
13
19
  ### Changed
@@ -15,11 +15,11 @@ file safety:
15
15
  ".travis.yml":
16
16
  comments:
17
17
  reviewed_by: josh.pencheon
18
- safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
18
+ safe_revision: 607c0668f1fffd70d181bc1a31c4f56eed5f6189
19
19
  CHANGELOG.md:
20
20
  comments:
21
21
  reviewed_by: josh.pencheon
22
- safe_revision: 952e45599240ab96b22d7ce223a7551853e50ae5
22
+ safe_revision: 5ca2f7e9cb0e79134e5fa54ca2316c584696b34a
23
23
  CODE_OF_CONDUCT.md:
24
24
  comments:
25
25
  reviewed_by: timgentry
@@ -44,10 +44,6 @@ file safety:
44
44
  comments:
45
45
  reviewed_by: josh.pencheon
46
46
  safe_revision: 5a7f26cecaabab20f4e666776f9166dcc3fa6bfe
47
- gemfiles/Gemfile.rails42:
48
- comments:
49
- reviewed_by: timgentry
50
- safe_revision: 0c5967732100f43e7b60a97a7f0aae60d3641791
51
47
  gemfiles/Gemfile.rails50:
52
48
  comments:
53
49
  reviewed_by: josh.pencheon
@@ -79,7 +75,7 @@ file safety:
79
75
  lib/ndr_import/file/delimited.rb:
80
76
  comments:
81
77
  reviewed_by: josh.pencheon
82
- safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
78
+ safe_revision: 607c0668f1fffd70d181bc1a31c4f56eed5f6189
83
79
  lib/ndr_import/file/docx.rb:
84
80
  comments:
85
81
  reviewed_by: josh.pencheon
@@ -127,7 +123,7 @@ file safety:
127
123
  lib/ndr_import/helpers/file/delimited.rb:
128
124
  comments:
129
125
  reviewed_by: josh.pencheon
130
- safe_revision: be12e57519d3737e8d3901d7b01485c6995708dd
126
+ safe_revision: 607c0668f1fffd70d181bc1a31c4f56eed5f6189
131
127
  lib/ndr_import/helpers/file/excel.rb:
132
128
  comments:
133
129
  reviewed_by: joshpencheon
@@ -199,7 +195,7 @@ file safety:
199
195
  lib/ndr_import/version.rb:
200
196
  comments: another check?
201
197
  reviewed_by: josh.pencheon
202
- safe_revision: 952e45599240ab96b22d7ce223a7551853e50ae5
198
+ safe_revision: 5ca2f7e9cb0e79134e5fa54ca2316c584696b34a
203
199
  lib/ndr_import/xml/table.rb:
204
200
  comments:
205
201
  reviewed_by: josh.pencheon
@@ -207,7 +203,7 @@ file safety:
207
203
  ndr_import.gemspec:
208
204
  comments:
209
205
  reviewed_by: josh.pencheon
210
- safe_revision: eca44583e9989159b45e90021dd1c65228447180
206
+ safe_revision: 607c0668f1fffd70d181bc1a31c4f56eed5f6189
211
207
  test/file/base_test.rb:
212
208
  comments:
213
209
  reviewed_by: timgentry
@@ -215,7 +211,7 @@ file safety:
215
211
  test/file/delimited_test.rb:
216
212
  comments:
217
213
  reviewed_by: josh.pencheon
218
- safe_revision: ab9f926a53e84f8ffa826abdd967abee455c89df
214
+ safe_revision: 607c0668f1fffd70d181bc1a31c4f56eed5f6189
219
215
  test/file/docx_test.rb:
220
216
  comments:
221
217
  reviewed_by: josh.pencheon
@@ -258,8 +254,8 @@ file safety:
258
254
  safe_revision: dfc958d44b6c58355445fa395db08a62213ee709
259
255
  test/helpers/file/delimited_test.rb:
260
256
  comments:
261
- reviewed_by: joshpencheon
262
- safe_revision: 377c060bd1e012d6e162a4ab8923a3609aacdf57
257
+ reviewed_by: josh.pencheon
258
+ safe_revision: 607c0668f1fffd70d181bc1a31c4f56eed5f6189
263
259
  test/helpers/file/excel_test.rb:
264
260
  comments:
265
261
  reviewed_by: joshpencheon
@@ -1,5 +1,6 @@
1
1
  require 'ndr_support/safe_file'
2
2
  require 'ndr_import/csv_library'
3
+ require 'ndr_import/helpers/file/delimited'
3
4
  require_relative 'registry'
4
5
 
5
6
  module NdrImport
@@ -8,6 +9,8 @@ module NdrImport
8
9
  module File
9
10
  # This class is a delimited file handler that returns a single table.
10
11
  class Delimited < Base
12
+ include Helpers::File::Delimited
13
+
11
14
  DELIMITED_COL_SEP = {
12
15
  'csv' => nil
13
16
  }
@@ -23,64 +26,16 @@ module NdrImport
23
26
  def rows
24
27
  return enum_for(:rows) unless block_given?
25
28
 
26
- safe_path = SafeFile.safepath_to_string(@filename)
27
-
28
- # By now, we know `encodings` should let us read the whole
29
- # file succesfully; if there are problems, we should crash.
30
- CSVLibrary.foreach(safe_path, encodings(safe_path)) do |line|
31
- yield line.map(&:to_s)
32
- end
33
- end
29
+ col_sep = @options['col_sep']
30
+ liberal = @options['liberal_parsing'].presence
34
31
 
35
- # Cache the determined encodings, so rewinding the enumerator doesn't
36
- # have to redo this, but equally it is still done lazily:
37
- def encodings(safe_path)
38
- @encodings ||= determine_encodings!(safe_path)
32
+ delimited_rows(@filename, col_sep, liberal) { |row| yield row }
39
33
  end
40
34
 
41
- # Derive the source encoding by trying all supported encodings.
42
- # Returns first set of working options, or raises if none could be found.
43
- def determine_encodings!(safe_path)
44
- # delimiter encoding => # FasterCSV encoding string
45
- supported_encodings = {
46
- 'UTF-8' => 'r:bom|utf-8',
47
- 'Windows-1252' => 'r:windows-1252:utf-8'
48
- }
49
-
50
- successful_options = nil
51
- supported_encodings.each do |delimiter_encoding, access_mode|
52
- begin
53
- col_sep = @options['col_sep']
54
- options = {
55
- col_sep: (col_sep || ',').force_encoding(delimiter_encoding),
56
- mode: access_mode,
57
- liberal_parsing: @options['liberal_parsing'].presence
58
- }
59
-
60
- row_num = 0
61
- # Iterate through the file; if we reach the end, this encoding worked:
62
- CSVLibrary.foreach(safe_path, options) { |_line| row_num += 1 }
63
- rescue ArgumentError => e
64
- next if e.message =~ /invalid byte sequence/ # This encoding didn't work
65
- raise(e)
66
- rescue CSVLibrary::MalformedCSVError => e
67
- description = (col_sep ? col_sep.inspect + ' delimited' : 'CSV')
68
-
69
- raise(CSVLibrary::MalformedCSVError, "Invalid #{description} format " \
70
- "on row #{row_num + 1} of #{::File.basename(safe_path)}. Original: #{e.message}")
71
- end
72
-
73
- # We got this far => encoding choice worked:
74
- successful_options = options
75
- break
76
- end
77
-
78
- # We tried them all, and none worked:
79
- unless successful_options
80
- raise "None of the encodings #{supported_encodings.values.inspect} were successful!"
81
- end
82
-
83
- successful_options
35
+ # Cache working encodings, so that resetting the enumerator
36
+ # doesn't mean the need to recalculate this:
37
+ def determine_encodings!(*)
38
+ @encoding ||= super
84
39
  end
85
40
  end
86
41
 
@@ -7,32 +7,32 @@ module NdrImport
7
7
  # This mixin adds delimited file functionality to unified importers.
8
8
  module Delimited
9
9
  # Read a plain text CSV file, return an array of the content
10
- def read_csv_file(path)
10
+ def read_csv_file(path, liberal = false)
11
11
  # Read the page below when encountering "CSV::IllegalFormatError" error caused by CSV
12
12
  # file generated at MAC OS
13
13
  # http://stackoverflow.com/questions/1549139/ruby-cannot-parse-excel-file-exported-as-csv-in-os-x
14
14
 
15
- read_delimited_file(path)
15
+ read_delimited_file(path, nil, liberal)
16
16
  end
17
17
 
18
18
  # Slurp the entire file into an array of lines.
19
- def read_delimited_file(path, col_sep = nil)
20
- delimited_rows(path, col_sep).to_a
19
+ def read_delimited_file(path, col_sep = nil, liberal = false)
20
+ delimited_rows(path, col_sep, liberal).to_a
21
21
  end
22
22
 
23
23
  # Iterate through the file table by table, yielding each one in turn.
24
- def delimited_tables(path, col_sep = nil)
25
- return enum_for(:delimited_tables, path, col_sep) unless block_given?
24
+ def delimited_tables(path, col_sep = nil, liberal = false)
25
+ return enum_for(:delimited_tables, path, col_sep, liberal) unless block_given?
26
26
 
27
- yield nil, delimited_rows(path, col_sep)
27
+ yield nil, delimited_rows(path, col_sep, liberal)
28
28
  end
29
29
 
30
30
  # Iterate through the file line by line, yielding each one in turn.
31
- def delimited_rows(path, col_sep = nil)
32
- return enum_for(:delimited_rows, path, col_sep) unless block_given?
31
+ def delimited_rows(path, col_sep = nil, liberal = false)
32
+ return enum_for(:delimited_rows, path, col_sep, liberal) unless block_given?
33
33
 
34
34
  safe_path = SafeFile.safepath_to_string(path)
35
- encodings = determine_encodings!(safe_path, col_sep)
35
+ encodings = determine_encodings!(safe_path, col_sep, liberal)
36
36
 
37
37
  # By now, we know `encodings` should let us read the whole
38
38
  # file succesfully; if there are problems, we should crash.
@@ -45,45 +45,59 @@ module NdrImport
45
45
 
46
46
  # Derive the source encoding by trying all supported encodings.
47
47
  # Returns first set of working options, or raises if none could be found.
48
- def determine_encodings!(safe_path, col_sep = nil)
48
+ def determine_encodings!(safe_path, col_sep, liberal)
49
49
  # delimiter encoding => # FasterCSV encoding string
50
50
  supported_encodings = {
51
51
  'UTF-8' => 'r:bom|utf-8',
52
52
  'Windows-1252' => 'r:windows-1252:utf-8'
53
53
  }
54
54
 
55
- successful_options = nil
55
+ successful_options = try_each_encoding(safe_path, col_sep, liberal, supported_encodings)
56
+
57
+ # We tried them all, and none worked:
58
+ unless successful_options
59
+ raise "None of the encodings #{supported_encodings.values.inspect} were successful!"
60
+ end
61
+
62
+ successful_options
63
+ end
64
+
65
+ def try_each_encoding(safe_path, col_sep, liberal, supported_encodings)
56
66
  supported_encodings.each do |delimiter_encoding, access_mode|
57
67
  begin
58
68
  options = {
59
- :col_sep => (col_sep || ',').force_encoding(delimiter_encoding),
60
- :mode => access_mode
69
+ col_sep: (col_sep || ',').force_encoding(delimiter_encoding),
70
+ liberal_parsing: liberal,
71
+ mode: access_mode
61
72
  }
62
73
 
63
74
  row_num = 0
64
75
  # Iterate through the file; if we reach the end, this encoding worked:
65
76
  CSVLibrary.foreach(safe_path, options) { |_line| row_num += 1 }
77
+ return options
66
78
  rescue ArgumentError => e
67
79
  next if e.message =~ /invalid byte sequence/ # This encoding didn't work
68
80
  raise(e)
81
+ rescue RegexpError => e
82
+ next if e.message =~ /invalid multibyte character/ # This encoding didn't work
83
+ raise(e)
69
84
  rescue CSVLibrary::MalformedCSVError => e
70
- description = (col_sep ? col_sep.inspect + ' delimited' : 'CSV')
71
-
72
- raise(CSVLibrary::MalformedCSVError, "Invalid #{description} format " \
73
- "on row #{row_num + 1} of #{::File.basename(safe_path)}. Original: #{e.message}")
85
+ next if e.message =~ /Invalid byte sequence/ # This encoding didn't work
86
+ raise malformed_csv_error(e, col_sep, row_num + 1, safe_path)
74
87
  end
75
-
76
- # We got this far => encoding choice worked:
77
- successful_options = options
78
- break
79
88
  end
89
+ end
80
90
 
81
- # We tried them all, and none worked:
82
- unless successful_options
83
- fail "None of the encodings #{supported_encodings.values.inspect} were successful!"
84
- end
91
+ def malformed_csv_error(exception, col_sep, line, safe_path)
92
+ type = (col_sep ? col_sep.inspect + ' delimited' : 'CSV')
93
+ message = "Invalid #{type} format on row #{line} of #{::File.basename(safe_path)}"
85
94
 
86
- successful_options
95
+ if exception.respond_to?(:line_number)
96
+ base_message = exception.message.chomp(" in line #{exception.line_number}.")
97
+ exception.class.new("#{message}. Original: #{base_message}", exception.line_number)
98
+ else
99
+ exception.class.new("#{message}. Original: #{exception.message}")
100
+ end
87
101
  end
88
102
  end
89
103
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  # This stores the current version of the NdrImport gem
3
3
  module NdrImport
4
- VERSION = '8.0.0'.freeze
4
+ VERSION = '8.1.0'.freeze
5
5
  end
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
21
21
  spec.require_paths = ['lib']
22
22
 
23
23
  spec.add_dependency 'activemodel'
24
- spec.add_dependency 'activesupport', '>= 3.2.18', '< 5.3'
24
+ spec.add_dependency 'activesupport', '~> 5.0'
25
25
  spec.add_dependency 'ndr_support', '>= 5.3.2', '< 6'
26
26
 
27
27
  spec.add_dependency 'rubyzip', '~> 1.2', '>= 1.2.2'
@@ -185,9 +185,9 @@ module NdrImport
185
185
  end
186
186
  end
187
187
 
188
- msg = 'Invalid CSV format on row 2 of broken.csv. ' \
189
- 'Original: Missing or stray quote in line 2'
190
- assert_equal msg, exception.message
188
+ assert_match(/Invalid CSV format on row 2 of broken\.csv\./, exception.message)
189
+ assert_match(/(Missing or stray quote|col_sep_split)/, exception.message)
190
+ assert_match(/in line 2/, exception.message)
191
191
  end
192
192
 
193
193
  test 'should report addition details upon failure to read csv line-by-line' do
@@ -207,16 +207,16 @@ module NdrImport
207
207
 
208
208
  assert rows_yielded.empty?, 'no rows should have been yielded'
209
209
 
210
- msg = 'Invalid CSV format on row 2 of broken.csv. ' \
211
- 'Original: Missing or stray quote in line 2'
212
- assert_equal msg, exception.message
210
+ assert_match(/Invalid CSV format on row 2 of broken\.csv\./, exception.message)
211
+ assert_match(/(Missing or stray quote|col_sep_split)/, exception.message)
212
+ assert_match(/in line 2/, exception.message)
213
213
  end
214
214
 
215
215
  test 'should only determine encodings once' do
216
216
  file_path = @permanent_test_files.join('normal.csv')
217
217
  handler = NdrImport::File::Delimited.new(file_path, 'csv', 'col_sep' => nil)
218
218
 
219
- handler.expects(determine_encodings!: { mode: 'r:bom|utf-8', col_sep: ',' }).once
219
+ handler.expects(try_each_encoding: { mode: 'r:bom|utf-8', col_sep: ',' }).once
220
220
 
221
221
  2.times do
222
222
  handler.tables.each do |tablename, sheet|
@@ -59,8 +59,22 @@ class DelimitedTest < ActiveSupport::TestCase
59
59
  @importer.read_delimited_file(@permanent_test_files.join('broken.csv'), nil)
60
60
  end
61
61
 
62
- msg = 'Invalid CSV format on row 2 of broken.csv. Original: Missing or stray quote in line 2'
63
- assert_equal msg, exception.message
62
+ assert_match(/Invalid CSV format on row 2 of broken\.csv\./, exception.message)
63
+ assert_match(/(Missing or stray quote|col_sep_split)/, exception.message)
64
+ assert_match(/in line 2/, exception.message)
65
+ end
66
+
67
+ test 'should be able to use liberal parsing to overcome minor CSV errors' do
68
+ file_path = @permanent_test_files.join('malformed.csv')
69
+ assert_raises(CSVLibrary::MalformedCSVError) do
70
+ @importer.read_delimited_file(file_path, nil)
71
+ end
72
+
73
+ rows = @importer.read_delimited_file(file_path, nil, true)
74
+
75
+ expected_row = ['2'] * 25
76
+ expected_row << '2"malformed"'
77
+ assert_equal expected_row, rows[2].sort
64
78
  end
65
79
 
66
80
  test 'should report addition details upon failure to read csv line-by-line' do
@@ -73,8 +87,9 @@ class DelimitedTest < ActiveSupport::TestCase
73
87
 
74
88
  assert rows_yielded.empty?, 'no rows should have been yielded'
75
89
 
76
- msg = 'Invalid CSV format on row 2 of broken.csv. Original: Missing or stray quote in line 2'
77
- assert_equal msg, exception.message
90
+ assert_match(/Invalid CSV format on row 2 of broken\.csv\./, exception.message)
91
+ assert_match(/(Missing or stray quote|col_sep_split)/, exception.message)
92
+ assert_match(/in line 2/, exception.message)
78
93
  end
79
94
 
80
95
  test 'delimited_tables should read table correctly' do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ndr_import
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.0.0
4
+ version: 8.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - NCRS Development Team
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-11-26 00:00:00.000000000 Z
11
+ date: 2019-01-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activemodel
@@ -28,22 +28,16 @@ dependencies:
28
28
  name: activesupport
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: 3.2.18
34
- - - "<"
31
+ - - "~>"
35
32
  - !ruby/object:Gem::Version
36
- version: '5.3'
33
+ version: '5.0'
37
34
  type: :runtime
38
35
  prerelease: false
39
36
  version_requirements: !ruby/object:Gem::Requirement
40
37
  requirements:
41
- - - ">="
42
- - !ruby/object:Gem::Version
43
- version: 3.2.18
44
- - - "<"
38
+ - - "~>"
45
39
  - !ruby/object:Gem::Version
46
- version: '5.3'
40
+ version: '5.0'
47
41
  - !ruby/object:Gem::Dependency
48
42
  name: ndr_support
49
43
  requirement: !ruby/object:Gem::Requirement
@@ -365,7 +359,6 @@ files:
365
359
  - README.md
366
360
  - Rakefile
367
361
  - code_safety.yml
368
- - gemfiles/Gemfile.rails42
369
362
  - gemfiles/Gemfile.rails50
370
363
  - gemfiles/Gemfile.rails51
371
364
  - gemfiles/Gemfile.rails52
@@ -497,8 +490,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
497
490
  - !ruby/object:Gem::Version
498
491
  version: '0'
499
492
  requirements: []
500
- rubyforge_project:
501
- rubygems_version: 2.7.6
493
+ rubygems_version: 3.0.1
502
494
  signing_key:
503
495
  specification_version: 4
504
496
  summary: NDR Import
@@ -1,5 +0,0 @@
1
- source 'https://rubygems.org'
2
-
3
- gemspec path: '..'
4
-
5
- gem 'activesupport', '~> 4.2.0'