ndr_import 9.1.0 → 10.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aaf3c826acb51f4d579fb956e9606b5b86e120f3bd561db762d72081f27a1098
4
- data.tar.gz: 2b11ddf7dc9b748b4a1ac9cc91b47c47b99677e4bde990c44d1ec20e76efbfc0
3
+ metadata.gz: '0080a6cf08b7832f1af6261ff2c774a0fd8629b02c363869ac852389f239731c'
4
+ data.tar.gz: f1f986e3e1d59d65cb260cd555e5c17716228cc3ba61ff58fc2df40fc9abb19e
5
5
  SHA512:
6
- metadata.gz: 3faaf744255693f04425b6e5ed1ec7198e912d2854b3f482a39d9bc53fe3859ed3bb6ef7855cdfd03959042a530f3a2f528239548676c7159c9029d69b6c160d
7
- data.tar.gz: 1aa9d1895d7f874499823c4b87f1874e025b376f239030f3baf40bab5f5ed3ed09f80721205e271446893a9070652df3f26fceb48b59a9eb86ac121fdbfd92c6
6
+ metadata.gz: bf67d9b21781db778b92e245af52182dab5e1cc41d1637a87f82df953aaeae017fc14254b3e3caa3dcb9e6459489e3661f5610277f04af79e6aaedd00109b299
7
+ data.tar.gz: '08dc2fe4d3dc4b0dcd34cc361285f04eeacf11afb244fc117d1b6abc125218305f092676c89b94547aa9f6711bcc9682671a74e0c5ed39714e8651b98ad9dfc6'
@@ -1,6 +1,13 @@
1
1
  name: Test
2
2
 
3
- on: [push]
3
+ on:
4
+ # Run on all master branch commits
5
+ push:
6
+ branches:
7
+ - master
8
+
9
+ # Run against all PRs (from the main repo, or forks)
10
+ pull_request:
4
11
 
5
12
  jobs:
6
13
  test:
data/CHANGELOG.md CHANGED
@@ -1,6 +1,23 @@
1
1
  ## [Unreleased]
2
2
  *no unreleased changes*
3
3
 
4
+ ## 10.1.2 / 2021-09-28
5
+ ### Fixed
6
+ * Bump to `nokogiri` to address CVE-2021-41098
7
+ * Bump `seven_zip_ruby` requirement for Ruby 2.7 support
8
+
9
+ ## 10.1.1 / 2021-03-15
10
+ ### Fixed
11
+ * XML: ensure invalid control character *references* are also escaped (#64)
12
+
13
+ ## 10.1.0 / 2021-03-08
14
+ ### Added
15
+ * Allow optional `last_data_column` in NdrImport::Table mappings (#61)
16
+
17
+ ## 10.0.0 / 2021-02-22
18
+ ### Changed
19
+ * By default, escape any control characters found in XML (#60)
20
+
4
21
  ## 9.1.0 / 2021-02-01
5
22
  ### Added
6
23
  * `CSVLibrary` is now deprecated.
data/code_safety.yml CHANGED
@@ -10,8 +10,8 @@ file safety:
10
10
  safe_revision: b64ff21375dcde2b8fefe622ee9861f0fea21487
11
11
  ".github/workflows/test.yml":
12
12
  comments:
13
- reviewed_by: ollietulloch
14
- safe_revision: b64ff21375dcde2b8fefe622ee9861f0fea21487
13
+ reviewed_by: ollietulllch
14
+ safe_revision: c3dd24e8abefe61f04fa9d3bb71ec9d0ac109bbe
15
15
  ".gitignore":
16
16
  comments: whole file re-reviewed
17
17
  reviewed_by: josh.pencheon
@@ -27,7 +27,7 @@ file safety:
27
27
  CHANGELOG.md:
28
28
  comments:
29
29
  reviewed_by: ollietulloch
30
- safe_revision: d88ded7c260da37200610e4f0b204a4ea2e481f9
30
+ safe_revision: e938689d115b75074313541ec9d6b2bc60475add
31
31
  CODE_OF_CONDUCT.md:
32
32
  comments:
33
33
  reviewed_by: timgentry
@@ -67,7 +67,7 @@ file safety:
67
67
  docs/Gemfile.lock:
68
68
  comments:
69
69
  reviewed_by: ollietulloch
70
- safe_revision: ea0149c7739676463a252ffd9fbe4af238762b2b
70
+ safe_revision: 75877ddc14e99a10e5e751b7034964a9c7a9d1ef
71
71
  docs/_config.yml:
72
72
  comments:
73
73
  reviewed_by: josh.pencheon
@@ -238,8 +238,8 @@ file safety:
238
238
  safe_revision: 45da71ebd3acbc0fe53755bcd75483ba17cb6924
239
239
  lib/ndr_import/helpers/file/xml.rb:
240
240
  comments:
241
- reviewed_by: ollietulloch
242
- safe_revision: 4d337bd233f7e60cf9d363c92400f21269a28da7
241
+ reviewed_by: josh.pencheon
242
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
243
243
  lib/ndr_import/helpers/file/xml_streaming.rb:
244
244
  comments: uses SafePath and Shellwords when accessing filesystem, or making system
245
245
  calls
@@ -279,8 +279,8 @@ file safety:
279
279
  safe_revision: bb44ade56a2151706eede2c31142440ccf49e6f6
280
280
  lib/ndr_import/non_tabular/table.rb:
281
281
  comments:
282
- reviewed_by: josh.pencheon
283
- safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
282
+ reviewed_by: ollietulloch
283
+ safe_revision: 66cff59af2f078152f7459c436d51b57cb93f28e
284
284
  lib/ndr_import/non_tabular_file_helper.rb:
285
285
  comments:
286
286
  reviewed_by: josh.pencheon
@@ -295,12 +295,12 @@ file safety:
295
295
  safe_revision: 3c7f827d17aacbf7b811eea67e27553f3b039070
296
296
  lib/ndr_import/table.rb:
297
297
  comments: uses File.basename
298
- reviewed_by: josh.pencheon
299
- safe_revision: a69d4a57ddcf13cdc13c27bd2eb91a395fa7ea36
298
+ reviewed_by: ollietulloch
299
+ safe_revision: 66cff59af2f078152f7459c436d51b57cb93f28e
300
300
  lib/ndr_import/universal_importer_helper.rb:
301
301
  comments:
302
- reviewed_by: ollietulloch
303
- safe_revision: ee2e74e4ceda4ff48cbda6872a6bdf0874212c21
302
+ reviewed_by: josh.pencheon
303
+ safe_revision: 85869d99ae93252b7f3ef2d0a4db817c88d35c9e
304
304
  lib/ndr_import/unmapped_data_error.rb:
305
305
  comments:
306
306
  reviewed_by: josh.pencheon
@@ -308,15 +308,19 @@ file safety:
308
308
  lib/ndr_import/version.rb:
309
309
  comments: another check?
310
310
  reviewed_by: ollietulloch
311
- safe_revision: d88ded7c260da37200610e4f0b204a4ea2e481f9
311
+ safe_revision: da0aed5e8a8659c5f70254f6d76264f8e780d835
312
+ lib/ndr_import/xml/control_char_escaper.rb:
313
+ comments:
314
+ reviewed_by: josh.pencheon
315
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
312
316
  lib/ndr_import/xml/table.rb:
313
317
  comments:
314
318
  reviewed_by: josh.pencheon
315
319
  safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
316
320
  ndr_import.gemspec:
317
321
  comments:
318
- reviewed_by: josh.pencheon
319
- safe_revision: 95e6ee9997d06471fe6f2f169c3c701471086371
322
+ reviewed_by: ollietulloch
323
+ safe_revision: 3b437f0ca271fa962121edecd4559017c2446a3a
320
324
  test/csv_library_test.rb:
321
325
  comments:
322
326
  reviewed_by: ollietulloch
@@ -395,8 +399,8 @@ file safety:
395
399
  safe_revision: ae75fb49baf028ac8ce08e4bedcd3625ff3ff0cd
396
400
  test/helpers/file/xml_test.rb:
397
401
  comments:
398
- reviewed_by: timgentry
399
- safe_revision: 137170d443ea6bcc0afb18f62202c285ae6501eb
402
+ reviewed_by: josh.pencheon
403
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
400
404
  test/helpers/file/zip_test.rb:
401
405
  comments:
402
406
  reviewed_by: josh.pencheon
@@ -507,8 +511,8 @@ file safety:
507
511
  safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
508
512
  test/resources/malformed.xml:
509
513
  comments:
510
- reviewed_by: timgentry
511
- safe_revision: 137170d443ea6bcc0afb18f62202c285ae6501eb
514
+ reviewed_by: joshpencheon
515
+ safe_revision: 3947f13e0cbd17f449eba292ad343eeb82116fe9
512
516
  test/resources/malformed_pipe.csv:
513
517
  comments:
514
518
  reviewed_by: josh.pencheon
@@ -621,6 +625,22 @@ file safety:
621
625
  comments:
622
626
  reviewed_by: timgentry
623
627
  safe_revision: f755c6960182f7dd460c18866cccfdf09178e860
628
+ test/resources/with-control-char-references-in-cdata.xml:
629
+ comments:
630
+ reviewed_by: josh.pencheon
631
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
632
+ test/resources/with-control-char-references.xml:
633
+ comments:
634
+ reviewed_by: josh.pencheon
635
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
636
+ test/resources/with-control-chars.xml:
637
+ comments:
638
+ reviewed_by: joshpencheon
639
+ safe_revision: 3947f13e0cbd17f449eba292ad343eeb82116fe9
640
+ test/resources/with-non-control-char-references.xml:
641
+ comments:
642
+ reviewed_by: josh.pencheon
643
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
624
644
  test/resources/xlsx_file_xls_extension.xls:
625
645
  comments:
626
646
  reviewed_by: timgentry
@@ -632,16 +652,20 @@ file safety:
632
652
  test/table_test.rb:
633
653
  comments:
634
654
  reviewed_by: josh.pencheon
635
- safe_revision: a69d4a57ddcf13cdc13c27bd2eb91a395fa7ea36
655
+ safe_revision: 3cf7473181f7f835b3dfe7822f6833d751805eaf
636
656
  test/test_helper.rb:
637
657
  comments:
638
658
  reviewed_by: josh.pencheon
639
659
  safe_revision: 93ccee82fc2165d1ca2d9b03d146ae03e769ea96
640
660
  test/universal_importer_helper_test.rb:
641
661
  comments:
642
- reviewed_by: ollietulloch
643
- safe_revision: 830de0f8cb139c5f61525652b424423935cfc7ac
644
- test/xml/table_test.rb:
662
+ reviewed_by: josh.pencheon
663
+ safe_revision: 85869d99ae93252b7f3ef2d0a4db817c88d35c9e
664
+ test/xml/control_char_escaper_test.rb:
645
665
  comments:
646
666
  reviewed_by: josh.pencheon
647
- safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
667
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
668
+ test/xml/table_test.rb:
669
+ comments:
670
+ reviewed_by: ollietulloch
671
+ safe_revision: 66cff59af2f078152f7459c436d51b57cb93f28e
@@ -1,3 +1,4 @@
1
+ require 'ndr_import/xml/control_char_escaper'
1
2
  require 'ndr_support/safe_file'
2
3
  require 'ndr_support/utf8_encoding'
3
4
 
@@ -10,16 +11,21 @@ module NdrImport
10
11
 
11
12
  private
12
13
 
13
- def read_xml_file(path)
14
- file_data = SafeFile.new(path).read
14
+ # By default, escapes any control characters found in the XML
15
+ # - their use is forbidden in XML 1.0, and highly discouraged
16
+ # in XML 1.1; any found are most likely to be erroneous.
17
+ def read_xml_file(path, preserve_control_chars: false)
18
+ file_data = ensure_utf8!(SafeFile.read(path))
15
19
 
16
20
  require 'nokogiri'
17
21
 
18
- doc = Nokogiri::XML((ensure_utf8! file_data)) do |config|
19
- config.huge
22
+ doc = nil
23
+
24
+ escaping_control_chars_if_necessary(preserve_control_chars, file_data) do
25
+ doc = Nokogiri::XML(file_data, &:huge)
26
+ doc.encoding = 'UTF-8'
27
+ emulate_strict_mode_fatal_check!(doc)
20
28
  end
21
- doc.encoding = 'UTF-8'
22
- emulate_strict_mode_fatal_check!(doc)
23
29
 
24
30
  doc
25
31
  end
@@ -40,11 +46,27 @@ module NdrImport
40
46
  end
41
47
 
42
48
  return unless fatal_errors.any?
49
+
43
50
  raise Nokogiri::XML::SyntaxError, <<~MSG
44
51
  The file had #{fatal_errors.length} fatal error(s)!"
45
52
  #{fatal_errors.join("\n")}
46
53
  MSG
47
54
  end
55
+
56
+ def escaping_control_chars_if_necessary(preserve_control_chars, file_data)
57
+ return yield if preserve_control_chars
58
+
59
+ tried_escaping = false
60
+ begin
61
+ yield
62
+ rescue Nokogiri::XML::SyntaxError => e
63
+ raise e if tried_escaping
64
+
65
+ NdrImport::Xml::ControlCharEscaper.new(file_data).escape!
66
+ tried_escaping = true
67
+ retry
68
+ end
69
+ end
48
70
  end
49
71
  end
50
72
  end
@@ -16,8 +16,8 @@ module NdrImport
16
16
 
17
17
  include UTF8Encoding
18
18
 
19
- TABULAR_ONLY_OPTIONS = %w[delimiter liberal_parsing tablename_pattern
20
- header_lines footer_lines xml_record_xpath].freeze
19
+ TABULAR_ONLY_OPTIONS = %w[delimiter last_data_column liberal_parsing tablename_pattern
20
+ header_lines footer_lines xml_record_xpath slurp].freeze
21
21
 
22
22
  NON_TABULAR_OPTIONS = %w[capture_end_line capture_start_line start_line_pattern
23
23
  end_line_pattern remove_lines start_in_a_record
@@ -10,8 +10,9 @@ module NdrImport
10
10
  include NdrImport::Mapper
11
11
 
12
12
  def self.all_valid_options
13
- %w[canonical_name delimiter liberal_parsing filename_pattern file_password tablename_pattern
14
- header_lines footer_lines format klass columns xml_record_xpath row_identifier]
13
+ %w[canonical_name delimiter liberal_parsing filename_pattern file_password last_data_column
14
+ tablename_pattern header_lines footer_lines format klass columns xml_record_xpath slurp
15
+ row_identifier]
15
16
  end
16
17
 
17
18
  def all_valid_options
@@ -50,8 +51,9 @@ module NdrImport
50
51
  @header_best_guess = nil
51
52
  @notifier.try(:started)
52
53
 
54
+ last_col = last_column_to_transform
53
55
  skip_footer_lines(lines, footer_lines).each do |line|
54
- process_line(line, &block)
56
+ line.is_a?(Array) ? process_line(line[0..last_col], &block) : process_line(line, &block)
55
57
  end
56
58
 
57
59
  @notifier.try(:finished)
@@ -226,5 +228,26 @@ module NdrImport
226
228
  def column_names(column_mappings)
227
229
  column_mappings.map { |c| (c['column'] || c['standard_mapping']).downcase }
228
230
  end
231
+
232
+ # If specified in the mapping, stop transforming data at a given index (column)
233
+ def last_column_to_transform
234
+ return -1 if last_data_column.nil?
235
+ return last_data_column - 1 if last_data_column.is_a?(Integer)
236
+
237
+ error = "Unknown 'last_data_column' format: #{last_data_column} " \
238
+ "(#{last_data_column.class})"
239
+ raise error unless last_data_column.is_a?(String) && last_data_column =~ /\A[A-Z]+\z/i
240
+
241
+ # If it's an excel column label (eg 'K', 'AF', 'DDE'), convert it to an index
242
+ index_from_column_label
243
+ end
244
+
245
+ def index_from_column_label
246
+ alphabet_index_hash = ('A'..'Z').map.with_index.to_h
247
+ index = last_data_column.upcase.chars.inject(0) do |char_index, char|
248
+ (char_index * 26) + (alphabet_index_hash[char] + 1)
249
+ end
250
+ index - 1
251
+ end
229
252
  end # class Table
230
253
  end
@@ -48,9 +48,7 @@ module NdrImport
48
48
  def extract(source_file, &block)
49
49
  return enum_for(:extract, source_file) unless block
50
50
 
51
- files = NdrImport::File::Registry.files(source_file,
52
- 'unzip_path' => unzip_path)
53
- files.each do |filename|
51
+ NdrImport::File::Registry.files(source_file, 'unzip_path' => unzip_path).each do |filename|
54
52
  # now at the individual file level, can we find the table mapping?
55
53
  table_mapping = get_table_mapping(filename, nil)
56
54
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  # This stores the current version of the NdrImport gem
3
3
  module NdrImport
4
- VERSION = '9.1.0'.freeze
4
+ VERSION = '10.1.2'
5
5
  end
@@ -0,0 +1,51 @@
1
+ require 'ndr_support/utf8_encoding'
2
+
3
+ module NdrImport
4
+ module Xml
5
+ # A class to remove control characters, and XML entities representing them
6
+ class ControlCharEscaper
7
+ include UTF8Encoding
8
+
9
+ # Matches XML character reference entities
10
+ CHARACTER_REFERENCES = /&#(?:(?<decimal>\d+)|x(?<hex>\h+));/.freeze
11
+
12
+ attr_reader :data
13
+
14
+ def initialize(data)
15
+ @data = data
16
+ end
17
+
18
+ def escape!
19
+ unescape_control_char_references!(data)
20
+ escape_control_chars!(data)
21
+ end
22
+
23
+ private
24
+
25
+ def unescape_control_char_references!(data)
26
+ data.gsub!(CHARACTER_REFERENCES) do |reference|
27
+ char = try_to_extract_char_from(Regexp.last_match)
28
+
29
+ if char&.match?(CONTROL_CHARACTERS)
30
+ escape_control_chars!(char)
31
+ else
32
+ reference
33
+ end
34
+ end
35
+ end
36
+
37
+ def try_to_extract_char_from(match)
38
+ if match.nil?
39
+ nil
40
+ elsif match[:decimal]
41
+ match[:decimal].to_i(10).chr
42
+ elsif match[:hex]
43
+ match[:hex].to_i(16).chr
44
+ end
45
+ rescue RangeError
46
+ # Return everything if the match was against junk:
47
+ match.to_s
48
+ end
49
+ end
50
+ end
51
+ end
data/ndr_import.gemspec CHANGED
@@ -35,7 +35,7 @@ Gem::Specification.new do |spec|
35
35
  spec.add_dependency 'ooxml_decrypt'
36
36
  spec.add_dependency 'pdf-reader', '~> 2.1'
37
37
  spec.add_dependency 'roo-xls'
38
- spec.add_dependency 'seven_zip_ruby', '~> 1.2'
38
+ spec.add_dependency 'seven_zip_ruby', '~> 1.3'
39
39
  spec.add_dependency 'spreadsheet', '1.2.6'
40
40
 
41
41
  spec.required_ruby_version = '>= 2.5'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ndr_import
3
3
  version: !ruby/object:Gem::Version
4
- version: 9.1.0
4
+ version: 10.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - NCRS Development Team
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-02-01 00:00:00.000000000 Z
11
+ date: 2021-09-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activemodel
@@ -188,14 +188,14 @@ dependencies:
188
188
  requirements:
189
189
  - - "~>"
190
190
  - !ruby/object:Gem::Version
191
- version: '1.2'
191
+ version: '1.3'
192
192
  type: :runtime
193
193
  prerelease: false
194
194
  version_requirements: !ruby/object:Gem::Requirement
195
195
  requirements:
196
196
  - - "~>"
197
197
  - !ruby/object:Gem::Version
198
- version: '1.2'
198
+ version: '1.3'
199
199
  - !ruby/object:Gem::Dependency
200
200
  name: spreadsheet
201
201
  requirement: !ruby/object:Gem::Requirement
@@ -427,6 +427,7 @@ files:
427
427
  - lib/ndr_import/universal_importer_helper.rb
428
428
  - lib/ndr_import/unmapped_data_error.rb
429
429
  - lib/ndr_import/version.rb
430
+ - lib/ndr_import/xml/control_char_escaper.rb
430
431
  - lib/ndr_import/xml/table.rb
431
432
  - ndr_import.gemspec
432
433
  homepage: https://github.com/PublicHealthEngland/ndr_import