ndr_import 9.1.0 → 10.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aaf3c826acb51f4d579fb956e9606b5b86e120f3bd561db762d72081f27a1098
4
- data.tar.gz: 2b11ddf7dc9b748b4a1ac9cc91b47c47b99677e4bde990c44d1ec20e76efbfc0
3
+ metadata.gz: '0080a6cf08b7832f1af6261ff2c774a0fd8629b02c363869ac852389f239731c'
4
+ data.tar.gz: f1f986e3e1d59d65cb260cd555e5c17716228cc3ba61ff58fc2df40fc9abb19e
5
5
  SHA512:
6
- metadata.gz: 3faaf744255693f04425b6e5ed1ec7198e912d2854b3f482a39d9bc53fe3859ed3bb6ef7855cdfd03959042a530f3a2f528239548676c7159c9029d69b6c160d
7
- data.tar.gz: 1aa9d1895d7f874499823c4b87f1874e025b376f239030f3baf40bab5f5ed3ed09f80721205e271446893a9070652df3f26fceb48b59a9eb86ac121fdbfd92c6
6
+ metadata.gz: bf67d9b21781db778b92e245af52182dab5e1cc41d1637a87f82df953aaeae017fc14254b3e3caa3dcb9e6459489e3661f5610277f04af79e6aaedd00109b299
7
+ data.tar.gz: '08dc2fe4d3dc4b0dcd34cc361285f04eeacf11afb244fc117d1b6abc125218305f092676c89b94547aa9f6711bcc9682671a74e0c5ed39714e8651b98ad9dfc6'
@@ -1,6 +1,13 @@
1
1
  name: Test
2
2
 
3
- on: [push]
3
+ on:
4
+ # Run on all master branch commits
5
+ push:
6
+ branches:
7
+ - master
8
+
9
+ # Run against all PRs (from the main repo, or forks)
10
+ pull_request:
4
11
 
5
12
  jobs:
6
13
  test:
data/CHANGELOG.md CHANGED
@@ -1,6 +1,23 @@
1
1
  ## [Unreleased]
2
2
  *no unreleased changes*
3
3
 
4
+ ## 10.1.2 / 2021-09-28
5
+ ### Fixed
6
+ * Bump to `nokogiri` to address CVE-2021-41098
7
+ * Bump `seven_zip_ruby` requirement for Ruby 2.7 support
8
+
9
+ ## 10.1.1 / 2021-03-15
10
+ ### Fixed
11
+ * XML: ensure invalid control character *references* are also escaped (#64)
12
+
13
+ ## 10.1.0 / 2021-03-08
14
+ ### Added
15
+ * Allow optional `last_data_column` in NdrImport::Table mappings (#61)
16
+
17
+ ## 10.0.0 / 2021-02-22
18
+ ### Changed
19
+ * By default, escape any control characters found in XML (#60)
20
+
4
21
  ## 9.1.0 / 2021-02-01
5
22
  ### Added
6
23
  * `CSVLibrary` is now deprecated.
data/code_safety.yml CHANGED
@@ -10,8 +10,8 @@ file safety:
10
10
  safe_revision: b64ff21375dcde2b8fefe622ee9861f0fea21487
11
11
  ".github/workflows/test.yml":
12
12
  comments:
13
- reviewed_by: ollietulloch
14
- safe_revision: b64ff21375dcde2b8fefe622ee9861f0fea21487
13
+ reviewed_by: ollietulllch
14
+ safe_revision: c3dd24e8abefe61f04fa9d3bb71ec9d0ac109bbe
15
15
  ".gitignore":
16
16
  comments: whole file re-reviewed
17
17
  reviewed_by: josh.pencheon
@@ -27,7 +27,7 @@ file safety:
27
27
  CHANGELOG.md:
28
28
  comments:
29
29
  reviewed_by: ollietulloch
30
- safe_revision: d88ded7c260da37200610e4f0b204a4ea2e481f9
30
+ safe_revision: e938689d115b75074313541ec9d6b2bc60475add
31
31
  CODE_OF_CONDUCT.md:
32
32
  comments:
33
33
  reviewed_by: timgentry
@@ -67,7 +67,7 @@ file safety:
67
67
  docs/Gemfile.lock:
68
68
  comments:
69
69
  reviewed_by: ollietulloch
70
- safe_revision: ea0149c7739676463a252ffd9fbe4af238762b2b
70
+ safe_revision: 75877ddc14e99a10e5e751b7034964a9c7a9d1ef
71
71
  docs/_config.yml:
72
72
  comments:
73
73
  reviewed_by: josh.pencheon
@@ -238,8 +238,8 @@ file safety:
238
238
  safe_revision: 45da71ebd3acbc0fe53755bcd75483ba17cb6924
239
239
  lib/ndr_import/helpers/file/xml.rb:
240
240
  comments:
241
- reviewed_by: ollietulloch
242
- safe_revision: 4d337bd233f7e60cf9d363c92400f21269a28da7
241
+ reviewed_by: josh.pencheon
242
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
243
243
  lib/ndr_import/helpers/file/xml_streaming.rb:
244
244
  comments: uses SafePath and Shellwords when accessing filesystem, or making system
245
245
  calls
@@ -279,8 +279,8 @@ file safety:
279
279
  safe_revision: bb44ade56a2151706eede2c31142440ccf49e6f6
280
280
  lib/ndr_import/non_tabular/table.rb:
281
281
  comments:
282
- reviewed_by: josh.pencheon
283
- safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
282
+ reviewed_by: ollietulloch
283
+ safe_revision: 66cff59af2f078152f7459c436d51b57cb93f28e
284
284
  lib/ndr_import/non_tabular_file_helper.rb:
285
285
  comments:
286
286
  reviewed_by: josh.pencheon
@@ -295,12 +295,12 @@ file safety:
295
295
  safe_revision: 3c7f827d17aacbf7b811eea67e27553f3b039070
296
296
  lib/ndr_import/table.rb:
297
297
  comments: uses File.basename
298
- reviewed_by: josh.pencheon
299
- safe_revision: a69d4a57ddcf13cdc13c27bd2eb91a395fa7ea36
298
+ reviewed_by: ollietulloch
299
+ safe_revision: 66cff59af2f078152f7459c436d51b57cb93f28e
300
300
  lib/ndr_import/universal_importer_helper.rb:
301
301
  comments:
302
- reviewed_by: ollietulloch
303
- safe_revision: ee2e74e4ceda4ff48cbda6872a6bdf0874212c21
302
+ reviewed_by: josh.pencheon
303
+ safe_revision: 85869d99ae93252b7f3ef2d0a4db817c88d35c9e
304
304
  lib/ndr_import/unmapped_data_error.rb:
305
305
  comments:
306
306
  reviewed_by: josh.pencheon
@@ -308,15 +308,19 @@ file safety:
308
308
  lib/ndr_import/version.rb:
309
309
  comments: another check?
310
310
  reviewed_by: ollietulloch
311
- safe_revision: d88ded7c260da37200610e4f0b204a4ea2e481f9
311
+ safe_revision: da0aed5e8a8659c5f70254f6d76264f8e780d835
312
+ lib/ndr_import/xml/control_char_escaper.rb:
313
+ comments:
314
+ reviewed_by: josh.pencheon
315
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
312
316
  lib/ndr_import/xml/table.rb:
313
317
  comments:
314
318
  reviewed_by: josh.pencheon
315
319
  safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
316
320
  ndr_import.gemspec:
317
321
  comments:
318
- reviewed_by: josh.pencheon
319
- safe_revision: 95e6ee9997d06471fe6f2f169c3c701471086371
322
+ reviewed_by: ollietulloch
323
+ safe_revision: 3b437f0ca271fa962121edecd4559017c2446a3a
320
324
  test/csv_library_test.rb:
321
325
  comments:
322
326
  reviewed_by: ollietulloch
@@ -395,8 +399,8 @@ file safety:
395
399
  safe_revision: ae75fb49baf028ac8ce08e4bedcd3625ff3ff0cd
396
400
  test/helpers/file/xml_test.rb:
397
401
  comments:
398
- reviewed_by: timgentry
399
- safe_revision: 137170d443ea6bcc0afb18f62202c285ae6501eb
402
+ reviewed_by: josh.pencheon
403
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
400
404
  test/helpers/file/zip_test.rb:
401
405
  comments:
402
406
  reviewed_by: josh.pencheon
@@ -507,8 +511,8 @@ file safety:
507
511
  safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
508
512
  test/resources/malformed.xml:
509
513
  comments:
510
- reviewed_by: timgentry
511
- safe_revision: 137170d443ea6bcc0afb18f62202c285ae6501eb
514
+ reviewed_by: joshpencheon
515
+ safe_revision: 3947f13e0cbd17f449eba292ad343eeb82116fe9
512
516
  test/resources/malformed_pipe.csv:
513
517
  comments:
514
518
  reviewed_by: josh.pencheon
@@ -621,6 +625,22 @@ file safety:
621
625
  comments:
622
626
  reviewed_by: timgentry
623
627
  safe_revision: f755c6960182f7dd460c18866cccfdf09178e860
628
+ test/resources/with-control-char-references-in-cdata.xml:
629
+ comments:
630
+ reviewed_by: josh.pencheon
631
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
632
+ test/resources/with-control-char-references.xml:
633
+ comments:
634
+ reviewed_by: josh.pencheon
635
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
636
+ test/resources/with-control-chars.xml:
637
+ comments:
638
+ reviewed_by: joshpencheon
639
+ safe_revision: 3947f13e0cbd17f449eba292ad343eeb82116fe9
640
+ test/resources/with-non-control-char-references.xml:
641
+ comments:
642
+ reviewed_by: josh.pencheon
643
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
624
644
  test/resources/xlsx_file_xls_extension.xls:
625
645
  comments:
626
646
  reviewed_by: timgentry
@@ -632,16 +652,20 @@ file safety:
632
652
  test/table_test.rb:
633
653
  comments:
634
654
  reviewed_by: josh.pencheon
635
- safe_revision: a69d4a57ddcf13cdc13c27bd2eb91a395fa7ea36
655
+ safe_revision: 3cf7473181f7f835b3dfe7822f6833d751805eaf
636
656
  test/test_helper.rb:
637
657
  comments:
638
658
  reviewed_by: josh.pencheon
639
659
  safe_revision: 93ccee82fc2165d1ca2d9b03d146ae03e769ea96
640
660
  test/universal_importer_helper_test.rb:
641
661
  comments:
642
- reviewed_by: ollietulloch
643
- safe_revision: 830de0f8cb139c5f61525652b424423935cfc7ac
644
- test/xml/table_test.rb:
662
+ reviewed_by: josh.pencheon
663
+ safe_revision: 85869d99ae93252b7f3ef2d0a4db817c88d35c9e
664
+ test/xml/control_char_escaper_test.rb:
645
665
  comments:
646
666
  reviewed_by: josh.pencheon
647
- safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
667
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
668
+ test/xml/table_test.rb:
669
+ comments:
670
+ reviewed_by: ollietulloch
671
+ safe_revision: 66cff59af2f078152f7459c436d51b57cb93f28e
@@ -1,3 +1,4 @@
1
+ require 'ndr_import/xml/control_char_escaper'
1
2
  require 'ndr_support/safe_file'
2
3
  require 'ndr_support/utf8_encoding'
3
4
 
@@ -10,16 +11,21 @@ module NdrImport
10
11
 
11
12
  private
12
13
 
13
- def read_xml_file(path)
14
- file_data = SafeFile.new(path).read
14
+ # By default, escapes any control characters found in the XML
15
+ # - their use is forbidden in XML 1.0, and highly discouraged
16
+ # in XML 1.1; any found are most likely to be erroneous.
17
+ def read_xml_file(path, preserve_control_chars: false)
18
+ file_data = ensure_utf8!(SafeFile.read(path))
15
19
 
16
20
  require 'nokogiri'
17
21
 
18
- doc = Nokogiri::XML((ensure_utf8! file_data)) do |config|
19
- config.huge
22
+ doc = nil
23
+
24
+ escaping_control_chars_if_necessary(preserve_control_chars, file_data) do
25
+ doc = Nokogiri::XML(file_data, &:huge)
26
+ doc.encoding = 'UTF-8'
27
+ emulate_strict_mode_fatal_check!(doc)
20
28
  end
21
- doc.encoding = 'UTF-8'
22
- emulate_strict_mode_fatal_check!(doc)
23
29
 
24
30
  doc
25
31
  end
@@ -40,11 +46,27 @@ module NdrImport
40
46
  end
41
47
 
42
48
  return unless fatal_errors.any?
49
+
43
50
  raise Nokogiri::XML::SyntaxError, <<~MSG
44
51
  The file had #{fatal_errors.length} fatal error(s)!"
45
52
  #{fatal_errors.join("\n")}
46
53
  MSG
47
54
  end
55
+
56
+ def escaping_control_chars_if_necessary(preserve_control_chars, file_data)
57
+ return yield if preserve_control_chars
58
+
59
+ tried_escaping = false
60
+ begin
61
+ yield
62
+ rescue Nokogiri::XML::SyntaxError => e
63
+ raise e if tried_escaping
64
+
65
+ NdrImport::Xml::ControlCharEscaper.new(file_data).escape!
66
+ tried_escaping = true
67
+ retry
68
+ end
69
+ end
48
70
  end
49
71
  end
50
72
  end
@@ -16,8 +16,8 @@ module NdrImport
16
16
 
17
17
  include UTF8Encoding
18
18
 
19
- TABULAR_ONLY_OPTIONS = %w[delimiter liberal_parsing tablename_pattern
20
- header_lines footer_lines xml_record_xpath].freeze
19
+ TABULAR_ONLY_OPTIONS = %w[delimiter last_data_column liberal_parsing tablename_pattern
20
+ header_lines footer_lines xml_record_xpath slurp].freeze
21
21
 
22
22
  NON_TABULAR_OPTIONS = %w[capture_end_line capture_start_line start_line_pattern
23
23
  end_line_pattern remove_lines start_in_a_record
@@ -10,8 +10,9 @@ module NdrImport
10
10
  include NdrImport::Mapper
11
11
 
12
12
  def self.all_valid_options
13
- %w[canonical_name delimiter liberal_parsing filename_pattern file_password tablename_pattern
14
- header_lines footer_lines format klass columns xml_record_xpath row_identifier]
13
+ %w[canonical_name delimiter liberal_parsing filename_pattern file_password last_data_column
14
+ tablename_pattern header_lines footer_lines format klass columns xml_record_xpath slurp
15
+ row_identifier]
15
16
  end
16
17
 
17
18
  def all_valid_options
@@ -50,8 +51,9 @@ module NdrImport
50
51
  @header_best_guess = nil
51
52
  @notifier.try(:started)
52
53
 
54
+ last_col = last_column_to_transform
53
55
  skip_footer_lines(lines, footer_lines).each do |line|
54
- process_line(line, &block)
56
+ line.is_a?(Array) ? process_line(line[0..last_col], &block) : process_line(line, &block)
55
57
  end
56
58
 
57
59
  @notifier.try(:finished)
@@ -226,5 +228,26 @@ module NdrImport
226
228
  def column_names(column_mappings)
227
229
  column_mappings.map { |c| (c['column'] || c['standard_mapping']).downcase }
228
230
  end
231
+
232
+ # If specified in the mapping, stop transforming data at a given index (column)
233
+ def last_column_to_transform
234
+ return -1 if last_data_column.nil?
235
+ return last_data_column - 1 if last_data_column.is_a?(Integer)
236
+
237
+ error = "Unknown 'last_data_column' format: #{last_data_column} " \
238
+ "(#{last_data_column.class})"
239
+ raise error unless last_data_column.is_a?(String) && last_data_column =~ /\A[A-Z]+\z/i
240
+
241
+ # If it's an excel column label (eg 'K', 'AF', 'DDE'), convert it to an index
242
+ index_from_column_label
243
+ end
244
+
245
+ def index_from_column_label
246
+ alphabet_index_hash = ('A'..'Z').map.with_index.to_h
247
+ index = last_data_column.upcase.chars.inject(0) do |char_index, char|
248
+ (char_index * 26) + (alphabet_index_hash[char] + 1)
249
+ end
250
+ index - 1
251
+ end
229
252
  end # class Table
230
253
  end
@@ -48,9 +48,7 @@ module NdrImport
48
48
  def extract(source_file, &block)
49
49
  return enum_for(:extract, source_file) unless block
50
50
 
51
- files = NdrImport::File::Registry.files(source_file,
52
- 'unzip_path' => unzip_path)
53
- files.each do |filename|
51
+ NdrImport::File::Registry.files(source_file, 'unzip_path' => unzip_path).each do |filename|
54
52
  # now at the individual file level, can we find the table mapping?
55
53
  table_mapping = get_table_mapping(filename, nil)
56
54
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  # This stores the current version of the NdrImport gem
3
3
  module NdrImport
4
- VERSION = '9.1.0'.freeze
4
+ VERSION = '10.1.2'
5
5
  end
@@ -0,0 +1,51 @@
1
+ require 'ndr_support/utf8_encoding'
2
+
3
+ module NdrImport
4
+ module Xml
5
+ # A class to remove control characters, and XML entities representing them
6
+ class ControlCharEscaper
7
+ include UTF8Encoding
8
+
9
+ # Matches XML character reference entities
10
+ CHARACTER_REFERENCES = /&#(?:(?<decimal>\d+)|x(?<hex>\h+));/.freeze
11
+
12
+ attr_reader :data
13
+
14
+ def initialize(data)
15
+ @data = data
16
+ end
17
+
18
+ def escape!
19
+ unescape_control_char_references!(data)
20
+ escape_control_chars!(data)
21
+ end
22
+
23
+ private
24
+
25
+ def unescape_control_char_references!(data)
26
+ data.gsub!(CHARACTER_REFERENCES) do |reference|
27
+ char = try_to_extract_char_from(Regexp.last_match)
28
+
29
+ if char&.match?(CONTROL_CHARACTERS)
30
+ escape_control_chars!(char)
31
+ else
32
+ reference
33
+ end
34
+ end
35
+ end
36
+
37
+ def try_to_extract_char_from(match)
38
+ if match.nil?
39
+ nil
40
+ elsif match[:decimal]
41
+ match[:decimal].to_i(10).chr
42
+ elsif match[:hex]
43
+ match[:hex].to_i(16).chr
44
+ end
45
+ rescue RangeError
46
+ # Return everything if the match was against junk:
47
+ match.to_s
48
+ end
49
+ end
50
+ end
51
+ end
data/ndr_import.gemspec CHANGED
@@ -35,7 +35,7 @@ Gem::Specification.new do |spec|
35
35
  spec.add_dependency 'ooxml_decrypt'
36
36
  spec.add_dependency 'pdf-reader', '~> 2.1'
37
37
  spec.add_dependency 'roo-xls'
38
- spec.add_dependency 'seven_zip_ruby', '~> 1.2'
38
+ spec.add_dependency 'seven_zip_ruby', '~> 1.3'
39
39
  spec.add_dependency 'spreadsheet', '1.2.6'
40
40
 
41
41
  spec.required_ruby_version = '>= 2.5'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ndr_import
3
3
  version: !ruby/object:Gem::Version
4
- version: 9.1.0
4
+ version: 10.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - NCRS Development Team
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-02-01 00:00:00.000000000 Z
11
+ date: 2021-09-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activemodel
@@ -188,14 +188,14 @@ dependencies:
188
188
  requirements:
189
189
  - - "~>"
190
190
  - !ruby/object:Gem::Version
191
- version: '1.2'
191
+ version: '1.3'
192
192
  type: :runtime
193
193
  prerelease: false
194
194
  version_requirements: !ruby/object:Gem::Requirement
195
195
  requirements:
196
196
  - - "~>"
197
197
  - !ruby/object:Gem::Version
198
- version: '1.2'
198
+ version: '1.3'
199
199
  - !ruby/object:Gem::Dependency
200
200
  name: spreadsheet
201
201
  requirement: !ruby/object:Gem::Requirement
@@ -427,6 +427,7 @@ files:
427
427
  - lib/ndr_import/universal_importer_helper.rb
428
428
  - lib/ndr_import/unmapped_data_error.rb
429
429
  - lib/ndr_import/version.rb
430
+ - lib/ndr_import/xml/control_char_escaper.rb
430
431
  - lib/ndr_import/xml/table.rb
431
432
  - ndr_import.gemspec
432
433
  homepage: https://github.com/PublicHealthEngland/ndr_import