ndr_import 10.1 → 10.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 16cdebb2e3a6809255d5d66971a2db6d5c954731f6def3c73fe0f89d8ea0b7e9
4
- data.tar.gz: 575ba2c40ae01f99ebd48f75ff42c2cdf40871cbed87bfb2399de45d9f402e73
3
+ metadata.gz: 2463ac35899a6db81e345b75b0ea10186530f559460dfca1211ba7694f52b760
4
+ data.tar.gz: 2f2cf39e959beeb3cfe6bcad033eb2c0695486dce3b640fe3de8b0c2b9b88a2f
5
5
  SHA512:
6
- metadata.gz: 37e9bcfe3b8a5cab98bd68fc846eafd386595e0b6b79b9b4bf10ba8bc44a6d24ff998f5ab249964009bb0eebebfe0700bd6a39ddd0dfefc1260b529ff5543e63
7
- data.tar.gz: c144108ad4d2f63c918e43ef540f30d49be26897d6c4d82bccda6128aaff5b408c3d93425612bedaef73f865292a3d2a408f0c792e35a9f548e50363804c8d10
6
+ metadata.gz: c6eca601043ff01ebe910375a36131bdf55ebd3f18664e3db3c07180f007b0f06dc50cc3016634a12764befc1e28d621a09a7b382392f67608aa4d21c83c7f2d
7
+ data.tar.gz: dc568494bfc4b39b7ee47c7738511274cb85ad9532e380f3c27870682a1a21d2acda4d7d8d855597abdd0643b8afe0ed6e161b74f201fa53640b5c9641f895c5
data/CHANGELOG.md CHANGED
@@ -1,10 +1,15 @@
1
1
  ## [Unreleased]
2
- * no relevant changes
2
+ *no unreleased changes*
3
3
 
4
- ## 10.1 / 2021-03-08
4
+ ## 10.1.1 / 2021-03-15
5
+ ### Fixed
6
+ * XML: ensure invalid control character *references* are also escaped (#64)
7
+
8
+ ## 10.1.0 / 2021-03-08
9
+ ### Added
5
10
  * Allow optional `last_data_column` in NdrImport::Table mappings (#61)
6
11
 
7
- ## 10.0 / 2021-02-22
12
+ ## 10.0.0 / 2021-02-22
8
13
  ### Changed
9
14
  * By default, escape any control characters found in XML (#60)
10
15
 
data/code_safety.yml CHANGED
@@ -26,8 +26,8 @@ file safety:
26
26
  safe_revision: b09e268ff9c8349b914aa1b7ba888e1d39f97e4a
27
27
  CHANGELOG.md:
28
28
  comments:
29
- reviewed_by: ollietulloch
30
- safe_revision: 2d093cc57a699b527a7d0159e77b91f4409a6e0b
29
+ reviewed_by: josh.pencheon
30
+ safe_revision: 47fa3633ec2e48f1ee9fb12aad03e817e73c54bf
31
31
  CODE_OF_CONDUCT.md:
32
32
  comments:
33
33
  reviewed_by: timgentry
@@ -238,8 +238,8 @@ file safety:
238
238
  safe_revision: 45da71ebd3acbc0fe53755bcd75483ba17cb6924
239
239
  lib/ndr_import/helpers/file/xml.rb:
240
240
  comments:
241
- reviewed_by: joshpencheon
242
- safe_revision: 3947f13e0cbd17f449eba292ad343eeb82116fe9
241
+ reviewed_by: josh.pencheon
242
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
243
243
  lib/ndr_import/helpers/file/xml_streaming.rb:
244
244
  comments: uses SafePath and Shellwords when accessing filesystem, or making system
245
245
  calls
@@ -279,7 +279,7 @@ file safety:
279
279
  safe_revision: bb44ade56a2151706eede2c31142440ccf49e6f6
280
280
  lib/ndr_import/non_tabular/table.rb:
281
281
  comments:
282
- reviewed_by: ollietulloch
282
+ reviewed_by: josh.pencheon
283
283
  safe_revision: f9df064adcfd38f09d83ad8c5496c84188faed98
284
284
  lib/ndr_import/non_tabular_file_helper.rb:
285
285
  comments:
@@ -295,11 +295,11 @@ file safety:
295
295
  safe_revision: 3c7f827d17aacbf7b811eea67e27553f3b039070
296
296
  lib/ndr_import/table.rb:
297
297
  comments: uses File.basename
298
- reviewed_by: ollietulloch
298
+ reviewed_by: josh.pencheon
299
299
  safe_revision: 3cf7473181f7f835b3dfe7822f6833d751805eaf
300
300
  lib/ndr_import/universal_importer_helper.rb:
301
301
  comments:
302
- reviewed_by: ollietulloch
302
+ reviewed_by: josh.pencheon
303
303
  safe_revision: 85869d99ae93252b7f3ef2d0a4db817c88d35c9e
304
304
  lib/ndr_import/unmapped_data_error.rb:
305
305
  comments:
@@ -307,8 +307,12 @@ file safety:
307
307
  safe_revision: 5cd2cd0b3a1e254d30d4acc28c6731825a1f84f5
308
308
  lib/ndr_import/version.rb:
309
309
  comments: another check?
310
- reviewed_by: ollietulloch
311
- safe_revision: 2d093cc57a699b527a7d0159e77b91f4409a6e0b
310
+ reviewed_by: josh.pencheon
311
+ safe_revision: 47fa3633ec2e48f1ee9fb12aad03e817e73c54bf
312
+ lib/ndr_import/xml/control_char_escaper.rb:
313
+ comments:
314
+ reviewed_by: josh.pencheon
315
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
312
316
  lib/ndr_import/xml/table.rb:
313
317
  comments:
314
318
  reviewed_by: josh.pencheon
@@ -395,8 +399,8 @@ file safety:
395
399
  safe_revision: ae75fb49baf028ac8ce08e4bedcd3625ff3ff0cd
396
400
  test/helpers/file/xml_test.rb:
397
401
  comments:
398
- reviewed_by: joshpencheon
399
- safe_revision: 3947f13e0cbd17f449eba292ad343eeb82116fe9
402
+ reviewed_by: josh.pencheon
403
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
400
404
  test/helpers/file/zip_test.rb:
401
405
  comments:
402
406
  reviewed_by: josh.pencheon
@@ -621,10 +625,22 @@ file safety:
621
625
  comments:
622
626
  reviewed_by: timgentry
623
627
  safe_revision: f755c6960182f7dd460c18866cccfdf09178e860
628
+ test/resources/with-control-char-references-in-cdata.xml:
629
+ comments:
630
+ reviewed_by: josh.pencheon
631
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
632
+ test/resources/with-control-char-references.xml:
633
+ comments:
634
+ reviewed_by: josh.pencheon
635
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
624
636
  test/resources/with-control-chars.xml:
625
637
  comments:
626
638
  reviewed_by: joshpencheon
627
639
  safe_revision: 3947f13e0cbd17f449eba292ad343eeb82116fe9
640
+ test/resources/with-non-control-char-references.xml:
641
+ comments:
642
+ reviewed_by: josh.pencheon
643
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
628
644
  test/resources/xlsx_file_xls_extension.xls:
629
645
  comments:
630
646
  reviewed_by: timgentry
@@ -635,7 +651,7 @@ file safety:
635
651
  safe_revision: 3c7f827d17aacbf7b811eea67e27553f3b039070
636
652
  test/table_test.rb:
637
653
  comments:
638
- reviewed_by: ollietulloch
654
+ reviewed_by: josh.pencheon
639
655
  safe_revision: 3cf7473181f7f835b3dfe7822f6833d751805eaf
640
656
  test/test_helper.rb:
641
657
  comments:
@@ -643,8 +659,12 @@ file safety:
643
659
  safe_revision: 93ccee82fc2165d1ca2d9b03d146ae03e769ea96
644
660
  test/universal_importer_helper_test.rb:
645
661
  comments:
646
- reviewed_by: ollietulloch
662
+ reviewed_by: josh.pencheon
647
663
  safe_revision: 85869d99ae93252b7f3ef2d0a4db817c88d35c9e
664
+ test/xml/control_char_escaper_test.rb:
665
+ comments:
666
+ reviewed_by: josh.pencheon
667
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
648
668
  test/xml/table_test.rb:
649
669
  comments:
650
670
  reviewed_by: josh.pencheon
@@ -1,3 +1,4 @@
1
+ require 'ndr_import/xml/control_char_escaper'
1
2
  require 'ndr_support/safe_file'
2
3
  require 'ndr_support/utf8_encoding'
3
4
 
@@ -15,13 +16,16 @@ module NdrImport
15
16
  # in XML 1.1; any found are most likely to be erroneous.
16
17
  def read_xml_file(path, preserve_control_chars: false)
17
18
  file_data = ensure_utf8!(SafeFile.read(path))
18
- escape_xml_control_chars!(file_data) unless preserve_control_chars
19
19
 
20
20
  require 'nokogiri'
21
21
 
22
- doc = Nokogiri::XML(file_data, &:huge)
23
- doc.encoding = 'UTF-8'
24
- emulate_strict_mode_fatal_check!(doc)
22
+ doc = nil
23
+
24
+ escaping_control_chars_if_necessary(preserve_control_chars, file_data) do
25
+ doc = Nokogiri::XML(file_data, &:huge)
26
+ doc.encoding = 'UTF-8'
27
+ emulate_strict_mode_fatal_check!(doc)
28
+ end
25
29
 
26
30
  doc
27
31
  end
@@ -49,11 +53,19 @@ module NdrImport
49
53
  MSG
50
54
  end
51
55
 
52
- # In place, escape out any control chars that would cause
53
- # libxml to crash. Very few are allowable in XML 1.0, and
54
- # remain heavily discouraged in XML 1.1.
55
- def escape_xml_control_chars!(data)
56
- escape_control_chars!(data)
56
+ def escaping_control_chars_if_necessary(preserve_control_chars, file_data)
57
+ return yield if preserve_control_chars
58
+
59
+ tried_escaping = false
60
+ begin
61
+ yield
62
+ rescue Nokogiri::XML::SyntaxError => e
63
+ raise e if tried_escaping
64
+
65
+ NdrImport::Xml::ControlCharEscaper.new(file_data).escape!
66
+ tried_escaping = true
67
+ retry
68
+ end
57
69
  end
58
70
  end
59
71
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  # This stores the current version of the NdrImport gem
3
3
  module NdrImport
4
- VERSION = '10.1'
4
+ VERSION = '10.1.1'
5
5
  end
@@ -0,0 +1,51 @@
1
+ require 'ndr_support/utf8_encoding'
2
+
3
+ module NdrImport
4
+ module Xml
5
+ # A class to remove control characters, and XML entities representing them
6
+ class ControlCharEscaper
7
+ include UTF8Encoding
8
+
9
+ # Matches XML character reference entities
10
+ CHARACTER_REFERENCES = /&#(?:(?<decimal>\d+)|x(?<hex>\h+));/.freeze
11
+
12
+ attr_reader :data
13
+
14
+ def initialize(data)
15
+ @data = data
16
+ end
17
+
18
+ def escape!
19
+ unescape_control_char_references!(data)
20
+ escape_control_chars!(data)
21
+ end
22
+
23
+ private
24
+
25
+ def unescape_control_char_references!(data)
26
+ data.gsub!(CHARACTER_REFERENCES) do |reference|
27
+ char = try_to_extract_char_from(Regexp.last_match)
28
+
29
+ if char&.match?(CONTROL_CHARACTERS)
30
+ escape_control_chars!(char)
31
+ else
32
+ reference
33
+ end
34
+ end
35
+ end
36
+
37
+ def try_to_extract_char_from(match)
38
+ if match.nil?
39
+ nil
40
+ elsif match[:decimal]
41
+ match[:decimal].to_i(10).chr
42
+ elsif match[:hex]
43
+ match[:hex].to_i(16).chr
44
+ end
45
+ rescue RangeError
46
+ # Return everything if the match was against junk:
47
+ match.to_s
48
+ end
49
+ end
50
+ end
51
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ndr_import
3
3
  version: !ruby/object:Gem::Version
4
- version: '10.1'
4
+ version: 10.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - NCRS Development Team
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-03-08 00:00:00.000000000 Z
11
+ date: 2021-03-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activemodel
@@ -427,6 +427,7 @@ files:
427
427
  - lib/ndr_import/universal_importer_helper.rb
428
428
  - lib/ndr_import/unmapped_data_error.rb
429
429
  - lib/ndr_import/version.rb
430
+ - lib/ndr_import/xml/control_char_escaper.rb
430
431
  - lib/ndr_import/xml/table.rb
431
432
  - ndr_import.gemspec
432
433
  homepage: https://github.com/PublicHealthEngland/ndr_import