ndr_import 10.1 → 10.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 16cdebb2e3a6809255d5d66971a2db6d5c954731f6def3c73fe0f89d8ea0b7e9
4
- data.tar.gz: 575ba2c40ae01f99ebd48f75ff42c2cdf40871cbed87bfb2399de45d9f402e73
3
+ metadata.gz: 2463ac35899a6db81e345b75b0ea10186530f559460dfca1211ba7694f52b760
4
+ data.tar.gz: 2f2cf39e959beeb3cfe6bcad033eb2c0695486dce3b640fe3de8b0c2b9b88a2f
5
5
  SHA512:
6
- metadata.gz: 37e9bcfe3b8a5cab98bd68fc846eafd386595e0b6b79b9b4bf10ba8bc44a6d24ff998f5ab249964009bb0eebebfe0700bd6a39ddd0dfefc1260b529ff5543e63
7
- data.tar.gz: c144108ad4d2f63c918e43ef540f30d49be26897d6c4d82bccda6128aaff5b408c3d93425612bedaef73f865292a3d2a408f0c792e35a9f548e50363804c8d10
6
+ metadata.gz: c6eca601043ff01ebe910375a36131bdf55ebd3f18664e3db3c07180f007b0f06dc50cc3016634a12764befc1e28d621a09a7b382392f67608aa4d21c83c7f2d
7
+ data.tar.gz: dc568494bfc4b39b7ee47c7738511274cb85ad9532e380f3c27870682a1a21d2acda4d7d8d855597abdd0643b8afe0ed6e161b74f201fa53640b5c9641f895c5
data/CHANGELOG.md CHANGED
@@ -1,10 +1,15 @@
1
1
  ## [Unreleased]
2
- * no relevant changes
2
+ *no unreleased changes*
3
3
 
4
- ## 10.1 / 2021-03-08
4
+ ## 10.1.1 / 2021-03-15
5
+ ### Fixed
6
+ * XML: ensure invalid control character *references* are also escaped (#64)
7
+
8
+ ## 10.1.0 / 2021-03-08
9
+ ### Added
5
10
  * Allow optional `last_data_column` in NdrImport::Table mappings (#61)
6
11
 
7
- ## 10.0 / 2021-02-22
12
+ ## 10.0.0 / 2021-02-22
8
13
  ### Changed
9
14
  * By default, escape any control characters found in XML (#60)
10
15
 
data/code_safety.yml CHANGED
@@ -26,8 +26,8 @@ file safety:
26
26
  safe_revision: b09e268ff9c8349b914aa1b7ba888e1d39f97e4a
27
27
  CHANGELOG.md:
28
28
  comments:
29
- reviewed_by: ollietulloch
30
- safe_revision: 2d093cc57a699b527a7d0159e77b91f4409a6e0b
29
+ reviewed_by: josh.pencheon
30
+ safe_revision: 47fa3633ec2e48f1ee9fb12aad03e817e73c54bf
31
31
  CODE_OF_CONDUCT.md:
32
32
  comments:
33
33
  reviewed_by: timgentry
@@ -238,8 +238,8 @@ file safety:
238
238
  safe_revision: 45da71ebd3acbc0fe53755bcd75483ba17cb6924
239
239
  lib/ndr_import/helpers/file/xml.rb:
240
240
  comments:
241
- reviewed_by: joshpencheon
242
- safe_revision: 3947f13e0cbd17f449eba292ad343eeb82116fe9
241
+ reviewed_by: josh.pencheon
242
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
243
243
  lib/ndr_import/helpers/file/xml_streaming.rb:
244
244
  comments: uses SafePath and Shellwords when accessing filesystem, or making system
245
245
  calls
@@ -279,7 +279,7 @@ file safety:
279
279
  safe_revision: bb44ade56a2151706eede2c31142440ccf49e6f6
280
280
  lib/ndr_import/non_tabular/table.rb:
281
281
  comments:
282
- reviewed_by: ollietulloch
282
+ reviewed_by: josh.pencheon
283
283
  safe_revision: f9df064adcfd38f09d83ad8c5496c84188faed98
284
284
  lib/ndr_import/non_tabular_file_helper.rb:
285
285
  comments:
@@ -295,11 +295,11 @@ file safety:
295
295
  safe_revision: 3c7f827d17aacbf7b811eea67e27553f3b039070
296
296
  lib/ndr_import/table.rb:
297
297
  comments: uses File.basename
298
- reviewed_by: ollietulloch
298
+ reviewed_by: josh.pencheon
299
299
  safe_revision: 3cf7473181f7f835b3dfe7822f6833d751805eaf
300
300
  lib/ndr_import/universal_importer_helper.rb:
301
301
  comments:
302
- reviewed_by: ollietulloch
302
+ reviewed_by: josh.pencheon
303
303
  safe_revision: 85869d99ae93252b7f3ef2d0a4db817c88d35c9e
304
304
  lib/ndr_import/unmapped_data_error.rb:
305
305
  comments:
@@ -307,8 +307,12 @@ file safety:
307
307
  safe_revision: 5cd2cd0b3a1e254d30d4acc28c6731825a1f84f5
308
308
  lib/ndr_import/version.rb:
309
309
  comments: another check?
310
- reviewed_by: ollietulloch
311
- safe_revision: 2d093cc57a699b527a7d0159e77b91f4409a6e0b
310
+ reviewed_by: josh.pencheon
311
+ safe_revision: 47fa3633ec2e48f1ee9fb12aad03e817e73c54bf
312
+ lib/ndr_import/xml/control_char_escaper.rb:
313
+ comments:
314
+ reviewed_by: josh.pencheon
315
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
312
316
  lib/ndr_import/xml/table.rb:
313
317
  comments:
314
318
  reviewed_by: josh.pencheon
@@ -395,8 +399,8 @@ file safety:
395
399
  safe_revision: ae75fb49baf028ac8ce08e4bedcd3625ff3ff0cd
396
400
  test/helpers/file/xml_test.rb:
397
401
  comments:
398
- reviewed_by: joshpencheon
399
- safe_revision: 3947f13e0cbd17f449eba292ad343eeb82116fe9
402
+ reviewed_by: josh.pencheon
403
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
400
404
  test/helpers/file/zip_test.rb:
401
405
  comments:
402
406
  reviewed_by: josh.pencheon
@@ -621,10 +625,22 @@ file safety:
621
625
  comments:
622
626
  reviewed_by: timgentry
623
627
  safe_revision: f755c6960182f7dd460c18866cccfdf09178e860
628
+ test/resources/with-control-char-references-in-cdata.xml:
629
+ comments:
630
+ reviewed_by: josh.pencheon
631
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
632
+ test/resources/with-control-char-references.xml:
633
+ comments:
634
+ reviewed_by: josh.pencheon
635
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
624
636
  test/resources/with-control-chars.xml:
625
637
  comments:
626
638
  reviewed_by: joshpencheon
627
639
  safe_revision: 3947f13e0cbd17f449eba292ad343eeb82116fe9
640
+ test/resources/with-non-control-char-references.xml:
641
+ comments:
642
+ reviewed_by: josh.pencheon
643
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
628
644
  test/resources/xlsx_file_xls_extension.xls:
629
645
  comments:
630
646
  reviewed_by: timgentry
@@ -635,7 +651,7 @@ file safety:
635
651
  safe_revision: 3c7f827d17aacbf7b811eea67e27553f3b039070
636
652
  test/table_test.rb:
637
653
  comments:
638
- reviewed_by: ollietulloch
654
+ reviewed_by: josh.pencheon
639
655
  safe_revision: 3cf7473181f7f835b3dfe7822f6833d751805eaf
640
656
  test/test_helper.rb:
641
657
  comments:
@@ -643,8 +659,12 @@ file safety:
643
659
  safe_revision: 93ccee82fc2165d1ca2d9b03d146ae03e769ea96
644
660
  test/universal_importer_helper_test.rb:
645
661
  comments:
646
- reviewed_by: ollietulloch
662
+ reviewed_by: josh.pencheon
647
663
  safe_revision: 85869d99ae93252b7f3ef2d0a4db817c88d35c9e
664
+ test/xml/control_char_escaper_test.rb:
665
+ comments:
666
+ reviewed_by: josh.pencheon
667
+ safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
648
668
  test/xml/table_test.rb:
649
669
  comments:
650
670
  reviewed_by: josh.pencheon
@@ -1,3 +1,4 @@
1
+ require 'ndr_import/xml/control_char_escaper'
1
2
  require 'ndr_support/safe_file'
2
3
  require 'ndr_support/utf8_encoding'
3
4
 
@@ -15,13 +16,16 @@ module NdrImport
15
16
  # in XML 1.1; any found are most likely to be erroneous.
16
17
  def read_xml_file(path, preserve_control_chars: false)
17
18
  file_data = ensure_utf8!(SafeFile.read(path))
18
- escape_xml_control_chars!(file_data) unless preserve_control_chars
19
19
 
20
20
  require 'nokogiri'
21
21
 
22
- doc = Nokogiri::XML(file_data, &:huge)
23
- doc.encoding = 'UTF-8'
24
- emulate_strict_mode_fatal_check!(doc)
22
+ doc = nil
23
+
24
+ escaping_control_chars_if_necessary(preserve_control_chars, file_data) do
25
+ doc = Nokogiri::XML(file_data, &:huge)
26
+ doc.encoding = 'UTF-8'
27
+ emulate_strict_mode_fatal_check!(doc)
28
+ end
25
29
 
26
30
  doc
27
31
  end
@@ -49,11 +53,19 @@ module NdrImport
49
53
  MSG
50
54
  end
51
55
 
52
- # In place, escape out any control chars that would cause
53
- # libxml to crash. Very few are allowable in XML 1.0, and
54
- # remain heavily discouraged in XML 1.1.
55
- def escape_xml_control_chars!(data)
56
- escape_control_chars!(data)
56
+ def escaping_control_chars_if_necessary(preserve_control_chars, file_data)
57
+ return yield if preserve_control_chars
58
+
59
+ tried_escaping = false
60
+ begin
61
+ yield
62
+ rescue Nokogiri::XML::SyntaxError => e
63
+ raise e if tried_escaping
64
+
65
+ NdrImport::Xml::ControlCharEscaper.new(file_data).escape!
66
+ tried_escaping = true
67
+ retry
68
+ end
57
69
  end
58
70
  end
59
71
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  # This stores the current version of the NdrImport gem
3
3
  module NdrImport
4
- VERSION = '10.1'
4
+ VERSION = '10.1.1'
5
5
  end
@@ -0,0 +1,51 @@
1
+ require 'ndr_support/utf8_encoding'
2
+
3
+ module NdrImport
4
+ module Xml
5
+ # A class to remove control characters, and XML entities representing them
6
+ class ControlCharEscaper
7
+ include UTF8Encoding
8
+
9
+ # Matches XML character reference entities
10
+ CHARACTER_REFERENCES = /&#(?:(?<decimal>\d+)|x(?<hex>\h+));/.freeze
11
+
12
+ attr_reader :data
13
+
14
+ def initialize(data)
15
+ @data = data
16
+ end
17
+
18
+ def escape!
19
+ unescape_control_char_references!(data)
20
+ escape_control_chars!(data)
21
+ end
22
+
23
+ private
24
+
25
+ def unescape_control_char_references!(data)
26
+ data.gsub!(CHARACTER_REFERENCES) do |reference|
27
+ char = try_to_extract_char_from(Regexp.last_match)
28
+
29
+ if char&.match?(CONTROL_CHARACTERS)
30
+ escape_control_chars!(char)
31
+ else
32
+ reference
33
+ end
34
+ end
35
+ end
36
+
37
+ def try_to_extract_char_from(match)
38
+ if match.nil?
39
+ nil
40
+ elsif match[:decimal]
41
+ match[:decimal].to_i(10).chr
42
+ elsif match[:hex]
43
+ match[:hex].to_i(16).chr
44
+ end
45
+ rescue RangeError
46
+ # Return everything if the match was against junk:
47
+ match.to_s
48
+ end
49
+ end
50
+ end
51
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ndr_import
3
3
  version: !ruby/object:Gem::Version
4
- version: '10.1'
4
+ version: 10.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - NCRS Development Team
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-03-08 00:00:00.000000000 Z
11
+ date: 2021-03-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activemodel
@@ -427,6 +427,7 @@ files:
427
427
  - lib/ndr_import/universal_importer_helper.rb
428
428
  - lib/ndr_import/unmapped_data_error.rb
429
429
  - lib/ndr_import/version.rb
430
+ - lib/ndr_import/xml/control_char_escaper.rb
430
431
  - lib/ndr_import/xml/table.rb
431
432
  - ndr_import.gemspec
432
433
  homepage: https://github.com/PublicHealthEngland/ndr_import