ndr_import 10.1 → 10.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -3
- data/code_safety.yml +33 -13
- data/lib/ndr_import/helpers/file/xml.rb +21 -9
- data/lib/ndr_import/version.rb +1 -1
- data/lib/ndr_import/xml/control_char_escaper.rb +51 -0
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2463ac35899a6db81e345b75b0ea10186530f559460dfca1211ba7694f52b760
|
|
4
|
+
data.tar.gz: 2f2cf39e959beeb3cfe6bcad033eb2c0695486dce3b640fe3de8b0c2b9b88a2f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c6eca601043ff01ebe910375a36131bdf55ebd3f18664e3db3c07180f007b0f06dc50cc3016634a12764befc1e28d621a09a7b382392f67608aa4d21c83c7f2d
|
|
7
|
+
data.tar.gz: dc568494bfc4b39b7ee47c7738511274cb85ad9532e380f3c27870682a1a21d2acda4d7d8d855597abdd0643b8afe0ed6e161b74f201fa53640b5c9641f895c5
|
data/CHANGELOG.md
CHANGED
|
@@ -1,10 +1,15 @@
|
|
|
1
1
|
## [Unreleased]
|
|
2
|
-
*
|
|
2
|
+
*no unreleased changes*
|
|
3
3
|
|
|
4
|
-
## 10.1 / 2021-03-
|
|
4
|
+
## 10.1.1 / 2021-03-15
|
|
5
|
+
### Fixed
|
|
6
|
+
* XML: ensure invalid control character *references* are also escaped (#64)
|
|
7
|
+
|
|
8
|
+
## 10.1.0 / 2021-03-08
|
|
9
|
+
### Added
|
|
5
10
|
* Allow optional `last_data_column` in NdrImport::Table mappings (#61)
|
|
6
11
|
|
|
7
|
-
## 10.0 / 2021-02-22
|
|
12
|
+
## 10.0.0 / 2021-02-22
|
|
8
13
|
### Changed
|
|
9
14
|
* By default, escape any control characters found in XML (#60)
|
|
10
15
|
|
data/code_safety.yml
CHANGED
|
@@ -26,8 +26,8 @@ file safety:
|
|
|
26
26
|
safe_revision: b09e268ff9c8349b914aa1b7ba888e1d39f97e4a
|
|
27
27
|
CHANGELOG.md:
|
|
28
28
|
comments:
|
|
29
|
-
reviewed_by:
|
|
30
|
-
safe_revision:
|
|
29
|
+
reviewed_by: josh.pencheon
|
|
30
|
+
safe_revision: 47fa3633ec2e48f1ee9fb12aad03e817e73c54bf
|
|
31
31
|
CODE_OF_CONDUCT.md:
|
|
32
32
|
comments:
|
|
33
33
|
reviewed_by: timgentry
|
|
@@ -238,8 +238,8 @@ file safety:
|
|
|
238
238
|
safe_revision: 45da71ebd3acbc0fe53755bcd75483ba17cb6924
|
|
239
239
|
lib/ndr_import/helpers/file/xml.rb:
|
|
240
240
|
comments:
|
|
241
|
-
reviewed_by:
|
|
242
|
-
safe_revision:
|
|
241
|
+
reviewed_by: josh.pencheon
|
|
242
|
+
safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
|
|
243
243
|
lib/ndr_import/helpers/file/xml_streaming.rb:
|
|
244
244
|
comments: uses SafePath and Shellwords when accessing filesystem, or making system
|
|
245
245
|
calls
|
|
@@ -279,7 +279,7 @@ file safety:
|
|
|
279
279
|
safe_revision: bb44ade56a2151706eede2c31142440ccf49e6f6
|
|
280
280
|
lib/ndr_import/non_tabular/table.rb:
|
|
281
281
|
comments:
|
|
282
|
-
reviewed_by:
|
|
282
|
+
reviewed_by: josh.pencheon
|
|
283
283
|
safe_revision: f9df064adcfd38f09d83ad8c5496c84188faed98
|
|
284
284
|
lib/ndr_import/non_tabular_file_helper.rb:
|
|
285
285
|
comments:
|
|
@@ -295,11 +295,11 @@ file safety:
|
|
|
295
295
|
safe_revision: 3c7f827d17aacbf7b811eea67e27553f3b039070
|
|
296
296
|
lib/ndr_import/table.rb:
|
|
297
297
|
comments: uses File.basename
|
|
298
|
-
reviewed_by:
|
|
298
|
+
reviewed_by: josh.pencheon
|
|
299
299
|
safe_revision: 3cf7473181f7f835b3dfe7822f6833d751805eaf
|
|
300
300
|
lib/ndr_import/universal_importer_helper.rb:
|
|
301
301
|
comments:
|
|
302
|
-
reviewed_by:
|
|
302
|
+
reviewed_by: josh.pencheon
|
|
303
303
|
safe_revision: 85869d99ae93252b7f3ef2d0a4db817c88d35c9e
|
|
304
304
|
lib/ndr_import/unmapped_data_error.rb:
|
|
305
305
|
comments:
|
|
@@ -307,8 +307,12 @@ file safety:
|
|
|
307
307
|
safe_revision: 5cd2cd0b3a1e254d30d4acc28c6731825a1f84f5
|
|
308
308
|
lib/ndr_import/version.rb:
|
|
309
309
|
comments: another check?
|
|
310
|
-
reviewed_by:
|
|
311
|
-
safe_revision:
|
|
310
|
+
reviewed_by: josh.pencheon
|
|
311
|
+
safe_revision: 47fa3633ec2e48f1ee9fb12aad03e817e73c54bf
|
|
312
|
+
lib/ndr_import/xml/control_char_escaper.rb:
|
|
313
|
+
comments:
|
|
314
|
+
reviewed_by: josh.pencheon
|
|
315
|
+
safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
|
|
312
316
|
lib/ndr_import/xml/table.rb:
|
|
313
317
|
comments:
|
|
314
318
|
reviewed_by: josh.pencheon
|
|
@@ -395,8 +399,8 @@ file safety:
|
|
|
395
399
|
safe_revision: ae75fb49baf028ac8ce08e4bedcd3625ff3ff0cd
|
|
396
400
|
test/helpers/file/xml_test.rb:
|
|
397
401
|
comments:
|
|
398
|
-
reviewed_by:
|
|
399
|
-
safe_revision:
|
|
402
|
+
reviewed_by: josh.pencheon
|
|
403
|
+
safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
|
|
400
404
|
test/helpers/file/zip_test.rb:
|
|
401
405
|
comments:
|
|
402
406
|
reviewed_by: josh.pencheon
|
|
@@ -621,10 +625,22 @@ file safety:
|
|
|
621
625
|
comments:
|
|
622
626
|
reviewed_by: timgentry
|
|
623
627
|
safe_revision: f755c6960182f7dd460c18866cccfdf09178e860
|
|
628
|
+
test/resources/with-control-char-references-in-cdata.xml:
|
|
629
|
+
comments:
|
|
630
|
+
reviewed_by: josh.pencheon
|
|
631
|
+
safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
|
|
632
|
+
test/resources/with-control-char-references.xml:
|
|
633
|
+
comments:
|
|
634
|
+
reviewed_by: josh.pencheon
|
|
635
|
+
safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
|
|
624
636
|
test/resources/with-control-chars.xml:
|
|
625
637
|
comments:
|
|
626
638
|
reviewed_by: joshpencheon
|
|
627
639
|
safe_revision: 3947f13e0cbd17f449eba292ad343eeb82116fe9
|
|
640
|
+
test/resources/with-non-control-char-references.xml:
|
|
641
|
+
comments:
|
|
642
|
+
reviewed_by: josh.pencheon
|
|
643
|
+
safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
|
|
628
644
|
test/resources/xlsx_file_xls_extension.xls:
|
|
629
645
|
comments:
|
|
630
646
|
reviewed_by: timgentry
|
|
@@ -635,7 +651,7 @@ file safety:
|
|
|
635
651
|
safe_revision: 3c7f827d17aacbf7b811eea67e27553f3b039070
|
|
636
652
|
test/table_test.rb:
|
|
637
653
|
comments:
|
|
638
|
-
reviewed_by:
|
|
654
|
+
reviewed_by: josh.pencheon
|
|
639
655
|
safe_revision: 3cf7473181f7f835b3dfe7822f6833d751805eaf
|
|
640
656
|
test/test_helper.rb:
|
|
641
657
|
comments:
|
|
@@ -643,8 +659,12 @@ file safety:
|
|
|
643
659
|
safe_revision: 93ccee82fc2165d1ca2d9b03d146ae03e769ea96
|
|
644
660
|
test/universal_importer_helper_test.rb:
|
|
645
661
|
comments:
|
|
646
|
-
reviewed_by:
|
|
662
|
+
reviewed_by: josh.pencheon
|
|
647
663
|
safe_revision: 85869d99ae93252b7f3ef2d0a4db817c88d35c9e
|
|
664
|
+
test/xml/control_char_escaper_test.rb:
|
|
665
|
+
comments:
|
|
666
|
+
reviewed_by: josh.pencheon
|
|
667
|
+
safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
|
|
648
668
|
test/xml/table_test.rb:
|
|
649
669
|
comments:
|
|
650
670
|
reviewed_by: josh.pencheon
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
require 'ndr_import/xml/control_char_escaper'
|
|
1
2
|
require 'ndr_support/safe_file'
|
|
2
3
|
require 'ndr_support/utf8_encoding'
|
|
3
4
|
|
|
@@ -15,13 +16,16 @@ module NdrImport
|
|
|
15
16
|
# in XML 1.1; any found are most likely to be erroneous.
|
|
16
17
|
def read_xml_file(path, preserve_control_chars: false)
|
|
17
18
|
file_data = ensure_utf8!(SafeFile.read(path))
|
|
18
|
-
escape_xml_control_chars!(file_data) unless preserve_control_chars
|
|
19
19
|
|
|
20
20
|
require 'nokogiri'
|
|
21
21
|
|
|
22
|
-
doc =
|
|
23
|
-
|
|
24
|
-
|
|
22
|
+
doc = nil
|
|
23
|
+
|
|
24
|
+
escaping_control_chars_if_necessary(preserve_control_chars, file_data) do
|
|
25
|
+
doc = Nokogiri::XML(file_data, &:huge)
|
|
26
|
+
doc.encoding = 'UTF-8'
|
|
27
|
+
emulate_strict_mode_fatal_check!(doc)
|
|
28
|
+
end
|
|
25
29
|
|
|
26
30
|
doc
|
|
27
31
|
end
|
|
@@ -49,11 +53,19 @@ module NdrImport
|
|
|
49
53
|
MSG
|
|
50
54
|
end
|
|
51
55
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
56
|
+
def escaping_control_chars_if_necessary(preserve_control_chars, file_data)
|
|
57
|
+
return yield if preserve_control_chars
|
|
58
|
+
|
|
59
|
+
tried_escaping = false
|
|
60
|
+
begin
|
|
61
|
+
yield
|
|
62
|
+
rescue Nokogiri::XML::SyntaxError => e
|
|
63
|
+
raise e if tried_escaping
|
|
64
|
+
|
|
65
|
+
NdrImport::Xml::ControlCharEscaper.new(file_data).escape!
|
|
66
|
+
tried_escaping = true
|
|
67
|
+
retry
|
|
68
|
+
end
|
|
57
69
|
end
|
|
58
70
|
end
|
|
59
71
|
end
|
data/lib/ndr_import/version.rb
CHANGED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
require 'ndr_support/utf8_encoding'
|
|
2
|
+
|
|
3
|
+
module NdrImport
|
|
4
|
+
module Xml
|
|
5
|
+
# A class to remove control characters, and XML entities representing them
|
|
6
|
+
class ControlCharEscaper
|
|
7
|
+
include UTF8Encoding
|
|
8
|
+
|
|
9
|
+
# Matches XML character reference entities
|
|
10
|
+
CHARACTER_REFERENCES = /&#(?:(?<decimal>\d+)|x(?<hex>\h+));/.freeze
|
|
11
|
+
|
|
12
|
+
attr_reader :data
|
|
13
|
+
|
|
14
|
+
def initialize(data)
|
|
15
|
+
@data = data
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def escape!
|
|
19
|
+
unescape_control_char_references!(data)
|
|
20
|
+
escape_control_chars!(data)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def unescape_control_char_references!(data)
|
|
26
|
+
data.gsub!(CHARACTER_REFERENCES) do |reference|
|
|
27
|
+
char = try_to_extract_char_from(Regexp.last_match)
|
|
28
|
+
|
|
29
|
+
if char&.match?(CONTROL_CHARACTERS)
|
|
30
|
+
escape_control_chars!(char)
|
|
31
|
+
else
|
|
32
|
+
reference
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def try_to_extract_char_from(match)
|
|
38
|
+
if match.nil?
|
|
39
|
+
nil
|
|
40
|
+
elsif match[:decimal]
|
|
41
|
+
match[:decimal].to_i(10).chr
|
|
42
|
+
elsif match[:hex]
|
|
43
|
+
match[:hex].to_i(16).chr
|
|
44
|
+
end
|
|
45
|
+
rescue RangeError
|
|
46
|
+
# Return everything if the match was against junk:
|
|
47
|
+
match.to_s
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ndr_import
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 10.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- NCRS Development Team
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2021-03-
|
|
11
|
+
date: 2021-03-15 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: activemodel
|
|
@@ -427,6 +427,7 @@ files:
|
|
|
427
427
|
- lib/ndr_import/universal_importer_helper.rb
|
|
428
428
|
- lib/ndr_import/unmapped_data_error.rb
|
|
429
429
|
- lib/ndr_import/version.rb
|
|
430
|
+
- lib/ndr_import/xml/control_char_escaper.rb
|
|
430
431
|
- lib/ndr_import/xml/table.rb
|
|
431
432
|
- ndr_import.gemspec
|
|
432
433
|
homepage: https://github.com/PublicHealthEngland/ndr_import
|