ndr_import 10.1 → 10.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -3
- data/code_safety.yml +33 -13
- data/lib/ndr_import/helpers/file/xml.rb +21 -9
- data/lib/ndr_import/version.rb +1 -1
- data/lib/ndr_import/xml/control_char_escaper.rb +51 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2463ac35899a6db81e345b75b0ea10186530f559460dfca1211ba7694f52b760
|
4
|
+
data.tar.gz: 2f2cf39e959beeb3cfe6bcad033eb2c0695486dce3b640fe3de8b0c2b9b88a2f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c6eca601043ff01ebe910375a36131bdf55ebd3f18664e3db3c07180f007b0f06dc50cc3016634a12764befc1e28d621a09a7b382392f67608aa4d21c83c7f2d
|
7
|
+
data.tar.gz: dc568494bfc4b39b7ee47c7738511274cb85ad9532e380f3c27870682a1a21d2acda4d7d8d855597abdd0643b8afe0ed6e161b74f201fa53640b5c9641f895c5
|
data/CHANGELOG.md
CHANGED
@@ -1,10 +1,15 @@
|
|
1
1
|
## [Unreleased]
|
2
|
-
*
|
2
|
+
*no unreleased changes*
|
3
3
|
|
4
|
-
## 10.1 / 2021-03-
|
4
|
+
## 10.1.1 / 2021-03-15
|
5
|
+
### Fixed
|
6
|
+
* XML: ensure invalid control character *references* are also escaped (#64)
|
7
|
+
|
8
|
+
## 10.1.0 / 2021-03-08
|
9
|
+
### Added
|
5
10
|
* Allow optional `last_data_column` in NdrImport::Table mappings (#61)
|
6
11
|
|
7
|
-
## 10.0 / 2021-02-22
|
12
|
+
## 10.0.0 / 2021-02-22
|
8
13
|
### Changed
|
9
14
|
* By default, escape any control characters found in XML (#60)
|
10
15
|
|
data/code_safety.yml
CHANGED
@@ -26,8 +26,8 @@ file safety:
|
|
26
26
|
safe_revision: b09e268ff9c8349b914aa1b7ba888e1d39f97e4a
|
27
27
|
CHANGELOG.md:
|
28
28
|
comments:
|
29
|
-
reviewed_by:
|
30
|
-
safe_revision:
|
29
|
+
reviewed_by: josh.pencheon
|
30
|
+
safe_revision: 47fa3633ec2e48f1ee9fb12aad03e817e73c54bf
|
31
31
|
CODE_OF_CONDUCT.md:
|
32
32
|
comments:
|
33
33
|
reviewed_by: timgentry
|
@@ -238,8 +238,8 @@ file safety:
|
|
238
238
|
safe_revision: 45da71ebd3acbc0fe53755bcd75483ba17cb6924
|
239
239
|
lib/ndr_import/helpers/file/xml.rb:
|
240
240
|
comments:
|
241
|
-
reviewed_by:
|
242
|
-
safe_revision:
|
241
|
+
reviewed_by: josh.pencheon
|
242
|
+
safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
|
243
243
|
lib/ndr_import/helpers/file/xml_streaming.rb:
|
244
244
|
comments: uses SafePath and Shellwords when accessing filesystem, or making system
|
245
245
|
calls
|
@@ -279,7 +279,7 @@ file safety:
|
|
279
279
|
safe_revision: bb44ade56a2151706eede2c31142440ccf49e6f6
|
280
280
|
lib/ndr_import/non_tabular/table.rb:
|
281
281
|
comments:
|
282
|
-
reviewed_by:
|
282
|
+
reviewed_by: josh.pencheon
|
283
283
|
safe_revision: f9df064adcfd38f09d83ad8c5496c84188faed98
|
284
284
|
lib/ndr_import/non_tabular_file_helper.rb:
|
285
285
|
comments:
|
@@ -295,11 +295,11 @@ file safety:
|
|
295
295
|
safe_revision: 3c7f827d17aacbf7b811eea67e27553f3b039070
|
296
296
|
lib/ndr_import/table.rb:
|
297
297
|
comments: uses File.basename
|
298
|
-
reviewed_by:
|
298
|
+
reviewed_by: josh.pencheon
|
299
299
|
safe_revision: 3cf7473181f7f835b3dfe7822f6833d751805eaf
|
300
300
|
lib/ndr_import/universal_importer_helper.rb:
|
301
301
|
comments:
|
302
|
-
reviewed_by:
|
302
|
+
reviewed_by: josh.pencheon
|
303
303
|
safe_revision: 85869d99ae93252b7f3ef2d0a4db817c88d35c9e
|
304
304
|
lib/ndr_import/unmapped_data_error.rb:
|
305
305
|
comments:
|
@@ -307,8 +307,12 @@ file safety:
|
|
307
307
|
safe_revision: 5cd2cd0b3a1e254d30d4acc28c6731825a1f84f5
|
308
308
|
lib/ndr_import/version.rb:
|
309
309
|
comments: another check?
|
310
|
-
reviewed_by:
|
311
|
-
safe_revision:
|
310
|
+
reviewed_by: josh.pencheon
|
311
|
+
safe_revision: 47fa3633ec2e48f1ee9fb12aad03e817e73c54bf
|
312
|
+
lib/ndr_import/xml/control_char_escaper.rb:
|
313
|
+
comments:
|
314
|
+
reviewed_by: josh.pencheon
|
315
|
+
safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
|
312
316
|
lib/ndr_import/xml/table.rb:
|
313
317
|
comments:
|
314
318
|
reviewed_by: josh.pencheon
|
@@ -395,8 +399,8 @@ file safety:
|
|
395
399
|
safe_revision: ae75fb49baf028ac8ce08e4bedcd3625ff3ff0cd
|
396
400
|
test/helpers/file/xml_test.rb:
|
397
401
|
comments:
|
398
|
-
reviewed_by:
|
399
|
-
safe_revision:
|
402
|
+
reviewed_by: josh.pencheon
|
403
|
+
safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
|
400
404
|
test/helpers/file/zip_test.rb:
|
401
405
|
comments:
|
402
406
|
reviewed_by: josh.pencheon
|
@@ -621,10 +625,22 @@ file safety:
|
|
621
625
|
comments:
|
622
626
|
reviewed_by: timgentry
|
623
627
|
safe_revision: f755c6960182f7dd460c18866cccfdf09178e860
|
628
|
+
test/resources/with-control-char-references-in-cdata.xml:
|
629
|
+
comments:
|
630
|
+
reviewed_by: josh.pencheon
|
631
|
+
safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
|
632
|
+
test/resources/with-control-char-references.xml:
|
633
|
+
comments:
|
634
|
+
reviewed_by: josh.pencheon
|
635
|
+
safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
|
624
636
|
test/resources/with-control-chars.xml:
|
625
637
|
comments:
|
626
638
|
reviewed_by: joshpencheon
|
627
639
|
safe_revision: 3947f13e0cbd17f449eba292ad343eeb82116fe9
|
640
|
+
test/resources/with-non-control-char-references.xml:
|
641
|
+
comments:
|
642
|
+
reviewed_by: josh.pencheon
|
643
|
+
safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
|
628
644
|
test/resources/xlsx_file_xls_extension.xls:
|
629
645
|
comments:
|
630
646
|
reviewed_by: timgentry
|
@@ -635,7 +651,7 @@ file safety:
|
|
635
651
|
safe_revision: 3c7f827d17aacbf7b811eea67e27553f3b039070
|
636
652
|
test/table_test.rb:
|
637
653
|
comments:
|
638
|
-
reviewed_by:
|
654
|
+
reviewed_by: josh.pencheon
|
639
655
|
safe_revision: 3cf7473181f7f835b3dfe7822f6833d751805eaf
|
640
656
|
test/test_helper.rb:
|
641
657
|
comments:
|
@@ -643,8 +659,12 @@ file safety:
|
|
643
659
|
safe_revision: 93ccee82fc2165d1ca2d9b03d146ae03e769ea96
|
644
660
|
test/universal_importer_helper_test.rb:
|
645
661
|
comments:
|
646
|
-
reviewed_by:
|
662
|
+
reviewed_by: josh.pencheon
|
647
663
|
safe_revision: 85869d99ae93252b7f3ef2d0a4db817c88d35c9e
|
664
|
+
test/xml/control_char_escaper_test.rb:
|
665
|
+
comments:
|
666
|
+
reviewed_by: josh.pencheon
|
667
|
+
safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
|
648
668
|
test/xml/table_test.rb:
|
649
669
|
comments:
|
650
670
|
reviewed_by: josh.pencheon
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'ndr_import/xml/control_char_escaper'
|
1
2
|
require 'ndr_support/safe_file'
|
2
3
|
require 'ndr_support/utf8_encoding'
|
3
4
|
|
@@ -15,13 +16,16 @@ module NdrImport
|
|
15
16
|
# in XML 1.1; any found are most likely to be erroneous.
|
16
17
|
def read_xml_file(path, preserve_control_chars: false)
|
17
18
|
file_data = ensure_utf8!(SafeFile.read(path))
|
18
|
-
escape_xml_control_chars!(file_data) unless preserve_control_chars
|
19
19
|
|
20
20
|
require 'nokogiri'
|
21
21
|
|
22
|
-
doc =
|
23
|
-
|
24
|
-
|
22
|
+
doc = nil
|
23
|
+
|
24
|
+
escaping_control_chars_if_necessary(preserve_control_chars, file_data) do
|
25
|
+
doc = Nokogiri::XML(file_data, &:huge)
|
26
|
+
doc.encoding = 'UTF-8'
|
27
|
+
emulate_strict_mode_fatal_check!(doc)
|
28
|
+
end
|
25
29
|
|
26
30
|
doc
|
27
31
|
end
|
@@ -49,11 +53,19 @@ module NdrImport
|
|
49
53
|
MSG
|
50
54
|
end
|
51
55
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
56
|
+
def escaping_control_chars_if_necessary(preserve_control_chars, file_data)
|
57
|
+
return yield if preserve_control_chars
|
58
|
+
|
59
|
+
tried_escaping = false
|
60
|
+
begin
|
61
|
+
yield
|
62
|
+
rescue Nokogiri::XML::SyntaxError => e
|
63
|
+
raise e if tried_escaping
|
64
|
+
|
65
|
+
NdrImport::Xml::ControlCharEscaper.new(file_data).escape!
|
66
|
+
tried_escaping = true
|
67
|
+
retry
|
68
|
+
end
|
57
69
|
end
|
58
70
|
end
|
59
71
|
end
|
data/lib/ndr_import/version.rb
CHANGED
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'ndr_support/utf8_encoding'
|
2
|
+
|
3
|
+
module NdrImport
|
4
|
+
module Xml
|
5
|
+
# A class to remove control characters, and XML entities representing them
|
6
|
+
class ControlCharEscaper
|
7
|
+
include UTF8Encoding
|
8
|
+
|
9
|
+
# Matches XML character reference entities
|
10
|
+
CHARACTER_REFERENCES = /&#(?:(?<decimal>\d+)|x(?<hex>\h+));/.freeze
|
11
|
+
|
12
|
+
attr_reader :data
|
13
|
+
|
14
|
+
def initialize(data)
|
15
|
+
@data = data
|
16
|
+
end
|
17
|
+
|
18
|
+
def escape!
|
19
|
+
unescape_control_char_references!(data)
|
20
|
+
escape_control_chars!(data)
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def unescape_control_char_references!(data)
|
26
|
+
data.gsub!(CHARACTER_REFERENCES) do |reference|
|
27
|
+
char = try_to_extract_char_from(Regexp.last_match)
|
28
|
+
|
29
|
+
if char&.match?(CONTROL_CHARACTERS)
|
30
|
+
escape_control_chars!(char)
|
31
|
+
else
|
32
|
+
reference
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def try_to_extract_char_from(match)
|
38
|
+
if match.nil?
|
39
|
+
nil
|
40
|
+
elsif match[:decimal]
|
41
|
+
match[:decimal].to_i(10).chr
|
42
|
+
elsif match[:hex]
|
43
|
+
match[:hex].to_i(16).chr
|
44
|
+
end
|
45
|
+
rescue RangeError
|
46
|
+
# Return everything if the match was against junk:
|
47
|
+
match.to_s
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ndr_import
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 10.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- NCRS Development Team
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-03-
|
11
|
+
date: 2021-03-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activemodel
|
@@ -427,6 +427,7 @@ files:
|
|
427
427
|
- lib/ndr_import/universal_importer_helper.rb
|
428
428
|
- lib/ndr_import/unmapped_data_error.rb
|
429
429
|
- lib/ndr_import/version.rb
|
430
|
+
- lib/ndr_import/xml/control_char_escaper.rb
|
430
431
|
- lib/ndr_import/xml/table.rb
|
431
432
|
- ndr_import.gemspec
|
432
433
|
homepage: https://github.com/PublicHealthEngland/ndr_import
|