ndr_import 9.1.0 → 10.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +8 -1
- data/CHANGELOG.md +17 -0
- data/code_safety.yml +48 -24
- data/lib/ndr_import/helpers/file/xml.rb +28 -6
- data/lib/ndr_import/non_tabular/table.rb +2 -2
- data/lib/ndr_import/table.rb +26 -3
- data/lib/ndr_import/universal_importer_helper.rb +1 -3
- data/lib/ndr_import/version.rb +1 -1
- data/lib/ndr_import/xml/control_char_escaper.rb +51 -0
- data/ndr_import.gemspec +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '0080a6cf08b7832f1af6261ff2c774a0fd8629b02c363869ac852389f239731c'
|
4
|
+
data.tar.gz: f1f986e3e1d59d65cb260cd555e5c17716228cc3ba61ff58fc2df40fc9abb19e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bf67d9b21781db778b92e245af52182dab5e1cc41d1637a87f82df953aaeae017fc14254b3e3caa3dcb9e6459489e3661f5610277f04af79e6aaedd00109b299
|
7
|
+
data.tar.gz: '08dc2fe4d3dc4b0dcd34cc361285f04eeacf11afb244fc117d1b6abc125218305f092676c89b94547aa9f6711bcc9682671a74e0c5ed39714e8651b98ad9dfc6'
|
data/.github/workflows/test.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,23 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
*no unreleased changes*
|
3
3
|
|
4
|
+
## 10.1.2 / 2021-09-28
|
5
|
+
### Fixed
|
6
|
+
* Bump to `nokogiri` to address CVE-2021-41098
|
7
|
+
* Bump `seven_zip_ruby` requirement for Ruby 2.7 support
|
8
|
+
|
9
|
+
## 10.1.1 / 2021-03-15
|
10
|
+
### Fixed
|
11
|
+
* XML: ensure invalid control character *references* are also escaped (#64)
|
12
|
+
|
13
|
+
## 10.1.0 / 2021-03-08
|
14
|
+
### Added
|
15
|
+
* Allow optional `last_data_column` in NdrImport::Table mappings (#61)
|
16
|
+
|
17
|
+
## 10.0.0 / 2021-02-22
|
18
|
+
### Changed
|
19
|
+
* By default, escape any control characters found in XML (#60)
|
20
|
+
|
4
21
|
## 9.1.0 / 2021-02-01
|
5
22
|
### Added
|
6
23
|
* `CSVLibrary` is now deprecated.
|
data/code_safety.yml
CHANGED
@@ -10,8 +10,8 @@ file safety:
|
|
10
10
|
safe_revision: b64ff21375dcde2b8fefe622ee9861f0fea21487
|
11
11
|
".github/workflows/test.yml":
|
12
12
|
comments:
|
13
|
-
reviewed_by:
|
14
|
-
safe_revision:
|
13
|
+
reviewed_by: ollietulllch
|
14
|
+
safe_revision: c3dd24e8abefe61f04fa9d3bb71ec9d0ac109bbe
|
15
15
|
".gitignore":
|
16
16
|
comments: whole file re-reviewed
|
17
17
|
reviewed_by: josh.pencheon
|
@@ -27,7 +27,7 @@ file safety:
|
|
27
27
|
CHANGELOG.md:
|
28
28
|
comments:
|
29
29
|
reviewed_by: ollietulloch
|
30
|
-
safe_revision:
|
30
|
+
safe_revision: e938689d115b75074313541ec9d6b2bc60475add
|
31
31
|
CODE_OF_CONDUCT.md:
|
32
32
|
comments:
|
33
33
|
reviewed_by: timgentry
|
@@ -67,7 +67,7 @@ file safety:
|
|
67
67
|
docs/Gemfile.lock:
|
68
68
|
comments:
|
69
69
|
reviewed_by: ollietulloch
|
70
|
-
safe_revision:
|
70
|
+
safe_revision: 75877ddc14e99a10e5e751b7034964a9c7a9d1ef
|
71
71
|
docs/_config.yml:
|
72
72
|
comments:
|
73
73
|
reviewed_by: josh.pencheon
|
@@ -238,8 +238,8 @@ file safety:
|
|
238
238
|
safe_revision: 45da71ebd3acbc0fe53755bcd75483ba17cb6924
|
239
239
|
lib/ndr_import/helpers/file/xml.rb:
|
240
240
|
comments:
|
241
|
-
reviewed_by:
|
242
|
-
safe_revision:
|
241
|
+
reviewed_by: josh.pencheon
|
242
|
+
safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
|
243
243
|
lib/ndr_import/helpers/file/xml_streaming.rb:
|
244
244
|
comments: uses SafePath and Shellwords when accessing filesystem, or making system
|
245
245
|
calls
|
@@ -279,8 +279,8 @@ file safety:
|
|
279
279
|
safe_revision: bb44ade56a2151706eede2c31142440ccf49e6f6
|
280
280
|
lib/ndr_import/non_tabular/table.rb:
|
281
281
|
comments:
|
282
|
-
reviewed_by:
|
283
|
-
safe_revision:
|
282
|
+
reviewed_by: ollietulloch
|
283
|
+
safe_revision: 66cff59af2f078152f7459c436d51b57cb93f28e
|
284
284
|
lib/ndr_import/non_tabular_file_helper.rb:
|
285
285
|
comments:
|
286
286
|
reviewed_by: josh.pencheon
|
@@ -295,12 +295,12 @@ file safety:
|
|
295
295
|
safe_revision: 3c7f827d17aacbf7b811eea67e27553f3b039070
|
296
296
|
lib/ndr_import/table.rb:
|
297
297
|
comments: uses File.basename
|
298
|
-
reviewed_by:
|
299
|
-
safe_revision:
|
298
|
+
reviewed_by: ollietulloch
|
299
|
+
safe_revision: 66cff59af2f078152f7459c436d51b57cb93f28e
|
300
300
|
lib/ndr_import/universal_importer_helper.rb:
|
301
301
|
comments:
|
302
|
-
reviewed_by:
|
303
|
-
safe_revision:
|
302
|
+
reviewed_by: josh.pencheon
|
303
|
+
safe_revision: 85869d99ae93252b7f3ef2d0a4db817c88d35c9e
|
304
304
|
lib/ndr_import/unmapped_data_error.rb:
|
305
305
|
comments:
|
306
306
|
reviewed_by: josh.pencheon
|
@@ -308,15 +308,19 @@ file safety:
|
|
308
308
|
lib/ndr_import/version.rb:
|
309
309
|
comments: another check?
|
310
310
|
reviewed_by: ollietulloch
|
311
|
-
safe_revision:
|
311
|
+
safe_revision: da0aed5e8a8659c5f70254f6d76264f8e780d835
|
312
|
+
lib/ndr_import/xml/control_char_escaper.rb:
|
313
|
+
comments:
|
314
|
+
reviewed_by: josh.pencheon
|
315
|
+
safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
|
312
316
|
lib/ndr_import/xml/table.rb:
|
313
317
|
comments:
|
314
318
|
reviewed_by: josh.pencheon
|
315
319
|
safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
|
316
320
|
ndr_import.gemspec:
|
317
321
|
comments:
|
318
|
-
reviewed_by:
|
319
|
-
safe_revision:
|
322
|
+
reviewed_by: ollietulloch
|
323
|
+
safe_revision: 3b437f0ca271fa962121edecd4559017c2446a3a
|
320
324
|
test/csv_library_test.rb:
|
321
325
|
comments:
|
322
326
|
reviewed_by: ollietulloch
|
@@ -395,8 +399,8 @@ file safety:
|
|
395
399
|
safe_revision: ae75fb49baf028ac8ce08e4bedcd3625ff3ff0cd
|
396
400
|
test/helpers/file/xml_test.rb:
|
397
401
|
comments:
|
398
|
-
reviewed_by:
|
399
|
-
safe_revision:
|
402
|
+
reviewed_by: josh.pencheon
|
403
|
+
safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
|
400
404
|
test/helpers/file/zip_test.rb:
|
401
405
|
comments:
|
402
406
|
reviewed_by: josh.pencheon
|
@@ -507,8 +511,8 @@ file safety:
|
|
507
511
|
safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
|
508
512
|
test/resources/malformed.xml:
|
509
513
|
comments:
|
510
|
-
reviewed_by:
|
511
|
-
safe_revision:
|
514
|
+
reviewed_by: joshpencheon
|
515
|
+
safe_revision: 3947f13e0cbd17f449eba292ad343eeb82116fe9
|
512
516
|
test/resources/malformed_pipe.csv:
|
513
517
|
comments:
|
514
518
|
reviewed_by: josh.pencheon
|
@@ -621,6 +625,22 @@ file safety:
|
|
621
625
|
comments:
|
622
626
|
reviewed_by: timgentry
|
623
627
|
safe_revision: f755c6960182f7dd460c18866cccfdf09178e860
|
628
|
+
test/resources/with-control-char-references-in-cdata.xml:
|
629
|
+
comments:
|
630
|
+
reviewed_by: josh.pencheon
|
631
|
+
safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
|
632
|
+
test/resources/with-control-char-references.xml:
|
633
|
+
comments:
|
634
|
+
reviewed_by: josh.pencheon
|
635
|
+
safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
|
636
|
+
test/resources/with-control-chars.xml:
|
637
|
+
comments:
|
638
|
+
reviewed_by: joshpencheon
|
639
|
+
safe_revision: 3947f13e0cbd17f449eba292ad343eeb82116fe9
|
640
|
+
test/resources/with-non-control-char-references.xml:
|
641
|
+
comments:
|
642
|
+
reviewed_by: josh.pencheon
|
643
|
+
safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
|
624
644
|
test/resources/xlsx_file_xls_extension.xls:
|
625
645
|
comments:
|
626
646
|
reviewed_by: timgentry
|
@@ -632,16 +652,20 @@ file safety:
|
|
632
652
|
test/table_test.rb:
|
633
653
|
comments:
|
634
654
|
reviewed_by: josh.pencheon
|
635
|
-
safe_revision:
|
655
|
+
safe_revision: 3cf7473181f7f835b3dfe7822f6833d751805eaf
|
636
656
|
test/test_helper.rb:
|
637
657
|
comments:
|
638
658
|
reviewed_by: josh.pencheon
|
639
659
|
safe_revision: 93ccee82fc2165d1ca2d9b03d146ae03e769ea96
|
640
660
|
test/universal_importer_helper_test.rb:
|
641
661
|
comments:
|
642
|
-
reviewed_by:
|
643
|
-
safe_revision:
|
644
|
-
test/xml/
|
662
|
+
reviewed_by: josh.pencheon
|
663
|
+
safe_revision: 85869d99ae93252b7f3ef2d0a4db817c88d35c9e
|
664
|
+
test/xml/control_char_escaper_test.rb:
|
645
665
|
comments:
|
646
666
|
reviewed_by: josh.pencheon
|
647
|
-
safe_revision:
|
667
|
+
safe_revision: 9a6cc769abce5f9bfa5b4f8bd5cda52dfe18b12b
|
668
|
+
test/xml/table_test.rb:
|
669
|
+
comments:
|
670
|
+
reviewed_by: ollietulloch
|
671
|
+
safe_revision: 66cff59af2f078152f7459c436d51b57cb93f28e
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'ndr_import/xml/control_char_escaper'
|
1
2
|
require 'ndr_support/safe_file'
|
2
3
|
require 'ndr_support/utf8_encoding'
|
3
4
|
|
@@ -10,16 +11,21 @@ module NdrImport
|
|
10
11
|
|
11
12
|
private
|
12
13
|
|
13
|
-
|
14
|
-
|
14
|
+
# By default, escapes any control characters found in the XML
|
15
|
+
# - their use is forbidden in XML 1.0, and highly discouraged
|
16
|
+
# in XML 1.1; any found are most likely to be erroneous.
|
17
|
+
def read_xml_file(path, preserve_control_chars: false)
|
18
|
+
file_data = ensure_utf8!(SafeFile.read(path))
|
15
19
|
|
16
20
|
require 'nokogiri'
|
17
21
|
|
18
|
-
doc =
|
19
|
-
|
22
|
+
doc = nil
|
23
|
+
|
24
|
+
escaping_control_chars_if_necessary(preserve_control_chars, file_data) do
|
25
|
+
doc = Nokogiri::XML(file_data, &:huge)
|
26
|
+
doc.encoding = 'UTF-8'
|
27
|
+
emulate_strict_mode_fatal_check!(doc)
|
20
28
|
end
|
21
|
-
doc.encoding = 'UTF-8'
|
22
|
-
emulate_strict_mode_fatal_check!(doc)
|
23
29
|
|
24
30
|
doc
|
25
31
|
end
|
@@ -40,11 +46,27 @@ module NdrImport
|
|
40
46
|
end
|
41
47
|
|
42
48
|
return unless fatal_errors.any?
|
49
|
+
|
43
50
|
raise Nokogiri::XML::SyntaxError, <<~MSG
|
44
51
|
The file had #{fatal_errors.length} fatal error(s)!"
|
45
52
|
#{fatal_errors.join("\n")}
|
46
53
|
MSG
|
47
54
|
end
|
55
|
+
|
56
|
+
def escaping_control_chars_if_necessary(preserve_control_chars, file_data)
|
57
|
+
return yield if preserve_control_chars
|
58
|
+
|
59
|
+
tried_escaping = false
|
60
|
+
begin
|
61
|
+
yield
|
62
|
+
rescue Nokogiri::XML::SyntaxError => e
|
63
|
+
raise e if tried_escaping
|
64
|
+
|
65
|
+
NdrImport::Xml::ControlCharEscaper.new(file_data).escape!
|
66
|
+
tried_escaping = true
|
67
|
+
retry
|
68
|
+
end
|
69
|
+
end
|
48
70
|
end
|
49
71
|
end
|
50
72
|
end
|
@@ -16,8 +16,8 @@ module NdrImport
|
|
16
16
|
|
17
17
|
include UTF8Encoding
|
18
18
|
|
19
|
-
TABULAR_ONLY_OPTIONS = %w[delimiter liberal_parsing tablename_pattern
|
20
|
-
header_lines footer_lines xml_record_xpath].freeze
|
19
|
+
TABULAR_ONLY_OPTIONS = %w[delimiter last_data_column liberal_parsing tablename_pattern
|
20
|
+
header_lines footer_lines xml_record_xpath slurp].freeze
|
21
21
|
|
22
22
|
NON_TABULAR_OPTIONS = %w[capture_end_line capture_start_line start_line_pattern
|
23
23
|
end_line_pattern remove_lines start_in_a_record
|
data/lib/ndr_import/table.rb
CHANGED
@@ -10,8 +10,9 @@ module NdrImport
|
|
10
10
|
include NdrImport::Mapper
|
11
11
|
|
12
12
|
def self.all_valid_options
|
13
|
-
%w[canonical_name delimiter liberal_parsing filename_pattern file_password
|
14
|
-
header_lines footer_lines format klass columns xml_record_xpath
|
13
|
+
%w[canonical_name delimiter liberal_parsing filename_pattern file_password last_data_column
|
14
|
+
tablename_pattern header_lines footer_lines format klass columns xml_record_xpath slurp
|
15
|
+
row_identifier]
|
15
16
|
end
|
16
17
|
|
17
18
|
def all_valid_options
|
@@ -50,8 +51,9 @@ module NdrImport
|
|
50
51
|
@header_best_guess = nil
|
51
52
|
@notifier.try(:started)
|
52
53
|
|
54
|
+
last_col = last_column_to_transform
|
53
55
|
skip_footer_lines(lines, footer_lines).each do |line|
|
54
|
-
process_line(line, &block)
|
56
|
+
line.is_a?(Array) ? process_line(line[0..last_col], &block) : process_line(line, &block)
|
55
57
|
end
|
56
58
|
|
57
59
|
@notifier.try(:finished)
|
@@ -226,5 +228,26 @@ module NdrImport
|
|
226
228
|
def column_names(column_mappings)
|
227
229
|
column_mappings.map { |c| (c['column'] || c['standard_mapping']).downcase }
|
228
230
|
end
|
231
|
+
|
232
|
+
# If specified in the mapping, stop transforming data at a given index (column)
|
233
|
+
def last_column_to_transform
|
234
|
+
return -1 if last_data_column.nil?
|
235
|
+
return last_data_column - 1 if last_data_column.is_a?(Integer)
|
236
|
+
|
237
|
+
error = "Unknown 'last_data_column' format: #{last_data_column} " \
|
238
|
+
"(#{last_data_column.class})"
|
239
|
+
raise error unless last_data_column.is_a?(String) && last_data_column =~ /\A[A-Z]+\z/i
|
240
|
+
|
241
|
+
# If it's an excel column label (eg 'K', 'AF', 'DDE'), convert it to an index
|
242
|
+
index_from_column_label
|
243
|
+
end
|
244
|
+
|
245
|
+
def index_from_column_label
|
246
|
+
alphabet_index_hash = ('A'..'Z').map.with_index.to_h
|
247
|
+
index = last_data_column.upcase.chars.inject(0) do |char_index, char|
|
248
|
+
(char_index * 26) + (alphabet_index_hash[char] + 1)
|
249
|
+
end
|
250
|
+
index - 1
|
251
|
+
end
|
229
252
|
end # class Table
|
230
253
|
end
|
@@ -48,9 +48,7 @@ module NdrImport
|
|
48
48
|
def extract(source_file, &block)
|
49
49
|
return enum_for(:extract, source_file) unless block
|
50
50
|
|
51
|
-
|
52
|
-
'unzip_path' => unzip_path)
|
53
|
-
files.each do |filename|
|
51
|
+
NdrImport::File::Registry.files(source_file, 'unzip_path' => unzip_path).each do |filename|
|
54
52
|
# now at the individual file level, can we find the table mapping?
|
55
53
|
table_mapping = get_table_mapping(filename, nil)
|
56
54
|
|
data/lib/ndr_import/version.rb
CHANGED
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'ndr_support/utf8_encoding'
|
2
|
+
|
3
|
+
module NdrImport
|
4
|
+
module Xml
|
5
|
+
# A class to remove control characters, and XML entities representing them
|
6
|
+
class ControlCharEscaper
|
7
|
+
include UTF8Encoding
|
8
|
+
|
9
|
+
# Matches XML character reference entities
|
10
|
+
CHARACTER_REFERENCES = /&#(?:(?<decimal>\d+)|x(?<hex>\h+));/.freeze
|
11
|
+
|
12
|
+
attr_reader :data
|
13
|
+
|
14
|
+
def initialize(data)
|
15
|
+
@data = data
|
16
|
+
end
|
17
|
+
|
18
|
+
def escape!
|
19
|
+
unescape_control_char_references!(data)
|
20
|
+
escape_control_chars!(data)
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def unescape_control_char_references!(data)
|
26
|
+
data.gsub!(CHARACTER_REFERENCES) do |reference|
|
27
|
+
char = try_to_extract_char_from(Regexp.last_match)
|
28
|
+
|
29
|
+
if char&.match?(CONTROL_CHARACTERS)
|
30
|
+
escape_control_chars!(char)
|
31
|
+
else
|
32
|
+
reference
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def try_to_extract_char_from(match)
|
38
|
+
if match.nil?
|
39
|
+
nil
|
40
|
+
elsif match[:decimal]
|
41
|
+
match[:decimal].to_i(10).chr
|
42
|
+
elsif match[:hex]
|
43
|
+
match[:hex].to_i(16).chr
|
44
|
+
end
|
45
|
+
rescue RangeError
|
46
|
+
# Return everything if the match was against junk:
|
47
|
+
match.to_s
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
data/ndr_import.gemspec
CHANGED
@@ -35,7 +35,7 @@ Gem::Specification.new do |spec|
|
|
35
35
|
spec.add_dependency 'ooxml_decrypt'
|
36
36
|
spec.add_dependency 'pdf-reader', '~> 2.1'
|
37
37
|
spec.add_dependency 'roo-xls'
|
38
|
-
spec.add_dependency 'seven_zip_ruby', '~> 1.
|
38
|
+
spec.add_dependency 'seven_zip_ruby', '~> 1.3'
|
39
39
|
spec.add_dependency 'spreadsheet', '1.2.6'
|
40
40
|
|
41
41
|
spec.required_ruby_version = '>= 2.5'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ndr_import
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 10.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- NCRS Development Team
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-09-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activemodel
|
@@ -188,14 +188,14 @@ dependencies:
|
|
188
188
|
requirements:
|
189
189
|
- - "~>"
|
190
190
|
- !ruby/object:Gem::Version
|
191
|
-
version: '1.
|
191
|
+
version: '1.3'
|
192
192
|
type: :runtime
|
193
193
|
prerelease: false
|
194
194
|
version_requirements: !ruby/object:Gem::Requirement
|
195
195
|
requirements:
|
196
196
|
- - "~>"
|
197
197
|
- !ruby/object:Gem::Version
|
198
|
-
version: '1.
|
198
|
+
version: '1.3'
|
199
199
|
- !ruby/object:Gem::Dependency
|
200
200
|
name: spreadsheet
|
201
201
|
requirement: !ruby/object:Gem::Requirement
|
@@ -427,6 +427,7 @@ files:
|
|
427
427
|
- lib/ndr_import/universal_importer_helper.rb
|
428
428
|
- lib/ndr_import/unmapped_data_error.rb
|
429
429
|
- lib/ndr_import/version.rb
|
430
|
+
- lib/ndr_import/xml/control_char_escaper.rb
|
430
431
|
- lib/ndr_import/xml/table.rb
|
431
432
|
- ndr_import.gemspec
|
432
433
|
homepage: https://github.com/PublicHealthEngland/ndr_import
|