ndr_import 11.1.0 → 11.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/ndr_import/file/base.rb +7 -5
- data/lib/ndr_import/file/xml.rb +60 -5
- data/lib/ndr_import/helpers/file/xml_streaming.rb +8 -7
- data/lib/ndr_import/table.rb +1 -1
- data/lib/ndr_import/universal_importer_helper.rb +4 -3
- data/lib/ndr_import/version.rb +1 -1
- data/lib/ndr_import/xml/table.rb +2 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e2869dc7651ab267d05332b78b6e18827ec8a2b2cf971ec023fba442df836f87
|
4
|
+
data.tar.gz: b8cc3ca305d12f282ad566b65d304af7d1ac527ced8f0bda7171cda5e3baeeac
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 607860deb2f9a9f35491b96c280e21070f123c8a70de6c2796db308aeb5dff7b4ee9a65416ff0708d9ea0a587d749a2f756d59c6446df55b78ac3eee0ebf3b63
|
7
|
+
data.tar.gz: 6513f63649016ec5025b3b7f000f76b57be3db1c6670c066b632f9fbb2198bbf2c700668575026bf903f44f866ed3377a99c93686e2eac94036b666254b4abc8
|
data/CHANGELOG.md
CHANGED
data/lib/ndr_import/file/base.rb
CHANGED
@@ -9,6 +9,8 @@ module NdrImport
|
|
9
9
|
module File
|
10
10
|
# All common base file handler logic is defined here.
|
11
11
|
class Base
|
12
|
+
attr_accessor :file_metadata
|
13
|
+
|
12
14
|
def initialize(filename, format, options = {})
|
13
15
|
@filename = filename
|
14
16
|
@format = format
|
@@ -32,10 +34,10 @@ module NdrImport
|
|
32
34
|
yield @filename
|
33
35
|
end
|
34
36
|
|
35
|
-
# This method iterates over the tables in the given file and yields with
|
36
|
-
# a tablename
|
37
|
-
# every worksheet in the file and for a CSV file it
|
38
|
-
# file is one table).
|
37
|
+
# This method iterates over the tables in the given file and yields with three arguments:
|
38
|
+
# a tablename, a row enumerator (for that table) and any file metadata.
|
39
|
+
# For a spreadsheet it may yield for every worksheet in the file and for a CSV file it
|
40
|
+
# will only yield once (the entire file is one table).
|
39
41
|
#
|
40
42
|
# As single table files are in the majority, the Base implementation is defined for
|
41
43
|
# single table handlers and you will only need to implement the rows iterator. If your
|
@@ -45,7 +47,7 @@ module NdrImport
|
|
45
47
|
def tables
|
46
48
|
return enum_for(:tables) unless block_given?
|
47
49
|
|
48
|
-
yield nil, rows
|
50
|
+
yield nil, rows, file_metadata
|
49
51
|
end
|
50
52
|
|
51
53
|
private
|
data/lib/ndr_import/file/xml.rb
CHANGED
@@ -16,18 +16,73 @@ module NdrImport
|
|
16
16
|
super
|
17
17
|
|
18
18
|
@pattern_match_xpath = @options['pattern_match_record_xpath']
|
19
|
+
@xml_file_metadata = @options['xml_file_metadata']
|
20
|
+
@options['slurp'] ? prepare_slurped_file : prepare_streamed_file
|
19
21
|
end
|
20
22
|
|
21
23
|
private
|
22
24
|
|
25
|
+
def prepare_slurped_file
|
26
|
+
@doc = read_xml_file(@filename)
|
27
|
+
slurp_metadata_values
|
28
|
+
end
|
29
|
+
|
30
|
+
def prepare_streamed_file
|
31
|
+
with_encoding_check(@filename) do |stream, encoding|
|
32
|
+
@stream = stream
|
33
|
+
@encoding = encoding
|
34
|
+
end
|
35
|
+
stream_metadata_values
|
36
|
+
end
|
37
|
+
|
38
|
+
def slurp_metadata_values
|
39
|
+
return unless @xml_file_metadata.is_a?(Hash)
|
40
|
+
|
41
|
+
self.file_metadata = @xml_file_metadata.transform_values do |xpath|
|
42
|
+
@doc.xpath(xpath).inner_text
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def stream_metadata_values
|
47
|
+
return unless @xml_file_metadata.is_a?(Hash)
|
48
|
+
|
49
|
+
self.file_metadata = @xml_file_metadata.transform_values.with_index do |xpath, index|
|
50
|
+
# Ensure we're at the start of the stream each time
|
51
|
+
@stream.rewind unless index.zero?
|
52
|
+
|
53
|
+
metadata_from_stream(xpath)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def metadata_from_stream(xpath)
|
58
|
+
cursor = Cursor.new(xpath, false)
|
59
|
+
|
60
|
+
# If markup isn't well-formed, try to work around it:
|
61
|
+
options = Nokogiri::XML::ParseOptions::RECOVER
|
62
|
+
reader = Nokogiri::XML::Reader(@stream, nil, @encoding, options)
|
63
|
+
|
64
|
+
reader.each do |node|
|
65
|
+
case node.node_type
|
66
|
+
when Nokogiri::XML::Reader::TYPE_ELEMENT # "opening tag"
|
67
|
+
raise NestingError, node if cursor.in?(node)
|
68
|
+
|
69
|
+
cursor.enter(node)
|
70
|
+
return cursor.inner_text if cursor.send(:current_stack_match?)
|
71
|
+
when Nokogiri::XML::Reader::TYPE_END_ELEMENT # "closing tag"
|
72
|
+
cursor.leave(node)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
23
77
|
# Iterate through the file, yielding each 'xml_record_xpath' element in turn.
|
24
78
|
def rows(&block)
|
25
79
|
return enum_for(:rows) unless block
|
26
80
|
|
27
81
|
if @options['slurp']
|
28
|
-
record_elements
|
82
|
+
record_elements.each(&block)
|
29
83
|
else
|
30
|
-
|
84
|
+
@stream.rewind
|
85
|
+
each_node(@stream, @encoding, xml_record_xpath, @pattern_match_xpath, &block)
|
31
86
|
end
|
32
87
|
end
|
33
88
|
|
@@ -35,13 +90,13 @@ module NdrImport
|
|
35
90
|
@pattern_match_xpath ? @options['xml_record_xpath'] : "*/#{@options['xml_record_xpath']}"
|
36
91
|
end
|
37
92
|
|
38
|
-
def record_elements
|
93
|
+
def record_elements
|
39
94
|
if @pattern_match_xpath
|
40
|
-
doc.root.children.find_all do |element|
|
95
|
+
@doc.root.children.find_all do |element|
|
41
96
|
element.name =~ Regexp.new(@options['xml_record_xpath'])
|
42
97
|
end
|
43
98
|
else
|
44
|
-
doc.root.xpath(@options['xml_record_xpath'])
|
99
|
+
@doc.root.xpath(@options['xml_record_xpath'])
|
45
100
|
end
|
46
101
|
end
|
47
102
|
end
|
@@ -78,6 +78,10 @@ module NdrImport
|
|
78
78
|
match
|
79
79
|
end
|
80
80
|
|
81
|
+
def inner_text
|
82
|
+
dom_stubs[@stack].xpath(@xpath)&.inner_text
|
83
|
+
end
|
84
|
+
|
81
85
|
private
|
82
86
|
|
83
87
|
def in_empty_element?
|
@@ -134,21 +138,18 @@ module NdrImport
|
|
134
138
|
|
135
139
|
include UTF8Encoding
|
136
140
|
|
137
|
-
#
|
138
|
-
# each element matching `xpath` as they're found.
|
141
|
+
# Yields each element matching `xpath` from `stream` as they're found.
|
139
142
|
#
|
140
143
|
# In the case of dodgy encoding, may fall back to slurping the
|
141
144
|
# file, but will still use stream parsing for XML.
|
142
145
|
#
|
143
146
|
# Optionally pattern match the xpath
|
144
|
-
def each_node(
|
145
|
-
return enum_for(:each_node,
|
147
|
+
def each_node(stream, encoding, xpath, pattern_match_xpath = nil, &block)
|
148
|
+
return enum_for(:each_node, stream, encoding, xpath, pattern_match_xpath) unless block
|
146
149
|
|
147
150
|
require 'nokogiri'
|
148
151
|
|
149
|
-
|
150
|
-
stream_xml_nodes(stream, xpath, pattern_match_xpath, encoding, &block)
|
151
|
-
end
|
152
|
+
stream_xml_nodes(stream, xpath, pattern_match_xpath, encoding, &block)
|
152
153
|
end
|
153
154
|
|
154
155
|
private
|
data/lib/ndr_import/table.rb
CHANGED
@@ -59,7 +59,8 @@ module NdrImport
|
|
59
59
|
'xml_record_xpath' => table_mapping.try(:xml_record_xpath),
|
60
60
|
'slurp' => table_mapping.try(:slurp),
|
61
61
|
'yield_xml_record' => table_mapping.try(:yield_xml_record),
|
62
|
-
'pattern_match_record_xpath' => table_mapping.try(:pattern_match_record_xpath)
|
62
|
+
'pattern_match_record_xpath' => table_mapping.try(:pattern_match_record_xpath),
|
63
|
+
'xml_file_metadata' => table_mapping.try(:xml_file_metadata) }
|
63
64
|
|
64
65
|
tables = NdrImport::File::Registry.tables(filename, table_mapping.try(:format), options)
|
65
66
|
yield_tables_and_their_content(filename, tables, &block)
|
@@ -71,12 +72,12 @@ module NdrImport
|
|
71
72
|
def yield_tables_and_their_content(filename, tables, &block)
|
72
73
|
return enum_for(:yield_tables_and_their_content, filename, tables) unless block_given?
|
73
74
|
|
74
|
-
tables.each do |tablename, table_content|
|
75
|
+
tables.each do |tablename, table_content, file_metadata|
|
75
76
|
mapping = get_table_mapping(filename, tablename)
|
76
77
|
next if mapping.nil?
|
77
78
|
|
78
79
|
mapping.notifier = get_notifier(record_total(filename, table_content))
|
79
|
-
|
80
|
+
mapping.table_metadata = file_metadata || {}
|
80
81
|
yield(mapping, table_content)
|
81
82
|
end
|
82
83
|
end
|
data/lib/ndr_import/version.rb
CHANGED
data/lib/ndr_import/xml/table.rb
CHANGED
@@ -10,7 +10,8 @@ module NdrImport
|
|
10
10
|
require 'ndr_import/xml/column_mapping'
|
11
11
|
require 'ndr_import/xml/masked_mappings'
|
12
12
|
|
13
|
-
XML_OPTIONS = %w[pattern_match_record_xpath xml_record_xpath
|
13
|
+
XML_OPTIONS = %w[pattern_match_record_xpath xml_file_metadata xml_record_xpath
|
14
|
+
yield_xml_record].freeze
|
14
15
|
|
15
16
|
def self.all_valid_options
|
16
17
|
super - %w[delimiter header_lines footer_lines] + XML_OPTIONS
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ndr_import
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 11.
|
4
|
+
version: 11.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- NCRS Development Team
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-04-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activemodel
|