ndr_import 11.0.2 → 11.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ab2194b39d6226b9c8883f89ef30045c714fe276cdd5312f0b53f643cebbc003
4
- data.tar.gz: 54433f513c6a6fb546f2f1b217d711214e44e1c30f488d1d00f9a67f41b94c0b
3
+ metadata.gz: e2869dc7651ab267d05332b78b6e18827ec8a2b2cf971ec023fba442df836f87
4
+ data.tar.gz: b8cc3ca305d12f282ad566b65d304af7d1ac527ced8f0bda7171cda5e3baeeac
5
5
  SHA512:
6
- metadata.gz: 6c60e8a7a9abd7e30ff754898e6c332f150133488ab5837cdf030bff483db6b3eda6196bb42a2bb42099104690d2621c1b9ad39580313ff82c66f813043f2843
7
- data.tar.gz: 4094561f3f994b0a82ee974f5eba00f6e70fdf219c311534ab070dc5da8f861a4dbe73d67842db2231766a18d0a1721cd13e9a8e46a9910987e450f388f06369
6
+ metadata.gz: 607860deb2f9a9f35491b96c280e21070f123c8a70de6c2796db308aeb5dff7b4ee9a65416ff0708d9ea0a587d749a2f756d59c6446df55b78ac3eee0ebf3b63
7
+ data.tar.gz: 6513f63649016ec5025b3b7f000f76b57be3db1c6670c066b632f9fbb2198bbf2c700668575026bf903f44f866ed3377a99c93686e2eac94036b666254b4abc8
data/CHANGELOG.md CHANGED
@@ -1,6 +1,14 @@
1
1
  ## [Unreleased]
2
2
  =======
3
- * no unreleased changes*
3
+ * no unreleased changes *
4
+
5
+ ## 11.2.0 / 2024-04-10
6
+ ### Added
7
+ * XML file/table metadata storage
8
+
9
+ ## 11.1.0 / 2024-03-07
10
+ ### Added
11
+ * XML table should not expect column mappings for empty nodes/elements
4
12
 
5
13
  ## 11.0.2 / 2024-02-06
6
14
  ### Fixed
@@ -9,6 +9,8 @@ module NdrImport
9
9
  module File
10
10
  # All common base file handler logic is defined here.
11
11
  class Base
12
+ attr_accessor :file_metadata
13
+
12
14
  def initialize(filename, format, options = {})
13
15
  @filename = filename
14
16
  @format = format
@@ -32,10 +34,10 @@ module NdrImport
32
34
  yield @filename
33
35
  end
34
36
 
35
- # This method iterates over the tables in the given file and yields with two arguments:
36
- # a tablename and a row enumerator (for that table). For a spreadsheet it may yield for
37
- # every worksheet in the file and for a CSV file it will only yield once (the entire
38
- # file is one table).
37
+ # This method iterates over the tables in the given file and yields with three arguments:
38
+ # a tablename, a row enumerator (for that table) and any file metadata.
39
+ # For a spreadsheet it may yield for every worksheet in the file and for a CSV file it
40
+ # will only yield once (the entire file is one table).
39
41
  #
40
42
  # As single table files are in the majority, the Base implementation is defined for
41
43
  # single table handlers and you will only need to implement the rows iterator. If your
@@ -45,7 +47,7 @@ module NdrImport
45
47
  def tables
46
48
  return enum_for(:tables) unless block_given?
47
49
 
48
- yield nil, rows
50
+ yield nil, rows, file_metadata
49
51
  end
50
52
 
51
53
  private
@@ -16,18 +16,73 @@ module NdrImport
16
16
  super
17
17
 
18
18
  @pattern_match_xpath = @options['pattern_match_record_xpath']
19
+ @xml_file_metadata = @options['xml_file_metadata']
20
+ @options['slurp'] ? prepare_slurped_file : prepare_streamed_file
19
21
  end
20
22
 
21
23
  private
22
24
 
25
+ def prepare_slurped_file
26
+ @doc = read_xml_file(@filename)
27
+ slurp_metadata_values
28
+ end
29
+
30
+ def prepare_streamed_file
31
+ with_encoding_check(@filename) do |stream, encoding|
32
+ @stream = stream
33
+ @encoding = encoding
34
+ end
35
+ stream_metadata_values
36
+ end
37
+
38
+ def slurp_metadata_values
39
+ return unless @xml_file_metadata.is_a?(Hash)
40
+
41
+ self.file_metadata = @xml_file_metadata.transform_values do |xpath|
42
+ @doc.xpath(xpath).inner_text
43
+ end
44
+ end
45
+
46
+ def stream_metadata_values
47
+ return unless @xml_file_metadata.is_a?(Hash)
48
+
49
+ self.file_metadata = @xml_file_metadata.transform_values.with_index do |xpath, index|
50
+ # Ensure we're at the start of the stream each time
51
+ @stream.rewind unless index.zero?
52
+
53
+ metadata_from_stream(xpath)
54
+ end
55
+ end
56
+
57
+ def metadata_from_stream(xpath)
58
+ cursor = Cursor.new(xpath, false)
59
+
60
+ # If markup isn't well-formed, try to work around it:
61
+ options = Nokogiri::XML::ParseOptions::RECOVER
62
+ reader = Nokogiri::XML::Reader(@stream, nil, @encoding, options)
63
+
64
+ reader.each do |node|
65
+ case node.node_type
66
+ when Nokogiri::XML::Reader::TYPE_ELEMENT # "opening tag"
67
+ raise NestingError, node if cursor.in?(node)
68
+
69
+ cursor.enter(node)
70
+ return cursor.inner_text if cursor.send(:current_stack_match?)
71
+ when Nokogiri::XML::Reader::TYPE_END_ELEMENT # "closing tag"
72
+ cursor.leave(node)
73
+ end
74
+ end
75
+ end
76
+
23
77
  # Iterate through the file, yielding each 'xml_record_xpath' element in turn.
24
78
  def rows(&block)
25
79
  return enum_for(:rows) unless block
26
80
 
27
81
  if @options['slurp']
28
- record_elements(read_xml_file(@filename)).each(&block)
82
+ record_elements.each(&block)
29
83
  else
30
- each_node(@filename, xml_record_xpath, @pattern_match_xpath, &block)
84
+ @stream.rewind
85
+ each_node(@stream, @encoding, xml_record_xpath, @pattern_match_xpath, &block)
31
86
  end
32
87
  end
33
88
 
@@ -35,13 +90,13 @@ module NdrImport
35
90
  @pattern_match_xpath ? @options['xml_record_xpath'] : "*/#{@options['xml_record_xpath']}"
36
91
  end
37
92
 
38
- def record_elements(doc)
93
+ def record_elements
39
94
  if @pattern_match_xpath
40
- doc.root.children.find_all do |element|
95
+ @doc.root.children.find_all do |element|
41
96
  element.name =~ Regexp.new(@options['xml_record_xpath'])
42
97
  end
43
98
  else
44
- doc.root.xpath(@options['xml_record_xpath'])
99
+ @doc.root.xpath(@options['xml_record_xpath'])
45
100
  end
46
101
  end
47
102
  end
@@ -78,6 +78,10 @@ module NdrImport
78
78
  match
79
79
  end
80
80
 
81
+ def inner_text
82
+ dom_stubs[@stack].xpath(@xpath)&.inner_text
83
+ end
84
+
81
85
  private
82
86
 
83
87
  def in_empty_element?
@@ -134,21 +138,18 @@ module NdrImport
134
138
 
135
139
  include UTF8Encoding
136
140
 
137
- # Streams the contents of the given `safe_path`, and yields
138
- # each element matching `xpath` as they're found.
141
+ # Yields each element matching `xpath` from `stream` as they're found.
139
142
  #
140
143
  # In the case of dodgy encoding, may fall back to slurping the
141
144
  # file, but will still use stream parsing for XML.
142
145
  #
143
146
  # Optionally pattern match the xpath
144
- def each_node(safe_path, xpath, pattern_match_xpath = nil, &block)
145
- return enum_for(:each_node, safe_path, xpath, pattern_match_xpath) unless block
147
+ def each_node(stream, encoding, xpath, pattern_match_xpath = nil, &block)
148
+ return enum_for(:each_node, stream, encoding, xpath, pattern_match_xpath) unless block
146
149
 
147
150
  require 'nokogiri'
148
151
 
149
- with_encoding_check(safe_path) do |stream, encoding|
150
- stream_xml_nodes(stream, xpath, pattern_match_xpath, encoding, &block)
151
- end
152
+ stream_xml_nodes(stream, xpath, pattern_match_xpath, encoding, &block)
152
153
  end
153
154
 
154
155
  private
@@ -21,7 +21,7 @@ module NdrImport
21
21
  end
22
22
 
23
23
  attr_reader(*all_valid_options)
24
- attr_accessor :notifier
24
+ attr_accessor :notifier, :table_metadata
25
25
 
26
26
  def initialize(options = {})
27
27
  options.stringify_keys! if options.is_a?(Hash)
@@ -59,7 +59,8 @@ module NdrImport
59
59
  'xml_record_xpath' => table_mapping.try(:xml_record_xpath),
60
60
  'slurp' => table_mapping.try(:slurp),
61
61
  'yield_xml_record' => table_mapping.try(:yield_xml_record),
62
- 'pattern_match_record_xpath' => table_mapping.try(:pattern_match_record_xpath) }
62
+ 'pattern_match_record_xpath' => table_mapping.try(:pattern_match_record_xpath),
63
+ 'xml_file_metadata' => table_mapping.try(:xml_file_metadata) }
63
64
 
64
65
  tables = NdrImport::File::Registry.tables(filename, table_mapping.try(:format), options)
65
66
  yield_tables_and_their_content(filename, tables, &block)
@@ -71,12 +72,12 @@ module NdrImport
71
72
  def yield_tables_and_their_content(filename, tables, &block)
72
73
  return enum_for(:yield_tables_and_their_content, filename, tables) unless block_given?
73
74
 
74
- tables.each do |tablename, table_content|
75
+ tables.each do |tablename, table_content, file_metadata|
75
76
  mapping = get_table_mapping(filename, tablename)
76
77
  next if mapping.nil?
77
78
 
78
79
  mapping.notifier = get_notifier(record_total(filename, table_content))
79
-
80
+ mapping.table_metadata = file_metadata || {}
80
81
  yield(mapping, table_content)
81
82
  end
82
83
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  # This stores the current version of the NdrImport gem
3
3
  module NdrImport
4
- VERSION = '11.0.2'
4
+ VERSION = '11.2.0'
5
5
  end
@@ -10,7 +10,8 @@ module NdrImport
10
10
  require 'ndr_import/xml/column_mapping'
11
11
  require 'ndr_import/xml/masked_mappings'
12
12
 
13
- XML_OPTIONS = %w[pattern_match_record_xpath xml_record_xpath yield_xml_record].freeze
13
+ XML_OPTIONS = %w[pattern_match_record_xpath xml_file_metadata xml_record_xpath
14
+ yield_xml_record].freeze
14
15
 
15
16
  def self.all_valid_options
16
17
  super - %w[delimiter header_lines footer_lines] + XML_OPTIONS
@@ -169,6 +170,8 @@ module NdrImport
169
170
  xpaths = []
170
171
 
171
172
  line.xpath('.//*[not(child::*)]').each do |node|
173
+ next unless populated?(node)
174
+
172
175
  xpath = node.path.sub("#{line.path}/", '')
173
176
  if node.attributes.any?
174
177
  node.attributes.each_key { |key| xpaths << "#{xpath}/@#{key}" }
@@ -179,6 +182,12 @@ module NdrImport
179
182
  xpaths
180
183
  end
181
184
 
185
+ def populated?(node)
186
+ node.element_children.empty? &&
187
+ !node.is_a?(Nokogiri::XML::Comment) && !node.text? && !node.cdata? &&
188
+ !(node.attributes.empty? && node.content.strip.blank?)
189
+ end
190
+
182
191
  def build_xpath_from(column)
183
192
  column_name = column_name_from(column)
184
193
  column['xml_cell'].presence ? relative_path_from(column, column_name) : column_name
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ndr_import
3
3
  version: !ruby/object:Gem::Version
4
- version: 11.0.2
4
+ version: 11.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - NCRS Development Team
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-02-06 00:00:00.000000000 Z
11
+ date: 2024-04-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activemodel
@@ -465,7 +465,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
465
465
  - !ruby/object:Gem::Version
466
466
  version: '0'
467
467
  requirements: []
468
- rubygems_version: 3.2.33
468
+ rubygems_version: 3.2.3
469
469
  signing_key:
470
470
  specification_version: 4
471
471
  summary: NDR Import