ndr_import 11.1.0 → 11.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1acf9db0301b6da991e890e74db10573ff666dd732eda4b58ba90e03370bb32a
4
- data.tar.gz: eba9da2d2d9f5fa98c3a7f8745086f3945c363844f4d1f8fc1dc6c38ab4bcb5b
3
+ metadata.gz: e2869dc7651ab267d05332b78b6e18827ec8a2b2cf971ec023fba442df836f87
4
+ data.tar.gz: b8cc3ca305d12f282ad566b65d304af7d1ac527ced8f0bda7171cda5e3baeeac
5
5
  SHA512:
6
- metadata.gz: 8509511d4f54f36344e09d380da53f168f8fa94efd4eec2d73e29c80811bd644d853bb82a835bf7ddf2820202b8abad28cbe0d265de3557e2af8adca4f12b178
7
- data.tar.gz: 384d8365d4e6ba741bf6b76a1ecfe1bf1bb92c6fd0e737489d2d3a13047d9e557b883c339ff2f89f372071140a071bcbedfdd303b588dda79d58870bab0be2e6
6
+ metadata.gz: 607860deb2f9a9f35491b96c280e21070f123c8a70de6c2796db308aeb5dff7b4ee9a65416ff0708d9ea0a587d749a2f756d59c6446df55b78ac3eee0ebf3b63
7
+ data.tar.gz: 6513f63649016ec5025b3b7f000f76b57be3db1c6670c066b632f9fbb2198bbf2c700668575026bf903f44f866ed3377a99c93686e2eac94036b666254b4abc8
data/CHANGELOG.md CHANGED
@@ -2,6 +2,10 @@
2
2
  =======
3
3
  * no unreleased changes *
4
4
 
5
+ ## 11.2.0 / 2024-04-10
6
+ ### Added
7
+ * XML file/table metadata storage
8
+
5
9
  ## 11.1.0 / 2024-03-07
6
10
  ### Added
7
11
  * XML table should not expect column mappings for empty nodes/elements
@@ -9,6 +9,8 @@ module NdrImport
9
9
  module File
10
10
  # All common base file handler logic is defined here.
11
11
  class Base
12
+ attr_accessor :file_metadata
13
+
12
14
  def initialize(filename, format, options = {})
13
15
  @filename = filename
14
16
  @format = format
@@ -32,10 +34,10 @@ module NdrImport
32
34
  yield @filename
33
35
  end
34
36
 
35
- # This method iterates over the tables in the given file and yields with two arguments:
36
- # a tablename and a row enumerator (for that table). For a spreadsheet it may yield for
37
- # every worksheet in the file and for a CSV file it will only yield once (the entire
38
- # file is one table).
37
+ # This method iterates over the tables in the given file and yields with three arguments:
38
+ # a tablename, a row enumerator (for that table) and any file metadata.
39
+ # For a spreadsheet it may yield for every worksheet in the file and for a CSV file it
40
+ # will only yield once (the entire file is one table).
39
41
  #
40
42
  # As single table files are in the majority, the Base implementation is defined for
41
43
  # single table handlers and you will only need to implement the rows iterator. If your
@@ -45,7 +47,7 @@ module NdrImport
45
47
  def tables
46
48
  return enum_for(:tables) unless block_given?
47
49
 
48
- yield nil, rows
50
+ yield nil, rows, file_metadata
49
51
  end
50
52
 
51
53
  private
@@ -16,18 +16,73 @@ module NdrImport
16
16
  super
17
17
 
18
18
  @pattern_match_xpath = @options['pattern_match_record_xpath']
19
+ @xml_file_metadata = @options['xml_file_metadata']
20
+ @options['slurp'] ? prepare_slurped_file : prepare_streamed_file
19
21
  end
20
22
 
21
23
  private
22
24
 
25
+ def prepare_slurped_file
26
+ @doc = read_xml_file(@filename)
27
+ slurp_metadata_values
28
+ end
29
+
30
+ def prepare_streamed_file
31
+ with_encoding_check(@filename) do |stream, encoding|
32
+ @stream = stream
33
+ @encoding = encoding
34
+ end
35
+ stream_metadata_values
36
+ end
37
+
38
+ def slurp_metadata_values
39
+ return unless @xml_file_metadata.is_a?(Hash)
40
+
41
+ self.file_metadata = @xml_file_metadata.transform_values do |xpath|
42
+ @doc.xpath(xpath).inner_text
43
+ end
44
+ end
45
+
46
+ def stream_metadata_values
47
+ return unless @xml_file_metadata.is_a?(Hash)
48
+
49
+ self.file_metadata = @xml_file_metadata.transform_values.with_index do |xpath, index|
50
+ # Ensure we're at the start of the stream each time
51
+ @stream.rewind unless index.zero?
52
+
53
+ metadata_from_stream(xpath)
54
+ end
55
+ end
56
+
57
+ def metadata_from_stream(xpath)
58
+ cursor = Cursor.new(xpath, false)
59
+
60
+ # If markup isn't well-formed, try to work around it:
61
+ options = Nokogiri::XML::ParseOptions::RECOVER
62
+ reader = Nokogiri::XML::Reader(@stream, nil, @encoding, options)
63
+
64
+ reader.each do |node|
65
+ case node.node_type
66
+ when Nokogiri::XML::Reader::TYPE_ELEMENT # "opening tag"
67
+ raise NestingError, node if cursor.in?(node)
68
+
69
+ cursor.enter(node)
70
+ return cursor.inner_text if cursor.send(:current_stack_match?)
71
+ when Nokogiri::XML::Reader::TYPE_END_ELEMENT # "closing tag"
72
+ cursor.leave(node)
73
+ end
74
+ end
75
+ end
76
+
23
77
  # Iterate through the file, yielding each 'xml_record_xpath' element in turn.
24
78
  def rows(&block)
25
79
  return enum_for(:rows) unless block
26
80
 
27
81
  if @options['slurp']
28
- record_elements(read_xml_file(@filename)).each(&block)
82
+ record_elements.each(&block)
29
83
  else
30
- each_node(@filename, xml_record_xpath, @pattern_match_xpath, &block)
84
+ @stream.rewind
85
+ each_node(@stream, @encoding, xml_record_xpath, @pattern_match_xpath, &block)
31
86
  end
32
87
  end
33
88
 
@@ -35,13 +90,13 @@ module NdrImport
35
90
  @pattern_match_xpath ? @options['xml_record_xpath'] : "*/#{@options['xml_record_xpath']}"
36
91
  end
37
92
 
38
- def record_elements(doc)
93
+ def record_elements
39
94
  if @pattern_match_xpath
40
- doc.root.children.find_all do |element|
95
+ @doc.root.children.find_all do |element|
41
96
  element.name =~ Regexp.new(@options['xml_record_xpath'])
42
97
  end
43
98
  else
44
- doc.root.xpath(@options['xml_record_xpath'])
99
+ @doc.root.xpath(@options['xml_record_xpath'])
45
100
  end
46
101
  end
47
102
  end
@@ -78,6 +78,10 @@ module NdrImport
78
78
  match
79
79
  end
80
80
 
81
+ def inner_text
82
+ dom_stubs[@stack].xpath(@xpath)&.inner_text
83
+ end
84
+
81
85
  private
82
86
 
83
87
  def in_empty_element?
@@ -134,21 +138,18 @@ module NdrImport
134
138
 
135
139
  include UTF8Encoding
136
140
 
137
- # Streams the contents of the given `safe_path`, and yields
138
- # each element matching `xpath` as they're found.
141
+ # Yields each element matching `xpath` from `stream` as they're found.
139
142
  #
140
143
  # In the case of dodgy encoding, may fall back to slurping the
141
144
  # file, but will still use stream parsing for XML.
142
145
  #
143
146
  # Optionally pattern match the xpath
144
- def each_node(safe_path, xpath, pattern_match_xpath = nil, &block)
145
- return enum_for(:each_node, safe_path, xpath, pattern_match_xpath) unless block
147
+ def each_node(stream, encoding, xpath, pattern_match_xpath = nil, &block)
148
+ return enum_for(:each_node, stream, encoding, xpath, pattern_match_xpath) unless block
146
149
 
147
150
  require 'nokogiri'
148
151
 
149
- with_encoding_check(safe_path) do |stream, encoding|
150
- stream_xml_nodes(stream, xpath, pattern_match_xpath, encoding, &block)
151
- end
152
+ stream_xml_nodes(stream, xpath, pattern_match_xpath, encoding, &block)
152
153
  end
153
154
 
154
155
  private
@@ -21,7 +21,7 @@ module NdrImport
21
21
  end
22
22
 
23
23
  attr_reader(*all_valid_options)
24
- attr_accessor :notifier
24
+ attr_accessor :notifier, :table_metadata
25
25
 
26
26
  def initialize(options = {})
27
27
  options.stringify_keys! if options.is_a?(Hash)
@@ -59,7 +59,8 @@ module NdrImport
59
59
  'xml_record_xpath' => table_mapping.try(:xml_record_xpath),
60
60
  'slurp' => table_mapping.try(:slurp),
61
61
  'yield_xml_record' => table_mapping.try(:yield_xml_record),
62
- 'pattern_match_record_xpath' => table_mapping.try(:pattern_match_record_xpath) }
62
+ 'pattern_match_record_xpath' => table_mapping.try(:pattern_match_record_xpath),
63
+ 'xml_file_metadata' => table_mapping.try(:xml_file_metadata) }
63
64
 
64
65
  tables = NdrImport::File::Registry.tables(filename, table_mapping.try(:format), options)
65
66
  yield_tables_and_their_content(filename, tables, &block)
@@ -71,12 +72,12 @@ module NdrImport
71
72
  def yield_tables_and_their_content(filename, tables, &block)
72
73
  return enum_for(:yield_tables_and_their_content, filename, tables) unless block_given?
73
74
 
74
- tables.each do |tablename, table_content|
75
+ tables.each do |tablename, table_content, file_metadata|
75
76
  mapping = get_table_mapping(filename, tablename)
76
77
  next if mapping.nil?
77
78
 
78
79
  mapping.notifier = get_notifier(record_total(filename, table_content))
79
-
80
+ mapping.table_metadata = file_metadata || {}
80
81
  yield(mapping, table_content)
81
82
  end
82
83
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  # This stores the current version of the NdrImport gem
3
3
  module NdrImport
4
- VERSION = '11.1.0'
4
+ VERSION = '11.2.0'
5
5
  end
@@ -10,7 +10,8 @@ module NdrImport
10
10
  require 'ndr_import/xml/column_mapping'
11
11
  require 'ndr_import/xml/masked_mappings'
12
12
 
13
- XML_OPTIONS = %w[pattern_match_record_xpath xml_record_xpath yield_xml_record].freeze
13
+ XML_OPTIONS = %w[pattern_match_record_xpath xml_file_metadata xml_record_xpath
14
+ yield_xml_record].freeze
14
15
 
15
16
  def self.all_valid_options
16
17
  super - %w[delimiter header_lines footer_lines] + XML_OPTIONS
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ndr_import
3
3
  version: !ruby/object:Gem::Version
4
- version: 11.1.0
4
+ version: 11.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - NCRS Development Team
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-03-07 00:00:00.000000000 Z
11
+ date: 2024-04-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activemodel