ndr_import 10.3.0 → 11.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 877346774d65eccb73f913081d75a48345a92b9e4c11e6f87702c9e0d59ebc3d
4
- data.tar.gz: e59f23137e7568ce5e6eb1e4734b9046aed8f8fa93f392a9f2d82091013c3c04
3
+ metadata.gz: 52ae4b12ab514a7bda584c93b3124fdeab03cea5292424c0e380c92f6e3a3c1a
4
+ data.tar.gz: 4b9dff76aa434bb87e542bcf694d4f99a91b2cd69e2eeaf6730a32b78fe61cc9
5
5
  SHA512:
6
- metadata.gz: 5bd15b2baf53b654a9f2be306d45dd3a509c298427790c139431f9f77df84b14f6506d625620b6781e9d44315fb8ada40307865a059c2ae3e78945293065dc9f
7
- data.tar.gz: 06070cd9f9d5311835a523f8f926c938e0c65561c863d8bc972a1250a278ccf06991013773c31f877173873b600bbee40e6ce9faa6d3da167dd93a4fc11b4965
6
+ metadata.gz: 27e3c4578ab466ae9977727de5b972e8e5bf7e2f9fa62ab53cd60437e4d7232e5da3ce3afbcf6b94dd943b219b051ccc6177b73189e736ecf8d10f582f6f0bc9
7
+ data.tar.gz: b06ef69bba53f56314574ff9749011289180624d0f73f7660537610ba29e3ec01114810bc781ccb8c67440c30f1406a05c56e508299a026a7467bb7968d4cb16
data/CHANGELOG.md CHANGED
@@ -2,6 +2,12 @@
2
2
  =======
3
3
  *no unreleased changes*
4
4
 
5
+ ## 11.0.0 / 2023-10-27
6
+ ### Changed
7
+ * XML enhancements. Breaking change, the enhancements are not backward compatible
8
+ ### Fixed
9
+ * Replace unsupported seven_zip_ruby gem with seven-zip fork
10
+
5
11
  ## 10.3.0 / 2023-09-07
6
12
  ### Added
7
13
  * VCF file support
@@ -12,22 +12,37 @@ module NdrImport
12
12
  include NdrImport::Helpers::File::Xml
13
13
  include NdrImport::Helpers::File::XmlStreaming
14
14
 
15
+ def initialize(*)
16
+ super
17
+
18
+ @pattern_match_xpath = @options['pattern_match_record_xpath']
19
+ end
20
+
15
21
  private
16
22
 
17
23
  # Iterate through the file, yielding each 'xml_record_xpath' element in turn.
18
24
  def rows(&block)
19
25
  return enum_for(:rows) unless block
20
26
 
21
- xpath = @options['xml_record_xpath']
22
-
23
27
  if @options['slurp']
24
- doc = read_xml_file(@filename)
25
- doc.xpath(xpath).each(&block)
28
+ record_elements(read_xml_file(@filename)).each(&block)
29
+ else
30
+ each_node(@filename, xml_record_xpath, @pattern_match_xpath, &block)
31
+ end
32
+ end
33
+
34
+ def xml_record_xpath
35
+ @pattern_match_xpath ? @options['xml_record_xpath'] : "*/#{@options['xml_record_xpath']}"
36
+ end
37
+
38
+ def record_elements(doc)
39
+ if @pattern_match_xpath
40
+ doc.root.children.find_all do |element|
41
+ element.name =~ Regexp.new(@options['xml_record_xpath'])
42
+ end
26
43
  else
27
- each_node(@filename, xpath, &block)
44
+ doc.root.xpath(@options['xml_record_xpath'])
28
45
  end
29
- rescue StandardError => e
30
- raise("#{SafeFile.basename(@filename)} [#{e.class}: #{e.message}]")
31
46
  end
32
47
  end
33
48
  # Not all xml files may want to be registered, so 'xml' is not registered by design.
@@ -33,15 +33,20 @@ module NdrImport
33
33
  # wrapper to hold a representation of each element we descent into:
34
34
  StackItem = Struct.new(:name, :attrs, :empty)
35
35
 
36
- def initialize(xpath)
36
+ def initialize(xpath, pattern_match_xpath)
37
37
  @xpath = xpath
38
+ @pattern_match_xpath = pattern_match_xpath
38
39
  @stack = []
39
40
  @match_depth = nil
40
41
  end
41
42
 
42
43
  # Has this cursor already passed inside a similar node?
44
+ # attribute comparison allows for e.g.:
45
+ # <SameName>
46
+ # <SameName code="N"/>
47
+ # </SameName>
43
48
  def in?(node)
44
- @stack.detect { |item| item.name == node.name }
49
+ @stack.detect { |item| item.name == node.name && item.attrs == node.attributes }
45
50
  end
46
51
 
47
52
  def enter(node)
@@ -85,9 +90,27 @@ module NdrImport
85
90
  def current_stack_match?
86
91
  parent_stack = @stack[0..-2]
87
92
 
88
- return false unless dom_stubs[@stack].at_xpath(@xpath)
93
+ stack_match = if @pattern_match_xpath
94
+ dom_stubs[@stack].root.children.find_all do |node|
95
+ node.name =~ Regexp.new(@xpath)
96
+ end.first
97
+ else
98
+ dom_stubs[@stack].at_xpath(@xpath)
99
+ end
89
100
 
90
- parent_stack.empty? || !dom_stubs[parent_stack].at_xpath(@xpath)
101
+ return false unless stack_match
102
+
103
+ parent_stack.empty? || xpath_not_in_parent_document?(dom_stubs[parent_stack])
104
+ end
105
+
106
+ def xpath_not_in_parent_document?(parent_document)
107
+ if @pattern_match_xpath
108
+ parent_document.root.children.find_all do |node|
109
+ node.name =~ Regexp.new(@xpath)
110
+ end.first.nil?
111
+ else
112
+ !parent_document.at_xpath(@xpath)
113
+ end
91
114
  end
92
115
 
93
116
  # A cached collection of DOM fragments, to represent the structure
@@ -116,13 +139,15 @@ module NdrImport
116
139
  #
117
140
  # In the case of dodgy encoding, may fall back to slurping the
118
141
  # file, but will still use stream parsing for XML.
119
- def each_node(safe_path, xpath, &block)
120
- return enum_for(:each_node, safe_path, xpath) unless block
142
+ #
143
+ # Optionally pattern match the xpath
144
+ def each_node(safe_path, xpath, pattern_match_xpath = nil, &block)
145
+ return enum_for(:each_node, safe_path, xpath, pattern_match_xpath) unless block
121
146
 
122
147
  require 'nokogiri'
123
148
 
124
149
  with_encoding_check(safe_path) do |stream, encoding|
125
- stream_xml_nodes(stream, xpath, encoding, &block)
150
+ stream_xml_nodes(stream, xpath, pattern_match_xpath, encoding, &block)
126
151
  end
127
152
  end
128
153
 
@@ -153,9 +178,9 @@ module NdrImport
153
178
  system("iconv -f UTF-8 #{Shellwords.escape(path)} > /dev/null 2>&1")
154
179
  end
155
180
 
156
- def stream_xml_nodes(io, node_xpath, encoding = nil)
181
+ def stream_xml_nodes(io, node_xpath, pattern_match_xpath, encoding = nil)
157
182
  # Track nesting as the cursor moves through the document:
158
- cursor = Cursor.new(node_xpath)
183
+ cursor = Cursor.new(node_xpath, pattern_match_xpath)
159
184
 
160
185
  # If markup isn't well-formed, try to work around it:
161
186
  options = Nokogiri::XML::ParseOptions::RECOVER
@@ -17,7 +17,7 @@ module NdrImport
17
17
  include UTF8Encoding
18
18
 
19
19
  TABULAR_ONLY_OPTIONS = %w[delimiter last_data_column liberal_parsing tablename_pattern
20
- header_lines footer_lines xml_record_xpath slurp].freeze
20
+ header_lines footer_lines slurp].freeze
21
21
 
22
22
  NON_TABULAR_OPTIONS = %w[capture_end_line capture_start_line start_line_pattern
23
23
  end_line_pattern remove_lines start_in_a_record
@@ -6,13 +6,14 @@ module NdrImport
6
6
  # required to transform a table of data into "records". Particular attention
7
7
  # has been made to use enumerables throughout to help with the transformation
8
8
  # of large quantities of data.
9
+ # rubocop:disable Metrics/ClassLength
9
10
  class Table
10
11
  include NdrImport::Mapper
11
12
 
12
13
  def self.all_valid_options
13
14
  %w[canonical_name delimiter liberal_parsing filename_pattern file_password last_data_column
14
- tablename_pattern header_lines footer_lines format klass columns xml_record_xpath slurp
15
- row_identifier significant_mapped_fields]
15
+ tablename_pattern header_lines footer_lines format klass columns slurp row_identifier
16
+ significant_mapped_fields]
16
17
  end
17
18
 
18
19
  def all_valid_options
@@ -250,4 +251,5 @@ module NdrImport
250
251
  index - 1
251
252
  end
252
253
  end # class Table
254
+ # rubocop:enable Metrics/ClassLength
253
255
  end
@@ -52,14 +52,14 @@ module NdrImport
52
52
  # now at the individual file level, can we find the table mapping?
53
53
  table_mapping = get_table_mapping(filename, nil)
54
54
 
55
- options = {
56
- 'unzip_path' => unzip_path,
57
- 'col_sep' => table_mapping.try(:delimiter),
58
- 'file_password' => table_mapping.try(:file_password),
59
- 'liberal_parsing' => table_mapping.try(:liberal_parsing),
60
- 'xml_record_xpath' => table_mapping.try(:xml_record_xpath),
61
- 'slurp' => table_mapping.try(:slurp)
62
- }
55
+ options = { 'unzip_path' => unzip_path,
56
+ 'col_sep' => table_mapping.try(:delimiter),
57
+ 'file_password' => table_mapping.try(:file_password),
58
+ 'liberal_parsing' => table_mapping.try(:liberal_parsing),
59
+ 'xml_record_xpath' => table_mapping.try(:xml_record_xpath),
60
+ 'slurp' => table_mapping.try(:slurp),
61
+ 'yield_xml_record' => table_mapping.try(:yield_xml_record),
62
+ 'pattern_match_record_xpath' => table_mapping.try(:pattern_match_record_xpath) }
63
63
 
64
64
  tables = NdrImport::File::Registry.tables(filename, table_mapping.try(:format), options)
65
65
  yield_tables_and_their_content(filename, tables, &block)
@@ -6,7 +6,7 @@ module NdrImport
6
6
  # All other Table logic is inherited from `NdrImport::Table`
7
7
  class Table < ::NdrImport::Table
8
8
  def self.all_valid_options
9
- super - %w[delimiter header_lines footer_lines xml_record_xpath]
9
+ super - %w[delimiter header_lines footer_lines]
10
10
  end
11
11
 
12
12
  def header_lines
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  # This stores the current version of the NdrImport gem
3
3
  module NdrImport
4
- VERSION = '10.3.0'
4
+ VERSION = '11.0.0'
5
5
  end
@@ -0,0 +1,87 @@
1
+ module NdrImport
2
+ module Xml
3
+ # This class generates new XML column mappings where repeating columns/sections have been
4
+ # identified in the xml.
5
+ # This avoids the need for mappings to verbosly define repeating columns/sections
6
+ class ColumnMapping
7
+ attr_accessor :existing_column, :unmapped_node_parts, :klass_increment, :xml_line, :klass,
8
+ :repeating_item, :increment_field_name, :build_new_record, :klass_section_xpath
9
+
10
+ def initialize(existing_column, unmapped_node_parts, klass_increment, xml_line, klass)
11
+ @existing_column = existing_column
12
+ @unmapped_node_parts = unmapped_node_parts
13
+ @klass_increment = klass_increment
14
+ @xml_line = xml_line
15
+ @klass = klass
16
+ @repeating_item = existing_column.dig('xml_cell', 'multiple')
17
+ @increment_field_name = existing_column.dig('xml_cell', 'increment_field_name')
18
+ @build_new_record = existing_column.dig('xml_cell', 'build_new_record')
19
+ @klass_section_xpath = existing_column.dig('xml_cell', 'klass_section')
20
+ end
21
+
22
+ def call
23
+ new_column = existing_column.deep_dup
24
+ new_column['column'] = unmapped_node_parts[:column_name]
25
+ new_column['xml_cell']['relative_path'] = unmapped_node_parts[:column_relative_path]
26
+
27
+ # create unique rawtext names for repeating sections within a record
28
+ apply_new_rawtext_and_mapped_names_to(new_column) if repeating_item
29
+
30
+ return new_column unless incremented_klass_needed?
31
+
32
+ new_column['klass'] = incremented_klass
33
+ new_column
34
+ end
35
+
36
+ private
37
+
38
+ # If a table level klass is defined, there is nothing to increment at the column level.
39
+ # Similarly, not all repeating sections/items require a separate record.
40
+ # No need to create new records for a single occurence of a repeating section
41
+ def incremented_klass_needed?
42
+ return false if klass.present?
43
+ # Column mapping needs to explicitly flag when additionals should not be made
44
+ return false if build_new_record == false
45
+ return false if xml_line.xpath(klass_section_xpath).one? && repeating_item
46
+
47
+ true
48
+ end
49
+
50
+ def incremented_klass
51
+ if existing_column['klass'].is_a?(Array)
52
+ existing_column['klass'].map do |column_klass|
53
+ column_klass + "##{klass_increment}"
54
+ end
55
+ else
56
+ existing_column['klass'] + "##{klass_increment}"
57
+ end
58
+ end
59
+
60
+ # Append "_1", "_2" etc to repeating rawtext and optionally mapped field names within a
61
+ # single record, so data is not overwritten
62
+ def apply_new_rawtext_and_mapped_names_to(new_column)
63
+ existing_rawtext = existing_column['rawtext_name'] || existing_column['column']
64
+ column_name_increment = new_column['column'].scan(/\[(\d+)\]/)
65
+ relative_path_increment = new_column.dig('xml_cell', 'relative_path').scan(/\[(\d+)\]/)
66
+
67
+ # Find all the increments (e.g. [1], [2]) from the new column and use their sum
68
+ # as the rawtext and column name increment
69
+ increment = (column_name_increment + relative_path_increment).flatten.map(&:to_i).sum
70
+ new_column['rawtext_name'] = existing_rawtext + "_#{increment}" unless increment.zero?
71
+
72
+ return unless !increment.zero? && increment_field_name
73
+
74
+ new_column['mappings'] = incremented_mappings_for(new_column, increment)
75
+ end
76
+
77
+ # Increment the mapped `field` names
78
+ def incremented_mappings_for(new_column, increment)
79
+ new_column['mappings'].map do |mapping|
80
+ mapping['field'] = "#{mapping['field']}_#{increment}"
81
+
82
+ mapping
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,74 @@
1
+ module NdrImport
2
+ module Xml
3
+ # This class applies a do_not_capture mask to those mappings that do not relate to each klass.
4
+ # Overriding the NdrImport::Table method to avoid memoizing. This by design, column mappings
5
+ # can change if new mappings are added on the fly where repeating sections are present
6
+ class MaskedMappings
7
+ attr_accessor :klass, :augmented_columns
8
+
9
+ def initialize(klass, augmented_columns)
10
+ @klass = klass
11
+ @augmented_columns = augmented_columns
12
+ end
13
+
14
+ def call
15
+ return { klass => augmented_columns } if klass.present?
16
+
17
+ masked_mappings = column_level_klass_masked_mappings
18
+
19
+ augmented_masked_mappings = masked_mappings
20
+ # Remove any masked klasses where additional columns mappings
21
+ # have been added for repeated sections
22
+ # e.g. SomeTestKlass column mappings are not needed if SomeTestKlass#1
23
+ # have been added
24
+ masked_mappings.each do |masked_key, columns|
25
+ # There may be occasions where the e.g. SomeTestKlass should be kept,
26
+ # This can be flagged in the one the klass's column mappings
27
+ next if columns.any? { |column| column.dig('xml_cell', 'keep_klass') }
28
+
29
+ if masked_mappings.keys.any? { |key| key =~ /\A#{masked_key}#\d+\z/ }
30
+ augmented_masked_mappings.delete(masked_key)
31
+ end
32
+ end
33
+
34
+ augmented_masked_mappings
35
+ end
36
+
37
+ private
38
+
39
+ # This method duplicates the mappings and applies a do_not_capture mask to those that do not
40
+ # relate to this klass, returning the masked mappings
41
+ def mask_mappings_by_klass(klass)
42
+ augmented_columns.deep_dup.map do |mapping|
43
+ Array(mapping['klass']).flatten.include?(klass) ? mapping : { 'do_not_capture' => true }
44
+ end
45
+ end
46
+
47
+ def column_level_klass_masked_mappings
48
+ ensure_mappings_define_klass
49
+
50
+ # Loop through each klass
51
+ masked_mappings = {}
52
+ augmented_columns.pluck('klass').flatten.compact.uniq.each do |klass|
53
+ # Do not capture fields that relate to other klasses
54
+ masked_mappings[klass] = mask_mappings_by_klass(klass)
55
+ end
56
+ masked_mappings
57
+ end
58
+
59
+ # This method ensures that every column mapping defines a klass (unless it is a column that
60
+ # we do not capture). It is only used where a table level klass is not defined.
61
+ def ensure_mappings_define_klass
62
+ klassless_mappings = augmented_columns.
63
+ select { |mapping| mapping.nil? || mapping['klass'].nil? }.
64
+ reject { |mapping| mapping['do_not_capture'] }.
65
+ map { |mapping| mapping['column'] || mapping['standard_mapping'] }
66
+
67
+ return if klassless_mappings.empty?
68
+
69
+ # All column mappings for the single item file require a klass definition.
70
+ raise "Missing klass for column(s): #{klassless_mappings.to_sentence}"
71
+ end
72
+ end
73
+ end
74
+ end
@@ -7,10 +7,17 @@ module NdrImport
7
7
  # attention has been made to use enumerables throughout to help with the
8
8
  # transformation of large quantities of data.
9
9
  class Table < ::NdrImport::Table
10
+ require 'ndr_import/xml/column_mapping'
11
+ require 'ndr_import/xml/masked_mappings'
12
+
13
+ XML_OPTIONS = %w[pattern_match_record_xpath xml_record_xpath yield_xml_record].freeze
14
+
10
15
  def self.all_valid_options
11
- super - %w[delimiter header_lines footer_lines]
16
+ super - %w[delimiter header_lines footer_lines] + XML_OPTIONS
12
17
  end
13
18
 
19
+ attr_reader(*XML_OPTIONS)
20
+
14
21
  def header_lines
15
22
  0
16
23
  end
@@ -24,26 +31,130 @@ module NdrImport
24
31
  # and fields for each mapped klass.
25
32
  def transform_line(line, index)
26
33
  return enum_for(:transform_line, line, index) unless block_given?
27
-
28
34
  raise 'Not an Nokogiri::XML::Element!' unless line.is_a? Nokogiri::XML::Element
29
35
 
30
- validate_column_mappings(line)
36
+ augmented_masked_mappings = augment_and_validate_column_mappings_for(line)
31
37
 
32
- xml_line = column_xpaths.map { |column_xpath| line.xpath(column_xpath).inner_text }
38
+ xml_line = xml_line_from(line)
33
39
 
34
- masked_mappings.each do |klass, klass_mappings|
40
+ records_from_xml_line = []
41
+ augmented_masked_mappings.each do |klass, klass_mappings|
35
42
  fields = mapped_line(xml_line, klass_mappings)
43
+
36
44
  next if fields[:skip].to_s == 'true'.freeze
37
- yield(klass, fields, index)
45
+
46
+ if yield_xml_record
47
+ records_from_xml_line << [klass, fields, index]
48
+ else
49
+ yield(klass, fields, index)
50
+ end
38
51
  end
52
+ yield(records_from_xml_line.compact) if yield_xml_record
39
53
  end
40
54
 
41
55
  private
42
56
 
57
+ def augment_and_validate_column_mappings_for(line)
58
+ augment_column_mappings_for(line)
59
+ validate_column_mappings(line)
60
+
61
+ NdrImport::Xml::MaskedMappings.new(@klass, @augmented_columns.deep_dup).call
62
+ end
63
+
64
+ # Add missing column mappings (and column_xpaths) where
65
+ # repeating sections / data items appear
66
+ def augment_column_mappings_for(line)
67
+ # Start with a fresh set of @augmented_columns for each line, adding new mappings as
68
+ # required for each `line`
69
+ @augmented_columns = @columns.deep_dup
70
+ @augmented_column_xpaths = column_xpaths.deep_dup
71
+
72
+ unmapped_xpaths(line).each do |unmapped_xpath|
73
+ existing_column = find_existing_column_for(unmapped_xpath.dup)
74
+ next unless existing_column
75
+
76
+ unmapped_xpath_hash = labelled_xpath_components_from(unmapped_xpath)
77
+ klass_increment_match = unmapped_xpath.match(/\[(\d+)\]/)
78
+ raise "could not identify klass for #{unmapped_xpath}" unless klass_increment_match
79
+
80
+ new_column = NdrImport::Xml::ColumnMapping.new(existing_column, unmapped_xpath_hash,
81
+ klass_increment_match[1], line,
82
+ @klass).call
83
+ @augmented_columns << new_column
84
+ @augmented_column_xpaths << build_xpath_from(new_column)
85
+ end
86
+ end
87
+
88
+ def xml_line_from(line)
89
+ @augmented_column_xpaths.map do |column_xpath|
90
+ # Augmenting the column mappings should account for repeating sections/items
91
+ # TODO: Is this needed now that we removed "duplicated" klass mappings?
92
+ line.xpath(column_xpath).count > 1 ? '' : line.xpath(column_xpath).inner_text
93
+ end
94
+ end
95
+
96
+ def find_existing_column_for(unmapped_xpath)
97
+ # Remove any e.g. [2] which will be present on repeating sections
98
+ unmapped_xpath.gsub!(/\[\d+\]/, '')
99
+ unmapped_xpath_hash = labelled_xpath_components_from(unmapped_xpath)
100
+ columns.detect do |column|
101
+ column['column'] == unmapped_xpath_hash[:column_name] &&
102
+ column.dig('xml_cell', 'relative_path') == unmapped_xpath_hash[:column_relative_path] &&
103
+ column.dig('xml_cell', 'attribute') == unmapped_xpath_hash[:column_attribute]
104
+ end
105
+ end
106
+
107
+ # Returns a Hash containing labelled components for the given `unmapped_xpath`
108
+ # For example, an `unmapped_xpath` of "Record/Demographics/Sex/@code" would result in:
109
+ # { column_attribute: '@code',
110
+ # column_name: 'Sex',
111
+ # column_relative_path: 'Record/Demographics' }
112
+ def labelled_xpath_components_from(unmapped_xpath)
113
+ xpath_components = unmapped_xpath.split('/')
114
+ column_attribute = new_column_attribute_from(xpath_components)
115
+ # I dislike the `EnforcedShorthandSyntax`, code is less readable
116
+ # rubocop:disable Style::HashSyntax
117
+ { column_attribute: column_attribute,
118
+ column_name: new_column_name_from(xpath_components, column_attribute),
119
+ column_relative_path: new_relative_path_from(xpath_components, column_attribute) }
120
+ # rubocop:enable Style::HashSyntax
121
+ end
122
+
123
+ def new_column_attribute_from(xpath_components)
124
+ xpath_components.last.starts_with?('@') ? xpath_components.last[1...] : nil
125
+ end
126
+
127
+ def new_column_name_from(xpath_components, column_attribute)
128
+ return xpath_components[-2] if column_attribute.present?
129
+
130
+ xpath_components.last
131
+ end
132
+
133
+ # xpaths can be e.g. Record/Demographics/Sex/@code or Record/Demographics/Surname
134
+ # `xpath_components` is an array of the xpath's components, for example:
135
+ # Record/Demographics/Sex/@code => ['Record', 'Demographics', 'Sex', '@code']
136
+ #
137
+ # For the relative path, we want to return Record/Demographics.
138
+ # The upper_limit removes the "field name" (Sex or Surname here) and optionally the
139
+ # attribute (@code here) if present, from `xpath_components`.
140
+ # The resulting array is joined back together to form the relative path.
141
+ def new_relative_path_from(xpath_components, column_attribute)
142
+ upper_limit = column_attribute.present? ? -3 : -2
143
+ xpath_components.count > 1 ? xpath_components[0..upper_limit].join('/') : nil
144
+ end
145
+
43
146
  # Ensure every leaf is accounted for in the column mappings
44
147
  def validate_column_mappings(line)
45
- missing_nodes = mappable_xpaths_from(line) - column_xpaths
46
- raise "Unmapped data! #{missing_nodes}" unless missing_nodes.empty?
148
+ missing_xpaths = unmapped_xpaths(line)
149
+ return if missing_xpaths.none?
150
+
151
+ raise(NdrImport::Xml::UnmappedXpathError, missing_xpaths.to_sentence)
152
+ end
153
+
154
+ # Not memoized this by design, we want to re-calculate unmapped xpaths after
155
+ # `@augmented_column_xpaths` have been augmented for each `line`
156
+ def unmapped_xpaths(line)
157
+ mappable_xpaths_from(line) - (@augmented_column_xpaths || column_xpaths)
47
158
  end
48
159
 
49
160
  def column_name_from(column)
@@ -58,9 +169,12 @@ module NdrImport
58
169
  xpaths = []
59
170
 
60
171
  line.xpath('.//*[not(child::*)]').each do |node|
61
- xpath = node.path.sub(line.path + '/', '')
62
- xpaths << xpath
63
- node.attributes.each_key { |key| xpaths << "#{xpath}/@#{key}" }
172
+ xpath = node.path.sub("#{line.path}/", '')
173
+ if node.attributes.any?
174
+ node.attributes.each_key { |key| xpaths << "#{xpath}/@#{key}" }
175
+ else
176
+ xpaths << xpath
177
+ end
64
178
  end
65
179
  xpaths
66
180
  end
@@ -0,0 +1,15 @@
1
+ module NdrImport
2
+ module Xml
3
+ # Raised if an unmapped xpath is identified
4
+ class UnmappedXpathError < StandardError
5
+ attr_reader :missing_xpaths
6
+
7
+ def initialize(missing_xpaths)
8
+ @missing_xpaths = missing_xpaths
9
+ message = "Unmapped xpath(s): #{missing_xpaths}"
10
+
11
+ super(message)
12
+ end
13
+ end
14
+ end
15
+ end
data/lib/ndr_import.rb CHANGED
@@ -13,6 +13,7 @@ require 'ndr_import/avro/table'
13
13
  require 'ndr_import/vcf/table'
14
14
  require 'ndr_import/unmapped_data_error'
15
15
  require 'ndr_import/acroform_reader'
16
+ require 'ndr_import/xml/unmapped_xpath_error'
16
17
 
17
18
  module NdrImport
18
19
  def self.root
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ndr_import
3
3
  version: !ruby/object:Gem::Version
4
- version: 10.3.0
4
+ version: 11.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - NCRS Development Team
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-09-07 00:00:00.000000000 Z
11
+ date: 2023-10-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activemodel
@@ -211,19 +211,19 @@ dependencies:
211
211
  - !ruby/object:Gem::Version
212
212
  version: '0'
213
213
  - !ruby/object:Gem::Dependency
214
- name: seven_zip_ruby
214
+ name: seven-zip
215
215
  requirement: !ruby/object:Gem::Requirement
216
216
  requirements:
217
217
  - - "~>"
218
218
  - !ruby/object:Gem::Version
219
- version: '1.3'
219
+ version: '1.4'
220
220
  type: :runtime
221
221
  prerelease: false
222
222
  version_requirements: !ruby/object:Gem::Requirement
223
223
  requirements:
224
224
  - - "~>"
225
225
  - !ruby/object:Gem::Version
226
- version: '1.3'
226
+ version: '1.4'
227
227
  - !ruby/object:Gem::Dependency
228
228
  name: spreadsheet
229
229
  requirement: !ruby/object:Gem::Requirement
@@ -441,8 +441,11 @@ files:
441
441
  - lib/ndr_import/unmapped_data_error.rb
442
442
  - lib/ndr_import/vcf/table.rb
443
443
  - lib/ndr_import/version.rb
444
+ - lib/ndr_import/xml/column_mapping.rb
444
445
  - lib/ndr_import/xml/control_char_escaper.rb
446
+ - lib/ndr_import/xml/masked_mappings.rb
445
447
  - lib/ndr_import/xml/table.rb
448
+ - lib/ndr_import/xml/unmapped_xpath_error.rb
446
449
  homepage: https://github.com/NHSDigital/ndr_import
447
450
  licenses:
448
451
  - MIT