ndr_import 10.3.0 → 11.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 877346774d65eccb73f913081d75a48345a92b9e4c11e6f87702c9e0d59ebc3d
4
- data.tar.gz: e59f23137e7568ce5e6eb1e4734b9046aed8f8fa93f392a9f2d82091013c3c04
3
+ metadata.gz: 52ae4b12ab514a7bda584c93b3124fdeab03cea5292424c0e380c92f6e3a3c1a
4
+ data.tar.gz: 4b9dff76aa434bb87e542bcf694d4f99a91b2cd69e2eeaf6730a32b78fe61cc9
5
5
  SHA512:
6
- metadata.gz: 5bd15b2baf53b654a9f2be306d45dd3a509c298427790c139431f9f77df84b14f6506d625620b6781e9d44315fb8ada40307865a059c2ae3e78945293065dc9f
7
- data.tar.gz: 06070cd9f9d5311835a523f8f926c938e0c65561c863d8bc972a1250a278ccf06991013773c31f877173873b600bbee40e6ce9faa6d3da167dd93a4fc11b4965
6
+ metadata.gz: 27e3c4578ab466ae9977727de5b972e8e5bf7e2f9fa62ab53cd60437e4d7232e5da3ce3afbcf6b94dd943b219b051ccc6177b73189e736ecf8d10f582f6f0bc9
7
+ data.tar.gz: b06ef69bba53f56314574ff9749011289180624d0f73f7660537610ba29e3ec01114810bc781ccb8c67440c30f1406a05c56e508299a026a7467bb7968d4cb16
data/CHANGELOG.md CHANGED
@@ -2,6 +2,12 @@
2
2
  =======
3
3
  *no unreleased changes*
4
4
 
5
+ ## 11.0.0 / 2023-10-27
6
+ ### Changed
7
+ * XML enhancements. Breaking change, the enhancements are not backward compatible
8
+ ### Fixed
9
+ * Replace unsupported seven_zip_ruby gem with seven-zip fork
10
+
5
11
  ## 10.3.0 / 2023-09-07
6
12
  ### Added
7
13
  * VCF file support
@@ -12,22 +12,37 @@ module NdrImport
12
12
  include NdrImport::Helpers::File::Xml
13
13
  include NdrImport::Helpers::File::XmlStreaming
14
14
 
15
+ def initialize(*)
16
+ super
17
+
18
+ @pattern_match_xpath = @options['pattern_match_record_xpath']
19
+ end
20
+
15
21
  private
16
22
 
17
23
  # Iterate through the file, yielding each 'xml_record_xpath' element in turn.
18
24
  def rows(&block)
19
25
  return enum_for(:rows) unless block
20
26
 
21
- xpath = @options['xml_record_xpath']
22
-
23
27
  if @options['slurp']
24
- doc = read_xml_file(@filename)
25
- doc.xpath(xpath).each(&block)
28
+ record_elements(read_xml_file(@filename)).each(&block)
29
+ else
30
+ each_node(@filename, xml_record_xpath, @pattern_match_xpath, &block)
31
+ end
32
+ end
33
+
34
+ def xml_record_xpath
35
+ @pattern_match_xpath ? @options['xml_record_xpath'] : "*/#{@options['xml_record_xpath']}"
36
+ end
37
+
38
+ def record_elements(doc)
39
+ if @pattern_match_xpath
40
+ doc.root.children.find_all do |element|
41
+ element.name =~ Regexp.new(@options['xml_record_xpath'])
42
+ end
26
43
  else
27
- each_node(@filename, xpath, &block)
44
+ doc.root.xpath(@options['xml_record_xpath'])
28
45
  end
29
- rescue StandardError => e
30
- raise("#{SafeFile.basename(@filename)} [#{e.class}: #{e.message}]")
31
46
  end
32
47
  end
33
48
  # Not all xml files may want to be registered, so 'xml' is not registered by design.
@@ -33,15 +33,20 @@ module NdrImport
33
33
  # wrapper to hold a representation of each element we descent into:
34
34
  StackItem = Struct.new(:name, :attrs, :empty)
35
35
 
36
- def initialize(xpath)
36
+ def initialize(xpath, pattern_match_xpath)
37
37
  @xpath = xpath
38
+ @pattern_match_xpath = pattern_match_xpath
38
39
  @stack = []
39
40
  @match_depth = nil
40
41
  end
41
42
 
42
43
  # Has this cursor already passed inside a similar node?
44
+ # attribute comparison allows for e.g.:
45
+ # <SameName>
46
+ # <SameName code="N"/>
47
+ # </SameName>
43
48
  def in?(node)
44
- @stack.detect { |item| item.name == node.name }
49
+ @stack.detect { |item| item.name == node.name && item.attrs == node.attributes }
45
50
  end
46
51
 
47
52
  def enter(node)
@@ -85,9 +90,27 @@ module NdrImport
85
90
  def current_stack_match?
86
91
  parent_stack = @stack[0..-2]
87
92
 
88
- return false unless dom_stubs[@stack].at_xpath(@xpath)
93
+ stack_match = if @pattern_match_xpath
94
+ dom_stubs[@stack].root.children.find_all do |node|
95
+ node.name =~ Regexp.new(@xpath)
96
+ end.first
97
+ else
98
+ dom_stubs[@stack].at_xpath(@xpath)
99
+ end
89
100
 
90
- parent_stack.empty? || !dom_stubs[parent_stack].at_xpath(@xpath)
101
+ return false unless stack_match
102
+
103
+ parent_stack.empty? || xpath_not_in_parent_document?(dom_stubs[parent_stack])
104
+ end
105
+
106
+ def xpath_not_in_parent_document?(parent_document)
107
+ if @pattern_match_xpath
108
+ parent_document.root.children.find_all do |node|
109
+ node.name =~ Regexp.new(@xpath)
110
+ end.first.nil?
111
+ else
112
+ !parent_document.at_xpath(@xpath)
113
+ end
91
114
  end
92
115
 
93
116
  # A cached collection of DOM fragments, to represent the structure
@@ -116,13 +139,15 @@ module NdrImport
116
139
  #
117
140
  # In the case of dodgy encoding, may fall back to slurping the
118
141
  # file, but will still use stream parsing for XML.
119
- def each_node(safe_path, xpath, &block)
120
- return enum_for(:each_node, safe_path, xpath) unless block
142
+ #
143
+ # Optionally pattern match the xpath
144
+ def each_node(safe_path, xpath, pattern_match_xpath = nil, &block)
145
+ return enum_for(:each_node, safe_path, xpath, pattern_match_xpath) unless block
121
146
 
122
147
  require 'nokogiri'
123
148
 
124
149
  with_encoding_check(safe_path) do |stream, encoding|
125
- stream_xml_nodes(stream, xpath, encoding, &block)
150
+ stream_xml_nodes(stream, xpath, pattern_match_xpath, encoding, &block)
126
151
  end
127
152
  end
128
153
 
@@ -153,9 +178,9 @@ module NdrImport
153
178
  system("iconv -f UTF-8 #{Shellwords.escape(path)} > /dev/null 2>&1")
154
179
  end
155
180
 
156
- def stream_xml_nodes(io, node_xpath, encoding = nil)
181
+ def stream_xml_nodes(io, node_xpath, pattern_match_xpath, encoding = nil)
157
182
  # Track nesting as the cursor moves through the document:
158
- cursor = Cursor.new(node_xpath)
183
+ cursor = Cursor.new(node_xpath, pattern_match_xpath)
159
184
 
160
185
  # If markup isn't well-formed, try to work around it:
161
186
  options = Nokogiri::XML::ParseOptions::RECOVER
@@ -17,7 +17,7 @@ module NdrImport
17
17
  include UTF8Encoding
18
18
 
19
19
  TABULAR_ONLY_OPTIONS = %w[delimiter last_data_column liberal_parsing tablename_pattern
20
- header_lines footer_lines xml_record_xpath slurp].freeze
20
+ header_lines footer_lines slurp].freeze
21
21
 
22
22
  NON_TABULAR_OPTIONS = %w[capture_end_line capture_start_line start_line_pattern
23
23
  end_line_pattern remove_lines start_in_a_record
@@ -6,13 +6,14 @@ module NdrImport
6
6
  # required to transform a table of data into "records". Particular attention
7
7
  # has been made to use enumerables throughout to help with the transformation
8
8
  # of large quantities of data.
9
+ # rubocop:disable Metrics/ClassLength
9
10
  class Table
10
11
  include NdrImport::Mapper
11
12
 
12
13
  def self.all_valid_options
13
14
  %w[canonical_name delimiter liberal_parsing filename_pattern file_password last_data_column
14
- tablename_pattern header_lines footer_lines format klass columns xml_record_xpath slurp
15
- row_identifier significant_mapped_fields]
15
+ tablename_pattern header_lines footer_lines format klass columns slurp row_identifier
16
+ significant_mapped_fields]
16
17
  end
17
18
 
18
19
  def all_valid_options
@@ -250,4 +251,5 @@ module NdrImport
250
251
  index - 1
251
252
  end
252
253
  end # class Table
254
+ # rubocop:enable Metrics/ClassLength
253
255
  end
@@ -52,14 +52,14 @@ module NdrImport
52
52
  # now at the individual file level, can we find the table mapping?
53
53
  table_mapping = get_table_mapping(filename, nil)
54
54
 
55
- options = {
56
- 'unzip_path' => unzip_path,
57
- 'col_sep' => table_mapping.try(:delimiter),
58
- 'file_password' => table_mapping.try(:file_password),
59
- 'liberal_parsing' => table_mapping.try(:liberal_parsing),
60
- 'xml_record_xpath' => table_mapping.try(:xml_record_xpath),
61
- 'slurp' => table_mapping.try(:slurp)
62
- }
55
+ options = { 'unzip_path' => unzip_path,
56
+ 'col_sep' => table_mapping.try(:delimiter),
57
+ 'file_password' => table_mapping.try(:file_password),
58
+ 'liberal_parsing' => table_mapping.try(:liberal_parsing),
59
+ 'xml_record_xpath' => table_mapping.try(:xml_record_xpath),
60
+ 'slurp' => table_mapping.try(:slurp),
61
+ 'yield_xml_record' => table_mapping.try(:yield_xml_record),
62
+ 'pattern_match_record_xpath' => table_mapping.try(:pattern_match_record_xpath) }
63
63
 
64
64
  tables = NdrImport::File::Registry.tables(filename, table_mapping.try(:format), options)
65
65
  yield_tables_and_their_content(filename, tables, &block)
@@ -6,7 +6,7 @@ module NdrImport
6
6
  # All other Table logic is inherited from `NdrImport::Table`
7
7
  class Table < ::NdrImport::Table
8
8
  def self.all_valid_options
9
- super - %w[delimiter header_lines footer_lines xml_record_xpath]
9
+ super - %w[delimiter header_lines footer_lines]
10
10
  end
11
11
 
12
12
  def header_lines
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  # This stores the current version of the NdrImport gem
3
3
  module NdrImport
4
- VERSION = '10.3.0'
4
+ VERSION = '11.0.0'
5
5
  end
@@ -0,0 +1,87 @@
1
+ module NdrImport
2
+ module Xml
3
+ # This class generates new XML column mappings where repeating columns/sections have been
4
+ # identified in the xml.
5
+ # This avoids the need for mappings to verbosly define repeating columns/sections
6
+ class ColumnMapping
7
+ attr_accessor :existing_column, :unmapped_node_parts, :klass_increment, :xml_line, :klass,
8
+ :repeating_item, :increment_field_name, :build_new_record, :klass_section_xpath
9
+
10
+ def initialize(existing_column, unmapped_node_parts, klass_increment, xml_line, klass)
11
+ @existing_column = existing_column
12
+ @unmapped_node_parts = unmapped_node_parts
13
+ @klass_increment = klass_increment
14
+ @xml_line = xml_line
15
+ @klass = klass
16
+ @repeating_item = existing_column.dig('xml_cell', 'multiple')
17
+ @increment_field_name = existing_column.dig('xml_cell', 'increment_field_name')
18
+ @build_new_record = existing_column.dig('xml_cell', 'build_new_record')
19
+ @klass_section_xpath = existing_column.dig('xml_cell', 'klass_section')
20
+ end
21
+
22
+ def call
23
+ new_column = existing_column.deep_dup
24
+ new_column['column'] = unmapped_node_parts[:column_name]
25
+ new_column['xml_cell']['relative_path'] = unmapped_node_parts[:column_relative_path]
26
+
27
+ # create unique rawtext names for repeating sections within a record
28
+ apply_new_rawtext_and_mapped_names_to(new_column) if repeating_item
29
+
30
+ return new_column unless incremented_klass_needed?
31
+
32
+ new_column['klass'] = incremented_klass
33
+ new_column
34
+ end
35
+
36
+ private
37
+
38
+ # If a table level klass is defined, there is nothing to increment at the column level.
39
+ # Similarly, not all repeating sections/items require a separate record.
40
+ # No need to create new records for a single occurence of a repeating section
41
+ def incremented_klass_needed?
42
+ return false if klass.present?
43
+ # Column mapping needs to explicitly flag when additionals should not be made
44
+ return false if build_new_record == false
45
+ return false if xml_line.xpath(klass_section_xpath).one? && repeating_item
46
+
47
+ true
48
+ end
49
+
50
+ def incremented_klass
51
+ if existing_column['klass'].is_a?(Array)
52
+ existing_column['klass'].map do |column_klass|
53
+ column_klass + "##{klass_increment}"
54
+ end
55
+ else
56
+ existing_column['klass'] + "##{klass_increment}"
57
+ end
58
+ end
59
+
60
+ # Append "_1", "_2" etc to repeating rawtext and optionally mapped field names within a
61
+ # single record, so data is not overwritten
62
+ def apply_new_rawtext_and_mapped_names_to(new_column)
63
+ existing_rawtext = existing_column['rawtext_name'] || existing_column['column']
64
+ column_name_increment = new_column['column'].scan(/\[(\d+)\]/)
65
+ relative_path_increment = new_column.dig('xml_cell', 'relative_path').scan(/\[(\d+)\]/)
66
+
67
+ # Find all the increments (e.g. [1], [2]) from the new column and use their sum
68
+ # as the rawtext and column name increment
69
+ increment = (column_name_increment + relative_path_increment).flatten.map(&:to_i).sum
70
+ new_column['rawtext_name'] = existing_rawtext + "_#{increment}" unless increment.zero?
71
+
72
+ return unless !increment.zero? && increment_field_name
73
+
74
+ new_column['mappings'] = incremented_mappings_for(new_column, increment)
75
+ end
76
+
77
+ # Increment the mapped `field` names
78
+ def incremented_mappings_for(new_column, increment)
79
+ new_column['mappings'].map do |mapping|
80
+ mapping['field'] = "#{mapping['field']}_#{increment}"
81
+
82
+ mapping
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,74 @@
1
+ module NdrImport
2
+ module Xml
3
+ # This class applies a do_not_capture mask to those mappings that do not relate to each klass.
4
+ # Overriding the NdrImport::Table method to avoid memoizing. This by design, column mappings
5
+ # can change if new mappings are added on the fly where repeating sections are present
6
+ class MaskedMappings
7
+ attr_accessor :klass, :augmented_columns
8
+
9
+ def initialize(klass, augmented_columns)
10
+ @klass = klass
11
+ @augmented_columns = augmented_columns
12
+ end
13
+
14
+ def call
15
+ return { klass => augmented_columns } if klass.present?
16
+
17
+ masked_mappings = column_level_klass_masked_mappings
18
+
19
+ augmented_masked_mappings = masked_mappings
20
+ # Remove any masked klasses where additional columns mappings
21
+ # have been added for repeated sections
22
+ # e.g. SomeTestKlass column mappings are not needed if SomeTestKlass#1
23
+ # have been added
24
+ masked_mappings.each do |masked_key, columns|
25
+ # There may be occasions where the e.g. SomeTestKlass should be kept,
26
+ # This can be flagged in the one the klass's column mappings
27
+ next if columns.any? { |column| column.dig('xml_cell', 'keep_klass') }
28
+
29
+ if masked_mappings.keys.any? { |key| key =~ /\A#{masked_key}#\d+\z/ }
30
+ augmented_masked_mappings.delete(masked_key)
31
+ end
32
+ end
33
+
34
+ augmented_masked_mappings
35
+ end
36
+
37
+ private
38
+
39
+ # This method duplicates the mappings and applies a do_not_capture mask to those that do not
40
+ # relate to this klass, returning the masked mappings
41
+ def mask_mappings_by_klass(klass)
42
+ augmented_columns.deep_dup.map do |mapping|
43
+ Array(mapping['klass']).flatten.include?(klass) ? mapping : { 'do_not_capture' => true }
44
+ end
45
+ end
46
+
47
+ def column_level_klass_masked_mappings
48
+ ensure_mappings_define_klass
49
+
50
+ # Loop through each klass
51
+ masked_mappings = {}
52
+ augmented_columns.pluck('klass').flatten.compact.uniq.each do |klass|
53
+ # Do not capture fields that relate to other klasses
54
+ masked_mappings[klass] = mask_mappings_by_klass(klass)
55
+ end
56
+ masked_mappings
57
+ end
58
+
59
+ # This method ensures that every column mapping defines a klass (unless it is a column that
60
+ # we do not capture). It is only used where a table level klass is not defined.
61
+ def ensure_mappings_define_klass
62
+ klassless_mappings = augmented_columns.
63
+ select { |mapping| mapping.nil? || mapping['klass'].nil? }.
64
+ reject { |mapping| mapping['do_not_capture'] }.
65
+ map { |mapping| mapping['column'] || mapping['standard_mapping'] }
66
+
67
+ return if klassless_mappings.empty?
68
+
69
+ # All column mappings for the single item file require a klass definition.
70
+ raise "Missing klass for column(s): #{klassless_mappings.to_sentence}"
71
+ end
72
+ end
73
+ end
74
+ end
@@ -7,10 +7,17 @@ module NdrImport
7
7
  # attention has been made to use enumerables throughout to help with the
8
8
  # transformation of large quantities of data.
9
9
  class Table < ::NdrImport::Table
10
+ require 'ndr_import/xml/column_mapping'
11
+ require 'ndr_import/xml/masked_mappings'
12
+
13
+ XML_OPTIONS = %w[pattern_match_record_xpath xml_record_xpath yield_xml_record].freeze
14
+
10
15
  def self.all_valid_options
11
- super - %w[delimiter header_lines footer_lines]
16
+ super - %w[delimiter header_lines footer_lines] + XML_OPTIONS
12
17
  end
13
18
 
19
+ attr_reader(*XML_OPTIONS)
20
+
14
21
  def header_lines
15
22
  0
16
23
  end
@@ -24,26 +31,130 @@ module NdrImport
24
31
  # and fields for each mapped klass.
25
32
  def transform_line(line, index)
26
33
  return enum_for(:transform_line, line, index) unless block_given?
27
-
28
34
  raise 'Not an Nokogiri::XML::Element!' unless line.is_a? Nokogiri::XML::Element
29
35
 
30
- validate_column_mappings(line)
36
+ augmented_masked_mappings = augment_and_validate_column_mappings_for(line)
31
37
 
32
- xml_line = column_xpaths.map { |column_xpath| line.xpath(column_xpath).inner_text }
38
+ xml_line = xml_line_from(line)
33
39
 
34
- masked_mappings.each do |klass, klass_mappings|
40
+ records_from_xml_line = []
41
+ augmented_masked_mappings.each do |klass, klass_mappings|
35
42
  fields = mapped_line(xml_line, klass_mappings)
43
+
36
44
  next if fields[:skip].to_s == 'true'.freeze
37
- yield(klass, fields, index)
45
+
46
+ if yield_xml_record
47
+ records_from_xml_line << [klass, fields, index]
48
+ else
49
+ yield(klass, fields, index)
50
+ end
38
51
  end
52
+ yield(records_from_xml_line.compact) if yield_xml_record
39
53
  end
40
54
 
41
55
  private
42
56
 
57
+ def augment_and_validate_column_mappings_for(line)
58
+ augment_column_mappings_for(line)
59
+ validate_column_mappings(line)
60
+
61
+ NdrImport::Xml::MaskedMappings.new(@klass, @augmented_columns.deep_dup).call
62
+ end
63
+
64
+ # Add missing column mappings (and column_xpaths) where
65
+ # repeating sections / data items appear
66
+ def augment_column_mappings_for(line)
67
+ # Start with a fresh set of @augmented_columns for each line, adding new mappings as
68
+ # required for each `line`
69
+ @augmented_columns = @columns.deep_dup
70
+ @augmented_column_xpaths = column_xpaths.deep_dup
71
+
72
+ unmapped_xpaths(line).each do |unmapped_xpath|
73
+ existing_column = find_existing_column_for(unmapped_xpath.dup)
74
+ next unless existing_column
75
+
76
+ unmapped_xpath_hash = labelled_xpath_components_from(unmapped_xpath)
77
+ klass_increment_match = unmapped_xpath.match(/\[(\d+)\]/)
78
+ raise "could not identify klass for #{unmapped_xpath}" unless klass_increment_match
79
+
80
+ new_column = NdrImport::Xml::ColumnMapping.new(existing_column, unmapped_xpath_hash,
81
+ klass_increment_match[1], line,
82
+ @klass).call
83
+ @augmented_columns << new_column
84
+ @augmented_column_xpaths << build_xpath_from(new_column)
85
+ end
86
+ end
87
+
88
+ def xml_line_from(line)
89
+ @augmented_column_xpaths.map do |column_xpath|
90
+ # Augmenting the column mappings should account for repeating sections/items
91
+ # TODO: Is this needed now that we removed "duplicated" klass mappings?
92
+ line.xpath(column_xpath).count > 1 ? '' : line.xpath(column_xpath).inner_text
93
+ end
94
+ end
95
+
96
+ def find_existing_column_for(unmapped_xpath)
97
+ # Remove any e.g. [2] which will be present on repeating sections
98
+ unmapped_xpath.gsub!(/\[\d+\]/, '')
99
+ unmapped_xpath_hash = labelled_xpath_components_from(unmapped_xpath)
100
+ columns.detect do |column|
101
+ column['column'] == unmapped_xpath_hash[:column_name] &&
102
+ column.dig('xml_cell', 'relative_path') == unmapped_xpath_hash[:column_relative_path] &&
103
+ column.dig('xml_cell', 'attribute') == unmapped_xpath_hash[:column_attribute]
104
+ end
105
+ end
106
+
107
+ # Returns a Hash containing labelled components for the given `unmapped_xpath`
108
+ # For example, an `unmapped_xpath` of "Record/Demographics/Sex/@code" would result in:
109
+ # { column_attribute: '@code',
110
+ # column_name: 'Sex',
111
+ # column_relative_path: 'Record/Demographics' }
112
+ def labelled_xpath_components_from(unmapped_xpath)
113
+ xpath_components = unmapped_xpath.split('/')
114
+ column_attribute = new_column_attribute_from(xpath_components)
115
+ # I dislike the `EnforcedShorthandSyntax`, code is less readable
116
+ # rubocop:disable Style::HashSyntax
117
+ { column_attribute: column_attribute,
118
+ column_name: new_column_name_from(xpath_components, column_attribute),
119
+ column_relative_path: new_relative_path_from(xpath_components, column_attribute) }
120
+ # rubocop:enable Style::HashSyntax
121
+ end
122
+
123
+ def new_column_attribute_from(xpath_components)
124
+ xpath_components.last.starts_with?('@') ? xpath_components.last[1...] : nil
125
+ end
126
+
127
+ def new_column_name_from(xpath_components, column_attribute)
128
+ return xpath_components[-2] if column_attribute.present?
129
+
130
+ xpath_components.last
131
+ end
132
+
133
+ # xpaths can be e.g. Record/Demographics/Sex/@code or Record/Demographics/Surname
134
+ # `xpath_components` is an array of the xpath's components, for example:
135
+ # Record/Demographics/Sex/@code => ['Record', 'Demographics', 'Sex', '@code']
136
+ #
137
+ # For the relative path, we want to return Record/Demographics.
138
+ # The upper_limit removes the "field name" (Sex or Surname here) and optionally the
139
+ # attribute (@code here) if present, from `xpath_components`.
140
+ # The resulting array is joined back together to form the relative path.
141
+ def new_relative_path_from(xpath_components, column_attribute)
142
+ upper_limit = column_attribute.present? ? -3 : -2
143
+ xpath_components.count > 1 ? xpath_components[0..upper_limit].join('/') : nil
144
+ end
145
+
43
146
  # Ensure every leaf is accounted for in the column mappings
44
147
  def validate_column_mappings(line)
45
- missing_nodes = mappable_xpaths_from(line) - column_xpaths
46
- raise "Unmapped data! #{missing_nodes}" unless missing_nodes.empty?
148
+ missing_xpaths = unmapped_xpaths(line)
149
+ return if missing_xpaths.none?
150
+
151
+ raise(NdrImport::Xml::UnmappedXpathError, missing_xpaths.to_sentence)
152
+ end
153
+
154
+ # Not memoized this by design, we want to re-calculate unmapped xpaths after
155
+ # `@augmented_column_xpaths` have been augmented for each `line`
156
+ def unmapped_xpaths(line)
157
+ mappable_xpaths_from(line) - (@augmented_column_xpaths || column_xpaths)
47
158
  end
48
159
 
49
160
  def column_name_from(column)
@@ -58,9 +169,12 @@ module NdrImport
58
169
  xpaths = []
59
170
 
60
171
  line.xpath('.//*[not(child::*)]').each do |node|
61
- xpath = node.path.sub(line.path + '/', '')
62
- xpaths << xpath
63
- node.attributes.each_key { |key| xpaths << "#{xpath}/@#{key}" }
172
+ xpath = node.path.sub("#{line.path}/", '')
173
+ if node.attributes.any?
174
+ node.attributes.each_key { |key| xpaths << "#{xpath}/@#{key}" }
175
+ else
176
+ xpaths << xpath
177
+ end
64
178
  end
65
179
  xpaths
66
180
  end
@@ -0,0 +1,15 @@
1
+ module NdrImport
2
+ module Xml
3
+ # Raised if an unmapped xpath is identified
4
+ class UnmappedXpathError < StandardError
5
+ attr_reader :missing_xpaths
6
+
7
+ def initialize(missing_xpaths)
8
+ @missing_xpaths = missing_xpaths
9
+ message = "Unmapped xpath(s): #{missing_xpaths}"
10
+
11
+ super(message)
12
+ end
13
+ end
14
+ end
15
+ end
data/lib/ndr_import.rb CHANGED
@@ -13,6 +13,7 @@ require 'ndr_import/avro/table'
13
13
  require 'ndr_import/vcf/table'
14
14
  require 'ndr_import/unmapped_data_error'
15
15
  require 'ndr_import/acroform_reader'
16
+ require 'ndr_import/xml/unmapped_xpath_error'
16
17
 
17
18
  module NdrImport
18
19
  def self.root
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ndr_import
3
3
  version: !ruby/object:Gem::Version
4
- version: 10.3.0
4
+ version: 11.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - NCRS Development Team
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-09-07 00:00:00.000000000 Z
11
+ date: 2023-10-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activemodel
@@ -211,19 +211,19 @@ dependencies:
211
211
  - !ruby/object:Gem::Version
212
212
  version: '0'
213
213
  - !ruby/object:Gem::Dependency
214
- name: seven_zip_ruby
214
+ name: seven-zip
215
215
  requirement: !ruby/object:Gem::Requirement
216
216
  requirements:
217
217
  - - "~>"
218
218
  - !ruby/object:Gem::Version
219
- version: '1.3'
219
+ version: '1.4'
220
220
  type: :runtime
221
221
  prerelease: false
222
222
  version_requirements: !ruby/object:Gem::Requirement
223
223
  requirements:
224
224
  - - "~>"
225
225
  - !ruby/object:Gem::Version
226
- version: '1.3'
226
+ version: '1.4'
227
227
  - !ruby/object:Gem::Dependency
228
228
  name: spreadsheet
229
229
  requirement: !ruby/object:Gem::Requirement
@@ -441,8 +441,11 @@ files:
441
441
  - lib/ndr_import/unmapped_data_error.rb
442
442
  - lib/ndr_import/vcf/table.rb
443
443
  - lib/ndr_import/version.rb
444
+ - lib/ndr_import/xml/column_mapping.rb
444
445
  - lib/ndr_import/xml/control_char_escaper.rb
446
+ - lib/ndr_import/xml/masked_mappings.rb
445
447
  - lib/ndr_import/xml/table.rb
448
+ - lib/ndr_import/xml/unmapped_xpath_error.rb
446
449
  homepage: https://github.com/NHSDigital/ndr_import
447
450
  licenses:
448
451
  - MIT