ndr_import 6.2.0 → 6.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 33380c2e5d1abc43cb56a040c8b6255ec6315bb9860506c173dcd573df777f11
4
- data.tar.gz: fe9a51d153f9f64cdeaf872198f06887ba0c0becf9877e9a0f6cbb511160ae10
3
+ metadata.gz: 9ae1ff625cdd7352f4b3306f9a49905b2563557b487762611dfd0391735177ee
4
+ data.tar.gz: a7bdc413db721c0f813b9fd7ef862beff3cf6a9a76f042eef15d141b2526f5a7
5
5
  SHA512:
6
- metadata.gz: e11257edc7d89f30f05943c9eb992f7c3660d66a15100092639840768843456e15a8b3babe16194f04387bd890f399fab58238842276e1a8c40209cfc127ddf4
7
- data.tar.gz: 27f159c69d12780b967caa9c5223f8ab91a33fa6cd21b62ebaa3811ab7a8d115d388fc084a8e2501663cbb24d81a97ae4cf090bd377ae4ccd1bbdb617e0ea9ab
6
+ metadata.gz: 3d572905a49329c295a9ab701de99a4f82a9165a399b05ca6f80e48218e2c603fddca14768d4aad2e9139ac07cdbaa6c5ba1f1851527cc50f7f6f4b45ed2392e
7
+ data.tar.gz: 3414e9efa2b16b60e9d532075fca1df095bdcab20516738243fc9ed9ac5b30540b3ad1f9139feaedfd3f5c3182c049e2deb5f5cd878a67360bdefaeffd9c6452
data/code_safety.yml CHANGED
@@ -15,7 +15,7 @@ file safety:
15
15
  ".travis.yml":
16
16
  comments:
17
17
  reviewed_by: josh.pencheon
18
- safe_revision: 661b5b8e71572bba28aa92fb95aa218e3f8444f4
18
+ safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
19
19
  CODE_OF_CONDUCT.md:
20
20
  comments:
21
21
  reviewed_by: timgentry
@@ -58,8 +58,8 @@ file safety:
58
58
  safe_revision: c158dc783b84cab31380708e76e3812544cc1c2f
59
59
  lib/ndr_import.rb:
60
60
  comments:
61
- reviewed_by: ollie.tulloch
62
- safe_revision: dfc958d44b6c58355445fa395db08a62213ee709
61
+ reviewed_by: josh.pencheon
62
+ safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
63
63
  lib/ndr_import/csv_library.rb:
64
64
  comments:
65
65
  reviewed_by: josh.pencheon
@@ -67,7 +67,7 @@ file safety:
67
67
  lib/ndr_import/file/all.rb:
68
68
  comments:
69
69
  reviewed_by: josh.pencheon
70
- safe_revision: 1b66cfcbb61dfac93c44889ca0ced5836101c20c
70
+ safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
71
71
  lib/ndr_import/file/base.rb:
72
72
  comments:
73
73
  reviewed_by: timgentry
@@ -75,7 +75,7 @@ file safety:
75
75
  lib/ndr_import/file/delimited.rb:
76
76
  comments:
77
77
  reviewed_by: josh.pencheon
78
- safe_revision: 902f5326d85372d9632de9869d6f56fc02b83a10
78
+ safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
79
79
  lib/ndr_import/file/docx.rb:
80
80
  comments:
81
81
  reviewed_by: josh.pencheon
@@ -104,6 +104,10 @@ file safety:
104
104
  comments:
105
105
  reviewed_by: timgentry
106
106
  safe_revision: c88000b32401b5ae9ef7f5878a9b630506ab5a94
107
+ lib/ndr_import/file/xml.rb:
108
+ comments:
109
+ reviewed_by: josh.pencheon
110
+ safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
107
111
  lib/ndr_import/file/zip.rb:
108
112
  comments:
109
113
  reviewed_by: timgentry
@@ -139,7 +143,7 @@ file safety:
139
143
  lib/ndr_import/mapper.rb:
140
144
  comments:
141
145
  reviewed_by: josh.pencheon
142
- safe_revision: fbd4e1bfda8acd9ae026601dc4ecdabca5be4bc7
146
+ safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
143
147
  lib/ndr_import/mapping_error.rb:
144
148
  comments:
145
149
  reviewed_by: timgentry
@@ -163,7 +167,7 @@ file safety:
163
167
  lib/ndr_import/non_tabular/table.rb:
164
168
  comments:
165
169
  reviewed_by: josh.pencheon
166
- safe_revision: 337bf56e39f0f08cf7593b03867bb2da48630663
170
+ safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
167
171
  lib/ndr_import/non_tabular_file_helper.rb:
168
172
  comments:
169
173
  reviewed_by: josh.pencheon
@@ -175,19 +179,23 @@ file safety:
175
179
  lib/ndr_import/table.rb:
176
180
  comments: uses File.basename
177
181
  reviewed_by: josh.pencheon
178
- safe_revision: 902f5326d85372d9632de9869d6f56fc02b83a10
182
+ safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
179
183
  lib/ndr_import/universal_importer_helper.rb:
180
184
  comments:
181
185
  reviewed_by: josh.pencheon
182
- safe_revision: 902f5326d85372d9632de9869d6f56fc02b83a10
186
+ safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
183
187
  lib/ndr_import/version.rb:
184
188
  comments: another check?
185
189
  reviewed_by: josh.pencheon
186
- safe_revision: b85d90430543f238706d569bacd750ee50cb5493
190
+ safe_revision: 0b1ab7c810d0fa46d153238d69627c07f56d1efa
191
+ lib/ndr_import/xml/table.rb:
192
+ comments:
193
+ reviewed_by: josh.pencheon
194
+ safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
187
195
  ndr_import.gemspec:
188
196
  comments:
189
197
  reviewed_by: josh.pencheon
190
- safe_revision: 1b66cfcbb61dfac93c44889ca0ced5836101c20c
198
+ safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
191
199
  test/file/base_test.rb:
192
200
  comments:
193
201
  reviewed_by: timgentry
@@ -195,7 +203,7 @@ file safety:
195
203
  test/file/delimited_test.rb:
196
204
  comments:
197
205
  reviewed_by: josh.pencheon
198
- safe_revision: 902f5326d85372d9632de9869d6f56fc02b83a10
206
+ safe_revision: ab9f926a53e84f8ffa826abdd967abee455c89df
199
207
  test/file/docx_test.rb:
200
208
  comments:
201
209
  reviewed_by: josh.pencheon
@@ -211,7 +219,7 @@ file safety:
211
219
  test/file/registry_test.rb:
212
220
  comments:
213
221
  reviewed_by: josh.pencheon
214
- safe_revision: 1b66cfcbb61dfac93c44889ca0ced5836101c20c
222
+ safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
215
223
  test/file/text_test.rb:
216
224
  comments:
217
225
  reviewed_by: timgentry
@@ -220,6 +228,10 @@ file safety:
220
228
  comments:
221
229
  reviewed_by: josh.pencheon
222
230
  safe_revision: 1b66cfcbb61dfac93c44889ca0ced5836101c20c
231
+ test/file/xml_test.rb:
232
+ comments:
233
+ reviewed_by: josh.pencheon
234
+ safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
223
235
  test/file/zip_test.rb:
224
236
  comments:
225
237
  reviewed_by: timgentry
@@ -255,7 +267,7 @@ file safety:
255
267
  test/mapper_test.rb:
256
268
  comments: exposes Mapper internals to test them
257
269
  reviewed_by: josh.pencheon
258
- safe_revision: 3cea78767a73fd0d0ae64a4af3f07389e45349e9
270
+ safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
259
271
  test/non_tabular/mapping_test.rb:
260
272
  comments:
261
273
  reviewed_by: timgentry
@@ -340,10 +352,18 @@ file safety:
340
352
  comments:
341
353
  reviewed_by: josh.pencheon
342
354
  safe_revision: 902f5326d85372d9632de9869d6f56fc02b83a10
355
+ test/resources/malformed.csv:
356
+ comments:
357
+ reviewed_by: josh.pencheon
358
+ safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
343
359
  test/resources/malformed.xml:
344
360
  comments:
345
361
  reviewed_by: timgentry
346
362
  safe_revision: 137170d443ea6bcc0afb18f62202c285ae6501eb
363
+ test/resources/malformed_pipe.csv:
364
+ comments:
365
+ reviewed_by: josh.pencheon
366
+ safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
347
367
  test/resources/normal.csv:
348
368
  comments:
349
369
  reviewed_by: timgentry
@@ -376,6 +396,10 @@ file safety:
376
396
  comments:
377
397
  reviewed_by: josh.pencheon
378
398
  safe_revision: 902f5326d85372d9632de9869d6f56fc02b83a10
399
+ test/resources/sample.xml:
400
+ comments:
401
+ reviewed_by: josh.pencheon
402
+ safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
379
403
  test/resources/sample_xls.xls:
380
404
  comments:
381
405
  reviewed_by: timgentry
@@ -448,3 +472,7 @@ file safety:
448
472
  comments:
449
473
  reviewed_by: josh.pencheon
450
474
  safe_revision: 902f5326d85372d9632de9869d6f56fc02b83a10
475
+ test/xml/table_test.rb:
476
+ comments:
477
+ reviewed_by: josh.pencheon
478
+ safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
data/lib/ndr_import.rb CHANGED
@@ -6,6 +6,7 @@ require 'ndr_import/non_tabular_file_helper'
6
6
  require 'ndr_import/table'
7
7
  require 'ndr_import/non_tabular/table'
8
8
  require 'ndr_import/fixed_width/table'
9
+ require 'ndr_import/xml/table'
9
10
 
10
11
  module NdrImport
11
12
  def self.root
@@ -6,5 +6,5 @@ require_relative 'pdf'
6
6
  require_relative 'text'
7
7
  require_relative 'unregistered_filetype'
8
8
  require_relative 'word'
9
- # # require_relative 'xml'
9
+ require_relative 'xml'
10
10
  require_relative 'zip'
@@ -52,8 +52,9 @@ module NdrImport
52
52
  begin
53
53
  col_sep = @options['col_sep']
54
54
  options = {
55
- :col_sep => (col_sep || ',').force_encoding(delimiter_encoding),
56
- :mode => access_mode
55
+ col_sep: (col_sep || ',').force_encoding(delimiter_encoding),
56
+ mode: access_mode,
57
+ liberal_parsing: @options['liberal_parsing'].presence
57
58
  }
58
59
 
59
60
  row_num = 0
@@ -76,7 +77,7 @@ module NdrImport
76
77
 
77
78
  # We tried them all, and none worked:
78
79
  unless successful_options
79
- fail "None of the encodings #{supported_encodings.values.inspect} were successful!"
80
+ raise "None of the encodings #{supported_encodings.values.inspect} were successful!"
80
81
  end
81
82
 
82
83
  successful_options
@@ -0,0 +1,29 @@
1
+ require 'ndr_support/safe_file'
2
+ require 'ndr_import/helpers/file/xml'
3
+ require_relative 'registry'
4
+
5
+ module NdrImport
6
+ # This is one of a collection of file handlers that deal with individual formats of data.
7
+ # They can be instantiated directly or via the factory method Registry.tables
8
+ module File
9
+ # This class is a xml file handler that returns a single table.
10
+ class Xml < Base
11
+ include NdrImport::Helpers::File::Xml
12
+
13
+ private
14
+
15
+ # Iterate through the file, yielding each 'xml_record_xpath' element in turn.
16
+ def rows(&block)
17
+ return enum_for(:rows) unless block
18
+
19
+ doc = read_xml_file(@filename)
20
+
21
+ doc.xpath(@options['xml_record_xpath']).each(&block)
22
+ rescue StandardError => e
23
+ raise("#{SafeFile.basename(@filename)} [#{e.class}: #{e.message}]")
24
+ end
25
+ end
26
+ # Not all xml files may want to be registered, so 'xml' is not registered by design.
27
+ Registry.register(Xml, 'xml_table')
28
+ end
29
+ end
@@ -231,13 +231,14 @@ module NdrImport::Mapper
231
231
  #
232
232
  # would base64 decode a word document and then 'decode' the word document into plain text
233
233
  def decode_raw_value(raw_value, encoding)
234
+ return raw_value if raw_value.blank?
234
235
  case encoding
235
236
  when :base64
236
237
  Base64.decode64(raw_value)
237
238
  when :word_doc
238
239
  read_word_stream(StringIO.new(raw_value, 'r'))
239
240
  else
240
- fail "Cannot decode: #{encoding}"
241
+ raise "Cannot decode: #{encoding}"
241
242
  end
242
243
  end
243
244
 
@@ -16,12 +16,15 @@ module NdrImport
16
16
 
17
17
  include UTF8Encoding
18
18
 
19
- NON_TABULAR_OPTIONS = %w(capture_end_line capture_start_line start_line_pattern
19
+ TABULAR_ONLY_OPTIONS = %w[delimiter liberal_parsing tablename_pattern
20
+ header_lines footer_lines xml_record_xpath].freeze
21
+
22
+ NON_TABULAR_OPTIONS = %w[capture_end_line capture_start_line start_line_pattern
20
23
  end_line_pattern remove_lines start_in_a_record
21
- end_in_a_record).freeze
24
+ end_in_a_record].freeze
22
25
 
23
26
  def self.all_valid_options
24
- super - %w(delimiter tablename_pattern header_lines footer_lines) + NON_TABULAR_OPTIONS
27
+ super - TABULAR_ONLY_OPTIONS + NON_TABULAR_OPTIONS
25
28
  end
26
29
 
27
30
  attr_reader(*NON_TABULAR_OPTIONS)
@@ -10,8 +10,8 @@ module NdrImport
10
10
  include NdrImport::Mapper
11
11
 
12
12
  def self.all_valid_options
13
- %w[canonical_name delimiter filename_pattern tablename_pattern header_lines footer_lines
14
- format klass columns]
13
+ %w[canonical_name delimiter liberal_parsing filename_pattern tablename_pattern header_lines
14
+ footer_lines format klass columns xml_record_xpath]
15
15
  end
16
16
 
17
17
  def all_valid_options
@@ -32,10 +32,14 @@ module NdrImport
32
32
  # now at the individual file level, can we find the table mapping?
33
33
  table_mapping = get_table_mapping(filename, nil)
34
34
 
35
- tables = NdrImport::File::Registry.tables(filename,
36
- table_mapping.try(:format),
37
- 'unzip_path' => unzip_path,
38
- 'col_sep' => table_mapping.try(:delimiter))
35
+ options = {
36
+ 'unzip_path' => unzip_path,
37
+ 'col_sep' => table_mapping.try(:delimiter),
38
+ 'liberal_parsing' => table_mapping.try(:liberal_parsing),
39
+ 'xml_record_xpath' => table_mapping.try(:xml_record_xpath)
40
+ }
41
+
42
+ tables = NdrImport::File::Registry.tables(filename, table_mapping.try(:format), options)
39
43
  yield_tables_and_their_content(filename, tables, &block)
40
44
  end
41
45
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  # This stores the current version of the NdrImport gem
3
3
  module NdrImport
4
- VERSION = '6.2.0'.freeze
4
+ VERSION = '6.3.0'.freeze
5
5
  end
@@ -0,0 +1,90 @@
1
+ require 'ndr_import/table'
2
+
3
+ module NdrImport
4
+ module Xml
5
+ # This class maintains the state of a xml table mapping and encapsulates
6
+ # the logic required to transform a table of data into "records". Particular
7
+ # attention has been made to use enumerables throughout to help with the
8
+ # transformation of large quantities of data.
9
+ class Table < ::NdrImport::Table
10
+ def self.all_valid_options
11
+ super - %w[delimiter header_lines footer_lines]
12
+ end
13
+
14
+ def header_lines
15
+ 0
16
+ end
17
+
18
+ def footer_lines
19
+ 0
20
+ end
21
+
22
+ # This method transforms an incoming line (element) of xml data by applying
23
+ # each of the klass masked mappings to the line and yielding the klass
24
+ # and fields for each mapped klass.
25
+ def transform_line(line, index)
26
+ return enum_for(:transform_line, line, index) unless block_given?
27
+
28
+ raise 'Not an Nokogiri::XML::Element!' unless line.is_a? Nokogiri::XML::Element
29
+
30
+ validate_column_mappings(line)
31
+
32
+ xml_line = column_xpaths.map { |column_xpath| line.xpath(column_xpath).inner_text }
33
+
34
+ masked_mappings.each do |klass, klass_mappings|
35
+ fields = mapped_line(xml_line, klass_mappings)
36
+ next if fields[:skip].to_s == 'true'.freeze
37
+ yield(klass, fields, index)
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ # Ensure every leaf is accounted for in the column mappings
44
+ def validate_column_mappings(line)
45
+ missing_nodes = mappable_xpaths_from(line) - column_xpaths
46
+ raise "Unmapped data! #{missing_nodes}" unless missing_nodes.empty?
47
+ end
48
+
49
+ def column_name_from(column)
50
+ column[Strings::COLUMN] || column[Strings::STANDARD_MAPPING]
51
+ end
52
+
53
+ def column_xpaths
54
+ @column_xpaths ||= columns.map { |column| build_xpath_from(column) }
55
+ end
56
+
57
+ def mappable_xpaths_from(line)
58
+ xpaths = []
59
+
60
+ line.xpath('.//*[not(child::*)]').each do |node|
61
+ xpath = node.path.sub(line.path + '/', '')
62
+ xpaths << xpath
63
+ node.attributes.each_key { |key| xpaths << "#{xpath}/@#{key}" }
64
+ end
65
+ xpaths
66
+ end
67
+
68
+ def build_xpath_from(column)
69
+ column_name = column_name_from(column)
70
+ column['xml_cell'].presence ? relative_path_from(column, column_name) : column_name
71
+ end
72
+
73
+ def relative_path_from(column, colum_name)
74
+ xml_cell = column['xml_cell']
75
+ relative_path = xml_cell['relative_path'].presence ? xml_cell['relative_path'] : nil
76
+ attribute = xml_cell['attribute'].presence ? '@' + xml_cell['attribute'] : nil
77
+
78
+ if relative_path && attribute
79
+ relative_path + '/' + colum_name + '/' + attribute
80
+ elsif relative_path
81
+ relative_path + '/' + colum_name
82
+ elsif attribute
83
+ colum_name + '/' + attribute
84
+ else
85
+ colum_name
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
data/ndr_import.gemspec CHANGED
@@ -33,7 +33,7 @@ Gem::Specification.new do |spec|
33
33
  spec.add_dependency 'roo-xls'
34
34
  spec.add_dependency 'spreadsheet', '1.0.3'
35
35
 
36
- spec.required_ruby_version = '>= 2.2'
36
+ spec.required_ruby_version = '>= 2.4'
37
37
 
38
38
  spec.add_development_dependency 'bundler', '~> 1.7'
39
39
  spec.add_development_dependency 'rake', '~> 10.0'
@@ -33,6 +33,21 @@ module NdrImport
33
33
  end
34
34
  end
35
35
 
36
+ test 'should read malformed pipe correctly' do
37
+ file_path = @permanent_test_files.join('malformed_pipe.csv')
38
+ handler = NdrImport::File::Delimited.new(file_path, 'delimited', 'col_sep' => '|',
39
+ 'liberal_parsing' => 'true')
40
+ handler.tables.each do |tablename, sheet|
41
+ assert_nil tablename
42
+ sheet = sheet.to_a
43
+ assert_equal(('A'..'Z').to_a, sheet[0])
44
+ assert_equal ['1'] * 26, sheet[1]
45
+ expected_row = ['2'] * 25
46
+ expected_row << '2"malformed"'
47
+ assert_equal expected_row, sheet[2].sort
48
+ end
49
+ end
50
+
36
51
  test 'should read thorn correctly' do
37
52
  file_path = @permanent_test_files.join('normal_thorn.csv')
38
53
  handler = NdrImport::File::Delimited.new(file_path, 'delimited', 'col_sep' => "\xfe")
@@ -99,6 +114,47 @@ module NdrImport
99
114
  assert_equal ['2'] * 26, rows[2]
100
115
  end
101
116
 
117
+ test 'should read malformed delimited txt' do
118
+ rows = []
119
+ file_path = @permanent_test_files.join('malformed.csv')
120
+ handler = NdrImport::File::Delimited.new(file_path, 'csv', 'col_sep' => nil,
121
+ 'liberal_parsing' => 'true')
122
+ handler.tables.each do |tablename, sheet|
123
+ assert_nil tablename
124
+ assert_instance_of Enumerator, sheet
125
+ sheet.each do |row|
126
+ rows << row
127
+ end
128
+ end
129
+
130
+ assert_equal(('A'..'Z').to_a, rows[0])
131
+ assert_equal ['1'] * 26, rows[1]
132
+ expected_row = ['2'] * 25
133
+ expected_row << '2"malformed"'
134
+ assert_equal expected_row, rows[2].sort
135
+ end
136
+
137
+ test 'should fail to read malformed delimited txt without liberal_parsing' do
138
+ rows_yielded = []
139
+ exception = assert_raises(CSVLibrary::MalformedCSVError) do
140
+ file_path = @permanent_test_files.join('malformed.csv')
141
+ handler = NdrImport::File::Delimited.new(file_path, 'csv')
142
+
143
+ handler.tables.each do |tablename, sheet|
144
+ assert_nil tablename
145
+ assert_instance_of Enumerator, sheet
146
+ sheet.each do |row|
147
+ rows_yielded << row
148
+ end
149
+ end
150
+ end
151
+
152
+ assert rows_yielded.empty?, 'no rows should have been yielded'
153
+
154
+ msg = 'Invalid CSV format on row 3 of malformed.csv. Original: Illegal quoting in line 3.'
155
+ assert_equal msg, exception.message
156
+ end
157
+
102
158
  test 'should read line-by-line' do
103
159
  rows = []
104
160
  file_path = @permanent_test_files.join('normal.csv')
@@ -12,7 +12,7 @@ module NdrImport
12
12
 
13
13
  test 'Registry.handlers' do
14
14
  assert_instance_of Hash, NdrImport::File::Registry.handlers
15
- assert_equal %w[csv delimited doc docx nontabular pdf text txt xls xlsx zip],
15
+ assert_equal %w[csv delimited doc docx nontabular pdf text txt xls xlsx xml_table zip],
16
16
  NdrImport::File::Registry.handlers.keys.sort
17
17
  end
18
18
 
@@ -0,0 +1,21 @@
1
+ require 'test_helper'
2
+ require 'ndr_import/file/xml'
3
+
4
+ module NdrImport
5
+ module File
6
+ # Xml file handler tests
7
+ class XmlTest < ActiveSupport::TestCase
8
+ def setup
9
+ @permanent_test_files = SafePath.new('permanent_test_files')
10
+ end
11
+
12
+ test 'should return enum of xml elements' do
13
+ file_path = @permanent_test_files.join('sample.xml')
14
+ handler = NdrImport::File::Xml.new(file_path, nil, 'xml_record_xpath' => 'root/record')
15
+ rows = handler.send(:rows)
16
+ assert rows.is_a? Enumerator
17
+ assert(rows.all? { |row| row.is_a? Nokogiri::XML::Element })
18
+ end
19
+ end
20
+ end
21
+ end
data/test/mapper_test.rb CHANGED
@@ -635,6 +635,11 @@ class MapperTest < ActiveSupport::TestCase
635
635
  assert_equal 'Hello world, this is a word document', file_content
636
636
  end
637
637
 
638
+ test 'should handle blank values when attempting to decode_raw_value' do
639
+ text_content = TestMapper.new.send(:decode_raw_value, '', :word_doc)
640
+ assert_equal '', text_content
641
+ end
642
+
638
643
  test 'should raise unknown encoding exception' do
639
644
  assert_raise(RuntimeError) do
640
645
  TestMapper.new.mapped_line(['A'], invalid_decode_mapping)
@@ -0,0 +1,3 @@
1
+ A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z
2
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
3
+ 2,2,2,2,2,2,2,2"malformed",2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
@@ -0,0 +1,3 @@
1
+ A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z
2
+ 1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1
3
+ 2|2|2|2|2|2|2|2|2|2|2|2|2|2|2|2|2"malformed"|2|2|2|2|2|2|2|2|2
@@ -0,0 +1,34 @@
1
+ <root>
2
+ <record>
3
+ <no_relative_path value="A value"/>
4
+ <no_path_or_att>Another value</no_path_or_att>
5
+ <demographics>
6
+ <demographics_1>AAA</demographics_1>
7
+ <address>
8
+ <address_line1>Address</address_line1>
9
+ <address_line1>Address 2</address_line1>
10
+ </address>
11
+ <demographics_2 code="03">Inner text</demographics_2>
12
+ </demographics>
13
+ <pathology>
14
+ <pathology_date_1>2018-01-01</pathology_date_1>
15
+ <pathology_date_2 />
16
+ </pathology>
17
+ </record>
18
+ <record>
19
+ <demographics>
20
+ <address>
21
+ <address_line1>Address</address_line1>
22
+ <address_line1>Address 2</address_line1>
23
+ </address>
24
+ <demographics_2 code="03">Inner text</demographics_2>
25
+ <demographics_1>AAA</demographics_1>
26
+ </demographics>
27
+ <no_path_or_att><![CDATA[Another value]]></no_path_or_att>
28
+ <pathology>
29
+ <pathology_date_1>2018-01-01</pathology_date_1>
30
+ <pathology_date_2 />
31
+ </pathology>
32
+ <no_relative_path value="A value"/>
33
+ </record>
34
+ </root>
@@ -0,0 +1,90 @@
1
+ require 'test_helper'
2
+
3
+ # This tests the NdrImport::Xml::Table mapping class
4
+ module Xml
5
+ class TableTest < ActiveSupport::TestCase
6
+ def setup
7
+ file_path = SafePath.new('permanent_test_files').join('sample.xml')
8
+ handler = NdrImport::File::Xml.new(file_path, nil, 'xml_record_xpath' => 'root/record')
9
+
10
+ @element_lines = handler.send(:rows)
11
+ end
12
+
13
+ def test_should_transform_xml_element_lines
14
+ table = NdrImport::Xml::Table.new(klass: 'SomeTestKlass', columns: xml_column_mapping)
15
+
16
+ expected_data = ['SomeTestKlass', { rawtext: {
17
+ 'no_relative_path' => 'A value',
18
+ 'no_relative_path_inner_text' => '',
19
+ 'no_path_or_att' => 'Another value',
20
+ 'demographics_1' => 'AAA',
21
+ 'demographics_2' => '03',
22
+ 'demographics_2_inner_text' => 'Inner text',
23
+ 'address1' => 'Address',
24
+ 'address2' => 'Address 2',
25
+ 'pathology_date_1' => '2018-01-01',
26
+ 'pathology_date_2' => '',
27
+ 'should_be_blank' => ''
28
+ } }, 1]
29
+
30
+ transformed_data = table.transform(@element_lines)
31
+ assert_equal 2, transformed_data.count
32
+
33
+ transformed_data.each do |klass, fields, _index|
34
+ assert_equal expected_data[0], klass
35
+ assert_equal expected_data[1], fields
36
+ end
37
+ end
38
+
39
+ def test_should_fail_with_unmappped_nodes
40
+ table = NdrImport::Xml::Table.new(klass: 'SomeTestKlass', columns: partial_xml_column_mapping)
41
+
42
+ exception = assert_raises(RuntimeError) { table.transform(@element_lines).to_a }
43
+ assert exception.message.starts_with? 'sample.xml [RuntimeError: Unmapped data!'
44
+ end
45
+
46
+ private
47
+
48
+ def xml_column_mapping
49
+ [
50
+ { 'column' => 'no_relative_path',
51
+ 'xml_cell' => { 'relative_path' => '', 'attribute' => 'value' } },
52
+ { 'column' => 'no_relative_path', 'rawtext_name' => 'no_relative_path_inner_text',
53
+ 'xml_cell' => { 'relative_path' => '' } },
54
+ { 'column' => 'no_path_or_att',
55
+ 'xml_cell' => { 'relative_path' => '', 'attribute' => '' } },
56
+ { 'column' => 'demographics_1',
57
+ 'xml_cell' => { 'relative_path' => 'demographics' } },
58
+ { 'column' => 'demographics_2',
59
+ 'xml_cell' => { 'relative_path' => 'demographics', 'attribute' => 'code' } },
60
+ { 'column' => 'demographics_2', 'rawtext_name' => 'demographics_2_inner_text',
61
+ 'xml_cell' => { 'relative_path' => 'demographics' } },
62
+ { 'column' => 'address_line1[1]', 'rawtext_name' => 'address1',
63
+ 'xml_cell' => { 'relative_path' => 'demographics/address' } },
64
+ { 'column' => 'address_line1[2]', 'rawtext_name' => 'address2',
65
+ 'xml_cell' => { 'relative_path' => 'demographics/address' } },
66
+ { 'column' => 'pathology_date_1',
67
+ 'xml_cell' => { 'relative_path' => 'pathology' } },
68
+ { 'column' => 'pathology_date_2',
69
+ 'xml_cell' => { 'relative_path' => 'pathology' } },
70
+ { 'column' => 'should_be_blank',
71
+ 'xml_cell' => { 'relative_path' => 'not_present' } }
72
+ ]
73
+ end
74
+
75
+ def partial_xml_column_mapping
76
+ [
77
+ { 'column' => 'no_relative_path',
78
+ 'xml_cell' => { 'relative_path' => '', 'attribute' => 'value' } },
79
+ { 'column' => 'no_path_or_att',
80
+ 'xml_cell' => { 'relative_path' => '', 'attribute' => '' } },
81
+ { 'column' => 'demographics_1',
82
+ 'xml_cell' => { 'relative_path' => 'demographics' } },
83
+ { 'column' => 'demographics_2',
84
+ 'xml_cell' => { 'relative_path' => 'demographics', 'attribute' => 'code' } },
85
+ { 'column' => 'address_line1',
86
+ 'xml_cell' => { 'relative_path' => 'demographics/address' } }
87
+ ]
88
+ end
89
+ end
90
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ndr_import
3
3
  version: !ruby/object:Gem::Version
4
- version: 6.2.0
4
+ version: 6.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - NCRS Development Team
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-09-24 00:00:00.000000000 Z
11
+ date: 2018-10-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -352,6 +352,7 @@ files:
352
352
  - lib/ndr_import/file/text.rb
353
353
  - lib/ndr_import/file/unregistered_filetype.rb
354
354
  - lib/ndr_import/file/word.rb
355
+ - lib/ndr_import/file/xml.rb
355
356
  - lib/ndr_import/file/zip.rb
356
357
  - lib/ndr_import/fixed_width/table.rb
357
358
  - lib/ndr_import/helpers/file/delimited.rb
@@ -372,6 +373,7 @@ files:
372
373
  - lib/ndr_import/table.rb
373
374
  - lib/ndr_import/universal_importer_helper.rb
374
375
  - lib/ndr_import/version.rb
376
+ - lib/ndr_import/xml/table.rb
375
377
  - ndr_import.gemspec
376
378
  - test/file/base_test.rb
377
379
  - test/file/delimited_test.rb
@@ -381,6 +383,7 @@ files:
381
383
  - test/file/registry_test.rb
382
384
  - test/file/text_test.rb
383
385
  - test/file/word_test.rb
386
+ - test/file/xml_test.rb
384
387
  - test/file/zip_test.rb
385
388
  - test/fixed_width/table_test.rb
386
389
  - test/helpers/file/delimited_test.rb
@@ -411,7 +414,9 @@ files:
411
414
  - test/resources/hello_world.txt
412
415
  - test/resources/high_ascii_delimited.txt
413
416
  - test/resources/high_ascii_delimited_example_two.txt
417
+ - test/resources/malformed.csv
414
418
  - test/resources/malformed.xml
419
+ - test/resources/malformed_pipe.csv
415
420
  - test/resources/normal.csv
416
421
  - test/resources/normal.csv.zip
417
422
  - test/resources/normal_pipe.csv
@@ -420,6 +425,7 @@ files:
420
425
  - test/resources/not_a_word_file.doc
421
426
  - test/resources/not_a_word_file.docx
422
427
  - test/resources/not_sign_delimited.txt
428
+ - test/resources/sample.xml
423
429
  - test/resources/sample_xls.xls
424
430
  - test/resources/sample_xlsx.xlsx
425
431
  - test/resources/sheet_streaming.xls
@@ -438,6 +444,7 @@ files:
438
444
  - test/table_test.rb
439
445
  - test/test_helper.rb
440
446
  - test/universal_importer_helper_test.rb
447
+ - test/xml/table_test.rb
441
448
  homepage: https://github.com/PublicHealthEngland/ndr_import
442
449
  licenses:
443
450
  - MIT
@@ -450,7 +457,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
450
457
  requirements:
451
458
  - - ">="
452
459
  - !ruby/object:Gem::Version
453
- version: '2.2'
460
+ version: '2.4'
454
461
  required_rubygems_version: !ruby/object:Gem::Requirement
455
462
  requirements:
456
463
  - - ">="
@@ -471,6 +478,7 @@ test_files:
471
478
  - test/file/registry_test.rb
472
479
  - test/file/text_test.rb
473
480
  - test/file/word_test.rb
481
+ - test/file/xml_test.rb
474
482
  - test/file/zip_test.rb
475
483
  - test/fixed_width/table_test.rb
476
484
  - test/helpers/file/delimited_test.rb
@@ -501,7 +509,9 @@ test_files:
501
509
  - test/resources/hello_world.txt
502
510
  - test/resources/high_ascii_delimited.txt
503
511
  - test/resources/high_ascii_delimited_example_two.txt
512
+ - test/resources/malformed.csv
504
513
  - test/resources/malformed.xml
514
+ - test/resources/malformed_pipe.csv
505
515
  - test/resources/normal.csv
506
516
  - test/resources/normal.csv.zip
507
517
  - test/resources/normal_pipe.csv
@@ -510,6 +520,7 @@ test_files:
510
520
  - test/resources/not_a_word_file.doc
511
521
  - test/resources/not_a_word_file.docx
512
522
  - test/resources/not_sign_delimited.txt
523
+ - test/resources/sample.xml
513
524
  - test/resources/sample_xls.xls
514
525
  - test/resources/sample_xlsx.xlsx
515
526
  - test/resources/sheet_streaming.xls
@@ -528,3 +539,4 @@ test_files:
528
539
  - test/table_test.rb
529
540
  - test/test_helper.rb
530
541
  - test/universal_importer_helper_test.rb
542
+ - test/xml/table_test.rb