ndr_import 6.2.0 → 6.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 33380c2e5d1abc43cb56a040c8b6255ec6315bb9860506c173dcd573df777f11
4
- data.tar.gz: fe9a51d153f9f64cdeaf872198f06887ba0c0becf9877e9a0f6cbb511160ae10
3
+ metadata.gz: 9ae1ff625cdd7352f4b3306f9a49905b2563557b487762611dfd0391735177ee
4
+ data.tar.gz: a7bdc413db721c0f813b9fd7ef862beff3cf6a9a76f042eef15d141b2526f5a7
5
5
  SHA512:
6
- metadata.gz: e11257edc7d89f30f05943c9eb992f7c3660d66a15100092639840768843456e15a8b3babe16194f04387bd890f399fab58238842276e1a8c40209cfc127ddf4
7
- data.tar.gz: 27f159c69d12780b967caa9c5223f8ab91a33fa6cd21b62ebaa3811ab7a8d115d388fc084a8e2501663cbb24d81a97ae4cf090bd377ae4ccd1bbdb617e0ea9ab
6
+ metadata.gz: 3d572905a49329c295a9ab701de99a4f82a9165a399b05ca6f80e48218e2c603fddca14768d4aad2e9139ac07cdbaa6c5ba1f1851527cc50f7f6f4b45ed2392e
7
+ data.tar.gz: 3414e9efa2b16b60e9d532075fca1df095bdcab20516738243fc9ed9ac5b30540b3ad1f9139feaedfd3f5c3182c049e2deb5f5cd878a67360bdefaeffd9c6452
data/code_safety.yml CHANGED
@@ -15,7 +15,7 @@ file safety:
15
15
  ".travis.yml":
16
16
  comments:
17
17
  reviewed_by: josh.pencheon
18
- safe_revision: 661b5b8e71572bba28aa92fb95aa218e3f8444f4
18
+ safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
19
19
  CODE_OF_CONDUCT.md:
20
20
  comments:
21
21
  reviewed_by: timgentry
@@ -58,8 +58,8 @@ file safety:
58
58
  safe_revision: c158dc783b84cab31380708e76e3812544cc1c2f
59
59
  lib/ndr_import.rb:
60
60
  comments:
61
- reviewed_by: ollie.tulloch
62
- safe_revision: dfc958d44b6c58355445fa395db08a62213ee709
61
+ reviewed_by: josh.pencheon
62
+ safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
63
63
  lib/ndr_import/csv_library.rb:
64
64
  comments:
65
65
  reviewed_by: josh.pencheon
@@ -67,7 +67,7 @@ file safety:
67
67
  lib/ndr_import/file/all.rb:
68
68
  comments:
69
69
  reviewed_by: josh.pencheon
70
- safe_revision: 1b66cfcbb61dfac93c44889ca0ced5836101c20c
70
+ safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
71
71
  lib/ndr_import/file/base.rb:
72
72
  comments:
73
73
  reviewed_by: timgentry
@@ -75,7 +75,7 @@ file safety:
75
75
  lib/ndr_import/file/delimited.rb:
76
76
  comments:
77
77
  reviewed_by: josh.pencheon
78
- safe_revision: 902f5326d85372d9632de9869d6f56fc02b83a10
78
+ safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
79
79
  lib/ndr_import/file/docx.rb:
80
80
  comments:
81
81
  reviewed_by: josh.pencheon
@@ -104,6 +104,10 @@ file safety:
104
104
  comments:
105
105
  reviewed_by: timgentry
106
106
  safe_revision: c88000b32401b5ae9ef7f5878a9b630506ab5a94
107
+ lib/ndr_import/file/xml.rb:
108
+ comments:
109
+ reviewed_by: josh.pencheon
110
+ safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
107
111
  lib/ndr_import/file/zip.rb:
108
112
  comments:
109
113
  reviewed_by: timgentry
@@ -139,7 +143,7 @@ file safety:
139
143
  lib/ndr_import/mapper.rb:
140
144
  comments:
141
145
  reviewed_by: josh.pencheon
142
- safe_revision: fbd4e1bfda8acd9ae026601dc4ecdabca5be4bc7
146
+ safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
143
147
  lib/ndr_import/mapping_error.rb:
144
148
  comments:
145
149
  reviewed_by: timgentry
@@ -163,7 +167,7 @@ file safety:
163
167
  lib/ndr_import/non_tabular/table.rb:
164
168
  comments:
165
169
  reviewed_by: josh.pencheon
166
- safe_revision: 337bf56e39f0f08cf7593b03867bb2da48630663
170
+ safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
167
171
  lib/ndr_import/non_tabular_file_helper.rb:
168
172
  comments:
169
173
  reviewed_by: josh.pencheon
@@ -175,19 +179,23 @@ file safety:
175
179
  lib/ndr_import/table.rb:
176
180
  comments: uses File.basename
177
181
  reviewed_by: josh.pencheon
178
- safe_revision: 902f5326d85372d9632de9869d6f56fc02b83a10
182
+ safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
179
183
  lib/ndr_import/universal_importer_helper.rb:
180
184
  comments:
181
185
  reviewed_by: josh.pencheon
182
- safe_revision: 902f5326d85372d9632de9869d6f56fc02b83a10
186
+ safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
183
187
  lib/ndr_import/version.rb:
184
188
  comments: another check?
185
189
  reviewed_by: josh.pencheon
186
- safe_revision: b85d90430543f238706d569bacd750ee50cb5493
190
+ safe_revision: 0b1ab7c810d0fa46d153238d69627c07f56d1efa
191
+ lib/ndr_import/xml/table.rb:
192
+ comments:
193
+ reviewed_by: josh.pencheon
194
+ safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
187
195
  ndr_import.gemspec:
188
196
  comments:
189
197
  reviewed_by: josh.pencheon
190
- safe_revision: 1b66cfcbb61dfac93c44889ca0ced5836101c20c
198
+ safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
191
199
  test/file/base_test.rb:
192
200
  comments:
193
201
  reviewed_by: timgentry
@@ -195,7 +203,7 @@ file safety:
195
203
  test/file/delimited_test.rb:
196
204
  comments:
197
205
  reviewed_by: josh.pencheon
198
- safe_revision: 902f5326d85372d9632de9869d6f56fc02b83a10
206
+ safe_revision: ab9f926a53e84f8ffa826abdd967abee455c89df
199
207
  test/file/docx_test.rb:
200
208
  comments:
201
209
  reviewed_by: josh.pencheon
@@ -211,7 +219,7 @@ file safety:
211
219
  test/file/registry_test.rb:
212
220
  comments:
213
221
  reviewed_by: josh.pencheon
214
- safe_revision: 1b66cfcbb61dfac93c44889ca0ced5836101c20c
222
+ safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
215
223
  test/file/text_test.rb:
216
224
  comments:
217
225
  reviewed_by: timgentry
@@ -220,6 +228,10 @@ file safety:
220
228
  comments:
221
229
  reviewed_by: josh.pencheon
222
230
  safe_revision: 1b66cfcbb61dfac93c44889ca0ced5836101c20c
231
+ test/file/xml_test.rb:
232
+ comments:
233
+ reviewed_by: josh.pencheon
234
+ safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
223
235
  test/file/zip_test.rb:
224
236
  comments:
225
237
  reviewed_by: timgentry
@@ -255,7 +267,7 @@ file safety:
255
267
  test/mapper_test.rb:
256
268
  comments: exposes Mapper internals to test them
257
269
  reviewed_by: josh.pencheon
258
- safe_revision: 3cea78767a73fd0d0ae64a4af3f07389e45349e9
270
+ safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
259
271
  test/non_tabular/mapping_test.rb:
260
272
  comments:
261
273
  reviewed_by: timgentry
@@ -340,10 +352,18 @@ file safety:
340
352
  comments:
341
353
  reviewed_by: josh.pencheon
342
354
  safe_revision: 902f5326d85372d9632de9869d6f56fc02b83a10
355
+ test/resources/malformed.csv:
356
+ comments:
357
+ reviewed_by: josh.pencheon
358
+ safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
343
359
  test/resources/malformed.xml:
344
360
  comments:
345
361
  reviewed_by: timgentry
346
362
  safe_revision: 137170d443ea6bcc0afb18f62202c285ae6501eb
363
+ test/resources/malformed_pipe.csv:
364
+ comments:
365
+ reviewed_by: josh.pencheon
366
+ safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
347
367
  test/resources/normal.csv:
348
368
  comments:
349
369
  reviewed_by: timgentry
@@ -376,6 +396,10 @@ file safety:
376
396
  comments:
377
397
  reviewed_by: josh.pencheon
378
398
  safe_revision: 902f5326d85372d9632de9869d6f56fc02b83a10
399
+ test/resources/sample.xml:
400
+ comments:
401
+ reviewed_by: josh.pencheon
402
+ safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
379
403
  test/resources/sample_xls.xls:
380
404
  comments:
381
405
  reviewed_by: timgentry
@@ -448,3 +472,7 @@ file safety:
448
472
  comments:
449
473
  reviewed_by: josh.pencheon
450
474
  safe_revision: 902f5326d85372d9632de9869d6f56fc02b83a10
475
+ test/xml/table_test.rb:
476
+ comments:
477
+ reviewed_by: josh.pencheon
478
+ safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
data/lib/ndr_import.rb CHANGED
@@ -6,6 +6,7 @@ require 'ndr_import/non_tabular_file_helper'
6
6
  require 'ndr_import/table'
7
7
  require 'ndr_import/non_tabular/table'
8
8
  require 'ndr_import/fixed_width/table'
9
+ require 'ndr_import/xml/table'
9
10
 
10
11
  module NdrImport
11
12
  def self.root
@@ -6,5 +6,5 @@ require_relative 'pdf'
6
6
  require_relative 'text'
7
7
  require_relative 'unregistered_filetype'
8
8
  require_relative 'word'
9
- # # require_relative 'xml'
9
+ require_relative 'xml'
10
10
  require_relative 'zip'
@@ -52,8 +52,9 @@ module NdrImport
52
52
  begin
53
53
  col_sep = @options['col_sep']
54
54
  options = {
55
- :col_sep => (col_sep || ',').force_encoding(delimiter_encoding),
56
- :mode => access_mode
55
+ col_sep: (col_sep || ',').force_encoding(delimiter_encoding),
56
+ mode: access_mode,
57
+ liberal_parsing: @options['liberal_parsing'].presence
57
58
  }
58
59
 
59
60
  row_num = 0
@@ -76,7 +77,7 @@ module NdrImport
76
77
 
77
78
  # We tried them all, and none worked:
78
79
  unless successful_options
79
- fail "None of the encodings #{supported_encodings.values.inspect} were successful!"
80
+ raise "None of the encodings #{supported_encodings.values.inspect} were successful!"
80
81
  end
81
82
 
82
83
  successful_options
@@ -0,0 +1,29 @@
1
+ require 'ndr_support/safe_file'
2
+ require 'ndr_import/helpers/file/xml'
3
+ require_relative 'registry'
4
+
5
+ module NdrImport
6
+ # This is one of a collection of file handlers that deal with individual formats of data.
7
+ # They can be instantiated directly or via the factory method Registry.tables
8
+ module File
9
+ # This class is a xml file handler that returns a single table.
10
+ class Xml < Base
11
+ include NdrImport::Helpers::File::Xml
12
+
13
+ private
14
+
15
+ # Iterate through the file, yielding each 'xml_record_xpath' element in turn.
16
+ def rows(&block)
17
+ return enum_for(:rows) unless block
18
+
19
+ doc = read_xml_file(@filename)
20
+
21
+ doc.xpath(@options['xml_record_xpath']).each(&block)
22
+ rescue StandardError => e
23
+ raise("#{SafeFile.basename(@filename)} [#{e.class}: #{e.message}]")
24
+ end
25
+ end
26
+ # Not all xml files may want to be registered, so 'xml' is not registered by design.
27
+ Registry.register(Xml, 'xml_table')
28
+ end
29
+ end
@@ -231,13 +231,14 @@ module NdrImport::Mapper
231
231
  #
232
232
  # would base64 decode a word document and then 'decode' the word document into plain text
233
233
  def decode_raw_value(raw_value, encoding)
234
+ return raw_value if raw_value.blank?
234
235
  case encoding
235
236
  when :base64
236
237
  Base64.decode64(raw_value)
237
238
  when :word_doc
238
239
  read_word_stream(StringIO.new(raw_value, 'r'))
239
240
  else
240
- fail "Cannot decode: #{encoding}"
241
+ raise "Cannot decode: #{encoding}"
241
242
  end
242
243
  end
243
244
 
@@ -16,12 +16,15 @@ module NdrImport
16
16
 
17
17
  include UTF8Encoding
18
18
 
19
- NON_TABULAR_OPTIONS = %w(capture_end_line capture_start_line start_line_pattern
19
+ TABULAR_ONLY_OPTIONS = %w[delimiter liberal_parsing tablename_pattern
20
+ header_lines footer_lines xml_record_xpath].freeze
21
+
22
+ NON_TABULAR_OPTIONS = %w[capture_end_line capture_start_line start_line_pattern
20
23
  end_line_pattern remove_lines start_in_a_record
21
- end_in_a_record).freeze
24
+ end_in_a_record].freeze
22
25
 
23
26
  def self.all_valid_options
24
- super - %w(delimiter tablename_pattern header_lines footer_lines) + NON_TABULAR_OPTIONS
27
+ super - TABULAR_ONLY_OPTIONS + NON_TABULAR_OPTIONS
25
28
  end
26
29
 
27
30
  attr_reader(*NON_TABULAR_OPTIONS)
@@ -10,8 +10,8 @@ module NdrImport
10
10
  include NdrImport::Mapper
11
11
 
12
12
  def self.all_valid_options
13
- %w[canonical_name delimiter filename_pattern tablename_pattern header_lines footer_lines
14
- format klass columns]
13
+ %w[canonical_name delimiter liberal_parsing filename_pattern tablename_pattern header_lines
14
+ footer_lines format klass columns xml_record_xpath]
15
15
  end
16
16
 
17
17
  def all_valid_options
@@ -32,10 +32,14 @@ module NdrImport
32
32
  # now at the individual file level, can we find the table mapping?
33
33
  table_mapping = get_table_mapping(filename, nil)
34
34
 
35
- tables = NdrImport::File::Registry.tables(filename,
36
- table_mapping.try(:format),
37
- 'unzip_path' => unzip_path,
38
- 'col_sep' => table_mapping.try(:delimiter))
35
+ options = {
36
+ 'unzip_path' => unzip_path,
37
+ 'col_sep' => table_mapping.try(:delimiter),
38
+ 'liberal_parsing' => table_mapping.try(:liberal_parsing),
39
+ 'xml_record_xpath' => table_mapping.try(:xml_record_xpath)
40
+ }
41
+
42
+ tables = NdrImport::File::Registry.tables(filename, table_mapping.try(:format), options)
39
43
  yield_tables_and_their_content(filename, tables, &block)
40
44
  end
41
45
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  # This stores the current version of the NdrImport gem
3
3
  module NdrImport
4
- VERSION = '6.2.0'.freeze
4
+ VERSION = '6.3.0'.freeze
5
5
  end
@@ -0,0 +1,90 @@
1
+ require 'ndr_import/table'
2
+
3
+ module NdrImport
4
+ module Xml
5
+ # This class maintains the state of a xml table mapping and encapsulates
6
+ # the logic required to transform a table of data into "records". Particular
7
+ # attention has been made to use enumerables throughout to help with the
8
+ # transformation of large quantities of data.
9
+ class Table < ::NdrImport::Table
10
+ def self.all_valid_options
11
+ super - %w[delimiter header_lines footer_lines]
12
+ end
13
+
14
+ def header_lines
15
+ 0
16
+ end
17
+
18
+ def footer_lines
19
+ 0
20
+ end
21
+
22
+ # This method transforms an incoming line (element) of xml data by applying
23
+ # each of the klass masked mappings to the line and yielding the klass
24
+ # and fields for each mapped klass.
25
+ def transform_line(line, index)
26
+ return enum_for(:transform_line, line, index) unless block_given?
27
+
28
+ raise 'Not an Nokogiri::XML::Element!' unless line.is_a? Nokogiri::XML::Element
29
+
30
+ validate_column_mappings(line)
31
+
32
+ xml_line = column_xpaths.map { |column_xpath| line.xpath(column_xpath).inner_text }
33
+
34
+ masked_mappings.each do |klass, klass_mappings|
35
+ fields = mapped_line(xml_line, klass_mappings)
36
+ next if fields[:skip].to_s == 'true'.freeze
37
+ yield(klass, fields, index)
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ # Ensure every leaf is accounted for in the column mappings
44
+ def validate_column_mappings(line)
45
+ missing_nodes = mappable_xpaths_from(line) - column_xpaths
46
+ raise "Unmapped data! #{missing_nodes}" unless missing_nodes.empty?
47
+ end
48
+
49
+ def column_name_from(column)
50
+ column[Strings::COLUMN] || column[Strings::STANDARD_MAPPING]
51
+ end
52
+
53
+ def column_xpaths
54
+ @column_xpaths ||= columns.map { |column| build_xpath_from(column) }
55
+ end
56
+
57
+ def mappable_xpaths_from(line)
58
+ xpaths = []
59
+
60
+ line.xpath('.//*[not(child::*)]').each do |node|
61
+ xpath = node.path.sub(line.path + '/', '')
62
+ xpaths << xpath
63
+ node.attributes.each_key { |key| xpaths << "#{xpath}/@#{key}" }
64
+ end
65
+ xpaths
66
+ end
67
+
68
+ def build_xpath_from(column)
69
+ column_name = column_name_from(column)
70
+ column['xml_cell'].presence ? relative_path_from(column, column_name) : column_name
71
+ end
72
+
73
+ def relative_path_from(column, colum_name)
74
+ xml_cell = column['xml_cell']
75
+ relative_path = xml_cell['relative_path'].presence ? xml_cell['relative_path'] : nil
76
+ attribute = xml_cell['attribute'].presence ? '@' + xml_cell['attribute'] : nil
77
+
78
+ if relative_path && attribute
79
+ relative_path + '/' + colum_name + '/' + attribute
80
+ elsif relative_path
81
+ relative_path + '/' + colum_name
82
+ elsif attribute
83
+ colum_name + '/' + attribute
84
+ else
85
+ colum_name
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
data/ndr_import.gemspec CHANGED
@@ -33,7 +33,7 @@ Gem::Specification.new do |spec|
33
33
  spec.add_dependency 'roo-xls'
34
34
  spec.add_dependency 'spreadsheet', '1.0.3'
35
35
 
36
- spec.required_ruby_version = '>= 2.2'
36
+ spec.required_ruby_version = '>= 2.4'
37
37
 
38
38
  spec.add_development_dependency 'bundler', '~> 1.7'
39
39
  spec.add_development_dependency 'rake', '~> 10.0'
@@ -33,6 +33,21 @@ module NdrImport
33
33
  end
34
34
  end
35
35
 
36
+ test 'should read malformed pipe correctly' do
37
+ file_path = @permanent_test_files.join('malformed_pipe.csv')
38
+ handler = NdrImport::File::Delimited.new(file_path, 'delimited', 'col_sep' => '|',
39
+ 'liberal_parsing' => 'true')
40
+ handler.tables.each do |tablename, sheet|
41
+ assert_nil tablename
42
+ sheet = sheet.to_a
43
+ assert_equal(('A'..'Z').to_a, sheet[0])
44
+ assert_equal ['1'] * 26, sheet[1]
45
+ expected_row = ['2'] * 25
46
+ expected_row << '2"malformed"'
47
+ assert_equal expected_row, sheet[2].sort
48
+ end
49
+ end
50
+
36
51
  test 'should read thorn correctly' do
37
52
  file_path = @permanent_test_files.join('normal_thorn.csv')
38
53
  handler = NdrImport::File::Delimited.new(file_path, 'delimited', 'col_sep' => "\xfe")
@@ -99,6 +114,47 @@ module NdrImport
99
114
  assert_equal ['2'] * 26, rows[2]
100
115
  end
101
116
 
117
+ test 'should read malformed delimited txt' do
118
+ rows = []
119
+ file_path = @permanent_test_files.join('malformed.csv')
120
+ handler = NdrImport::File::Delimited.new(file_path, 'csv', 'col_sep' => nil,
121
+ 'liberal_parsing' => 'true')
122
+ handler.tables.each do |tablename, sheet|
123
+ assert_nil tablename
124
+ assert_instance_of Enumerator, sheet
125
+ sheet.each do |row|
126
+ rows << row
127
+ end
128
+ end
129
+
130
+ assert_equal(('A'..'Z').to_a, rows[0])
131
+ assert_equal ['1'] * 26, rows[1]
132
+ expected_row = ['2'] * 25
133
+ expected_row << '2"malformed"'
134
+ assert_equal expected_row, rows[2].sort
135
+ end
136
+
137
+ test 'should fail to read malformed delimited txt without liberal_parsing' do
138
+ rows_yielded = []
139
+ exception = assert_raises(CSVLibrary::MalformedCSVError) do
140
+ file_path = @permanent_test_files.join('malformed.csv')
141
+ handler = NdrImport::File::Delimited.new(file_path, 'csv')
142
+
143
+ handler.tables.each do |tablename, sheet|
144
+ assert_nil tablename
145
+ assert_instance_of Enumerator, sheet
146
+ sheet.each do |row|
147
+ rows_yielded << row
148
+ end
149
+ end
150
+ end
151
+
152
+ assert rows_yielded.empty?, 'no rows should have been yielded'
153
+
154
+ msg = 'Invalid CSV format on row 3 of malformed.csv. Original: Illegal quoting in line 3.'
155
+ assert_equal msg, exception.message
156
+ end
157
+
102
158
  test 'should read line-by-line' do
103
159
  rows = []
104
160
  file_path = @permanent_test_files.join('normal.csv')
@@ -12,7 +12,7 @@ module NdrImport
12
12
 
13
13
  test 'Registry.handlers' do
14
14
  assert_instance_of Hash, NdrImport::File::Registry.handlers
15
- assert_equal %w[csv delimited doc docx nontabular pdf text txt xls xlsx zip],
15
+ assert_equal %w[csv delimited doc docx nontabular pdf text txt xls xlsx xml_table zip],
16
16
  NdrImport::File::Registry.handlers.keys.sort
17
17
  end
18
18
 
@@ -0,0 +1,21 @@
1
+ require 'test_helper'
2
+ require 'ndr_import/file/xml'
3
+
4
+ module NdrImport
5
+ module File
6
+ # Xml file handler tests
7
+ class XmlTest < ActiveSupport::TestCase
8
+ def setup
9
+ @permanent_test_files = SafePath.new('permanent_test_files')
10
+ end
11
+
12
+ test 'should return enum of xml elements' do
13
+ file_path = @permanent_test_files.join('sample.xml')
14
+ handler = NdrImport::File::Xml.new(file_path, nil, 'xml_record_xpath' => 'root/record')
15
+ rows = handler.send(:rows)
16
+ assert rows.is_a? Enumerator
17
+ assert(rows.all? { |row| row.is_a? Nokogiri::XML::Element })
18
+ end
19
+ end
20
+ end
21
+ end
data/test/mapper_test.rb CHANGED
@@ -635,6 +635,11 @@ class MapperTest < ActiveSupport::TestCase
635
635
  assert_equal 'Hello world, this is a word document', file_content
636
636
  end
637
637
 
638
+ test 'should handle blank values when attempting to decode_raw_value' do
639
+ text_content = TestMapper.new.send(:decode_raw_value, '', :word_doc)
640
+ assert_equal '', text_content
641
+ end
642
+
638
643
  test 'should raise unknown encoding exception' do
639
644
  assert_raise(RuntimeError) do
640
645
  TestMapper.new.mapped_line(['A'], invalid_decode_mapping)
@@ -0,0 +1,3 @@
1
+ A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z
2
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
3
+ 2,2,2,2,2,2,2,2"malformed",2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
@@ -0,0 +1,3 @@
1
+ A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z
2
+ 1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1
3
+ 2|2|2|2|2|2|2|2|2|2|2|2|2|2|2|2|2"malformed"|2|2|2|2|2|2|2|2|2
@@ -0,0 +1,34 @@
1
+ <root>
2
+ <record>
3
+ <no_relative_path value="A value"/>
4
+ <no_path_or_att>Another value</no_path_or_att>
5
+ <demographics>
6
+ <demographics_1>AAA</demographics_1>
7
+ <address>
8
+ <address_line1>Address</address_line1>
9
+ <address_line1>Address 2</address_line1>
10
+ </address>
11
+ <demographics_2 code="03">Inner text</demographics_2>
12
+ </demographics>
13
+ <pathology>
14
+ <pathology_date_1>2018-01-01</pathology_date_1>
15
+ <pathology_date_2 />
16
+ </pathology>
17
+ </record>
18
+ <record>
19
+ <demographics>
20
+ <address>
21
+ <address_line1>Address</address_line1>
22
+ <address_line1>Address 2</address_line1>
23
+ </address>
24
+ <demographics_2 code="03">Inner text</demographics_2>
25
+ <demographics_1>AAA</demographics_1>
26
+ </demographics>
27
+ <no_path_or_att><![CDATA[Another value]]></no_path_or_att>
28
+ <pathology>
29
+ <pathology_date_1>2018-01-01</pathology_date_1>
30
+ <pathology_date_2 />
31
+ </pathology>
32
+ <no_relative_path value="A value"/>
33
+ </record>
34
+ </root>
@@ -0,0 +1,90 @@
1
+ require 'test_helper'
2
+
3
+ # This tests the NdrImport::Xml::Table mapping class
4
+ module Xml
5
+ class TableTest < ActiveSupport::TestCase
6
+ def setup
7
+ file_path = SafePath.new('permanent_test_files').join('sample.xml')
8
+ handler = NdrImport::File::Xml.new(file_path, nil, 'xml_record_xpath' => 'root/record')
9
+
10
+ @element_lines = handler.send(:rows)
11
+ end
12
+
13
+ def test_should_transform_xml_element_lines
14
+ table = NdrImport::Xml::Table.new(klass: 'SomeTestKlass', columns: xml_column_mapping)
15
+
16
+ expected_data = ['SomeTestKlass', { rawtext: {
17
+ 'no_relative_path' => 'A value',
18
+ 'no_relative_path_inner_text' => '',
19
+ 'no_path_or_att' => 'Another value',
20
+ 'demographics_1' => 'AAA',
21
+ 'demographics_2' => '03',
22
+ 'demographics_2_inner_text' => 'Inner text',
23
+ 'address1' => 'Address',
24
+ 'address2' => 'Address 2',
25
+ 'pathology_date_1' => '2018-01-01',
26
+ 'pathology_date_2' => '',
27
+ 'should_be_blank' => ''
28
+ } }, 1]
29
+
30
+ transformed_data = table.transform(@element_lines)
31
+ assert_equal 2, transformed_data.count
32
+
33
+ transformed_data.each do |klass, fields, _index|
34
+ assert_equal expected_data[0], klass
35
+ assert_equal expected_data[1], fields
36
+ end
37
+ end
38
+
39
+ def test_should_fail_with_unmappped_nodes
40
+ table = NdrImport::Xml::Table.new(klass: 'SomeTestKlass', columns: partial_xml_column_mapping)
41
+
42
+ exception = assert_raises(RuntimeError) { table.transform(@element_lines).to_a }
43
+ assert exception.message.starts_with? 'sample.xml [RuntimeError: Unmapped data!'
44
+ end
45
+
46
+ private
47
+
48
+ def xml_column_mapping
49
+ [
50
+ { 'column' => 'no_relative_path',
51
+ 'xml_cell' => { 'relative_path' => '', 'attribute' => 'value' } },
52
+ { 'column' => 'no_relative_path', 'rawtext_name' => 'no_relative_path_inner_text',
53
+ 'xml_cell' => { 'relative_path' => '' } },
54
+ { 'column' => 'no_path_or_att',
55
+ 'xml_cell' => { 'relative_path' => '', 'attribute' => '' } },
56
+ { 'column' => 'demographics_1',
57
+ 'xml_cell' => { 'relative_path' => 'demographics' } },
58
+ { 'column' => 'demographics_2',
59
+ 'xml_cell' => { 'relative_path' => 'demographics', 'attribute' => 'code' } },
60
+ { 'column' => 'demographics_2', 'rawtext_name' => 'demographics_2_inner_text',
61
+ 'xml_cell' => { 'relative_path' => 'demographics' } },
62
+ { 'column' => 'address_line1[1]', 'rawtext_name' => 'address1',
63
+ 'xml_cell' => { 'relative_path' => 'demographics/address' } },
64
+ { 'column' => 'address_line1[2]', 'rawtext_name' => 'address2',
65
+ 'xml_cell' => { 'relative_path' => 'demographics/address' } },
66
+ { 'column' => 'pathology_date_1',
67
+ 'xml_cell' => { 'relative_path' => 'pathology' } },
68
+ { 'column' => 'pathology_date_2',
69
+ 'xml_cell' => { 'relative_path' => 'pathology' } },
70
+ { 'column' => 'should_be_blank',
71
+ 'xml_cell' => { 'relative_path' => 'not_present' } }
72
+ ]
73
+ end
74
+
75
+ def partial_xml_column_mapping
76
+ [
77
+ { 'column' => 'no_relative_path',
78
+ 'xml_cell' => { 'relative_path' => '', 'attribute' => 'value' } },
79
+ { 'column' => 'no_path_or_att',
80
+ 'xml_cell' => { 'relative_path' => '', 'attribute' => '' } },
81
+ { 'column' => 'demographics_1',
82
+ 'xml_cell' => { 'relative_path' => 'demographics' } },
83
+ { 'column' => 'demographics_2',
84
+ 'xml_cell' => { 'relative_path' => 'demographics', 'attribute' => 'code' } },
85
+ { 'column' => 'address_line1',
86
+ 'xml_cell' => { 'relative_path' => 'demographics/address' } }
87
+ ]
88
+ end
89
+ end
90
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ndr_import
3
3
  version: !ruby/object:Gem::Version
4
- version: 6.2.0
4
+ version: 6.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - NCRS Development Team
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-09-24 00:00:00.000000000 Z
11
+ date: 2018-10-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -352,6 +352,7 @@ files:
352
352
  - lib/ndr_import/file/text.rb
353
353
  - lib/ndr_import/file/unregistered_filetype.rb
354
354
  - lib/ndr_import/file/word.rb
355
+ - lib/ndr_import/file/xml.rb
355
356
  - lib/ndr_import/file/zip.rb
356
357
  - lib/ndr_import/fixed_width/table.rb
357
358
  - lib/ndr_import/helpers/file/delimited.rb
@@ -372,6 +373,7 @@ files:
372
373
  - lib/ndr_import/table.rb
373
374
  - lib/ndr_import/universal_importer_helper.rb
374
375
  - lib/ndr_import/version.rb
376
+ - lib/ndr_import/xml/table.rb
375
377
  - ndr_import.gemspec
376
378
  - test/file/base_test.rb
377
379
  - test/file/delimited_test.rb
@@ -381,6 +383,7 @@ files:
381
383
  - test/file/registry_test.rb
382
384
  - test/file/text_test.rb
383
385
  - test/file/word_test.rb
386
+ - test/file/xml_test.rb
384
387
  - test/file/zip_test.rb
385
388
  - test/fixed_width/table_test.rb
386
389
  - test/helpers/file/delimited_test.rb
@@ -411,7 +414,9 @@ files:
411
414
  - test/resources/hello_world.txt
412
415
  - test/resources/high_ascii_delimited.txt
413
416
  - test/resources/high_ascii_delimited_example_two.txt
417
+ - test/resources/malformed.csv
414
418
  - test/resources/malformed.xml
419
+ - test/resources/malformed_pipe.csv
415
420
  - test/resources/normal.csv
416
421
  - test/resources/normal.csv.zip
417
422
  - test/resources/normal_pipe.csv
@@ -420,6 +425,7 @@ files:
420
425
  - test/resources/not_a_word_file.doc
421
426
  - test/resources/not_a_word_file.docx
422
427
  - test/resources/not_sign_delimited.txt
428
+ - test/resources/sample.xml
423
429
  - test/resources/sample_xls.xls
424
430
  - test/resources/sample_xlsx.xlsx
425
431
  - test/resources/sheet_streaming.xls
@@ -438,6 +444,7 @@ files:
438
444
  - test/table_test.rb
439
445
  - test/test_helper.rb
440
446
  - test/universal_importer_helper_test.rb
447
+ - test/xml/table_test.rb
441
448
  homepage: https://github.com/PublicHealthEngland/ndr_import
442
449
  licenses:
443
450
  - MIT
@@ -450,7 +457,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
450
457
  requirements:
451
458
  - - ">="
452
459
  - !ruby/object:Gem::Version
453
- version: '2.2'
460
+ version: '2.4'
454
461
  required_rubygems_version: !ruby/object:Gem::Requirement
455
462
  requirements:
456
463
  - - ">="
@@ -471,6 +478,7 @@ test_files:
471
478
  - test/file/registry_test.rb
472
479
  - test/file/text_test.rb
473
480
  - test/file/word_test.rb
481
+ - test/file/xml_test.rb
474
482
  - test/file/zip_test.rb
475
483
  - test/fixed_width/table_test.rb
476
484
  - test/helpers/file/delimited_test.rb
@@ -501,7 +509,9 @@ test_files:
501
509
  - test/resources/hello_world.txt
502
510
  - test/resources/high_ascii_delimited.txt
503
511
  - test/resources/high_ascii_delimited_example_two.txt
512
+ - test/resources/malformed.csv
504
513
  - test/resources/malformed.xml
514
+ - test/resources/malformed_pipe.csv
505
515
  - test/resources/normal.csv
506
516
  - test/resources/normal.csv.zip
507
517
  - test/resources/normal_pipe.csv
@@ -510,6 +520,7 @@ test_files:
510
520
  - test/resources/not_a_word_file.doc
511
521
  - test/resources/not_a_word_file.docx
512
522
  - test/resources/not_sign_delimited.txt
523
+ - test/resources/sample.xml
513
524
  - test/resources/sample_xls.xls
514
525
  - test/resources/sample_xlsx.xlsx
515
526
  - test/resources/sheet_streaming.xls
@@ -528,3 +539,4 @@ test_files:
528
539
  - test/table_test.rb
529
540
  - test/test_helper.rb
530
541
  - test/universal_importer_helper_test.rb
542
+ - test/xml/table_test.rb