ndr_import 6.2.0 → 6.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/code_safety.yml +42 -14
- data/lib/ndr_import.rb +1 -0
- data/lib/ndr_import/file/all.rb +1 -1
- data/lib/ndr_import/file/delimited.rb +4 -3
- data/lib/ndr_import/file/xml.rb +29 -0
- data/lib/ndr_import/mapper.rb +2 -1
- data/lib/ndr_import/non_tabular/table.rb +6 -3
- data/lib/ndr_import/table.rb +2 -2
- data/lib/ndr_import/universal_importer_helper.rb +8 -4
- data/lib/ndr_import/version.rb +1 -1
- data/lib/ndr_import/xml/table.rb +90 -0
- data/ndr_import.gemspec +1 -1
- data/test/file/delimited_test.rb +56 -0
- data/test/file/registry_test.rb +1 -1
- data/test/file/xml_test.rb +21 -0
- data/test/mapper_test.rb +5 -0
- data/test/resources/malformed.csv +3 -0
- data/test/resources/malformed_pipe.csv +3 -0
- data/test/resources/sample.xml +34 -0
- data/test/xml/table_test.rb +90 -0
- metadata +15 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9ae1ff625cdd7352f4b3306f9a49905b2563557b487762611dfd0391735177ee
|
4
|
+
data.tar.gz: a7bdc413db721c0f813b9fd7ef862beff3cf6a9a76f042eef15d141b2526f5a7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3d572905a49329c295a9ab701de99a4f82a9165a399b05ca6f80e48218e2c603fddca14768d4aad2e9139ac07cdbaa6c5ba1f1851527cc50f7f6f4b45ed2392e
|
7
|
+
data.tar.gz: 3414e9efa2b16b60e9d532075fca1df095bdcab20516738243fc9ed9ac5b30540b3ad1f9139feaedfd3f5c3182c049e2deb5f5cd878a67360bdefaeffd9c6452
|
data/code_safety.yml
CHANGED
@@ -15,7 +15,7 @@ file safety:
|
|
15
15
|
".travis.yml":
|
16
16
|
comments:
|
17
17
|
reviewed_by: josh.pencheon
|
18
|
-
safe_revision:
|
18
|
+
safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
|
19
19
|
CODE_OF_CONDUCT.md:
|
20
20
|
comments:
|
21
21
|
reviewed_by: timgentry
|
@@ -58,8 +58,8 @@ file safety:
|
|
58
58
|
safe_revision: c158dc783b84cab31380708e76e3812544cc1c2f
|
59
59
|
lib/ndr_import.rb:
|
60
60
|
comments:
|
61
|
-
reviewed_by:
|
62
|
-
safe_revision:
|
61
|
+
reviewed_by: josh.pencheon
|
62
|
+
safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
|
63
63
|
lib/ndr_import/csv_library.rb:
|
64
64
|
comments:
|
65
65
|
reviewed_by: josh.pencheon
|
@@ -67,7 +67,7 @@ file safety:
|
|
67
67
|
lib/ndr_import/file/all.rb:
|
68
68
|
comments:
|
69
69
|
reviewed_by: josh.pencheon
|
70
|
-
safe_revision:
|
70
|
+
safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
|
71
71
|
lib/ndr_import/file/base.rb:
|
72
72
|
comments:
|
73
73
|
reviewed_by: timgentry
|
@@ -75,7 +75,7 @@ file safety:
|
|
75
75
|
lib/ndr_import/file/delimited.rb:
|
76
76
|
comments:
|
77
77
|
reviewed_by: josh.pencheon
|
78
|
-
safe_revision:
|
78
|
+
safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
|
79
79
|
lib/ndr_import/file/docx.rb:
|
80
80
|
comments:
|
81
81
|
reviewed_by: josh.pencheon
|
@@ -104,6 +104,10 @@ file safety:
|
|
104
104
|
comments:
|
105
105
|
reviewed_by: timgentry
|
106
106
|
safe_revision: c88000b32401b5ae9ef7f5878a9b630506ab5a94
|
107
|
+
lib/ndr_import/file/xml.rb:
|
108
|
+
comments:
|
109
|
+
reviewed_by: josh.pencheon
|
110
|
+
safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
|
107
111
|
lib/ndr_import/file/zip.rb:
|
108
112
|
comments:
|
109
113
|
reviewed_by: timgentry
|
@@ -139,7 +143,7 @@ file safety:
|
|
139
143
|
lib/ndr_import/mapper.rb:
|
140
144
|
comments:
|
141
145
|
reviewed_by: josh.pencheon
|
142
|
-
safe_revision:
|
146
|
+
safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
|
143
147
|
lib/ndr_import/mapping_error.rb:
|
144
148
|
comments:
|
145
149
|
reviewed_by: timgentry
|
@@ -163,7 +167,7 @@ file safety:
|
|
163
167
|
lib/ndr_import/non_tabular/table.rb:
|
164
168
|
comments:
|
165
169
|
reviewed_by: josh.pencheon
|
166
|
-
safe_revision:
|
170
|
+
safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
|
167
171
|
lib/ndr_import/non_tabular_file_helper.rb:
|
168
172
|
comments:
|
169
173
|
reviewed_by: josh.pencheon
|
@@ -175,19 +179,23 @@ file safety:
|
|
175
179
|
lib/ndr_import/table.rb:
|
176
180
|
comments: uses File.basename
|
177
181
|
reviewed_by: josh.pencheon
|
178
|
-
safe_revision:
|
182
|
+
safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
|
179
183
|
lib/ndr_import/universal_importer_helper.rb:
|
180
184
|
comments:
|
181
185
|
reviewed_by: josh.pencheon
|
182
|
-
safe_revision:
|
186
|
+
safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
|
183
187
|
lib/ndr_import/version.rb:
|
184
188
|
comments: another check?
|
185
189
|
reviewed_by: josh.pencheon
|
186
|
-
safe_revision:
|
190
|
+
safe_revision: 0b1ab7c810d0fa46d153238d69627c07f56d1efa
|
191
|
+
lib/ndr_import/xml/table.rb:
|
192
|
+
comments:
|
193
|
+
reviewed_by: josh.pencheon
|
194
|
+
safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
|
187
195
|
ndr_import.gemspec:
|
188
196
|
comments:
|
189
197
|
reviewed_by: josh.pencheon
|
190
|
-
safe_revision:
|
198
|
+
safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
|
191
199
|
test/file/base_test.rb:
|
192
200
|
comments:
|
193
201
|
reviewed_by: timgentry
|
@@ -195,7 +203,7 @@ file safety:
|
|
195
203
|
test/file/delimited_test.rb:
|
196
204
|
comments:
|
197
205
|
reviewed_by: josh.pencheon
|
198
|
-
safe_revision:
|
206
|
+
safe_revision: ab9f926a53e84f8ffa826abdd967abee455c89df
|
199
207
|
test/file/docx_test.rb:
|
200
208
|
comments:
|
201
209
|
reviewed_by: josh.pencheon
|
@@ -211,7 +219,7 @@ file safety:
|
|
211
219
|
test/file/registry_test.rb:
|
212
220
|
comments:
|
213
221
|
reviewed_by: josh.pencheon
|
214
|
-
safe_revision:
|
222
|
+
safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
|
215
223
|
test/file/text_test.rb:
|
216
224
|
comments:
|
217
225
|
reviewed_by: timgentry
|
@@ -220,6 +228,10 @@ file safety:
|
|
220
228
|
comments:
|
221
229
|
reviewed_by: josh.pencheon
|
222
230
|
safe_revision: 1b66cfcbb61dfac93c44889ca0ced5836101c20c
|
231
|
+
test/file/xml_test.rb:
|
232
|
+
comments:
|
233
|
+
reviewed_by: josh.pencheon
|
234
|
+
safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
|
223
235
|
test/file/zip_test.rb:
|
224
236
|
comments:
|
225
237
|
reviewed_by: timgentry
|
@@ -255,7 +267,7 @@ file safety:
|
|
255
267
|
test/mapper_test.rb:
|
256
268
|
comments: exposes Mapper internals to test them
|
257
269
|
reviewed_by: josh.pencheon
|
258
|
-
safe_revision:
|
270
|
+
safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
|
259
271
|
test/non_tabular/mapping_test.rb:
|
260
272
|
comments:
|
261
273
|
reviewed_by: timgentry
|
@@ -340,10 +352,18 @@ file safety:
|
|
340
352
|
comments:
|
341
353
|
reviewed_by: josh.pencheon
|
342
354
|
safe_revision: 902f5326d85372d9632de9869d6f56fc02b83a10
|
355
|
+
test/resources/malformed.csv:
|
356
|
+
comments:
|
357
|
+
reviewed_by: josh.pencheon
|
358
|
+
safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
|
343
359
|
test/resources/malformed.xml:
|
344
360
|
comments:
|
345
361
|
reviewed_by: timgentry
|
346
362
|
safe_revision: 137170d443ea6bcc0afb18f62202c285ae6501eb
|
363
|
+
test/resources/malformed_pipe.csv:
|
364
|
+
comments:
|
365
|
+
reviewed_by: josh.pencheon
|
366
|
+
safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
|
347
367
|
test/resources/normal.csv:
|
348
368
|
comments:
|
349
369
|
reviewed_by: timgentry
|
@@ -376,6 +396,10 @@ file safety:
|
|
376
396
|
comments:
|
377
397
|
reviewed_by: josh.pencheon
|
378
398
|
safe_revision: 902f5326d85372d9632de9869d6f56fc02b83a10
|
399
|
+
test/resources/sample.xml:
|
400
|
+
comments:
|
401
|
+
reviewed_by: josh.pencheon
|
402
|
+
safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
|
379
403
|
test/resources/sample_xls.xls:
|
380
404
|
comments:
|
381
405
|
reviewed_by: timgentry
|
@@ -448,3 +472,7 @@ file safety:
|
|
448
472
|
comments:
|
449
473
|
reviewed_by: josh.pencheon
|
450
474
|
safe_revision: 902f5326d85372d9632de9869d6f56fc02b83a10
|
475
|
+
test/xml/table_test.rb:
|
476
|
+
comments:
|
477
|
+
reviewed_by: josh.pencheon
|
478
|
+
safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
|
data/lib/ndr_import.rb
CHANGED
data/lib/ndr_import/file/all.rb
CHANGED
@@ -52,8 +52,9 @@ module NdrImport
|
|
52
52
|
begin
|
53
53
|
col_sep = @options['col_sep']
|
54
54
|
options = {
|
55
|
-
:
|
56
|
-
:
|
55
|
+
col_sep: (col_sep || ',').force_encoding(delimiter_encoding),
|
56
|
+
mode: access_mode,
|
57
|
+
liberal_parsing: @options['liberal_parsing'].presence
|
57
58
|
}
|
58
59
|
|
59
60
|
row_num = 0
|
@@ -76,7 +77,7 @@ module NdrImport
|
|
76
77
|
|
77
78
|
# We tried them all, and none worked:
|
78
79
|
unless successful_options
|
79
|
-
|
80
|
+
raise "None of the encodings #{supported_encodings.values.inspect} were successful!"
|
80
81
|
end
|
81
82
|
|
82
83
|
successful_options
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'ndr_support/safe_file'
|
2
|
+
require 'ndr_import/helpers/file/xml'
|
3
|
+
require_relative 'registry'
|
4
|
+
|
5
|
+
module NdrImport
|
6
|
+
# This is one of a collection of file handlers that deal with individual formats of data.
|
7
|
+
# They can be instantiated directly or via the factory method Registry.tables
|
8
|
+
module File
|
9
|
+
# This class is a xml file handler that returns a single table.
|
10
|
+
class Xml < Base
|
11
|
+
include NdrImport::Helpers::File::Xml
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
# Iterate through the file, yielding each 'xml_record_xpath' element in turn.
|
16
|
+
def rows(&block)
|
17
|
+
return enum_for(:rows) unless block
|
18
|
+
|
19
|
+
doc = read_xml_file(@filename)
|
20
|
+
|
21
|
+
doc.xpath(@options['xml_record_xpath']).each(&block)
|
22
|
+
rescue StandardError => e
|
23
|
+
raise("#{SafeFile.basename(@filename)} [#{e.class}: #{e.message}]")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
# Not all xml files may want to be registered, so 'xml' is not registered by design.
|
27
|
+
Registry.register(Xml, 'xml_table')
|
28
|
+
end
|
29
|
+
end
|
data/lib/ndr_import/mapper.rb
CHANGED
@@ -231,13 +231,14 @@ module NdrImport::Mapper
|
|
231
231
|
#
|
232
232
|
# would base64 decode a word document and then 'decode' the word document into plain text
|
233
233
|
def decode_raw_value(raw_value, encoding)
|
234
|
+
return raw_value if raw_value.blank?
|
234
235
|
case encoding
|
235
236
|
when :base64
|
236
237
|
Base64.decode64(raw_value)
|
237
238
|
when :word_doc
|
238
239
|
read_word_stream(StringIO.new(raw_value, 'r'))
|
239
240
|
else
|
240
|
-
|
241
|
+
raise "Cannot decode: #{encoding}"
|
241
242
|
end
|
242
243
|
end
|
243
244
|
|
@@ -16,12 +16,15 @@ module NdrImport
|
|
16
16
|
|
17
17
|
include UTF8Encoding
|
18
18
|
|
19
|
-
|
19
|
+
TABULAR_ONLY_OPTIONS = %w[delimiter liberal_parsing tablename_pattern
|
20
|
+
header_lines footer_lines xml_record_xpath].freeze
|
21
|
+
|
22
|
+
NON_TABULAR_OPTIONS = %w[capture_end_line capture_start_line start_line_pattern
|
20
23
|
end_line_pattern remove_lines start_in_a_record
|
21
|
-
end_in_a_record
|
24
|
+
end_in_a_record].freeze
|
22
25
|
|
23
26
|
def self.all_valid_options
|
24
|
-
super -
|
27
|
+
super - TABULAR_ONLY_OPTIONS + NON_TABULAR_OPTIONS
|
25
28
|
end
|
26
29
|
|
27
30
|
attr_reader(*NON_TABULAR_OPTIONS)
|
data/lib/ndr_import/table.rb
CHANGED
@@ -10,8 +10,8 @@ module NdrImport
|
|
10
10
|
include NdrImport::Mapper
|
11
11
|
|
12
12
|
def self.all_valid_options
|
13
|
-
%w[canonical_name delimiter filename_pattern tablename_pattern header_lines
|
14
|
-
format klass columns]
|
13
|
+
%w[canonical_name delimiter liberal_parsing filename_pattern tablename_pattern header_lines
|
14
|
+
footer_lines format klass columns xml_record_xpath]
|
15
15
|
end
|
16
16
|
|
17
17
|
def all_valid_options
|
@@ -32,10 +32,14 @@ module NdrImport
|
|
32
32
|
# now at the individual file level, can we find the table mapping?
|
33
33
|
table_mapping = get_table_mapping(filename, nil)
|
34
34
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
35
|
+
options = {
|
36
|
+
'unzip_path' => unzip_path,
|
37
|
+
'col_sep' => table_mapping.try(:delimiter),
|
38
|
+
'liberal_parsing' => table_mapping.try(:liberal_parsing),
|
39
|
+
'xml_record_xpath' => table_mapping.try(:xml_record_xpath)
|
40
|
+
}
|
41
|
+
|
42
|
+
tables = NdrImport::File::Registry.tables(filename, table_mapping.try(:format), options)
|
39
43
|
yield_tables_and_their_content(filename, tables, &block)
|
40
44
|
end
|
41
45
|
end
|
data/lib/ndr_import/version.rb
CHANGED
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'ndr_import/table'
|
2
|
+
|
3
|
+
module NdrImport
|
4
|
+
module Xml
|
5
|
+
# This class maintains the state of a xml table mapping and encapsulates
|
6
|
+
# the logic required to transform a table of data into "records". Particular
|
7
|
+
# attention has been made to use enumerables throughout to help with the
|
8
|
+
# transformation of large quantities of data.
|
9
|
+
class Table < ::NdrImport::Table
|
10
|
+
def self.all_valid_options
|
11
|
+
super - %w[delimiter header_lines footer_lines]
|
12
|
+
end
|
13
|
+
|
14
|
+
def header_lines
|
15
|
+
0
|
16
|
+
end
|
17
|
+
|
18
|
+
def footer_lines
|
19
|
+
0
|
20
|
+
end
|
21
|
+
|
22
|
+
# This method transforms an incoming line (element) of xml data by applying
|
23
|
+
# each of the klass masked mappings to the line and yielding the klass
|
24
|
+
# and fields for each mapped klass.
|
25
|
+
def transform_line(line, index)
|
26
|
+
return enum_for(:transform_line, line, index) unless block_given?
|
27
|
+
|
28
|
+
raise 'Not an Nokogiri::XML::Element!' unless line.is_a? Nokogiri::XML::Element
|
29
|
+
|
30
|
+
validate_column_mappings(line)
|
31
|
+
|
32
|
+
xml_line = column_xpaths.map { |column_xpath| line.xpath(column_xpath).inner_text }
|
33
|
+
|
34
|
+
masked_mappings.each do |klass, klass_mappings|
|
35
|
+
fields = mapped_line(xml_line, klass_mappings)
|
36
|
+
next if fields[:skip].to_s == 'true'.freeze
|
37
|
+
yield(klass, fields, index)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
# Ensure every leaf is accounted for in the column mappings
|
44
|
+
def validate_column_mappings(line)
|
45
|
+
missing_nodes = mappable_xpaths_from(line) - column_xpaths
|
46
|
+
raise "Unmapped data! #{missing_nodes}" unless missing_nodes.empty?
|
47
|
+
end
|
48
|
+
|
49
|
+
def column_name_from(column)
|
50
|
+
column[Strings::COLUMN] || column[Strings::STANDARD_MAPPING]
|
51
|
+
end
|
52
|
+
|
53
|
+
def column_xpaths
|
54
|
+
@column_xpaths ||= columns.map { |column| build_xpath_from(column) }
|
55
|
+
end
|
56
|
+
|
57
|
+
def mappable_xpaths_from(line)
|
58
|
+
xpaths = []
|
59
|
+
|
60
|
+
line.xpath('.//*[not(child::*)]').each do |node|
|
61
|
+
xpath = node.path.sub(line.path + '/', '')
|
62
|
+
xpaths << xpath
|
63
|
+
node.attributes.each_key { |key| xpaths << "#{xpath}/@#{key}" }
|
64
|
+
end
|
65
|
+
xpaths
|
66
|
+
end
|
67
|
+
|
68
|
+
def build_xpath_from(column)
|
69
|
+
column_name = column_name_from(column)
|
70
|
+
column['xml_cell'].presence ? relative_path_from(column, column_name) : column_name
|
71
|
+
end
|
72
|
+
|
73
|
+
def relative_path_from(column, colum_name)
|
74
|
+
xml_cell = column['xml_cell']
|
75
|
+
relative_path = xml_cell['relative_path'].presence ? xml_cell['relative_path'] : nil
|
76
|
+
attribute = xml_cell['attribute'].presence ? '@' + xml_cell['attribute'] : nil
|
77
|
+
|
78
|
+
if relative_path && attribute
|
79
|
+
relative_path + '/' + colum_name + '/' + attribute
|
80
|
+
elsif relative_path
|
81
|
+
relative_path + '/' + colum_name
|
82
|
+
elsif attribute
|
83
|
+
colum_name + '/' + attribute
|
84
|
+
else
|
85
|
+
colum_name
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
data/ndr_import.gemspec
CHANGED
@@ -33,7 +33,7 @@ Gem::Specification.new do |spec|
|
|
33
33
|
spec.add_dependency 'roo-xls'
|
34
34
|
spec.add_dependency 'spreadsheet', '1.0.3'
|
35
35
|
|
36
|
-
spec.required_ruby_version = '>= 2.
|
36
|
+
spec.required_ruby_version = '>= 2.4'
|
37
37
|
|
38
38
|
spec.add_development_dependency 'bundler', '~> 1.7'
|
39
39
|
spec.add_development_dependency 'rake', '~> 10.0'
|
data/test/file/delimited_test.rb
CHANGED
@@ -33,6 +33,21 @@ module NdrImport
|
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
36
|
+
test 'should read malformed pipe correctly' do
|
37
|
+
file_path = @permanent_test_files.join('malformed_pipe.csv')
|
38
|
+
handler = NdrImport::File::Delimited.new(file_path, 'delimited', 'col_sep' => '|',
|
39
|
+
'liberal_parsing' => 'true')
|
40
|
+
handler.tables.each do |tablename, sheet|
|
41
|
+
assert_nil tablename
|
42
|
+
sheet = sheet.to_a
|
43
|
+
assert_equal(('A'..'Z').to_a, sheet[0])
|
44
|
+
assert_equal ['1'] * 26, sheet[1]
|
45
|
+
expected_row = ['2'] * 25
|
46
|
+
expected_row << '2"malformed"'
|
47
|
+
assert_equal expected_row, sheet[2].sort
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
36
51
|
test 'should read thorn correctly' do
|
37
52
|
file_path = @permanent_test_files.join('normal_thorn.csv')
|
38
53
|
handler = NdrImport::File::Delimited.new(file_path, 'delimited', 'col_sep' => "\xfe")
|
@@ -99,6 +114,47 @@ module NdrImport
|
|
99
114
|
assert_equal ['2'] * 26, rows[2]
|
100
115
|
end
|
101
116
|
|
117
|
+
test 'should read malformed delimited txt' do
|
118
|
+
rows = []
|
119
|
+
file_path = @permanent_test_files.join('malformed.csv')
|
120
|
+
handler = NdrImport::File::Delimited.new(file_path, 'csv', 'col_sep' => nil,
|
121
|
+
'liberal_parsing' => 'true')
|
122
|
+
handler.tables.each do |tablename, sheet|
|
123
|
+
assert_nil tablename
|
124
|
+
assert_instance_of Enumerator, sheet
|
125
|
+
sheet.each do |row|
|
126
|
+
rows << row
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
assert_equal(('A'..'Z').to_a, rows[0])
|
131
|
+
assert_equal ['1'] * 26, rows[1]
|
132
|
+
expected_row = ['2'] * 25
|
133
|
+
expected_row << '2"malformed"'
|
134
|
+
assert_equal expected_row, rows[2].sort
|
135
|
+
end
|
136
|
+
|
137
|
+
test 'should fail to read malformed delimited txt without liberal_parsing' do
|
138
|
+
rows_yielded = []
|
139
|
+
exception = assert_raises(CSVLibrary::MalformedCSVError) do
|
140
|
+
file_path = @permanent_test_files.join('malformed.csv')
|
141
|
+
handler = NdrImport::File::Delimited.new(file_path, 'csv')
|
142
|
+
|
143
|
+
handler.tables.each do |tablename, sheet|
|
144
|
+
assert_nil tablename
|
145
|
+
assert_instance_of Enumerator, sheet
|
146
|
+
sheet.each do |row|
|
147
|
+
rows_yielded << row
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
assert rows_yielded.empty?, 'no rows should have been yielded'
|
153
|
+
|
154
|
+
msg = 'Invalid CSV format on row 3 of malformed.csv. Original: Illegal quoting in line 3.'
|
155
|
+
assert_equal msg, exception.message
|
156
|
+
end
|
157
|
+
|
102
158
|
test 'should read line-by-line' do
|
103
159
|
rows = []
|
104
160
|
file_path = @permanent_test_files.join('normal.csv')
|
data/test/file/registry_test.rb
CHANGED
@@ -12,7 +12,7 @@ module NdrImport
|
|
12
12
|
|
13
13
|
test 'Registry.handlers' do
|
14
14
|
assert_instance_of Hash, NdrImport::File::Registry.handlers
|
15
|
-
assert_equal %w[csv delimited doc docx nontabular pdf text txt xls xlsx zip],
|
15
|
+
assert_equal %w[csv delimited doc docx nontabular pdf text txt xls xlsx xml_table zip],
|
16
16
|
NdrImport::File::Registry.handlers.keys.sort
|
17
17
|
end
|
18
18
|
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'ndr_import/file/xml'
|
3
|
+
|
4
|
+
module NdrImport
|
5
|
+
module File
|
6
|
+
# Xml file handler tests
|
7
|
+
class XmlTest < ActiveSupport::TestCase
|
8
|
+
def setup
|
9
|
+
@permanent_test_files = SafePath.new('permanent_test_files')
|
10
|
+
end
|
11
|
+
|
12
|
+
test 'should return enum of xml elements' do
|
13
|
+
file_path = @permanent_test_files.join('sample.xml')
|
14
|
+
handler = NdrImport::File::Xml.new(file_path, nil, 'xml_record_xpath' => 'root/record')
|
15
|
+
rows = handler.send(:rows)
|
16
|
+
assert rows.is_a? Enumerator
|
17
|
+
assert(rows.all? { |row| row.is_a? Nokogiri::XML::Element })
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/test/mapper_test.rb
CHANGED
@@ -635,6 +635,11 @@ class MapperTest < ActiveSupport::TestCase
|
|
635
635
|
assert_equal 'Hello world, this is a word document', file_content
|
636
636
|
end
|
637
637
|
|
638
|
+
test 'should handle blank values when attempting to decode_raw_value' do
|
639
|
+
text_content = TestMapper.new.send(:decode_raw_value, '', :word_doc)
|
640
|
+
assert_equal '', text_content
|
641
|
+
end
|
642
|
+
|
638
643
|
test 'should raise unknown encoding exception' do
|
639
644
|
assert_raise(RuntimeError) do
|
640
645
|
TestMapper.new.mapped_line(['A'], invalid_decode_mapping)
|
@@ -0,0 +1,34 @@
|
|
1
|
+
<root>
|
2
|
+
<record>
|
3
|
+
<no_relative_path value="A value"/>
|
4
|
+
<no_path_or_att>Another value</no_path_or_att>
|
5
|
+
<demographics>
|
6
|
+
<demographics_1>AAA</demographics_1>
|
7
|
+
<address>
|
8
|
+
<address_line1>Address</address_line1>
|
9
|
+
<address_line1>Address 2</address_line1>
|
10
|
+
</address>
|
11
|
+
<demographics_2 code="03">Inner text</demographics_2>
|
12
|
+
</demographics>
|
13
|
+
<pathology>
|
14
|
+
<pathology_date_1>2018-01-01</pathology_date_1>
|
15
|
+
<pathology_date_2 />
|
16
|
+
</pathology>
|
17
|
+
</record>
|
18
|
+
<record>
|
19
|
+
<demographics>
|
20
|
+
<address>
|
21
|
+
<address_line1>Address</address_line1>
|
22
|
+
<address_line1>Address 2</address_line1>
|
23
|
+
</address>
|
24
|
+
<demographics_2 code="03">Inner text</demographics_2>
|
25
|
+
<demographics_1>AAA</demographics_1>
|
26
|
+
</demographics>
|
27
|
+
<no_path_or_att><![CDATA[Another value]]></no_path_or_att>
|
28
|
+
<pathology>
|
29
|
+
<pathology_date_1>2018-01-01</pathology_date_1>
|
30
|
+
<pathology_date_2 />
|
31
|
+
</pathology>
|
32
|
+
<no_relative_path value="A value"/>
|
33
|
+
</record>
|
34
|
+
</root>
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
# This tests the NdrImport::Xml::Table mapping class
|
4
|
+
module Xml
|
5
|
+
class TableTest < ActiveSupport::TestCase
|
6
|
+
def setup
|
7
|
+
file_path = SafePath.new('permanent_test_files').join('sample.xml')
|
8
|
+
handler = NdrImport::File::Xml.new(file_path, nil, 'xml_record_xpath' => 'root/record')
|
9
|
+
|
10
|
+
@element_lines = handler.send(:rows)
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_should_transform_xml_element_lines
|
14
|
+
table = NdrImport::Xml::Table.new(klass: 'SomeTestKlass', columns: xml_column_mapping)
|
15
|
+
|
16
|
+
expected_data = ['SomeTestKlass', { rawtext: {
|
17
|
+
'no_relative_path' => 'A value',
|
18
|
+
'no_relative_path_inner_text' => '',
|
19
|
+
'no_path_or_att' => 'Another value',
|
20
|
+
'demographics_1' => 'AAA',
|
21
|
+
'demographics_2' => '03',
|
22
|
+
'demographics_2_inner_text' => 'Inner text',
|
23
|
+
'address1' => 'Address',
|
24
|
+
'address2' => 'Address 2',
|
25
|
+
'pathology_date_1' => '2018-01-01',
|
26
|
+
'pathology_date_2' => '',
|
27
|
+
'should_be_blank' => ''
|
28
|
+
} }, 1]
|
29
|
+
|
30
|
+
transformed_data = table.transform(@element_lines)
|
31
|
+
assert_equal 2, transformed_data.count
|
32
|
+
|
33
|
+
transformed_data.each do |klass, fields, _index|
|
34
|
+
assert_equal expected_data[0], klass
|
35
|
+
assert_equal expected_data[1], fields
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_should_fail_with_unmappped_nodes
|
40
|
+
table = NdrImport::Xml::Table.new(klass: 'SomeTestKlass', columns: partial_xml_column_mapping)
|
41
|
+
|
42
|
+
exception = assert_raises(RuntimeError) { table.transform(@element_lines).to_a }
|
43
|
+
assert exception.message.starts_with? 'sample.xml [RuntimeError: Unmapped data!'
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def xml_column_mapping
|
49
|
+
[
|
50
|
+
{ 'column' => 'no_relative_path',
|
51
|
+
'xml_cell' => { 'relative_path' => '', 'attribute' => 'value' } },
|
52
|
+
{ 'column' => 'no_relative_path', 'rawtext_name' => 'no_relative_path_inner_text',
|
53
|
+
'xml_cell' => { 'relative_path' => '' } },
|
54
|
+
{ 'column' => 'no_path_or_att',
|
55
|
+
'xml_cell' => { 'relative_path' => '', 'attribute' => '' } },
|
56
|
+
{ 'column' => 'demographics_1',
|
57
|
+
'xml_cell' => { 'relative_path' => 'demographics' } },
|
58
|
+
{ 'column' => 'demographics_2',
|
59
|
+
'xml_cell' => { 'relative_path' => 'demographics', 'attribute' => 'code' } },
|
60
|
+
{ 'column' => 'demographics_2', 'rawtext_name' => 'demographics_2_inner_text',
|
61
|
+
'xml_cell' => { 'relative_path' => 'demographics' } },
|
62
|
+
{ 'column' => 'address_line1[1]', 'rawtext_name' => 'address1',
|
63
|
+
'xml_cell' => { 'relative_path' => 'demographics/address' } },
|
64
|
+
{ 'column' => 'address_line1[2]', 'rawtext_name' => 'address2',
|
65
|
+
'xml_cell' => { 'relative_path' => 'demographics/address' } },
|
66
|
+
{ 'column' => 'pathology_date_1',
|
67
|
+
'xml_cell' => { 'relative_path' => 'pathology' } },
|
68
|
+
{ 'column' => 'pathology_date_2',
|
69
|
+
'xml_cell' => { 'relative_path' => 'pathology' } },
|
70
|
+
{ 'column' => 'should_be_blank',
|
71
|
+
'xml_cell' => { 'relative_path' => 'not_present' } }
|
72
|
+
]
|
73
|
+
end
|
74
|
+
|
75
|
+
def partial_xml_column_mapping
|
76
|
+
[
|
77
|
+
{ 'column' => 'no_relative_path',
|
78
|
+
'xml_cell' => { 'relative_path' => '', 'attribute' => 'value' } },
|
79
|
+
{ 'column' => 'no_path_or_att',
|
80
|
+
'xml_cell' => { 'relative_path' => '', 'attribute' => '' } },
|
81
|
+
{ 'column' => 'demographics_1',
|
82
|
+
'xml_cell' => { 'relative_path' => 'demographics' } },
|
83
|
+
{ 'column' => 'demographics_2',
|
84
|
+
'xml_cell' => { 'relative_path' => 'demographics', 'attribute' => 'code' } },
|
85
|
+
{ 'column' => 'address_line1',
|
86
|
+
'xml_cell' => { 'relative_path' => 'demographics/address' } }
|
87
|
+
]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ndr_import
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 6.
|
4
|
+
version: 6.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- NCRS Development Team
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-10-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -352,6 +352,7 @@ files:
|
|
352
352
|
- lib/ndr_import/file/text.rb
|
353
353
|
- lib/ndr_import/file/unregistered_filetype.rb
|
354
354
|
- lib/ndr_import/file/word.rb
|
355
|
+
- lib/ndr_import/file/xml.rb
|
355
356
|
- lib/ndr_import/file/zip.rb
|
356
357
|
- lib/ndr_import/fixed_width/table.rb
|
357
358
|
- lib/ndr_import/helpers/file/delimited.rb
|
@@ -372,6 +373,7 @@ files:
|
|
372
373
|
- lib/ndr_import/table.rb
|
373
374
|
- lib/ndr_import/universal_importer_helper.rb
|
374
375
|
- lib/ndr_import/version.rb
|
376
|
+
- lib/ndr_import/xml/table.rb
|
375
377
|
- ndr_import.gemspec
|
376
378
|
- test/file/base_test.rb
|
377
379
|
- test/file/delimited_test.rb
|
@@ -381,6 +383,7 @@ files:
|
|
381
383
|
- test/file/registry_test.rb
|
382
384
|
- test/file/text_test.rb
|
383
385
|
- test/file/word_test.rb
|
386
|
+
- test/file/xml_test.rb
|
384
387
|
- test/file/zip_test.rb
|
385
388
|
- test/fixed_width/table_test.rb
|
386
389
|
- test/helpers/file/delimited_test.rb
|
@@ -411,7 +414,9 @@ files:
|
|
411
414
|
- test/resources/hello_world.txt
|
412
415
|
- test/resources/high_ascii_delimited.txt
|
413
416
|
- test/resources/high_ascii_delimited_example_two.txt
|
417
|
+
- test/resources/malformed.csv
|
414
418
|
- test/resources/malformed.xml
|
419
|
+
- test/resources/malformed_pipe.csv
|
415
420
|
- test/resources/normal.csv
|
416
421
|
- test/resources/normal.csv.zip
|
417
422
|
- test/resources/normal_pipe.csv
|
@@ -420,6 +425,7 @@ files:
|
|
420
425
|
- test/resources/not_a_word_file.doc
|
421
426
|
- test/resources/not_a_word_file.docx
|
422
427
|
- test/resources/not_sign_delimited.txt
|
428
|
+
- test/resources/sample.xml
|
423
429
|
- test/resources/sample_xls.xls
|
424
430
|
- test/resources/sample_xlsx.xlsx
|
425
431
|
- test/resources/sheet_streaming.xls
|
@@ -438,6 +444,7 @@ files:
|
|
438
444
|
- test/table_test.rb
|
439
445
|
- test/test_helper.rb
|
440
446
|
- test/universal_importer_helper_test.rb
|
447
|
+
- test/xml/table_test.rb
|
441
448
|
homepage: https://github.com/PublicHealthEngland/ndr_import
|
442
449
|
licenses:
|
443
450
|
- MIT
|
@@ -450,7 +457,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
450
457
|
requirements:
|
451
458
|
- - ">="
|
452
459
|
- !ruby/object:Gem::Version
|
453
|
-
version: '2.
|
460
|
+
version: '2.4'
|
454
461
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
455
462
|
requirements:
|
456
463
|
- - ">="
|
@@ -471,6 +478,7 @@ test_files:
|
|
471
478
|
- test/file/registry_test.rb
|
472
479
|
- test/file/text_test.rb
|
473
480
|
- test/file/word_test.rb
|
481
|
+
- test/file/xml_test.rb
|
474
482
|
- test/file/zip_test.rb
|
475
483
|
- test/fixed_width/table_test.rb
|
476
484
|
- test/helpers/file/delimited_test.rb
|
@@ -501,7 +509,9 @@ test_files:
|
|
501
509
|
- test/resources/hello_world.txt
|
502
510
|
- test/resources/high_ascii_delimited.txt
|
503
511
|
- test/resources/high_ascii_delimited_example_two.txt
|
512
|
+
- test/resources/malformed.csv
|
504
513
|
- test/resources/malformed.xml
|
514
|
+
- test/resources/malformed_pipe.csv
|
505
515
|
- test/resources/normal.csv
|
506
516
|
- test/resources/normal.csv.zip
|
507
517
|
- test/resources/normal_pipe.csv
|
@@ -510,6 +520,7 @@ test_files:
|
|
510
520
|
- test/resources/not_a_word_file.doc
|
511
521
|
- test/resources/not_a_word_file.docx
|
512
522
|
- test/resources/not_sign_delimited.txt
|
523
|
+
- test/resources/sample.xml
|
513
524
|
- test/resources/sample_xls.xls
|
514
525
|
- test/resources/sample_xlsx.xlsx
|
515
526
|
- test/resources/sheet_streaming.xls
|
@@ -528,3 +539,4 @@ test_files:
|
|
528
539
|
- test/table_test.rb
|
529
540
|
- test/test_helper.rb
|
530
541
|
- test/universal_importer_helper_test.rb
|
542
|
+
- test/xml/table_test.rb
|