ndr_import 6.2.0 → 6.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/code_safety.yml +42 -14
- data/lib/ndr_import.rb +1 -0
- data/lib/ndr_import/file/all.rb +1 -1
- data/lib/ndr_import/file/delimited.rb +4 -3
- data/lib/ndr_import/file/xml.rb +29 -0
- data/lib/ndr_import/mapper.rb +2 -1
- data/lib/ndr_import/non_tabular/table.rb +6 -3
- data/lib/ndr_import/table.rb +2 -2
- data/lib/ndr_import/universal_importer_helper.rb +8 -4
- data/lib/ndr_import/version.rb +1 -1
- data/lib/ndr_import/xml/table.rb +90 -0
- data/ndr_import.gemspec +1 -1
- data/test/file/delimited_test.rb +56 -0
- data/test/file/registry_test.rb +1 -1
- data/test/file/xml_test.rb +21 -0
- data/test/mapper_test.rb +5 -0
- data/test/resources/malformed.csv +3 -0
- data/test/resources/malformed_pipe.csv +3 -0
- data/test/resources/sample.xml +34 -0
- data/test/xml/table_test.rb +90 -0
- metadata +15 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9ae1ff625cdd7352f4b3306f9a49905b2563557b487762611dfd0391735177ee
|
|
4
|
+
data.tar.gz: a7bdc413db721c0f813b9fd7ef862beff3cf6a9a76f042eef15d141b2526f5a7
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3d572905a49329c295a9ab701de99a4f82a9165a399b05ca6f80e48218e2c603fddca14768d4aad2e9139ac07cdbaa6c5ba1f1851527cc50f7f6f4b45ed2392e
|
|
7
|
+
data.tar.gz: 3414e9efa2b16b60e9d532075fca1df095bdcab20516738243fc9ed9ac5b30540b3ad1f9139feaedfd3f5c3182c049e2deb5f5cd878a67360bdefaeffd9c6452
|
data/code_safety.yml
CHANGED
|
@@ -15,7 +15,7 @@ file safety:
|
|
|
15
15
|
".travis.yml":
|
|
16
16
|
comments:
|
|
17
17
|
reviewed_by: josh.pencheon
|
|
18
|
-
safe_revision:
|
|
18
|
+
safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
|
|
19
19
|
CODE_OF_CONDUCT.md:
|
|
20
20
|
comments:
|
|
21
21
|
reviewed_by: timgentry
|
|
@@ -58,8 +58,8 @@ file safety:
|
|
|
58
58
|
safe_revision: c158dc783b84cab31380708e76e3812544cc1c2f
|
|
59
59
|
lib/ndr_import.rb:
|
|
60
60
|
comments:
|
|
61
|
-
reviewed_by:
|
|
62
|
-
safe_revision:
|
|
61
|
+
reviewed_by: josh.pencheon
|
|
62
|
+
safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
|
|
63
63
|
lib/ndr_import/csv_library.rb:
|
|
64
64
|
comments:
|
|
65
65
|
reviewed_by: josh.pencheon
|
|
@@ -67,7 +67,7 @@ file safety:
|
|
|
67
67
|
lib/ndr_import/file/all.rb:
|
|
68
68
|
comments:
|
|
69
69
|
reviewed_by: josh.pencheon
|
|
70
|
-
safe_revision:
|
|
70
|
+
safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
|
|
71
71
|
lib/ndr_import/file/base.rb:
|
|
72
72
|
comments:
|
|
73
73
|
reviewed_by: timgentry
|
|
@@ -75,7 +75,7 @@ file safety:
|
|
|
75
75
|
lib/ndr_import/file/delimited.rb:
|
|
76
76
|
comments:
|
|
77
77
|
reviewed_by: josh.pencheon
|
|
78
|
-
safe_revision:
|
|
78
|
+
safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
|
|
79
79
|
lib/ndr_import/file/docx.rb:
|
|
80
80
|
comments:
|
|
81
81
|
reviewed_by: josh.pencheon
|
|
@@ -104,6 +104,10 @@ file safety:
|
|
|
104
104
|
comments:
|
|
105
105
|
reviewed_by: timgentry
|
|
106
106
|
safe_revision: c88000b32401b5ae9ef7f5878a9b630506ab5a94
|
|
107
|
+
lib/ndr_import/file/xml.rb:
|
|
108
|
+
comments:
|
|
109
|
+
reviewed_by: josh.pencheon
|
|
110
|
+
safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
|
|
107
111
|
lib/ndr_import/file/zip.rb:
|
|
108
112
|
comments:
|
|
109
113
|
reviewed_by: timgentry
|
|
@@ -139,7 +143,7 @@ file safety:
|
|
|
139
143
|
lib/ndr_import/mapper.rb:
|
|
140
144
|
comments:
|
|
141
145
|
reviewed_by: josh.pencheon
|
|
142
|
-
safe_revision:
|
|
146
|
+
safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
|
|
143
147
|
lib/ndr_import/mapping_error.rb:
|
|
144
148
|
comments:
|
|
145
149
|
reviewed_by: timgentry
|
|
@@ -163,7 +167,7 @@ file safety:
|
|
|
163
167
|
lib/ndr_import/non_tabular/table.rb:
|
|
164
168
|
comments:
|
|
165
169
|
reviewed_by: josh.pencheon
|
|
166
|
-
safe_revision:
|
|
170
|
+
safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
|
|
167
171
|
lib/ndr_import/non_tabular_file_helper.rb:
|
|
168
172
|
comments:
|
|
169
173
|
reviewed_by: josh.pencheon
|
|
@@ -175,19 +179,23 @@ file safety:
|
|
|
175
179
|
lib/ndr_import/table.rb:
|
|
176
180
|
comments: uses File.basename
|
|
177
181
|
reviewed_by: josh.pencheon
|
|
178
|
-
safe_revision:
|
|
182
|
+
safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
|
|
179
183
|
lib/ndr_import/universal_importer_helper.rb:
|
|
180
184
|
comments:
|
|
181
185
|
reviewed_by: josh.pencheon
|
|
182
|
-
safe_revision:
|
|
186
|
+
safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
|
|
183
187
|
lib/ndr_import/version.rb:
|
|
184
188
|
comments: another check?
|
|
185
189
|
reviewed_by: josh.pencheon
|
|
186
|
-
safe_revision:
|
|
190
|
+
safe_revision: 0b1ab7c810d0fa46d153238d69627c07f56d1efa
|
|
191
|
+
lib/ndr_import/xml/table.rb:
|
|
192
|
+
comments:
|
|
193
|
+
reviewed_by: josh.pencheon
|
|
194
|
+
safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
|
|
187
195
|
ndr_import.gemspec:
|
|
188
196
|
comments:
|
|
189
197
|
reviewed_by: josh.pencheon
|
|
190
|
-
safe_revision:
|
|
198
|
+
safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
|
|
191
199
|
test/file/base_test.rb:
|
|
192
200
|
comments:
|
|
193
201
|
reviewed_by: timgentry
|
|
@@ -195,7 +203,7 @@ file safety:
|
|
|
195
203
|
test/file/delimited_test.rb:
|
|
196
204
|
comments:
|
|
197
205
|
reviewed_by: josh.pencheon
|
|
198
|
-
safe_revision:
|
|
206
|
+
safe_revision: ab9f926a53e84f8ffa826abdd967abee455c89df
|
|
199
207
|
test/file/docx_test.rb:
|
|
200
208
|
comments:
|
|
201
209
|
reviewed_by: josh.pencheon
|
|
@@ -211,7 +219,7 @@ file safety:
|
|
|
211
219
|
test/file/registry_test.rb:
|
|
212
220
|
comments:
|
|
213
221
|
reviewed_by: josh.pencheon
|
|
214
|
-
safe_revision:
|
|
222
|
+
safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
|
|
215
223
|
test/file/text_test.rb:
|
|
216
224
|
comments:
|
|
217
225
|
reviewed_by: timgentry
|
|
@@ -220,6 +228,10 @@ file safety:
|
|
|
220
228
|
comments:
|
|
221
229
|
reviewed_by: josh.pencheon
|
|
222
230
|
safe_revision: 1b66cfcbb61dfac93c44889ca0ced5836101c20c
|
|
231
|
+
test/file/xml_test.rb:
|
|
232
|
+
comments:
|
|
233
|
+
reviewed_by: josh.pencheon
|
|
234
|
+
safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
|
|
223
235
|
test/file/zip_test.rb:
|
|
224
236
|
comments:
|
|
225
237
|
reviewed_by: timgentry
|
|
@@ -255,7 +267,7 @@ file safety:
|
|
|
255
267
|
test/mapper_test.rb:
|
|
256
268
|
comments: exposes Mapper internals to test them
|
|
257
269
|
reviewed_by: josh.pencheon
|
|
258
|
-
safe_revision:
|
|
270
|
+
safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
|
|
259
271
|
test/non_tabular/mapping_test.rb:
|
|
260
272
|
comments:
|
|
261
273
|
reviewed_by: timgentry
|
|
@@ -340,10 +352,18 @@ file safety:
|
|
|
340
352
|
comments:
|
|
341
353
|
reviewed_by: josh.pencheon
|
|
342
354
|
safe_revision: 902f5326d85372d9632de9869d6f56fc02b83a10
|
|
355
|
+
test/resources/malformed.csv:
|
|
356
|
+
comments:
|
|
357
|
+
reviewed_by: josh.pencheon
|
|
358
|
+
safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
|
|
343
359
|
test/resources/malformed.xml:
|
|
344
360
|
comments:
|
|
345
361
|
reviewed_by: timgentry
|
|
346
362
|
safe_revision: 137170d443ea6bcc0afb18f62202c285ae6501eb
|
|
363
|
+
test/resources/malformed_pipe.csv:
|
|
364
|
+
comments:
|
|
365
|
+
reviewed_by: josh.pencheon
|
|
366
|
+
safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
|
|
347
367
|
test/resources/normal.csv:
|
|
348
368
|
comments:
|
|
349
369
|
reviewed_by: timgentry
|
|
@@ -376,6 +396,10 @@ file safety:
|
|
|
376
396
|
comments:
|
|
377
397
|
reviewed_by: josh.pencheon
|
|
378
398
|
safe_revision: 902f5326d85372d9632de9869d6f56fc02b83a10
|
|
399
|
+
test/resources/sample.xml:
|
|
400
|
+
comments:
|
|
401
|
+
reviewed_by: josh.pencheon
|
|
402
|
+
safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
|
|
379
403
|
test/resources/sample_xls.xls:
|
|
380
404
|
comments:
|
|
381
405
|
reviewed_by: timgentry
|
|
@@ -448,3 +472,7 @@ file safety:
|
|
|
448
472
|
comments:
|
|
449
473
|
reviewed_by: josh.pencheon
|
|
450
474
|
safe_revision: 902f5326d85372d9632de9869d6f56fc02b83a10
|
|
475
|
+
test/xml/table_test.rb:
|
|
476
|
+
comments:
|
|
477
|
+
reviewed_by: josh.pencheon
|
|
478
|
+
safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
|
data/lib/ndr_import.rb
CHANGED
data/lib/ndr_import/file/all.rb
CHANGED
|
@@ -52,8 +52,9 @@ module NdrImport
|
|
|
52
52
|
begin
|
|
53
53
|
col_sep = @options['col_sep']
|
|
54
54
|
options = {
|
|
55
|
-
:
|
|
56
|
-
:
|
|
55
|
+
col_sep: (col_sep || ',').force_encoding(delimiter_encoding),
|
|
56
|
+
mode: access_mode,
|
|
57
|
+
liberal_parsing: @options['liberal_parsing'].presence
|
|
57
58
|
}
|
|
58
59
|
|
|
59
60
|
row_num = 0
|
|
@@ -76,7 +77,7 @@ module NdrImport
|
|
|
76
77
|
|
|
77
78
|
# We tried them all, and none worked:
|
|
78
79
|
unless successful_options
|
|
79
|
-
|
|
80
|
+
raise "None of the encodings #{supported_encodings.values.inspect} were successful!"
|
|
80
81
|
end
|
|
81
82
|
|
|
82
83
|
successful_options
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
require 'ndr_support/safe_file'
|
|
2
|
+
require 'ndr_import/helpers/file/xml'
|
|
3
|
+
require_relative 'registry'
|
|
4
|
+
|
|
5
|
+
module NdrImport
|
|
6
|
+
# This is one of a collection of file handlers that deal with individual formats of data.
|
|
7
|
+
# They can be instantiated directly or via the factory method Registry.tables
|
|
8
|
+
module File
|
|
9
|
+
# This class is a xml file handler that returns a single table.
|
|
10
|
+
class Xml < Base
|
|
11
|
+
include NdrImport::Helpers::File::Xml
|
|
12
|
+
|
|
13
|
+
private
|
|
14
|
+
|
|
15
|
+
# Iterate through the file, yielding each 'xml_record_xpath' element in turn.
|
|
16
|
+
def rows(&block)
|
|
17
|
+
return enum_for(:rows) unless block
|
|
18
|
+
|
|
19
|
+
doc = read_xml_file(@filename)
|
|
20
|
+
|
|
21
|
+
doc.xpath(@options['xml_record_xpath']).each(&block)
|
|
22
|
+
rescue StandardError => e
|
|
23
|
+
raise("#{SafeFile.basename(@filename)} [#{e.class}: #{e.message}]")
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
# Not all xml files may want to be registered, so 'xml' is not registered by design.
|
|
27
|
+
Registry.register(Xml, 'xml_table')
|
|
28
|
+
end
|
|
29
|
+
end
|
data/lib/ndr_import/mapper.rb
CHANGED
|
@@ -231,13 +231,14 @@ module NdrImport::Mapper
|
|
|
231
231
|
#
|
|
232
232
|
# would base64 decode a word document and then 'decode' the word document into plain text
|
|
233
233
|
def decode_raw_value(raw_value, encoding)
|
|
234
|
+
return raw_value if raw_value.blank?
|
|
234
235
|
case encoding
|
|
235
236
|
when :base64
|
|
236
237
|
Base64.decode64(raw_value)
|
|
237
238
|
when :word_doc
|
|
238
239
|
read_word_stream(StringIO.new(raw_value, 'r'))
|
|
239
240
|
else
|
|
240
|
-
|
|
241
|
+
raise "Cannot decode: #{encoding}"
|
|
241
242
|
end
|
|
242
243
|
end
|
|
243
244
|
|
|
@@ -16,12 +16,15 @@ module NdrImport
|
|
|
16
16
|
|
|
17
17
|
include UTF8Encoding
|
|
18
18
|
|
|
19
|
-
|
|
19
|
+
TABULAR_ONLY_OPTIONS = %w[delimiter liberal_parsing tablename_pattern
|
|
20
|
+
header_lines footer_lines xml_record_xpath].freeze
|
|
21
|
+
|
|
22
|
+
NON_TABULAR_OPTIONS = %w[capture_end_line capture_start_line start_line_pattern
|
|
20
23
|
end_line_pattern remove_lines start_in_a_record
|
|
21
|
-
end_in_a_record
|
|
24
|
+
end_in_a_record].freeze
|
|
22
25
|
|
|
23
26
|
def self.all_valid_options
|
|
24
|
-
super -
|
|
27
|
+
super - TABULAR_ONLY_OPTIONS + NON_TABULAR_OPTIONS
|
|
25
28
|
end
|
|
26
29
|
|
|
27
30
|
attr_reader(*NON_TABULAR_OPTIONS)
|
data/lib/ndr_import/table.rb
CHANGED
|
@@ -10,8 +10,8 @@ module NdrImport
|
|
|
10
10
|
include NdrImport::Mapper
|
|
11
11
|
|
|
12
12
|
def self.all_valid_options
|
|
13
|
-
%w[canonical_name delimiter filename_pattern tablename_pattern header_lines
|
|
14
|
-
format klass columns]
|
|
13
|
+
%w[canonical_name delimiter liberal_parsing filename_pattern tablename_pattern header_lines
|
|
14
|
+
footer_lines format klass columns xml_record_xpath]
|
|
15
15
|
end
|
|
16
16
|
|
|
17
17
|
def all_valid_options
|
|
@@ -32,10 +32,14 @@ module NdrImport
|
|
|
32
32
|
# now at the individual file level, can we find the table mapping?
|
|
33
33
|
table_mapping = get_table_mapping(filename, nil)
|
|
34
34
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
35
|
+
options = {
|
|
36
|
+
'unzip_path' => unzip_path,
|
|
37
|
+
'col_sep' => table_mapping.try(:delimiter),
|
|
38
|
+
'liberal_parsing' => table_mapping.try(:liberal_parsing),
|
|
39
|
+
'xml_record_xpath' => table_mapping.try(:xml_record_xpath)
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
tables = NdrImport::File::Registry.tables(filename, table_mapping.try(:format), options)
|
|
39
43
|
yield_tables_and_their_content(filename, tables, &block)
|
|
40
44
|
end
|
|
41
45
|
end
|
data/lib/ndr_import/version.rb
CHANGED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
require 'ndr_import/table'
|
|
2
|
+
|
|
3
|
+
module NdrImport
|
|
4
|
+
module Xml
|
|
5
|
+
# This class maintains the state of a xml table mapping and encapsulates
|
|
6
|
+
# the logic required to transform a table of data into "records". Particular
|
|
7
|
+
# attention has been made to use enumerables throughout to help with the
|
|
8
|
+
# transformation of large quantities of data.
|
|
9
|
+
class Table < ::NdrImport::Table
|
|
10
|
+
def self.all_valid_options
|
|
11
|
+
super - %w[delimiter header_lines footer_lines]
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def header_lines
|
|
15
|
+
0
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def footer_lines
|
|
19
|
+
0
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# This method transforms an incoming line (element) of xml data by applying
|
|
23
|
+
# each of the klass masked mappings to the line and yielding the klass
|
|
24
|
+
# and fields for each mapped klass.
|
|
25
|
+
def transform_line(line, index)
|
|
26
|
+
return enum_for(:transform_line, line, index) unless block_given?
|
|
27
|
+
|
|
28
|
+
raise 'Not an Nokogiri::XML::Element!' unless line.is_a? Nokogiri::XML::Element
|
|
29
|
+
|
|
30
|
+
validate_column_mappings(line)
|
|
31
|
+
|
|
32
|
+
xml_line = column_xpaths.map { |column_xpath| line.xpath(column_xpath).inner_text }
|
|
33
|
+
|
|
34
|
+
masked_mappings.each do |klass, klass_mappings|
|
|
35
|
+
fields = mapped_line(xml_line, klass_mappings)
|
|
36
|
+
next if fields[:skip].to_s == 'true'.freeze
|
|
37
|
+
yield(klass, fields, index)
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
# Ensure every leaf is accounted for in the column mappings
|
|
44
|
+
def validate_column_mappings(line)
|
|
45
|
+
missing_nodes = mappable_xpaths_from(line) - column_xpaths
|
|
46
|
+
raise "Unmapped data! #{missing_nodes}" unless missing_nodes.empty?
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def column_name_from(column)
|
|
50
|
+
column[Strings::COLUMN] || column[Strings::STANDARD_MAPPING]
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def column_xpaths
|
|
54
|
+
@column_xpaths ||= columns.map { |column| build_xpath_from(column) }
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def mappable_xpaths_from(line)
|
|
58
|
+
xpaths = []
|
|
59
|
+
|
|
60
|
+
line.xpath('.//*[not(child::*)]').each do |node|
|
|
61
|
+
xpath = node.path.sub(line.path + '/', '')
|
|
62
|
+
xpaths << xpath
|
|
63
|
+
node.attributes.each_key { |key| xpaths << "#{xpath}/@#{key}" }
|
|
64
|
+
end
|
|
65
|
+
xpaths
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def build_xpath_from(column)
|
|
69
|
+
column_name = column_name_from(column)
|
|
70
|
+
column['xml_cell'].presence ? relative_path_from(column, column_name) : column_name
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def relative_path_from(column, colum_name)
|
|
74
|
+
xml_cell = column['xml_cell']
|
|
75
|
+
relative_path = xml_cell['relative_path'].presence ? xml_cell['relative_path'] : nil
|
|
76
|
+
attribute = xml_cell['attribute'].presence ? '@' + xml_cell['attribute'] : nil
|
|
77
|
+
|
|
78
|
+
if relative_path && attribute
|
|
79
|
+
relative_path + '/' + colum_name + '/' + attribute
|
|
80
|
+
elsif relative_path
|
|
81
|
+
relative_path + '/' + colum_name
|
|
82
|
+
elsif attribute
|
|
83
|
+
colum_name + '/' + attribute
|
|
84
|
+
else
|
|
85
|
+
colum_name
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
data/ndr_import.gemspec
CHANGED
|
@@ -33,7 +33,7 @@ Gem::Specification.new do |spec|
|
|
|
33
33
|
spec.add_dependency 'roo-xls'
|
|
34
34
|
spec.add_dependency 'spreadsheet', '1.0.3'
|
|
35
35
|
|
|
36
|
-
spec.required_ruby_version = '>= 2.
|
|
36
|
+
spec.required_ruby_version = '>= 2.4'
|
|
37
37
|
|
|
38
38
|
spec.add_development_dependency 'bundler', '~> 1.7'
|
|
39
39
|
spec.add_development_dependency 'rake', '~> 10.0'
|
data/test/file/delimited_test.rb
CHANGED
|
@@ -33,6 +33,21 @@ module NdrImport
|
|
|
33
33
|
end
|
|
34
34
|
end
|
|
35
35
|
|
|
36
|
+
test 'should read malformed pipe correctly' do
|
|
37
|
+
file_path = @permanent_test_files.join('malformed_pipe.csv')
|
|
38
|
+
handler = NdrImport::File::Delimited.new(file_path, 'delimited', 'col_sep' => '|',
|
|
39
|
+
'liberal_parsing' => 'true')
|
|
40
|
+
handler.tables.each do |tablename, sheet|
|
|
41
|
+
assert_nil tablename
|
|
42
|
+
sheet = sheet.to_a
|
|
43
|
+
assert_equal(('A'..'Z').to_a, sheet[0])
|
|
44
|
+
assert_equal ['1'] * 26, sheet[1]
|
|
45
|
+
expected_row = ['2'] * 25
|
|
46
|
+
expected_row << '2"malformed"'
|
|
47
|
+
assert_equal expected_row, sheet[2].sort
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
36
51
|
test 'should read thorn correctly' do
|
|
37
52
|
file_path = @permanent_test_files.join('normal_thorn.csv')
|
|
38
53
|
handler = NdrImport::File::Delimited.new(file_path, 'delimited', 'col_sep' => "\xfe")
|
|
@@ -99,6 +114,47 @@ module NdrImport
|
|
|
99
114
|
assert_equal ['2'] * 26, rows[2]
|
|
100
115
|
end
|
|
101
116
|
|
|
117
|
+
test 'should read malformed delimited txt' do
|
|
118
|
+
rows = []
|
|
119
|
+
file_path = @permanent_test_files.join('malformed.csv')
|
|
120
|
+
handler = NdrImport::File::Delimited.new(file_path, 'csv', 'col_sep' => nil,
|
|
121
|
+
'liberal_parsing' => 'true')
|
|
122
|
+
handler.tables.each do |tablename, sheet|
|
|
123
|
+
assert_nil tablename
|
|
124
|
+
assert_instance_of Enumerator, sheet
|
|
125
|
+
sheet.each do |row|
|
|
126
|
+
rows << row
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
assert_equal(('A'..'Z').to_a, rows[0])
|
|
131
|
+
assert_equal ['1'] * 26, rows[1]
|
|
132
|
+
expected_row = ['2'] * 25
|
|
133
|
+
expected_row << '2"malformed"'
|
|
134
|
+
assert_equal expected_row, rows[2].sort
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
test 'should fail to read malformed delimited txt without liberal_parsing' do
|
|
138
|
+
rows_yielded = []
|
|
139
|
+
exception = assert_raises(CSVLibrary::MalformedCSVError) do
|
|
140
|
+
file_path = @permanent_test_files.join('malformed.csv')
|
|
141
|
+
handler = NdrImport::File::Delimited.new(file_path, 'csv')
|
|
142
|
+
|
|
143
|
+
handler.tables.each do |tablename, sheet|
|
|
144
|
+
assert_nil tablename
|
|
145
|
+
assert_instance_of Enumerator, sheet
|
|
146
|
+
sheet.each do |row|
|
|
147
|
+
rows_yielded << row
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
assert rows_yielded.empty?, 'no rows should have been yielded'
|
|
153
|
+
|
|
154
|
+
msg = 'Invalid CSV format on row 3 of malformed.csv. Original: Illegal quoting in line 3.'
|
|
155
|
+
assert_equal msg, exception.message
|
|
156
|
+
end
|
|
157
|
+
|
|
102
158
|
test 'should read line-by-line' do
|
|
103
159
|
rows = []
|
|
104
160
|
file_path = @permanent_test_files.join('normal.csv')
|
data/test/file/registry_test.rb
CHANGED
|
@@ -12,7 +12,7 @@ module NdrImport
|
|
|
12
12
|
|
|
13
13
|
test 'Registry.handlers' do
|
|
14
14
|
assert_instance_of Hash, NdrImport::File::Registry.handlers
|
|
15
|
-
assert_equal %w[csv delimited doc docx nontabular pdf text txt xls xlsx zip],
|
|
15
|
+
assert_equal %w[csv delimited doc docx nontabular pdf text txt xls xlsx xml_table zip],
|
|
16
16
|
NdrImport::File::Registry.handlers.keys.sort
|
|
17
17
|
end
|
|
18
18
|
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
require 'test_helper'
|
|
2
|
+
require 'ndr_import/file/xml'
|
|
3
|
+
|
|
4
|
+
module NdrImport
|
|
5
|
+
module File
|
|
6
|
+
# Xml file handler tests
|
|
7
|
+
class XmlTest < ActiveSupport::TestCase
|
|
8
|
+
def setup
|
|
9
|
+
@permanent_test_files = SafePath.new('permanent_test_files')
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
test 'should return enum of xml elements' do
|
|
13
|
+
file_path = @permanent_test_files.join('sample.xml')
|
|
14
|
+
handler = NdrImport::File::Xml.new(file_path, nil, 'xml_record_xpath' => 'root/record')
|
|
15
|
+
rows = handler.send(:rows)
|
|
16
|
+
assert rows.is_a? Enumerator
|
|
17
|
+
assert(rows.all? { |row| row.is_a? Nokogiri::XML::Element })
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
data/test/mapper_test.rb
CHANGED
|
@@ -635,6 +635,11 @@ class MapperTest < ActiveSupport::TestCase
|
|
|
635
635
|
assert_equal 'Hello world, this is a word document', file_content
|
|
636
636
|
end
|
|
637
637
|
|
|
638
|
+
test 'should handle blank values when attempting to decode_raw_value' do
|
|
639
|
+
text_content = TestMapper.new.send(:decode_raw_value, '', :word_doc)
|
|
640
|
+
assert_equal '', text_content
|
|
641
|
+
end
|
|
642
|
+
|
|
638
643
|
test 'should raise unknown encoding exception' do
|
|
639
644
|
assert_raise(RuntimeError) do
|
|
640
645
|
TestMapper.new.mapped_line(['A'], invalid_decode_mapping)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
<root>
|
|
2
|
+
<record>
|
|
3
|
+
<no_relative_path value="A value"/>
|
|
4
|
+
<no_path_or_att>Another value</no_path_or_att>
|
|
5
|
+
<demographics>
|
|
6
|
+
<demographics_1>AAA</demographics_1>
|
|
7
|
+
<address>
|
|
8
|
+
<address_line1>Address</address_line1>
|
|
9
|
+
<address_line1>Address 2</address_line1>
|
|
10
|
+
</address>
|
|
11
|
+
<demographics_2 code="03">Inner text</demographics_2>
|
|
12
|
+
</demographics>
|
|
13
|
+
<pathology>
|
|
14
|
+
<pathology_date_1>2018-01-01</pathology_date_1>
|
|
15
|
+
<pathology_date_2 />
|
|
16
|
+
</pathology>
|
|
17
|
+
</record>
|
|
18
|
+
<record>
|
|
19
|
+
<demographics>
|
|
20
|
+
<address>
|
|
21
|
+
<address_line1>Address</address_line1>
|
|
22
|
+
<address_line1>Address 2</address_line1>
|
|
23
|
+
</address>
|
|
24
|
+
<demographics_2 code="03">Inner text</demographics_2>
|
|
25
|
+
<demographics_1>AAA</demographics_1>
|
|
26
|
+
</demographics>
|
|
27
|
+
<no_path_or_att><![CDATA[Another value]]></no_path_or_att>
|
|
28
|
+
<pathology>
|
|
29
|
+
<pathology_date_1>2018-01-01</pathology_date_1>
|
|
30
|
+
<pathology_date_2 />
|
|
31
|
+
</pathology>
|
|
32
|
+
<no_relative_path value="A value"/>
|
|
33
|
+
</record>
|
|
34
|
+
</root>
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
require 'test_helper'
|
|
2
|
+
|
|
3
|
+
# This tests the NdrImport::Xml::Table mapping class
|
|
4
|
+
module Xml
|
|
5
|
+
class TableTest < ActiveSupport::TestCase
|
|
6
|
+
def setup
|
|
7
|
+
file_path = SafePath.new('permanent_test_files').join('sample.xml')
|
|
8
|
+
handler = NdrImport::File::Xml.new(file_path, nil, 'xml_record_xpath' => 'root/record')
|
|
9
|
+
|
|
10
|
+
@element_lines = handler.send(:rows)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def test_should_transform_xml_element_lines
|
|
14
|
+
table = NdrImport::Xml::Table.new(klass: 'SomeTestKlass', columns: xml_column_mapping)
|
|
15
|
+
|
|
16
|
+
expected_data = ['SomeTestKlass', { rawtext: {
|
|
17
|
+
'no_relative_path' => 'A value',
|
|
18
|
+
'no_relative_path_inner_text' => '',
|
|
19
|
+
'no_path_or_att' => 'Another value',
|
|
20
|
+
'demographics_1' => 'AAA',
|
|
21
|
+
'demographics_2' => '03',
|
|
22
|
+
'demographics_2_inner_text' => 'Inner text',
|
|
23
|
+
'address1' => 'Address',
|
|
24
|
+
'address2' => 'Address 2',
|
|
25
|
+
'pathology_date_1' => '2018-01-01',
|
|
26
|
+
'pathology_date_2' => '',
|
|
27
|
+
'should_be_blank' => ''
|
|
28
|
+
} }, 1]
|
|
29
|
+
|
|
30
|
+
transformed_data = table.transform(@element_lines)
|
|
31
|
+
assert_equal 2, transformed_data.count
|
|
32
|
+
|
|
33
|
+
transformed_data.each do |klass, fields, _index|
|
|
34
|
+
assert_equal expected_data[0], klass
|
|
35
|
+
assert_equal expected_data[1], fields
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def test_should_fail_with_unmappped_nodes
|
|
40
|
+
table = NdrImport::Xml::Table.new(klass: 'SomeTestKlass', columns: partial_xml_column_mapping)
|
|
41
|
+
|
|
42
|
+
exception = assert_raises(RuntimeError) { table.transform(@element_lines).to_a }
|
|
43
|
+
assert exception.message.starts_with? 'sample.xml [RuntimeError: Unmapped data!'
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def xml_column_mapping
|
|
49
|
+
[
|
|
50
|
+
{ 'column' => 'no_relative_path',
|
|
51
|
+
'xml_cell' => { 'relative_path' => '', 'attribute' => 'value' } },
|
|
52
|
+
{ 'column' => 'no_relative_path', 'rawtext_name' => 'no_relative_path_inner_text',
|
|
53
|
+
'xml_cell' => { 'relative_path' => '' } },
|
|
54
|
+
{ 'column' => 'no_path_or_att',
|
|
55
|
+
'xml_cell' => { 'relative_path' => '', 'attribute' => '' } },
|
|
56
|
+
{ 'column' => 'demographics_1',
|
|
57
|
+
'xml_cell' => { 'relative_path' => 'demographics' } },
|
|
58
|
+
{ 'column' => 'demographics_2',
|
|
59
|
+
'xml_cell' => { 'relative_path' => 'demographics', 'attribute' => 'code' } },
|
|
60
|
+
{ 'column' => 'demographics_2', 'rawtext_name' => 'demographics_2_inner_text',
|
|
61
|
+
'xml_cell' => { 'relative_path' => 'demographics' } },
|
|
62
|
+
{ 'column' => 'address_line1[1]', 'rawtext_name' => 'address1',
|
|
63
|
+
'xml_cell' => { 'relative_path' => 'demographics/address' } },
|
|
64
|
+
{ 'column' => 'address_line1[2]', 'rawtext_name' => 'address2',
|
|
65
|
+
'xml_cell' => { 'relative_path' => 'demographics/address' } },
|
|
66
|
+
{ 'column' => 'pathology_date_1',
|
|
67
|
+
'xml_cell' => { 'relative_path' => 'pathology' } },
|
|
68
|
+
{ 'column' => 'pathology_date_2',
|
|
69
|
+
'xml_cell' => { 'relative_path' => 'pathology' } },
|
|
70
|
+
{ 'column' => 'should_be_blank',
|
|
71
|
+
'xml_cell' => { 'relative_path' => 'not_present' } }
|
|
72
|
+
]
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def partial_xml_column_mapping
|
|
76
|
+
[
|
|
77
|
+
{ 'column' => 'no_relative_path',
|
|
78
|
+
'xml_cell' => { 'relative_path' => '', 'attribute' => 'value' } },
|
|
79
|
+
{ 'column' => 'no_path_or_att',
|
|
80
|
+
'xml_cell' => { 'relative_path' => '', 'attribute' => '' } },
|
|
81
|
+
{ 'column' => 'demographics_1',
|
|
82
|
+
'xml_cell' => { 'relative_path' => 'demographics' } },
|
|
83
|
+
{ 'column' => 'demographics_2',
|
|
84
|
+
'xml_cell' => { 'relative_path' => 'demographics', 'attribute' => 'code' } },
|
|
85
|
+
{ 'column' => 'address_line1',
|
|
86
|
+
'xml_cell' => { 'relative_path' => 'demographics/address' } }
|
|
87
|
+
]
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ndr_import
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 6.
|
|
4
|
+
version: 6.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- NCRS Development Team
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2018-
|
|
11
|
+
date: 2018-10-12 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: activesupport
|
|
@@ -352,6 +352,7 @@ files:
|
|
|
352
352
|
- lib/ndr_import/file/text.rb
|
|
353
353
|
- lib/ndr_import/file/unregistered_filetype.rb
|
|
354
354
|
- lib/ndr_import/file/word.rb
|
|
355
|
+
- lib/ndr_import/file/xml.rb
|
|
355
356
|
- lib/ndr_import/file/zip.rb
|
|
356
357
|
- lib/ndr_import/fixed_width/table.rb
|
|
357
358
|
- lib/ndr_import/helpers/file/delimited.rb
|
|
@@ -372,6 +373,7 @@ files:
|
|
|
372
373
|
- lib/ndr_import/table.rb
|
|
373
374
|
- lib/ndr_import/universal_importer_helper.rb
|
|
374
375
|
- lib/ndr_import/version.rb
|
|
376
|
+
- lib/ndr_import/xml/table.rb
|
|
375
377
|
- ndr_import.gemspec
|
|
376
378
|
- test/file/base_test.rb
|
|
377
379
|
- test/file/delimited_test.rb
|
|
@@ -381,6 +383,7 @@ files:
|
|
|
381
383
|
- test/file/registry_test.rb
|
|
382
384
|
- test/file/text_test.rb
|
|
383
385
|
- test/file/word_test.rb
|
|
386
|
+
- test/file/xml_test.rb
|
|
384
387
|
- test/file/zip_test.rb
|
|
385
388
|
- test/fixed_width/table_test.rb
|
|
386
389
|
- test/helpers/file/delimited_test.rb
|
|
@@ -411,7 +414,9 @@ files:
|
|
|
411
414
|
- test/resources/hello_world.txt
|
|
412
415
|
- test/resources/high_ascii_delimited.txt
|
|
413
416
|
- test/resources/high_ascii_delimited_example_two.txt
|
|
417
|
+
- test/resources/malformed.csv
|
|
414
418
|
- test/resources/malformed.xml
|
|
419
|
+
- test/resources/malformed_pipe.csv
|
|
415
420
|
- test/resources/normal.csv
|
|
416
421
|
- test/resources/normal.csv.zip
|
|
417
422
|
- test/resources/normal_pipe.csv
|
|
@@ -420,6 +425,7 @@ files:
|
|
|
420
425
|
- test/resources/not_a_word_file.doc
|
|
421
426
|
- test/resources/not_a_word_file.docx
|
|
422
427
|
- test/resources/not_sign_delimited.txt
|
|
428
|
+
- test/resources/sample.xml
|
|
423
429
|
- test/resources/sample_xls.xls
|
|
424
430
|
- test/resources/sample_xlsx.xlsx
|
|
425
431
|
- test/resources/sheet_streaming.xls
|
|
@@ -438,6 +444,7 @@ files:
|
|
|
438
444
|
- test/table_test.rb
|
|
439
445
|
- test/test_helper.rb
|
|
440
446
|
- test/universal_importer_helper_test.rb
|
|
447
|
+
- test/xml/table_test.rb
|
|
441
448
|
homepage: https://github.com/PublicHealthEngland/ndr_import
|
|
442
449
|
licenses:
|
|
443
450
|
- MIT
|
|
@@ -450,7 +457,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
450
457
|
requirements:
|
|
451
458
|
- - ">="
|
|
452
459
|
- !ruby/object:Gem::Version
|
|
453
|
-
version: '2.
|
|
460
|
+
version: '2.4'
|
|
454
461
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
455
462
|
requirements:
|
|
456
463
|
- - ">="
|
|
@@ -471,6 +478,7 @@ test_files:
|
|
|
471
478
|
- test/file/registry_test.rb
|
|
472
479
|
- test/file/text_test.rb
|
|
473
480
|
- test/file/word_test.rb
|
|
481
|
+
- test/file/xml_test.rb
|
|
474
482
|
- test/file/zip_test.rb
|
|
475
483
|
- test/fixed_width/table_test.rb
|
|
476
484
|
- test/helpers/file/delimited_test.rb
|
|
@@ -501,7 +509,9 @@ test_files:
|
|
|
501
509
|
- test/resources/hello_world.txt
|
|
502
510
|
- test/resources/high_ascii_delimited.txt
|
|
503
511
|
- test/resources/high_ascii_delimited_example_two.txt
|
|
512
|
+
- test/resources/malformed.csv
|
|
504
513
|
- test/resources/malformed.xml
|
|
514
|
+
- test/resources/malformed_pipe.csv
|
|
505
515
|
- test/resources/normal.csv
|
|
506
516
|
- test/resources/normal.csv.zip
|
|
507
517
|
- test/resources/normal_pipe.csv
|
|
@@ -510,6 +520,7 @@ test_files:
|
|
|
510
520
|
- test/resources/not_a_word_file.doc
|
|
511
521
|
- test/resources/not_a_word_file.docx
|
|
512
522
|
- test/resources/not_sign_delimited.txt
|
|
523
|
+
- test/resources/sample.xml
|
|
513
524
|
- test/resources/sample_xls.xls
|
|
514
525
|
- test/resources/sample_xlsx.xlsx
|
|
515
526
|
- test/resources/sheet_streaming.xls
|
|
@@ -528,3 +539,4 @@ test_files:
|
|
|
528
539
|
- test/table_test.rb
|
|
529
540
|
- test/test_helper.rb
|
|
530
541
|
- test/universal_importer_helper_test.rb
|
|
542
|
+
- test/xml/table_test.rb
|