ndr_import 7.0.0 → 8.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +28 -0
- data/README.md +2 -2
- data/code_safety.yml +32 -12
- data/lib/ndr_import.rb +1 -0
- data/lib/ndr_import/file/all.rb +1 -0
- data/lib/ndr_import/file/seven_zip.rb +70 -0
- data/lib/ndr_import/mapper.rb +27 -5
- data/lib/ndr_import/missing_field_error.rb +14 -0
- data/lib/ndr_import/non_tabular/column_mapping.rb +3 -2
- data/lib/ndr_import/table.rb +10 -0
- data/lib/ndr_import/version.rb +1 -1
- data/ndr_import.gemspec +2 -0
- data/test/file/registry_test.rb +1 -1
- data/test/file/seven_zip_test.rb +59 -0
- data/test/mapper_test.rb +18 -0
- data/test/non_tabular/table_test.rb +44 -0
- data/test/resources/normal.7z +0 -0
- data/test/table_test.rb +71 -1
- metadata +37 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f74d60e93cf1f1d72baeebcbe931fc4225c3e74c11aaa13c324578b261561008
|
4
|
+
data.tar.gz: 70657ad1f113089bd2f719de5b59317310212c730ba34e762d99f9395da8fff2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 91bced100d286a44df6fa193cad58a2ad85f6733d9b8cbbf1970e438ed8a3a7d9785a6d0e8b16396713511d045edd0a6aba00090a903c217e4c71b8e2fde7174
|
7
|
+
data.tar.gz: 4e2e968084f78f25fffebbc793806402fefd515605afc0ac9c81a5d535f0cb7f27a50af3c4dcb733ff5a9a8bc17311084c65593a2fb971ac35c5a4937bbfe777
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
## [Unreleased]
|
2
|
+
*no unreleased changes*
|
3
|
+
|
4
|
+
## 8.0.0 / 2018-11-26
|
5
|
+
### Changed
|
6
|
+
* Strip non tabular captured values by default (#28)
|
7
|
+
|
8
|
+
### Added
|
9
|
+
* Add validations to field mappings (#27)
|
10
|
+
* control `ndr_table` serialise order with `encode_with` method (#30)
|
11
|
+
|
12
|
+
## 7.0.0 / 2018-11-09
|
13
|
+
### Changed
|
14
|
+
* Breaking refactor of universal importer mixin (#25)
|
15
|
+
* Update `pdf-reader` version to support recent Rubies (#16)
|
16
|
+
|
17
|
+
## 6.4.1 / 2018-10-18
|
18
|
+
### Fixed
|
19
|
+
* bump nokogiri re: CVE-2018-14404
|
20
|
+
|
21
|
+
## 6.4.0 / 2018-10-17
|
22
|
+
### Added
|
23
|
+
* Allow `decode: :word_doc` to read a .DOCX file (#26)
|
24
|
+
|
25
|
+
## 6.3.0 / 2018-10-12
|
26
|
+
### Added
|
27
|
+
* Add XML file support (#22)
|
28
|
+
* Add liberal CSV parsing option (#24)
|
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
This is the Public Health England (PHE) National Disease Registers (NDR) Import ETL ruby gem, providing:
|
4
4
|
|
5
|
-
1. file import handlers for *extracting* data from delimited files (csv, pipe, tab, thorn), .xls(x) spreadsheets, .doc(x) word documents, PDF, XML and Zip files.
|
5
|
+
1. file import handlers for *extracting* data from delimited files (csv, pipe, tab, thorn), .xls(x) spreadsheets, .doc(x) word documents, PDF, XML, 7-Zip and Zip files.
|
6
6
|
2. table mappers for *transforming* tabular and non-tabular data into key value pairs grouped by a common "klass".
|
7
7
|
|
8
8
|
## Installation
|
@@ -48,7 +48,7 @@ files.each do |filename|
|
|
48
48
|
end
|
49
49
|
```
|
50
50
|
|
51
|
-
See test/readme_test.rb for a more complete working example.
|
51
|
+
See `test/readme_test.rb` for a more complete working example.
|
52
52
|
|
53
53
|
More information on the workings of the mapper are available in the [wiki](https://github.com/PublicHealthEngland/ndr_import/wiki).
|
54
54
|
|
data/code_safety.yml
CHANGED
@@ -16,6 +16,10 @@ file safety:
|
|
16
16
|
comments:
|
17
17
|
reviewed_by: josh.pencheon
|
18
18
|
safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
|
19
|
+
CHANGELOG.md:
|
20
|
+
comments:
|
21
|
+
reviewed_by: josh.pencheon
|
22
|
+
safe_revision: 952e45599240ab96b22d7ce223a7551853e50ae5
|
19
23
|
CODE_OF_CONDUCT.md:
|
20
24
|
comments:
|
21
25
|
reviewed_by: timgentry
|
@@ -35,7 +39,7 @@ file safety:
|
|
35
39
|
README.md:
|
36
40
|
comments:
|
37
41
|
reviewed_by: josh.pencheon
|
38
|
-
safe_revision:
|
42
|
+
safe_revision: f2feb0c430947839183d7223e60e7c29b2c0f846
|
39
43
|
Rakefile:
|
40
44
|
comments:
|
41
45
|
reviewed_by: josh.pencheon
|
@@ -59,7 +63,7 @@ file safety:
|
|
59
63
|
lib/ndr_import.rb:
|
60
64
|
comments:
|
61
65
|
reviewed_by: josh.pencheon
|
62
|
-
safe_revision:
|
66
|
+
safe_revision: eca44583e9989159b45e90021dd1c65228447180
|
63
67
|
lib/ndr_import/csv_library.rb:
|
64
68
|
comments:
|
65
69
|
reviewed_by: josh.pencheon
|
@@ -67,7 +71,7 @@ file safety:
|
|
67
71
|
lib/ndr_import/file/all.rb:
|
68
72
|
comments:
|
69
73
|
reviewed_by: josh.pencheon
|
70
|
-
safe_revision:
|
74
|
+
safe_revision: f2feb0c430947839183d7223e60e7c29b2c0f846
|
71
75
|
lib/ndr_import/file/base.rb:
|
72
76
|
comments:
|
73
77
|
reviewed_by: timgentry
|
@@ -92,6 +96,10 @@ file safety:
|
|
92
96
|
comments:
|
93
97
|
reviewed_by: josh.pencheon
|
94
98
|
safe_revision: 2104514689a1a1286195fff18144a8cecb93048b
|
99
|
+
lib/ndr_import/file/seven_zip.rb:
|
100
|
+
comments:
|
101
|
+
reviewed_by: josh.pencheon
|
102
|
+
safe_revision: f2feb0c430947839183d7223e60e7c29b2c0f846
|
95
103
|
lib/ndr_import/file/text.rb:
|
96
104
|
comments:
|
97
105
|
reviewed_by: josh.pencheon
|
@@ -143,15 +151,19 @@ file safety:
|
|
143
151
|
lib/ndr_import/mapper.rb:
|
144
152
|
comments: Writes to a Tempfile, but cleans up. Ruby will respect TMP_DIR
|
145
153
|
reviewed_by: josh.pencheon
|
146
|
-
safe_revision:
|
154
|
+
safe_revision: eca44583e9989159b45e90021dd1c65228447180
|
147
155
|
lib/ndr_import/mapping_error.rb:
|
148
156
|
comments:
|
149
157
|
reviewed_by: timgentry
|
150
158
|
safe_revision: 96ccc535bb9f933081dfc73bf0442b32b6bdce1d
|
159
|
+
lib/ndr_import/missing_field_error.rb:
|
160
|
+
comments:
|
161
|
+
reviewed_by: josh.pencheon
|
162
|
+
safe_revision: eca44583e9989159b45e90021dd1c65228447180
|
151
163
|
lib/ndr_import/non_tabular/column_mapping.rb:
|
152
164
|
comments:
|
153
165
|
reviewed_by: josh.pencheon
|
154
|
-
safe_revision:
|
166
|
+
safe_revision: f216687d0bba7f2940f74a3353a32be3c900c194
|
155
167
|
lib/ndr_import/non_tabular/line.rb:
|
156
168
|
comments:
|
157
169
|
reviewed_by: timgentry
|
@@ -179,7 +191,7 @@ file safety:
|
|
179
191
|
lib/ndr_import/table.rb:
|
180
192
|
comments: uses File.basename
|
181
193
|
reviewed_by: josh.pencheon
|
182
|
-
safe_revision:
|
194
|
+
safe_revision: 2baccd482bcd875dc273f6a34a0e28823790e8d5
|
183
195
|
lib/ndr_import/universal_importer_helper.rb:
|
184
196
|
comments:
|
185
197
|
reviewed_by: josh.pencheon
|
@@ -187,7 +199,7 @@ file safety:
|
|
187
199
|
lib/ndr_import/version.rb:
|
188
200
|
comments: another check?
|
189
201
|
reviewed_by: josh.pencheon
|
190
|
-
safe_revision:
|
202
|
+
safe_revision: 952e45599240ab96b22d7ce223a7551853e50ae5
|
191
203
|
lib/ndr_import/xml/table.rb:
|
192
204
|
comments:
|
193
205
|
reviewed_by: josh.pencheon
|
@@ -195,7 +207,7 @@ file safety:
|
|
195
207
|
ndr_import.gemspec:
|
196
208
|
comments:
|
197
209
|
reviewed_by: josh.pencheon
|
198
|
-
safe_revision:
|
210
|
+
safe_revision: eca44583e9989159b45e90021dd1c65228447180
|
199
211
|
test/file/base_test.rb:
|
200
212
|
comments:
|
201
213
|
reviewed_by: timgentry
|
@@ -219,7 +231,11 @@ file safety:
|
|
219
231
|
test/file/registry_test.rb:
|
220
232
|
comments:
|
221
233
|
reviewed_by: josh.pencheon
|
222
|
-
safe_revision:
|
234
|
+
safe_revision: f2feb0c430947839183d7223e60e7c29b2c0f846
|
235
|
+
test/file/seven_zip_test.rb:
|
236
|
+
comments:
|
237
|
+
reviewed_by: josh.pencheon
|
238
|
+
safe_revision: f2feb0c430947839183d7223e60e7c29b2c0f846
|
223
239
|
test/file/text_test.rb:
|
224
240
|
comments:
|
225
241
|
reviewed_by: timgentry
|
@@ -267,7 +283,7 @@ file safety:
|
|
267
283
|
test/mapper_test.rb:
|
268
284
|
comments: exposes Mapper internals to test them
|
269
285
|
reviewed_by: josh.pencheon
|
270
|
-
safe_revision:
|
286
|
+
safe_revision: eca44583e9989159b45e90021dd1c65228447180
|
271
287
|
test/non_tabular/mapping_test.rb:
|
272
288
|
comments:
|
273
289
|
reviewed_by: timgentry
|
@@ -275,7 +291,7 @@ file safety:
|
|
275
291
|
test/non_tabular/table_test.rb:
|
276
292
|
comments:
|
277
293
|
reviewed_by: josh.pencheon
|
278
|
-
safe_revision:
|
294
|
+
safe_revision: f216687d0bba7f2940f74a3353a32be3c900c194
|
279
295
|
test/non_tabular_file_helper_test.rb:
|
280
296
|
comments:
|
281
297
|
reviewed_by: timgentry
|
@@ -364,6 +380,10 @@ file safety:
|
|
364
380
|
comments:
|
365
381
|
reviewed_by: josh.pencheon
|
366
382
|
safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
|
383
|
+
test/resources/normal.7z:
|
384
|
+
comments:
|
385
|
+
reviewed_by: josh.pencheon
|
386
|
+
safe_revision: f2feb0c430947839183d7223e60e7c29b2c0f846
|
367
387
|
test/resources/normal.csv:
|
368
388
|
comments:
|
369
389
|
reviewed_by: timgentry
|
@@ -463,7 +483,7 @@ file safety:
|
|
463
483
|
test/table_test.rb:
|
464
484
|
comments:
|
465
485
|
reviewed_by: josh.pencheon
|
466
|
-
safe_revision:
|
486
|
+
safe_revision: 2baccd482bcd875dc273f6a34a0e28823790e8d5
|
467
487
|
test/test_helper.rb:
|
468
488
|
comments:
|
469
489
|
reviewed_by: josh.pencheon
|
data/lib/ndr_import.rb
CHANGED
data/lib/ndr_import/file/all.rb
CHANGED
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'seven_zip_ruby'
|
2
|
+
require 'ndr_support/safe_file'
|
3
|
+
require_relative 'registry'
|
4
|
+
|
5
|
+
module NdrImport
|
6
|
+
# This is one of a collection of file handlers that deal with individual formats of data.
|
7
|
+
# They can be instantiated directly or via the factory method Registry.tables
|
8
|
+
module File
|
9
|
+
# This class is a 7zip file handler that returns tables from the extracted files.
|
10
|
+
class SevenZip < Base
|
11
|
+
def initialize(filename, format, options = {})
|
12
|
+
super
|
13
|
+
@pattern = options['pattern'] || //
|
14
|
+
@unzip_path = options['unzip_path']
|
15
|
+
@password = options['password']
|
16
|
+
|
17
|
+
validate_unzip_path_is_safe!
|
18
|
+
end
|
19
|
+
|
20
|
+
def files(&block)
|
21
|
+
raise 'Not allowed in external environment' if defined?(::Rails) && ::Rails.env.external?
|
22
|
+
|
23
|
+
return enum_for(:files) unless block
|
24
|
+
|
25
|
+
destination = @unzip_path.join(Time.current.strftime('%H%M%S%L'))
|
26
|
+
FileUtils.mkdir_p(SafeFile.safepath_to_string(destination))
|
27
|
+
|
28
|
+
::File.open(SafeFile.safepath_to_string(@filename), 'rb') do |zipfile|
|
29
|
+
unzip_entries(zipfile, destination, &block)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# 7zip files produce files, never tables.
|
34
|
+
def tables
|
35
|
+
raise 'SevenZip#tables should never be called'
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
# Unzip the 7zip file entry and enumerate over it
|
41
|
+
def unzip_entries(zipfile, destination, &block)
|
42
|
+
SevenZipRuby::Reader.open(zipfile, password: @password) do |szr|
|
43
|
+
szr.entries.each do |entry|
|
44
|
+
# SECURE: TPG 2018-11-21: The path is stripped from the zipfile entry when extracted
|
45
|
+
basename = ::File.basename(entry.path)
|
46
|
+
next unless entry.file? && basename.match(@pattern)
|
47
|
+
|
48
|
+
unzipped_filename = destination.join(basename)
|
49
|
+
szr.extract([entry], unzipped_filename)
|
50
|
+
|
51
|
+
unzipped_files(unzipped_filename, &block)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# Enumerate over an unzipped file like any other
|
57
|
+
def unzipped_files(unzipped_filename)
|
58
|
+
Registry.files(unzipped_filename, @options).each do |filename|
|
59
|
+
yield(filename)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def validate_unzip_path_is_safe!
|
64
|
+
SafeFile.safepath_to_string(@unzip_path)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
Registry.register(SevenZip, '7z')
|
69
|
+
end
|
70
|
+
end
|
data/lib/ndr_import/mapper.rb
CHANGED
@@ -23,11 +23,13 @@ module NdrImport::Mapper
|
|
23
23
|
MAPPINGS = 'mappings'.freeze
|
24
24
|
MATCH = 'match'.freeze
|
25
25
|
ORDER = 'order'.freeze
|
26
|
+
PRESENCE = 'presence'.freeze
|
26
27
|
PRIORITY = 'priority'.freeze
|
27
28
|
RAWTEXT_NAME = 'rawtext_name'.freeze
|
28
29
|
REPLACE = 'replace'.freeze
|
29
30
|
STANDARD_MAPPING = 'standard_mapping'.freeze
|
30
31
|
UNPACK_PATTERN = 'unpack_pattern'.freeze
|
32
|
+
VALIDATES = 'validates'.freeze
|
31
33
|
end
|
32
34
|
|
33
35
|
private
|
@@ -89,8 +91,8 @@ module NdrImport::Mapper
|
|
89
91
|
line.each_with_index do |raw_value, col|
|
90
92
|
column_mapping = line_mappings[col]
|
91
93
|
if column_mapping.nil?
|
92
|
-
|
93
|
-
|
94
|
+
raise ArgumentError,
|
95
|
+
"Line has too many columns (expected #{line_mappings.size} but got #{line.size})"
|
94
96
|
end
|
95
97
|
|
96
98
|
next if column_mapping[Strings::DO_NOT_CAPTURE]
|
@@ -100,7 +102,8 @@ module NdrImport::Mapper
|
|
100
102
|
end
|
101
103
|
|
102
104
|
# Establish the rawtext column name we are to use for this column
|
103
|
-
rawtext_column_name = (column_mapping[Strings::RAWTEXT_NAME] ||
|
105
|
+
rawtext_column_name = (column_mapping[Strings::RAWTEXT_NAME] ||
|
106
|
+
column_mapping[Strings::COLUMN]).downcase
|
104
107
|
|
105
108
|
# Replace raw_value with decoded raw_value
|
106
109
|
Array(column_mapping[Strings::DECODE]).each do |encoding|
|
@@ -123,6 +126,9 @@ module NdrImport::Mapper
|
|
123
126
|
replace_before_mapping(original_value, field_mapping)
|
124
127
|
value = mapped_value(original_value, field_mapping)
|
125
128
|
|
129
|
+
validations = field_mapping[Strings::VALIDATES].presence
|
130
|
+
apply_validations_on(field_mapping[Strings::FIELD], value, validations) if validations
|
131
|
+
|
126
132
|
# We don't care about blank values, unless we're mapping a :join
|
127
133
|
# field (in which case, :compact may or may not be being used).
|
128
134
|
next if value.blank? && !field_mapping[Strings::JOIN]
|
@@ -135,7 +141,9 @@ module NdrImport::Mapper
|
|
135
141
|
|
136
142
|
if field_mapping[Strings::ORDER]
|
137
143
|
data[field][:join] ||= field_mapping[Strings::JOIN]
|
138
|
-
|
144
|
+
if field_mapping.key?(Strings::COMPACT)
|
145
|
+
data[field][:compact] = field_mapping[Strings::COMPACT]
|
146
|
+
end
|
139
147
|
|
140
148
|
data[field][:values][field_mapping[Strings::ORDER] - 1] = value
|
141
149
|
elsif field_mapping[Strings::PRIORITY]
|
@@ -156,7 +164,7 @@ module NdrImport::Mapper
|
|
156
164
|
attributes[field] =
|
157
165
|
if field_data.key?(:join)
|
158
166
|
# Map "blank" values to nil:
|
159
|
-
values = values.map
|
167
|
+
values = values.map(&:presence)
|
160
168
|
values.compact! if field_data[:compact]
|
161
169
|
values.join(field_data[:join])
|
162
170
|
else
|
@@ -222,6 +230,20 @@ module NdrImport::Mapper
|
|
222
230
|
true
|
223
231
|
end
|
224
232
|
|
233
|
+
# Apply ActiveRecord-like validations specified in field mappings, e.g.
|
234
|
+
# - column: column_one
|
235
|
+
# mappings:
|
236
|
+
# - field: field_one
|
237
|
+
# validates:
|
238
|
+
# presence: true
|
239
|
+
def apply_validations_on(field, value, validations)
|
240
|
+
presence_validation_on(field, value) if validations[Strings::PRESENCE]
|
241
|
+
end
|
242
|
+
|
243
|
+
def presence_validation_on(field, value)
|
244
|
+
raise NdrImport::MissingFieldError, field if value.blank?
|
245
|
+
end
|
246
|
+
|
225
247
|
# Decode raw_value using specified encoding
|
226
248
|
# E.g. adding decode to a column:
|
227
249
|
#
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'active_model' # Source I18n translations
|
2
|
+
|
3
|
+
module NdrImport
|
4
|
+
# Raised if a mandatory field is blank.
|
5
|
+
class MissingFieldError < StandardError
|
6
|
+
attr_reader :field
|
7
|
+
|
8
|
+
def initialize(field)
|
9
|
+
@field = field
|
10
|
+
message = "#{field} #{I18n.t('errors.messages.blank')}"
|
11
|
+
super(message)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -32,7 +32,8 @@ module NdrImport
|
|
32
32
|
|
33
33
|
# capture the required part of the line by replacing (recusively) the line,
|
34
34
|
# with the first captured regular expression group. This is hardcoded in an attempt
|
35
|
-
# to preserve the rawtext as much as possible
|
35
|
+
# to preserve the rawtext as much as possible.
|
36
|
+
# The captured value is `String#strip`ed by default.
|
36
37
|
def capture_value(line)
|
37
38
|
value = line.dup
|
38
39
|
[@cell_mapping['capture']].flatten.each do |pattern|
|
@@ -42,7 +43,7 @@ module NdrImport
|
|
42
43
|
value = nil
|
43
44
|
end
|
44
45
|
end
|
45
|
-
value
|
46
|
+
value.nil? ? value : value.strip
|
46
47
|
end
|
47
48
|
|
48
49
|
def validate_cell_mapping
|
data/lib/ndr_import/table.rb
CHANGED
@@ -92,6 +92,16 @@ module NdrImport
|
|
92
92
|
@header_valid == true
|
93
93
|
end
|
94
94
|
|
95
|
+
# For readability, we should serialise the columns last
|
96
|
+
def encode_with(coder)
|
97
|
+
options = self.class.all_valid_options - ['columns']
|
98
|
+
options.each do |option|
|
99
|
+
value = send(option)
|
100
|
+
coder[option] = value if value
|
101
|
+
end
|
102
|
+
coder['columns'] = @columns
|
103
|
+
end
|
104
|
+
|
95
105
|
private
|
96
106
|
|
97
107
|
# This method uses a buffer to not yield the last <buffer_size> iterations of an enumerable.
|
data/lib/ndr_import/version.rb
CHANGED
data/ndr_import.gemspec
CHANGED
@@ -20,6 +20,7 @@ Gem::Specification.new do |spec|
|
|
20
20
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
21
21
|
spec.require_paths = ['lib']
|
22
22
|
|
23
|
+
spec.add_dependency 'activemodel'
|
23
24
|
spec.add_dependency 'activesupport', '>= 3.2.18', '< 5.3'
|
24
25
|
spec.add_dependency 'ndr_support', '>= 5.3.2', '< 6'
|
25
26
|
|
@@ -31,6 +32,7 @@ Gem::Specification.new do |spec|
|
|
31
32
|
spec.add_dependency 'nokogiri', '~> 1.8', '>= 1.8.5'
|
32
33
|
spec.add_dependency 'pdf-reader', '~> 2.1'
|
33
34
|
spec.add_dependency 'roo-xls'
|
35
|
+
spec.add_dependency 'seven_zip_ruby', '~> 1.2'
|
34
36
|
spec.add_dependency 'spreadsheet', '1.0.3'
|
35
37
|
|
36
38
|
spec.required_ruby_version = '>= 2.4'
|
data/test/file/registry_test.rb
CHANGED
@@ -12,7 +12,7 @@ module NdrImport
|
|
12
12
|
|
13
13
|
test 'Registry.handlers' do
|
14
14
|
assert_instance_of Hash, NdrImport::File::Registry.handlers
|
15
|
-
assert_equal %w[csv delimited doc docx nontabular pdf text txt xls xlsx xml_table zip],
|
15
|
+
assert_equal %w[7z csv delimited doc docx nontabular pdf text txt xls xlsx xml_table zip],
|
16
16
|
NdrImport::File::Registry.handlers.keys.sort
|
17
17
|
end
|
18
18
|
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'ndr_import/file/seven_zip'
|
3
|
+
|
4
|
+
module NdrImport
|
5
|
+
module File
|
6
|
+
# 7zip file handler tests
|
7
|
+
class SevenZipTestTest < ActiveSupport::TestCase
|
8
|
+
def setup
|
9
|
+
@home = SafePath.new('test_space_rw')
|
10
|
+
@permanent_test_files = SafePath.new('permanent_test_files')
|
11
|
+
end
|
12
|
+
|
13
|
+
test 'should reject non SafePath arguments' do
|
14
|
+
file_path = @home.join('imaginary.7z')
|
15
|
+
|
16
|
+
assert_raises ArgumentError do
|
17
|
+
NdrImport::File::SevenZip.new(file_path.to_s, nil, 'unzip_path' => @home.to_s)
|
18
|
+
end
|
19
|
+
|
20
|
+
assert_raises ArgumentError do
|
21
|
+
NdrImport::File::SevenZip.new(file_path.to_s, nil, 'unzip_path' => @home)
|
22
|
+
end
|
23
|
+
|
24
|
+
assert_raises ArgumentError do
|
25
|
+
NdrImport::File::SevenZip.new(file_path, nil, 'unzip_path' => @home.to_s)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
test 'should read 7zip file with correct password' do
|
30
|
+
options = { 'password' => 'FortuneCookie', 'unzip_path' => @home }
|
31
|
+
file_path = @permanent_test_files.join('normal.7z')
|
32
|
+
|
33
|
+
handler = NdrImport::File::SevenZip.new(file_path, nil, options)
|
34
|
+
handler.files.all? do |filename|
|
35
|
+
assert_instance_of SafePath, filename
|
36
|
+
end
|
37
|
+
files = handler.files.to_a
|
38
|
+
assert_equal 'normal_pipe.csv', ::File.basename(files[0])
|
39
|
+
assert_equal 'normal_thorn.csv', ::File.basename(files[1])
|
40
|
+
|
41
|
+
exception = assert_raises RuntimeError do
|
42
|
+
handler.tables
|
43
|
+
end
|
44
|
+
assert_equal 'SevenZip#tables should never be called', exception.message
|
45
|
+
end
|
46
|
+
|
47
|
+
test 'should not read 7zip file with incorrect password' do
|
48
|
+
options = { 'password' => 'WrongPassword', 'unzip_path' => @home }
|
49
|
+
file_path = @permanent_test_files.join('normal.7z')
|
50
|
+
|
51
|
+
handler = NdrImport::File::SevenZip.new(file_path, nil, options)
|
52
|
+
|
53
|
+
assert_raises SevenZipRuby::InvalidArchive do
|
54
|
+
handler.files.to_a
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
data/test/mapper_test.rb
CHANGED
@@ -293,6 +293,17 @@ class MapperTest < ActiveSupport::TestCase
|
|
293
293
|
: 'RGT01'
|
294
294
|
YML
|
295
295
|
|
296
|
+
validates_presence_mapping = YAML.safe_load <<-YML
|
297
|
+
- column: column_one
|
298
|
+
mappings:
|
299
|
+
- field: field_one
|
300
|
+
validates:
|
301
|
+
presence: true
|
302
|
+
- column: column_two
|
303
|
+
mappings:
|
304
|
+
- field: field_two
|
305
|
+
YML
|
306
|
+
|
296
307
|
test 'map should return a number' do
|
297
308
|
assert_equal '1', TestMapper.new.mapped_value('A', map_mapping)
|
298
309
|
end
|
@@ -365,6 +376,13 @@ class MapperTest < ActiveSupport::TestCase
|
|
365
376
|
assert_equal %w(RGT01 RGT01), mapped_value['hospital']
|
366
377
|
end
|
367
378
|
|
379
|
+
test 'should raise an error on blank mandatory field' do
|
380
|
+
exception = assert_raise(NdrImport::MissingFieldError) do
|
381
|
+
TestMapper.new.mapped_line(['', 'RGT01'], validates_presence_mapping)
|
382
|
+
end
|
383
|
+
assert_equal "field_one can't be blank", exception.message
|
384
|
+
end
|
385
|
+
|
368
386
|
test 'should return correct date format for date fields with daysafter' do
|
369
387
|
assert_equal Date.new(2012, 5, 18), TestMapper.new.mapped_value(2, daysafter_mapping)
|
370
388
|
assert_equal Date.new(2012, 5, 18), TestMapper.new.mapped_value('2', daysafter_mapping)
|
@@ -542,4 +542,48 @@ STR
|
|
542
542
|
table.transform(junk).to_a
|
543
543
|
end
|
544
544
|
end
|
545
|
+
|
546
|
+
def test_should_strip_captured_rawtext
|
547
|
+
unwanted_white_space = <<-STR.each_line
|
548
|
+
111
|
549
|
+
Trailing whitespace end_of_line
|
550
|
+
------
|
551
|
+
111
|
552
|
+
Leading whitespaceend_of_line
|
553
|
+
------
|
554
|
+
111
|
555
|
+
Leading and trailing whitespace end_of_line
|
556
|
+
------
|
557
|
+
111
|
558
|
+
Should not match this
|
559
|
+
------
|
560
|
+
STR
|
561
|
+
|
562
|
+
table = YAML.load <<-YML.strip_heredoc
|
563
|
+
--- !ruby/object:NdrImport::NonTabular::Table
|
564
|
+
start_line_pattern: !ruby/regexp /^111$/
|
565
|
+
end_in_a_record: true
|
566
|
+
klass: SomeTestKlass
|
567
|
+
columns:
|
568
|
+
- column: one
|
569
|
+
non_tabular_cell:
|
570
|
+
lines: 0
|
571
|
+
capture: !ruby/regexp /^(.*)end_of_line$/i
|
572
|
+
trim_rawtext: left
|
573
|
+
YML
|
574
|
+
|
575
|
+
enum = table.transform(unwanted_white_space)
|
576
|
+
assert_instance_of Enumerator, enum
|
577
|
+
|
578
|
+
output = []
|
579
|
+
enum.each do |klass, fields, index|
|
580
|
+
output << [klass, fields, index]
|
581
|
+
end
|
582
|
+
|
583
|
+
expected_rawtext_ouput = [{ 'one' => 'Trailing whitespace' },
|
584
|
+
{ 'one' => 'Leading whitespace' },
|
585
|
+
{ 'one' => 'Leading and trailing whitespace' },
|
586
|
+
{ 'one' => '' }]
|
587
|
+
assert_equal expected_rawtext_ouput, (output.map { |row| row[1][:rawtext] })
|
588
|
+
end
|
545
589
|
end
|
Binary file
|
data/test/table_test.rb
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
require 'test_helper'
|
2
2
|
|
3
|
+
class TestNoCoderTable < NdrImport::NonTabular::Table
|
4
|
+
undef_method :encode_with
|
5
|
+
end
|
6
|
+
|
3
7
|
# This tests the NdrImport::Table mapping class
|
4
8
|
class TableTest < ActiveSupport::TestCase
|
5
9
|
def test_deserialize_table
|
@@ -137,7 +141,67 @@ class TableTest < ActiveSupport::TestCase
|
|
137
141
|
end
|
138
142
|
|
139
143
|
def test_encode_with
|
140
|
-
|
144
|
+
table = NdrImport::Table.new
|
145
|
+
assert table.instance_variables.include?(:@row_index)
|
146
|
+
refute table.class.all_valid_options.include?('row_index')
|
147
|
+
assert_nil table.columns
|
148
|
+
|
149
|
+
coder = {}
|
150
|
+
table.encode_with(coder)
|
151
|
+
assert coder.key?('columns')
|
152
|
+
|
153
|
+
yaml_output = table.to_yaml
|
154
|
+
assert yaml_output.include?('columns')
|
155
|
+
refute yaml_output.include?('row_index')
|
156
|
+
assert YAML.load(yaml_output).is_a?(NdrImport::Table)
|
157
|
+
end
|
158
|
+
|
159
|
+
def test_encode_with_compare
|
160
|
+
table_options = {
|
161
|
+
columns: %w[a b],
|
162
|
+
klass: 'SomeKlass',
|
163
|
+
start_line_pattern: 'TODO',
|
164
|
+
end_line_pattern: 'TODO'
|
165
|
+
}
|
166
|
+
no_coder_table = TestNoCoderTable.new(table_options)
|
167
|
+
ndr_table = NdrImport::NonTabular::Table.new(table_options)
|
168
|
+
|
169
|
+
assert no_coder_table.is_a?(NdrImport::Table)
|
170
|
+
assert ndr_table.is_a?(NdrImport::Table)
|
171
|
+
assert no_coder_table.is_a?(NdrImport::NonTabular::Table)
|
172
|
+
assert ndr_table.is_a?(NdrImport::NonTabular::Table)
|
173
|
+
|
174
|
+
refute no_coder_table.respond_to?(:encode_with)
|
175
|
+
assert ndr_table.respond_to?(:encode_with)
|
176
|
+
|
177
|
+
no_coder_table_yaml_order = get_yaml_mapping_order(no_coder_table.to_yaml)
|
178
|
+
ndr_table_yaml_order = get_yaml_mapping_order(ndr_table.to_yaml)
|
179
|
+
|
180
|
+
# no_coder_table_yaml_order => ["klass", "columns", "start_line_pattern", "end_line_pattern", "row_index"]
|
181
|
+
# ndr_table_yaml_order => ["klass", "start_line_pattern", "end_line_pattern", "columns"]
|
182
|
+
|
183
|
+
assert no_coder_table_yaml_order.include?('row_index')
|
184
|
+
refute ndr_table_yaml_order.include?('row_index')
|
185
|
+
|
186
|
+
refute no_coder_table_yaml_order.last == 'columns'
|
187
|
+
assert ndr_table_yaml_order.last == 'columns'
|
188
|
+
|
189
|
+
# test objects deserialized from yaml mappings
|
190
|
+
deserialized_no_coder_table_yaml = YAML.load(no_coder_table.to_yaml)
|
191
|
+
deserialized_ndr_table_yaml = YAML.load(ndr_table.to_yaml)
|
192
|
+
|
193
|
+
assert deserialized_no_coder_table_yaml.is_a?(NdrImport::NonTabular::Table)
|
194
|
+
assert deserialized_ndr_table_yaml.is_a?(NdrImport::NonTabular::Table)
|
195
|
+
|
196
|
+
assert_nil deserialized_no_coder_table_yaml.filename_pattern
|
197
|
+
assert_equal deserialized_no_coder_table_yaml.klass, no_coder_table.klass
|
198
|
+
assert_equal deserialized_no_coder_table_yaml.start_line_pattern, no_coder_table.start_line_pattern
|
199
|
+
assert_equal deserialized_no_coder_table_yaml.columns, no_coder_table.columns
|
200
|
+
|
201
|
+
assert_nil deserialized_ndr_table_yaml.filename_pattern
|
202
|
+
assert_equal deserialized_ndr_table_yaml.klass, ndr_table.klass
|
203
|
+
assert_equal deserialized_ndr_table_yaml.start_line_pattern, ndr_table.start_line_pattern
|
204
|
+
assert_equal deserialized_ndr_table_yaml.columns, ndr_table.columns
|
141
205
|
end
|
142
206
|
|
143
207
|
def test_skip_footer_lines
|
@@ -407,4 +471,10 @@ YML
|
|
407
471
|
]
|
408
472
|
}
|
409
473
|
end
|
474
|
+
|
475
|
+
def get_yaml_mapping_order(yaml_mapping)
|
476
|
+
yaml_mapping.split("\n").
|
477
|
+
delete_if { |line| /-+/.match(line) }.
|
478
|
+
map { |line| /(.*):/.match(line)[1].to_s }
|
479
|
+
end
|
410
480
|
end
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ndr_import
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 8.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- NCRS Development Team
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-11-
|
11
|
+
date: 2018-11-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activemodel
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: activesupport
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -160,6 +174,20 @@ dependencies:
|
|
160
174
|
- - ">="
|
161
175
|
- !ruby/object:Gem::Version
|
162
176
|
version: '0'
|
177
|
+
- !ruby/object:Gem::Dependency
|
178
|
+
name: seven_zip_ruby
|
179
|
+
requirement: !ruby/object:Gem::Requirement
|
180
|
+
requirements:
|
181
|
+
- - "~>"
|
182
|
+
- !ruby/object:Gem::Version
|
183
|
+
version: '1.2'
|
184
|
+
type: :runtime
|
185
|
+
prerelease: false
|
186
|
+
version_requirements: !ruby/object:Gem::Requirement
|
187
|
+
requirements:
|
188
|
+
- - "~>"
|
189
|
+
- !ruby/object:Gem::Version
|
190
|
+
version: '1.2'
|
163
191
|
- !ruby/object:Gem::Dependency
|
164
192
|
name: spreadsheet
|
165
193
|
requirement: !ruby/object:Gem::Requirement
|
@@ -329,6 +357,7 @@ files:
|
|
329
357
|
- ".gitignore"
|
330
358
|
- ".hound.yml"
|
331
359
|
- ".rubocop.yml"
|
360
|
+
- CHANGELOG.md
|
332
361
|
- CODE_OF_CONDUCT.md
|
333
362
|
- Gemfile
|
334
363
|
- Guardfile
|
@@ -349,6 +378,7 @@ files:
|
|
349
378
|
- lib/ndr_import/file/excel.rb
|
350
379
|
- lib/ndr_import/file/pdf.rb
|
351
380
|
- lib/ndr_import/file/registry.rb
|
381
|
+
- lib/ndr_import/file/seven_zip.rb
|
352
382
|
- lib/ndr_import/file/text.rb
|
353
383
|
- lib/ndr_import/file/unregistered_filetype.rb
|
354
384
|
- lib/ndr_import/file/word.rb
|
@@ -363,6 +393,7 @@ files:
|
|
363
393
|
- lib/ndr_import/helpers/file/zip.rb
|
364
394
|
- lib/ndr_import/mapper.rb
|
365
395
|
- lib/ndr_import/mapping_error.rb
|
396
|
+
- lib/ndr_import/missing_field_error.rb
|
366
397
|
- lib/ndr_import/non_tabular/column_mapping.rb
|
367
398
|
- lib/ndr_import/non_tabular/line.rb
|
368
399
|
- lib/ndr_import/non_tabular/mapping.rb
|
@@ -381,6 +412,7 @@ files:
|
|
381
412
|
- test/file/excel_test.rb
|
382
413
|
- test/file/pdf_test.rb
|
383
414
|
- test/file/registry_test.rb
|
415
|
+
- test/file/seven_zip_test.rb
|
384
416
|
- test/file/text_test.rb
|
385
417
|
- test/file/word_test.rb
|
386
418
|
- test/file/xml_test.rb
|
@@ -417,6 +449,7 @@ files:
|
|
417
449
|
- test/resources/malformed.csv
|
418
450
|
- test/resources/malformed.xml
|
419
451
|
- test/resources/malformed_pipe.csv
|
452
|
+
- test/resources/normal.7z
|
420
453
|
- test/resources/normal.csv
|
421
454
|
- test/resources/normal.csv.zip
|
422
455
|
- test/resources/normal_pipe.csv
|
@@ -476,6 +509,7 @@ test_files:
|
|
476
509
|
- test/file/excel_test.rb
|
477
510
|
- test/file/pdf_test.rb
|
478
511
|
- test/file/registry_test.rb
|
512
|
+
- test/file/seven_zip_test.rb
|
479
513
|
- test/file/text_test.rb
|
480
514
|
- test/file/word_test.rb
|
481
515
|
- test/file/xml_test.rb
|
@@ -512,6 +546,7 @@ test_files:
|
|
512
546
|
- test/resources/malformed.csv
|
513
547
|
- test/resources/malformed.xml
|
514
548
|
- test/resources/malformed_pipe.csv
|
549
|
+
- test/resources/normal.7z
|
515
550
|
- test/resources/normal.csv
|
516
551
|
- test/resources/normal.csv.zip
|
517
552
|
- test/resources/normal_pipe.csv
|