ndr_import 7.0.0 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +28 -0
- data/README.md +2 -2
- data/code_safety.yml +32 -12
- data/lib/ndr_import.rb +1 -0
- data/lib/ndr_import/file/all.rb +1 -0
- data/lib/ndr_import/file/seven_zip.rb +70 -0
- data/lib/ndr_import/mapper.rb +27 -5
- data/lib/ndr_import/missing_field_error.rb +14 -0
- data/lib/ndr_import/non_tabular/column_mapping.rb +3 -2
- data/lib/ndr_import/table.rb +10 -0
- data/lib/ndr_import/version.rb +1 -1
- data/ndr_import.gemspec +2 -0
- data/test/file/registry_test.rb +1 -1
- data/test/file/seven_zip_test.rb +59 -0
- data/test/mapper_test.rb +18 -0
- data/test/non_tabular/table_test.rb +44 -0
- data/test/resources/normal.7z +0 -0
- data/test/table_test.rb +71 -1
- metadata +37 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f74d60e93cf1f1d72baeebcbe931fc4225c3e74c11aaa13c324578b261561008
|
|
4
|
+
data.tar.gz: 70657ad1f113089bd2f719de5b59317310212c730ba34e762d99f9395da8fff2
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 91bced100d286a44df6fa193cad58a2ad85f6733d9b8cbbf1970e438ed8a3a7d9785a6d0e8b16396713511d045edd0a6aba00090a903c217e4c71b8e2fde7174
|
|
7
|
+
data.tar.gz: 4e2e968084f78f25fffebbc793806402fefd515605afc0ac9c81a5d535f0cb7f27a50af3c4dcb733ff5a9a8bc17311084c65593a2fb971ac35c5a4937bbfe777
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
## [Unreleased]
|
|
2
|
+
*no unreleased changes*
|
|
3
|
+
|
|
4
|
+
## 8.0.0 / 2018-11-26
|
|
5
|
+
### Changed
|
|
6
|
+
* Strip non tabular captured values by default (#28)
|
|
7
|
+
|
|
8
|
+
### Added
|
|
9
|
+
* Add validations to field mappings (#27)
|
|
10
|
+
* control `ndr_table` serialise order with `encode_with` method (#30)
|
|
11
|
+
|
|
12
|
+
## 7.0.0 / 2018-11-09
|
|
13
|
+
### Changed
|
|
14
|
+
* Breaking refactor of universal importer mixin (#25)
|
|
15
|
+
* Update `pdf-reader` version to support recent Rubies (#16)
|
|
16
|
+
|
|
17
|
+
## 6.4.1 / 2018-10-18
|
|
18
|
+
### Fixed
|
|
19
|
+
* bump nokogiri re: CVE-2018-14404
|
|
20
|
+
|
|
21
|
+
## 6.4.0 / 2018-10-17
|
|
22
|
+
### Added
|
|
23
|
+
* Allow `decode: :word_doc` to read a .DOCX file (#26)
|
|
24
|
+
|
|
25
|
+
## 6.3.0 / 2018-10-12
|
|
26
|
+
### Added
|
|
27
|
+
* Add XML file support (#22)
|
|
28
|
+
* Add liberal CSV parsing option (#24)
|
data/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
This is the Public Health England (PHE) National Disease Registers (NDR) Import ETL ruby gem, providing:
|
|
4
4
|
|
|
5
|
-
1. file import handlers for *extracting* data from delimited files (csv, pipe, tab, thorn), .xls(x) spreadsheets, .doc(x) word documents, PDF, XML and Zip files.
|
|
5
|
+
1. file import handlers for *extracting* data from delimited files (csv, pipe, tab, thorn), .xls(x) spreadsheets, .doc(x) word documents, PDF, XML, 7-Zip and Zip files.
|
|
6
6
|
2. table mappers for *transforming* tabular and non-tabular data into key value pairs grouped by a common "klass".
|
|
7
7
|
|
|
8
8
|
## Installation
|
|
@@ -48,7 +48,7 @@ files.each do |filename|
|
|
|
48
48
|
end
|
|
49
49
|
```
|
|
50
50
|
|
|
51
|
-
See test/readme_test.rb for a more complete working example.
|
|
51
|
+
See `test/readme_test.rb` for a more complete working example.
|
|
52
52
|
|
|
53
53
|
More information on the workings of the mapper are available in the [wiki](https://github.com/PublicHealthEngland/ndr_import/wiki).
|
|
54
54
|
|
data/code_safety.yml
CHANGED
|
@@ -16,6 +16,10 @@ file safety:
|
|
|
16
16
|
comments:
|
|
17
17
|
reviewed_by: josh.pencheon
|
|
18
18
|
safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
|
|
19
|
+
CHANGELOG.md:
|
|
20
|
+
comments:
|
|
21
|
+
reviewed_by: josh.pencheon
|
|
22
|
+
safe_revision: 952e45599240ab96b22d7ce223a7551853e50ae5
|
|
19
23
|
CODE_OF_CONDUCT.md:
|
|
20
24
|
comments:
|
|
21
25
|
reviewed_by: timgentry
|
|
@@ -35,7 +39,7 @@ file safety:
|
|
|
35
39
|
README.md:
|
|
36
40
|
comments:
|
|
37
41
|
reviewed_by: josh.pencheon
|
|
38
|
-
safe_revision:
|
|
42
|
+
safe_revision: f2feb0c430947839183d7223e60e7c29b2c0f846
|
|
39
43
|
Rakefile:
|
|
40
44
|
comments:
|
|
41
45
|
reviewed_by: josh.pencheon
|
|
@@ -59,7 +63,7 @@ file safety:
|
|
|
59
63
|
lib/ndr_import.rb:
|
|
60
64
|
comments:
|
|
61
65
|
reviewed_by: josh.pencheon
|
|
62
|
-
safe_revision:
|
|
66
|
+
safe_revision: eca44583e9989159b45e90021dd1c65228447180
|
|
63
67
|
lib/ndr_import/csv_library.rb:
|
|
64
68
|
comments:
|
|
65
69
|
reviewed_by: josh.pencheon
|
|
@@ -67,7 +71,7 @@ file safety:
|
|
|
67
71
|
lib/ndr_import/file/all.rb:
|
|
68
72
|
comments:
|
|
69
73
|
reviewed_by: josh.pencheon
|
|
70
|
-
safe_revision:
|
|
74
|
+
safe_revision: f2feb0c430947839183d7223e60e7c29b2c0f846
|
|
71
75
|
lib/ndr_import/file/base.rb:
|
|
72
76
|
comments:
|
|
73
77
|
reviewed_by: timgentry
|
|
@@ -92,6 +96,10 @@ file safety:
|
|
|
92
96
|
comments:
|
|
93
97
|
reviewed_by: josh.pencheon
|
|
94
98
|
safe_revision: 2104514689a1a1286195fff18144a8cecb93048b
|
|
99
|
+
lib/ndr_import/file/seven_zip.rb:
|
|
100
|
+
comments:
|
|
101
|
+
reviewed_by: josh.pencheon
|
|
102
|
+
safe_revision: f2feb0c430947839183d7223e60e7c29b2c0f846
|
|
95
103
|
lib/ndr_import/file/text.rb:
|
|
96
104
|
comments:
|
|
97
105
|
reviewed_by: josh.pencheon
|
|
@@ -143,15 +151,19 @@ file safety:
|
|
|
143
151
|
lib/ndr_import/mapper.rb:
|
|
144
152
|
comments: Writes to a Tempfile, but cleans up. Ruby will respect TMP_DIR
|
|
145
153
|
reviewed_by: josh.pencheon
|
|
146
|
-
safe_revision:
|
|
154
|
+
safe_revision: eca44583e9989159b45e90021dd1c65228447180
|
|
147
155
|
lib/ndr_import/mapping_error.rb:
|
|
148
156
|
comments:
|
|
149
157
|
reviewed_by: timgentry
|
|
150
158
|
safe_revision: 96ccc535bb9f933081dfc73bf0442b32b6bdce1d
|
|
159
|
+
lib/ndr_import/missing_field_error.rb:
|
|
160
|
+
comments:
|
|
161
|
+
reviewed_by: josh.pencheon
|
|
162
|
+
safe_revision: eca44583e9989159b45e90021dd1c65228447180
|
|
151
163
|
lib/ndr_import/non_tabular/column_mapping.rb:
|
|
152
164
|
comments:
|
|
153
165
|
reviewed_by: josh.pencheon
|
|
154
|
-
safe_revision:
|
|
166
|
+
safe_revision: f216687d0bba7f2940f74a3353a32be3c900c194
|
|
155
167
|
lib/ndr_import/non_tabular/line.rb:
|
|
156
168
|
comments:
|
|
157
169
|
reviewed_by: timgentry
|
|
@@ -179,7 +191,7 @@ file safety:
|
|
|
179
191
|
lib/ndr_import/table.rb:
|
|
180
192
|
comments: uses File.basename
|
|
181
193
|
reviewed_by: josh.pencheon
|
|
182
|
-
safe_revision:
|
|
194
|
+
safe_revision: 2baccd482bcd875dc273f6a34a0e28823790e8d5
|
|
183
195
|
lib/ndr_import/universal_importer_helper.rb:
|
|
184
196
|
comments:
|
|
185
197
|
reviewed_by: josh.pencheon
|
|
@@ -187,7 +199,7 @@ file safety:
|
|
|
187
199
|
lib/ndr_import/version.rb:
|
|
188
200
|
comments: another check?
|
|
189
201
|
reviewed_by: josh.pencheon
|
|
190
|
-
safe_revision:
|
|
202
|
+
safe_revision: 952e45599240ab96b22d7ce223a7551853e50ae5
|
|
191
203
|
lib/ndr_import/xml/table.rb:
|
|
192
204
|
comments:
|
|
193
205
|
reviewed_by: josh.pencheon
|
|
@@ -195,7 +207,7 @@ file safety:
|
|
|
195
207
|
ndr_import.gemspec:
|
|
196
208
|
comments:
|
|
197
209
|
reviewed_by: josh.pencheon
|
|
198
|
-
safe_revision:
|
|
210
|
+
safe_revision: eca44583e9989159b45e90021dd1c65228447180
|
|
199
211
|
test/file/base_test.rb:
|
|
200
212
|
comments:
|
|
201
213
|
reviewed_by: timgentry
|
|
@@ -219,7 +231,11 @@ file safety:
|
|
|
219
231
|
test/file/registry_test.rb:
|
|
220
232
|
comments:
|
|
221
233
|
reviewed_by: josh.pencheon
|
|
222
|
-
safe_revision:
|
|
234
|
+
safe_revision: f2feb0c430947839183d7223e60e7c29b2c0f846
|
|
235
|
+
test/file/seven_zip_test.rb:
|
|
236
|
+
comments:
|
|
237
|
+
reviewed_by: josh.pencheon
|
|
238
|
+
safe_revision: f2feb0c430947839183d7223e60e7c29b2c0f846
|
|
223
239
|
test/file/text_test.rb:
|
|
224
240
|
comments:
|
|
225
241
|
reviewed_by: timgentry
|
|
@@ -267,7 +283,7 @@ file safety:
|
|
|
267
283
|
test/mapper_test.rb:
|
|
268
284
|
comments: exposes Mapper internals to test them
|
|
269
285
|
reviewed_by: josh.pencheon
|
|
270
|
-
safe_revision:
|
|
286
|
+
safe_revision: eca44583e9989159b45e90021dd1c65228447180
|
|
271
287
|
test/non_tabular/mapping_test.rb:
|
|
272
288
|
comments:
|
|
273
289
|
reviewed_by: timgentry
|
|
@@ -275,7 +291,7 @@ file safety:
|
|
|
275
291
|
test/non_tabular/table_test.rb:
|
|
276
292
|
comments:
|
|
277
293
|
reviewed_by: josh.pencheon
|
|
278
|
-
safe_revision:
|
|
294
|
+
safe_revision: f216687d0bba7f2940f74a3353a32be3c900c194
|
|
279
295
|
test/non_tabular_file_helper_test.rb:
|
|
280
296
|
comments:
|
|
281
297
|
reviewed_by: timgentry
|
|
@@ -364,6 +380,10 @@ file safety:
|
|
|
364
380
|
comments:
|
|
365
381
|
reviewed_by: josh.pencheon
|
|
366
382
|
safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
|
|
383
|
+
test/resources/normal.7z:
|
|
384
|
+
comments:
|
|
385
|
+
reviewed_by: josh.pencheon
|
|
386
|
+
safe_revision: f2feb0c430947839183d7223e60e7c29b2c0f846
|
|
367
387
|
test/resources/normal.csv:
|
|
368
388
|
comments:
|
|
369
389
|
reviewed_by: timgentry
|
|
@@ -463,7 +483,7 @@ file safety:
|
|
|
463
483
|
test/table_test.rb:
|
|
464
484
|
comments:
|
|
465
485
|
reviewed_by: josh.pencheon
|
|
466
|
-
safe_revision:
|
|
486
|
+
safe_revision: 2baccd482bcd875dc273f6a34a0e28823790e8d5
|
|
467
487
|
test/test_helper.rb:
|
|
468
488
|
comments:
|
|
469
489
|
reviewed_by: josh.pencheon
|
data/lib/ndr_import.rb
CHANGED
data/lib/ndr_import/file/all.rb
CHANGED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
require 'seven_zip_ruby'
|
|
2
|
+
require 'ndr_support/safe_file'
|
|
3
|
+
require_relative 'registry'
|
|
4
|
+
|
|
5
|
+
module NdrImport
|
|
6
|
+
# This is one of a collection of file handlers that deal with individual formats of data.
|
|
7
|
+
# They can be instantiated directly or via the factory method Registry.tables
|
|
8
|
+
module File
|
|
9
|
+
# This class is a 7zip file handler that returns tables from the extracted files.
|
|
10
|
+
class SevenZip < Base
|
|
11
|
+
def initialize(filename, format, options = {})
|
|
12
|
+
super
|
|
13
|
+
@pattern = options['pattern'] || //
|
|
14
|
+
@unzip_path = options['unzip_path']
|
|
15
|
+
@password = options['password']
|
|
16
|
+
|
|
17
|
+
validate_unzip_path_is_safe!
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def files(&block)
|
|
21
|
+
raise 'Not allowed in external environment' if defined?(::Rails) && ::Rails.env.external?
|
|
22
|
+
|
|
23
|
+
return enum_for(:files) unless block
|
|
24
|
+
|
|
25
|
+
destination = @unzip_path.join(Time.current.strftime('%H%M%S%L'))
|
|
26
|
+
FileUtils.mkdir_p(SafeFile.safepath_to_string(destination))
|
|
27
|
+
|
|
28
|
+
::File.open(SafeFile.safepath_to_string(@filename), 'rb') do |zipfile|
|
|
29
|
+
unzip_entries(zipfile, destination, &block)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# 7zip files produce files, never tables.
|
|
34
|
+
def tables
|
|
35
|
+
raise 'SevenZip#tables should never be called'
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
# Unzip the 7zip file entry and enumerate over it
|
|
41
|
+
def unzip_entries(zipfile, destination, &block)
|
|
42
|
+
SevenZipRuby::Reader.open(zipfile, password: @password) do |szr|
|
|
43
|
+
szr.entries.each do |entry|
|
|
44
|
+
# SECURE: TPG 2018-11-21: The path is stripped from the zipfile entry when extracted
|
|
45
|
+
basename = ::File.basename(entry.path)
|
|
46
|
+
next unless entry.file? && basename.match(@pattern)
|
|
47
|
+
|
|
48
|
+
unzipped_filename = destination.join(basename)
|
|
49
|
+
szr.extract([entry], unzipped_filename)
|
|
50
|
+
|
|
51
|
+
unzipped_files(unzipped_filename, &block)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Enumerate over an unzipped file like any other
|
|
57
|
+
def unzipped_files(unzipped_filename)
|
|
58
|
+
Registry.files(unzipped_filename, @options).each do |filename|
|
|
59
|
+
yield(filename)
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def validate_unzip_path_is_safe!
|
|
64
|
+
SafeFile.safepath_to_string(@unzip_path)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
Registry.register(SevenZip, '7z')
|
|
69
|
+
end
|
|
70
|
+
end
|
data/lib/ndr_import/mapper.rb
CHANGED
|
@@ -23,11 +23,13 @@ module NdrImport::Mapper
|
|
|
23
23
|
MAPPINGS = 'mappings'.freeze
|
|
24
24
|
MATCH = 'match'.freeze
|
|
25
25
|
ORDER = 'order'.freeze
|
|
26
|
+
PRESENCE = 'presence'.freeze
|
|
26
27
|
PRIORITY = 'priority'.freeze
|
|
27
28
|
RAWTEXT_NAME = 'rawtext_name'.freeze
|
|
28
29
|
REPLACE = 'replace'.freeze
|
|
29
30
|
STANDARD_MAPPING = 'standard_mapping'.freeze
|
|
30
31
|
UNPACK_PATTERN = 'unpack_pattern'.freeze
|
|
32
|
+
VALIDATES = 'validates'.freeze
|
|
31
33
|
end
|
|
32
34
|
|
|
33
35
|
private
|
|
@@ -89,8 +91,8 @@ module NdrImport::Mapper
|
|
|
89
91
|
line.each_with_index do |raw_value, col|
|
|
90
92
|
column_mapping = line_mappings[col]
|
|
91
93
|
if column_mapping.nil?
|
|
92
|
-
|
|
93
|
-
|
|
94
|
+
raise ArgumentError,
|
|
95
|
+
"Line has too many columns (expected #{line_mappings.size} but got #{line.size})"
|
|
94
96
|
end
|
|
95
97
|
|
|
96
98
|
next if column_mapping[Strings::DO_NOT_CAPTURE]
|
|
@@ -100,7 +102,8 @@ module NdrImport::Mapper
|
|
|
100
102
|
end
|
|
101
103
|
|
|
102
104
|
# Establish the rawtext column name we are to use for this column
|
|
103
|
-
rawtext_column_name = (column_mapping[Strings::RAWTEXT_NAME] ||
|
|
105
|
+
rawtext_column_name = (column_mapping[Strings::RAWTEXT_NAME] ||
|
|
106
|
+
column_mapping[Strings::COLUMN]).downcase
|
|
104
107
|
|
|
105
108
|
# Replace raw_value with decoded raw_value
|
|
106
109
|
Array(column_mapping[Strings::DECODE]).each do |encoding|
|
|
@@ -123,6 +126,9 @@ module NdrImport::Mapper
|
|
|
123
126
|
replace_before_mapping(original_value, field_mapping)
|
|
124
127
|
value = mapped_value(original_value, field_mapping)
|
|
125
128
|
|
|
129
|
+
validations = field_mapping[Strings::VALIDATES].presence
|
|
130
|
+
apply_validations_on(field_mapping[Strings::FIELD], value, validations) if validations
|
|
131
|
+
|
|
126
132
|
# We don't care about blank values, unless we're mapping a :join
|
|
127
133
|
# field (in which case, :compact may or may not be being used).
|
|
128
134
|
next if value.blank? && !field_mapping[Strings::JOIN]
|
|
@@ -135,7 +141,9 @@ module NdrImport::Mapper
|
|
|
135
141
|
|
|
136
142
|
if field_mapping[Strings::ORDER]
|
|
137
143
|
data[field][:join] ||= field_mapping[Strings::JOIN]
|
|
138
|
-
|
|
144
|
+
if field_mapping.key?(Strings::COMPACT)
|
|
145
|
+
data[field][:compact] = field_mapping[Strings::COMPACT]
|
|
146
|
+
end
|
|
139
147
|
|
|
140
148
|
data[field][:values][field_mapping[Strings::ORDER] - 1] = value
|
|
141
149
|
elsif field_mapping[Strings::PRIORITY]
|
|
@@ -156,7 +164,7 @@ module NdrImport::Mapper
|
|
|
156
164
|
attributes[field] =
|
|
157
165
|
if field_data.key?(:join)
|
|
158
166
|
# Map "blank" values to nil:
|
|
159
|
-
values = values.map
|
|
167
|
+
values = values.map(&:presence)
|
|
160
168
|
values.compact! if field_data[:compact]
|
|
161
169
|
values.join(field_data[:join])
|
|
162
170
|
else
|
|
@@ -222,6 +230,20 @@ module NdrImport::Mapper
|
|
|
222
230
|
true
|
|
223
231
|
end
|
|
224
232
|
|
|
233
|
+
# Apply ActiveRecord-like validations specified in field mappings, e.g.
|
|
234
|
+
# - column: column_one
|
|
235
|
+
# mappings:
|
|
236
|
+
# - field: field_one
|
|
237
|
+
# validates:
|
|
238
|
+
# presence: true
|
|
239
|
+
def apply_validations_on(field, value, validations)
|
|
240
|
+
presence_validation_on(field, value) if validations[Strings::PRESENCE]
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
def presence_validation_on(field, value)
|
|
244
|
+
raise NdrImport::MissingFieldError, field if value.blank?
|
|
245
|
+
end
|
|
246
|
+
|
|
225
247
|
# Decode raw_value using specified encoding
|
|
226
248
|
# E.g. adding decode to a column:
|
|
227
249
|
#
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
require 'active_model' # Source I18n translations
|
|
2
|
+
|
|
3
|
+
module NdrImport
|
|
4
|
+
# Raised if a mandatory field is blank.
|
|
5
|
+
class MissingFieldError < StandardError
|
|
6
|
+
attr_reader :field
|
|
7
|
+
|
|
8
|
+
def initialize(field)
|
|
9
|
+
@field = field
|
|
10
|
+
message = "#{field} #{I18n.t('errors.messages.blank')}"
|
|
11
|
+
super(message)
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -32,7 +32,8 @@ module NdrImport
|
|
|
32
32
|
|
|
33
33
|
# capture the required part of the line by replacing (recusively) the line,
|
|
34
34
|
# with the first captured regular expression group. This is hardcoded in an attempt
|
|
35
|
-
# to preserve the rawtext as much as possible
|
|
35
|
+
# to preserve the rawtext as much as possible.
|
|
36
|
+
# The captured value is `String#strip`ed by default.
|
|
36
37
|
def capture_value(line)
|
|
37
38
|
value = line.dup
|
|
38
39
|
[@cell_mapping['capture']].flatten.each do |pattern|
|
|
@@ -42,7 +43,7 @@ module NdrImport
|
|
|
42
43
|
value = nil
|
|
43
44
|
end
|
|
44
45
|
end
|
|
45
|
-
value
|
|
46
|
+
value.nil? ? value : value.strip
|
|
46
47
|
end
|
|
47
48
|
|
|
48
49
|
def validate_cell_mapping
|
data/lib/ndr_import/table.rb
CHANGED
|
@@ -92,6 +92,16 @@ module NdrImport
|
|
|
92
92
|
@header_valid == true
|
|
93
93
|
end
|
|
94
94
|
|
|
95
|
+
# For readability, we should serialise the columns last
|
|
96
|
+
def encode_with(coder)
|
|
97
|
+
options = self.class.all_valid_options - ['columns']
|
|
98
|
+
options.each do |option|
|
|
99
|
+
value = send(option)
|
|
100
|
+
coder[option] = value if value
|
|
101
|
+
end
|
|
102
|
+
coder['columns'] = @columns
|
|
103
|
+
end
|
|
104
|
+
|
|
95
105
|
private
|
|
96
106
|
|
|
97
107
|
# This method uses a buffer to not yield the last <buffer_size> iterations of an enumerable.
|
data/lib/ndr_import/version.rb
CHANGED
data/ndr_import.gemspec
CHANGED
|
@@ -20,6 +20,7 @@ Gem::Specification.new do |spec|
|
|
|
20
20
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
|
21
21
|
spec.require_paths = ['lib']
|
|
22
22
|
|
|
23
|
+
spec.add_dependency 'activemodel'
|
|
23
24
|
spec.add_dependency 'activesupport', '>= 3.2.18', '< 5.3'
|
|
24
25
|
spec.add_dependency 'ndr_support', '>= 5.3.2', '< 6'
|
|
25
26
|
|
|
@@ -31,6 +32,7 @@ Gem::Specification.new do |spec|
|
|
|
31
32
|
spec.add_dependency 'nokogiri', '~> 1.8', '>= 1.8.5'
|
|
32
33
|
spec.add_dependency 'pdf-reader', '~> 2.1'
|
|
33
34
|
spec.add_dependency 'roo-xls'
|
|
35
|
+
spec.add_dependency 'seven_zip_ruby', '~> 1.2'
|
|
34
36
|
spec.add_dependency 'spreadsheet', '1.0.3'
|
|
35
37
|
|
|
36
38
|
spec.required_ruby_version = '>= 2.4'
|
data/test/file/registry_test.rb
CHANGED
|
@@ -12,7 +12,7 @@ module NdrImport
|
|
|
12
12
|
|
|
13
13
|
test 'Registry.handlers' do
|
|
14
14
|
assert_instance_of Hash, NdrImport::File::Registry.handlers
|
|
15
|
-
assert_equal %w[csv delimited doc docx nontabular pdf text txt xls xlsx xml_table zip],
|
|
15
|
+
assert_equal %w[7z csv delimited doc docx nontabular pdf text txt xls xlsx xml_table zip],
|
|
16
16
|
NdrImport::File::Registry.handlers.keys.sort
|
|
17
17
|
end
|
|
18
18
|
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
require 'test_helper'
|
|
2
|
+
require 'ndr_import/file/seven_zip'
|
|
3
|
+
|
|
4
|
+
module NdrImport
|
|
5
|
+
module File
|
|
6
|
+
# 7zip file handler tests
|
|
7
|
+
class SevenZipTestTest < ActiveSupport::TestCase
|
|
8
|
+
def setup
|
|
9
|
+
@home = SafePath.new('test_space_rw')
|
|
10
|
+
@permanent_test_files = SafePath.new('permanent_test_files')
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
test 'should reject non SafePath arguments' do
|
|
14
|
+
file_path = @home.join('imaginary.7z')
|
|
15
|
+
|
|
16
|
+
assert_raises ArgumentError do
|
|
17
|
+
NdrImport::File::SevenZip.new(file_path.to_s, nil, 'unzip_path' => @home.to_s)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
assert_raises ArgumentError do
|
|
21
|
+
NdrImport::File::SevenZip.new(file_path.to_s, nil, 'unzip_path' => @home)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
assert_raises ArgumentError do
|
|
25
|
+
NdrImport::File::SevenZip.new(file_path, nil, 'unzip_path' => @home.to_s)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
test 'should read 7zip file with correct password' do
|
|
30
|
+
options = { 'password' => 'FortuneCookie', 'unzip_path' => @home }
|
|
31
|
+
file_path = @permanent_test_files.join('normal.7z')
|
|
32
|
+
|
|
33
|
+
handler = NdrImport::File::SevenZip.new(file_path, nil, options)
|
|
34
|
+
handler.files.all? do |filename|
|
|
35
|
+
assert_instance_of SafePath, filename
|
|
36
|
+
end
|
|
37
|
+
files = handler.files.to_a
|
|
38
|
+
assert_equal 'normal_pipe.csv', ::File.basename(files[0])
|
|
39
|
+
assert_equal 'normal_thorn.csv', ::File.basename(files[1])
|
|
40
|
+
|
|
41
|
+
exception = assert_raises RuntimeError do
|
|
42
|
+
handler.tables
|
|
43
|
+
end
|
|
44
|
+
assert_equal 'SevenZip#tables should never be called', exception.message
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
test 'should not read 7zip file with incorrect password' do
|
|
48
|
+
options = { 'password' => 'WrongPassword', 'unzip_path' => @home }
|
|
49
|
+
file_path = @permanent_test_files.join('normal.7z')
|
|
50
|
+
|
|
51
|
+
handler = NdrImport::File::SevenZip.new(file_path, nil, options)
|
|
52
|
+
|
|
53
|
+
assert_raises SevenZipRuby::InvalidArchive do
|
|
54
|
+
handler.files.to_a
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
data/test/mapper_test.rb
CHANGED
|
@@ -293,6 +293,17 @@ class MapperTest < ActiveSupport::TestCase
|
|
|
293
293
|
: 'RGT01'
|
|
294
294
|
YML
|
|
295
295
|
|
|
296
|
+
validates_presence_mapping = YAML.safe_load <<-YML
|
|
297
|
+
- column: column_one
|
|
298
|
+
mappings:
|
|
299
|
+
- field: field_one
|
|
300
|
+
validates:
|
|
301
|
+
presence: true
|
|
302
|
+
- column: column_two
|
|
303
|
+
mappings:
|
|
304
|
+
- field: field_two
|
|
305
|
+
YML
|
|
306
|
+
|
|
296
307
|
test 'map should return a number' do
|
|
297
308
|
assert_equal '1', TestMapper.new.mapped_value('A', map_mapping)
|
|
298
309
|
end
|
|
@@ -365,6 +376,13 @@ class MapperTest < ActiveSupport::TestCase
|
|
|
365
376
|
assert_equal %w(RGT01 RGT01), mapped_value['hospital']
|
|
366
377
|
end
|
|
367
378
|
|
|
379
|
+
test 'should raise an error on blank mandatory field' do
|
|
380
|
+
exception = assert_raise(NdrImport::MissingFieldError) do
|
|
381
|
+
TestMapper.new.mapped_line(['', 'RGT01'], validates_presence_mapping)
|
|
382
|
+
end
|
|
383
|
+
assert_equal "field_one can't be blank", exception.message
|
|
384
|
+
end
|
|
385
|
+
|
|
368
386
|
test 'should return correct date format for date fields with daysafter' do
|
|
369
387
|
assert_equal Date.new(2012, 5, 18), TestMapper.new.mapped_value(2, daysafter_mapping)
|
|
370
388
|
assert_equal Date.new(2012, 5, 18), TestMapper.new.mapped_value('2', daysafter_mapping)
|
|
@@ -542,4 +542,48 @@ STR
|
|
|
542
542
|
table.transform(junk).to_a
|
|
543
543
|
end
|
|
544
544
|
end
|
|
545
|
+
|
|
546
|
+
def test_should_strip_captured_rawtext
|
|
547
|
+
unwanted_white_space = <<-STR.each_line
|
|
548
|
+
111
|
|
549
|
+
Trailing whitespace end_of_line
|
|
550
|
+
------
|
|
551
|
+
111
|
|
552
|
+
Leading whitespaceend_of_line
|
|
553
|
+
------
|
|
554
|
+
111
|
|
555
|
+
Leading and trailing whitespace end_of_line
|
|
556
|
+
------
|
|
557
|
+
111
|
|
558
|
+
Should not match this
|
|
559
|
+
------
|
|
560
|
+
STR
|
|
561
|
+
|
|
562
|
+
table = YAML.load <<-YML.strip_heredoc
|
|
563
|
+
--- !ruby/object:NdrImport::NonTabular::Table
|
|
564
|
+
start_line_pattern: !ruby/regexp /^111$/
|
|
565
|
+
end_in_a_record: true
|
|
566
|
+
klass: SomeTestKlass
|
|
567
|
+
columns:
|
|
568
|
+
- column: one
|
|
569
|
+
non_tabular_cell:
|
|
570
|
+
lines: 0
|
|
571
|
+
capture: !ruby/regexp /^(.*)end_of_line$/i
|
|
572
|
+
trim_rawtext: left
|
|
573
|
+
YML
|
|
574
|
+
|
|
575
|
+
enum = table.transform(unwanted_white_space)
|
|
576
|
+
assert_instance_of Enumerator, enum
|
|
577
|
+
|
|
578
|
+
output = []
|
|
579
|
+
enum.each do |klass, fields, index|
|
|
580
|
+
output << [klass, fields, index]
|
|
581
|
+
end
|
|
582
|
+
|
|
583
|
+
expected_rawtext_ouput = [{ 'one' => 'Trailing whitespace' },
|
|
584
|
+
{ 'one' => 'Leading whitespace' },
|
|
585
|
+
{ 'one' => 'Leading and trailing whitespace' },
|
|
586
|
+
{ 'one' => '' }]
|
|
587
|
+
assert_equal expected_rawtext_ouput, (output.map { |row| row[1][:rawtext] })
|
|
588
|
+
end
|
|
545
589
|
end
|
|
Binary file
|
data/test/table_test.rb
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
require 'test_helper'
|
|
2
2
|
|
|
3
|
+
class TestNoCoderTable < NdrImport::NonTabular::Table
|
|
4
|
+
undef_method :encode_with
|
|
5
|
+
end
|
|
6
|
+
|
|
3
7
|
# This tests the NdrImport::Table mapping class
|
|
4
8
|
class TableTest < ActiveSupport::TestCase
|
|
5
9
|
def test_deserialize_table
|
|
@@ -137,7 +141,67 @@ class TableTest < ActiveSupport::TestCase
|
|
|
137
141
|
end
|
|
138
142
|
|
|
139
143
|
def test_encode_with
|
|
140
|
-
|
|
144
|
+
table = NdrImport::Table.new
|
|
145
|
+
assert table.instance_variables.include?(:@row_index)
|
|
146
|
+
refute table.class.all_valid_options.include?('row_index')
|
|
147
|
+
assert_nil table.columns
|
|
148
|
+
|
|
149
|
+
coder = {}
|
|
150
|
+
table.encode_with(coder)
|
|
151
|
+
assert coder.key?('columns')
|
|
152
|
+
|
|
153
|
+
yaml_output = table.to_yaml
|
|
154
|
+
assert yaml_output.include?('columns')
|
|
155
|
+
refute yaml_output.include?('row_index')
|
|
156
|
+
assert YAML.load(yaml_output).is_a?(NdrImport::Table)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def test_encode_with_compare
|
|
160
|
+
table_options = {
|
|
161
|
+
columns: %w[a b],
|
|
162
|
+
klass: 'SomeKlass',
|
|
163
|
+
start_line_pattern: 'TODO',
|
|
164
|
+
end_line_pattern: 'TODO'
|
|
165
|
+
}
|
|
166
|
+
no_coder_table = TestNoCoderTable.new(table_options)
|
|
167
|
+
ndr_table = NdrImport::NonTabular::Table.new(table_options)
|
|
168
|
+
|
|
169
|
+
assert no_coder_table.is_a?(NdrImport::Table)
|
|
170
|
+
assert ndr_table.is_a?(NdrImport::Table)
|
|
171
|
+
assert no_coder_table.is_a?(NdrImport::NonTabular::Table)
|
|
172
|
+
assert ndr_table.is_a?(NdrImport::NonTabular::Table)
|
|
173
|
+
|
|
174
|
+
refute no_coder_table.respond_to?(:encode_with)
|
|
175
|
+
assert ndr_table.respond_to?(:encode_with)
|
|
176
|
+
|
|
177
|
+
no_coder_table_yaml_order = get_yaml_mapping_order(no_coder_table.to_yaml)
|
|
178
|
+
ndr_table_yaml_order = get_yaml_mapping_order(ndr_table.to_yaml)
|
|
179
|
+
|
|
180
|
+
# no_coder_table_yaml_order => ["klass", "columns", "start_line_pattern", "end_line_pattern", "row_index"]
|
|
181
|
+
# ndr_table_yaml_order => ["klass", "start_line_pattern", "end_line_pattern", "columns"]
|
|
182
|
+
|
|
183
|
+
assert no_coder_table_yaml_order.include?('row_index')
|
|
184
|
+
refute ndr_table_yaml_order.include?('row_index')
|
|
185
|
+
|
|
186
|
+
refute no_coder_table_yaml_order.last == 'columns'
|
|
187
|
+
assert ndr_table_yaml_order.last == 'columns'
|
|
188
|
+
|
|
189
|
+
# test objects deserialized from yaml mappings
|
|
190
|
+
deserialized_no_coder_table_yaml = YAML.load(no_coder_table.to_yaml)
|
|
191
|
+
deserialized_ndr_table_yaml = YAML.load(ndr_table.to_yaml)
|
|
192
|
+
|
|
193
|
+
assert deserialized_no_coder_table_yaml.is_a?(NdrImport::NonTabular::Table)
|
|
194
|
+
assert deserialized_ndr_table_yaml.is_a?(NdrImport::NonTabular::Table)
|
|
195
|
+
|
|
196
|
+
assert_nil deserialized_no_coder_table_yaml.filename_pattern
|
|
197
|
+
assert_equal deserialized_no_coder_table_yaml.klass, no_coder_table.klass
|
|
198
|
+
assert_equal deserialized_no_coder_table_yaml.start_line_pattern, no_coder_table.start_line_pattern
|
|
199
|
+
assert_equal deserialized_no_coder_table_yaml.columns, no_coder_table.columns
|
|
200
|
+
|
|
201
|
+
assert_nil deserialized_ndr_table_yaml.filename_pattern
|
|
202
|
+
assert_equal deserialized_ndr_table_yaml.klass, ndr_table.klass
|
|
203
|
+
assert_equal deserialized_ndr_table_yaml.start_line_pattern, ndr_table.start_line_pattern
|
|
204
|
+
assert_equal deserialized_ndr_table_yaml.columns, ndr_table.columns
|
|
141
205
|
end
|
|
142
206
|
|
|
143
207
|
def test_skip_footer_lines
|
|
@@ -407,4 +471,10 @@ YML
|
|
|
407
471
|
]
|
|
408
472
|
}
|
|
409
473
|
end
|
|
474
|
+
|
|
475
|
+
def get_yaml_mapping_order(yaml_mapping)
|
|
476
|
+
yaml_mapping.split("\n").
|
|
477
|
+
delete_if { |line| /-+/.match(line) }.
|
|
478
|
+
map { |line| /(.*):/.match(line)[1].to_s }
|
|
479
|
+
end
|
|
410
480
|
end
|
metadata
CHANGED
|
@@ -1,15 +1,29 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ndr_import
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 8.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- NCRS Development Team
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2018-11-
|
|
11
|
+
date: 2018-11-26 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: activemodel
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - ">="
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '0'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - ">="
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '0'
|
|
13
27
|
- !ruby/object:Gem::Dependency
|
|
14
28
|
name: activesupport
|
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -160,6 +174,20 @@ dependencies:
|
|
|
160
174
|
- - ">="
|
|
161
175
|
- !ruby/object:Gem::Version
|
|
162
176
|
version: '0'
|
|
177
|
+
- !ruby/object:Gem::Dependency
|
|
178
|
+
name: seven_zip_ruby
|
|
179
|
+
requirement: !ruby/object:Gem::Requirement
|
|
180
|
+
requirements:
|
|
181
|
+
- - "~>"
|
|
182
|
+
- !ruby/object:Gem::Version
|
|
183
|
+
version: '1.2'
|
|
184
|
+
type: :runtime
|
|
185
|
+
prerelease: false
|
|
186
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
187
|
+
requirements:
|
|
188
|
+
- - "~>"
|
|
189
|
+
- !ruby/object:Gem::Version
|
|
190
|
+
version: '1.2'
|
|
163
191
|
- !ruby/object:Gem::Dependency
|
|
164
192
|
name: spreadsheet
|
|
165
193
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -329,6 +357,7 @@ files:
|
|
|
329
357
|
- ".gitignore"
|
|
330
358
|
- ".hound.yml"
|
|
331
359
|
- ".rubocop.yml"
|
|
360
|
+
- CHANGELOG.md
|
|
332
361
|
- CODE_OF_CONDUCT.md
|
|
333
362
|
- Gemfile
|
|
334
363
|
- Guardfile
|
|
@@ -349,6 +378,7 @@ files:
|
|
|
349
378
|
- lib/ndr_import/file/excel.rb
|
|
350
379
|
- lib/ndr_import/file/pdf.rb
|
|
351
380
|
- lib/ndr_import/file/registry.rb
|
|
381
|
+
- lib/ndr_import/file/seven_zip.rb
|
|
352
382
|
- lib/ndr_import/file/text.rb
|
|
353
383
|
- lib/ndr_import/file/unregistered_filetype.rb
|
|
354
384
|
- lib/ndr_import/file/word.rb
|
|
@@ -363,6 +393,7 @@ files:
|
|
|
363
393
|
- lib/ndr_import/helpers/file/zip.rb
|
|
364
394
|
- lib/ndr_import/mapper.rb
|
|
365
395
|
- lib/ndr_import/mapping_error.rb
|
|
396
|
+
- lib/ndr_import/missing_field_error.rb
|
|
366
397
|
- lib/ndr_import/non_tabular/column_mapping.rb
|
|
367
398
|
- lib/ndr_import/non_tabular/line.rb
|
|
368
399
|
- lib/ndr_import/non_tabular/mapping.rb
|
|
@@ -381,6 +412,7 @@ files:
|
|
|
381
412
|
- test/file/excel_test.rb
|
|
382
413
|
- test/file/pdf_test.rb
|
|
383
414
|
- test/file/registry_test.rb
|
|
415
|
+
- test/file/seven_zip_test.rb
|
|
384
416
|
- test/file/text_test.rb
|
|
385
417
|
- test/file/word_test.rb
|
|
386
418
|
- test/file/xml_test.rb
|
|
@@ -417,6 +449,7 @@ files:
|
|
|
417
449
|
- test/resources/malformed.csv
|
|
418
450
|
- test/resources/malformed.xml
|
|
419
451
|
- test/resources/malformed_pipe.csv
|
|
452
|
+
- test/resources/normal.7z
|
|
420
453
|
- test/resources/normal.csv
|
|
421
454
|
- test/resources/normal.csv.zip
|
|
422
455
|
- test/resources/normal_pipe.csv
|
|
@@ -476,6 +509,7 @@ test_files:
|
|
|
476
509
|
- test/file/excel_test.rb
|
|
477
510
|
- test/file/pdf_test.rb
|
|
478
511
|
- test/file/registry_test.rb
|
|
512
|
+
- test/file/seven_zip_test.rb
|
|
479
513
|
- test/file/text_test.rb
|
|
480
514
|
- test/file/word_test.rb
|
|
481
515
|
- test/file/xml_test.rb
|
|
@@ -512,6 +546,7 @@ test_files:
|
|
|
512
546
|
- test/resources/malformed.csv
|
|
513
547
|
- test/resources/malformed.xml
|
|
514
548
|
- test/resources/malformed_pipe.csv
|
|
549
|
+
- test/resources/normal.7z
|
|
515
550
|
- test/resources/normal.csv
|
|
516
551
|
- test/resources/normal.csv.zip
|
|
517
552
|
- test/resources/normal_pipe.csv
|