ndr_import 11.2.1 → 11.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -1
- data/lib/ndr_import/file/vcf.rb +26 -1
- data/lib/ndr_import/helpers/file/delimited.rb +2 -2
- data/lib/ndr_import/mapper.rb +27 -2
- data/lib/ndr_import/table.rb +11 -1
- data/lib/ndr_import/universal_importer_helper.rb +13 -10
- data/lib/ndr_import/vcf/table.rb +1 -1
- data/lib/ndr_import/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3dd3ec74b568c0429492ded5083edf0eaf31a0a6c7849945f7fa69dbe0e43a49
|
4
|
+
data.tar.gz: 0f845a90d21901350122a6d41437c8ebe53e1eaa49694af76b38811210280804
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 26668128f4fa8a7165d50ee39e729f5d5a63b704c3db41a5cab1be8df02ea38bdeb7cd69494ef62656c22c7b8bd4aea2c94ee839019accff59a28f2b1452065c
|
7
|
+
data.tar.gz: 6b09bbb5ae408a1ca9d79e74e6c1de3f2b7a1d3597e67a02207e20dc83e18a24bdd8e6f2117f56d781cff855dd14495a686ecfb4589dc5af4388ffc13a8f3d03
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,15 @@
|
|
1
1
|
## [Unreleased]
|
2
|
-
* no unreleased changes
|
2
|
+
* no unreleased changes
|
3
|
+
|
4
|
+
## 11.3.0/ 2025-02-11
|
5
|
+
### Fixed
|
6
|
+
* Fix CSV parsing bug
|
7
|
+
|
8
|
+
### Added
|
9
|
+
* Column zipping functionality *
|
10
|
+
* Capturing Column name *
|
11
|
+
* Regular expression column names *
|
12
|
+
* VCF file/table metadata storage *
|
3
13
|
|
4
14
|
## 11.2.1 / 2024-11-18
|
5
15
|
### Fixed
|
data/lib/ndr_import/file/vcf.rb
CHANGED
@@ -8,13 +8,38 @@ module NdrImport
|
|
8
8
|
module File
|
9
9
|
# This class is a vcf file handler that returns a single table.
|
10
10
|
class Vcf < Base
|
11
|
+
attr_accessor :vcf_file_metadata
|
12
|
+
|
13
|
+
def initialize(*)
|
14
|
+
super
|
15
|
+
|
16
|
+
@vcf_file_metadata = @options['vcf_file_metadata']
|
17
|
+
assign_file_metadata
|
18
|
+
end
|
19
|
+
|
11
20
|
private
|
12
21
|
|
22
|
+
def assign_file_metadata
|
23
|
+
return unless vcf_file_metadata.is_a?(Hash)
|
24
|
+
|
25
|
+
file_metadata_hash = {}
|
26
|
+
|
27
|
+
::File.read(@filename).each_line do |line|
|
28
|
+
next unless line.match?(/^##/)
|
29
|
+
|
30
|
+
vcf_file_metadata.each do |attribute, pattern|
|
31
|
+
file_metadata_hash[attribute] = line.match(pattern)[1].presence if line.match? pattern
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
self.file_metadata = file_metadata_hash
|
36
|
+
end
|
37
|
+
|
13
38
|
def rows(&block)
|
14
39
|
return enum_for(:rows) unless block
|
15
40
|
|
16
41
|
::File.read(@filename).each_line do |line|
|
17
|
-
next if line
|
42
|
+
next if line.match?(/^##/)
|
18
43
|
|
19
44
|
yield BioVcf::VcfLine.parse(line)
|
20
45
|
end
|
@@ -28,7 +28,7 @@ module NdrImport
|
|
28
28
|
end
|
29
29
|
|
30
30
|
# Iterate through the file line by line, yielding each one in turn.
|
31
|
-
def delimited_rows(path, col_sep = nil, liberal = false)
|
31
|
+
def delimited_rows(path, col_sep = nil, liberal = false) # rubocop:disable Style/OptionalBooleanParameter
|
32
32
|
return enum_for(:delimited_rows, path, col_sep, liberal) unless block_given?
|
33
33
|
|
34
34
|
safe_path = SafeFile.safepath_to_string(path)
|
@@ -36,7 +36,7 @@ module NdrImport
|
|
36
36
|
|
37
37
|
# By now, we know `options` should let us read the whole
|
38
38
|
# file succesfully; if there are problems, we should crash.
|
39
|
-
CSV.foreach(safe_path, options
|
39
|
+
CSV.foreach(safe_path, options[:mode], **options.except(:mode)) do |line|
|
40
40
|
yield line.map(&:to_s)
|
41
41
|
end
|
42
42
|
end
|
data/lib/ndr_import/mapper.rb
CHANGED
@@ -30,6 +30,9 @@ module NdrImport::Mapper
|
|
30
30
|
STANDARD_MAPPING = 'standard_mapping'.freeze
|
31
31
|
UNPACK_PATTERN = 'unpack_pattern'.freeze
|
32
32
|
VALIDATES = 'validates'.freeze
|
33
|
+
ZIP_ORDER = 'zip_order'.freeze
|
34
|
+
SPLIT_CHAR = 'split_char'.freeze
|
35
|
+
MAP_COLUMNAME_TO = 'map_columname_to'.freeze
|
33
36
|
end
|
34
37
|
|
35
38
|
private
|
@@ -118,10 +121,18 @@ module NdrImport::Mapper
|
|
118
121
|
# Store the raw column value
|
119
122
|
rawtext[rawtext_column_name] = raw_value
|
120
123
|
|
124
|
+
# If configured, store the column name in the given field
|
125
|
+
if column_mapping[Strings::MAP_COLUMNAME_TO].present?
|
126
|
+
data[column_mapping[Strings::MAP_COLUMNAME_TO]] ||= {}
|
127
|
+
data[column_mapping[Strings::MAP_COLUMNAME_TO]][:values] = [column_mapping['column']]
|
128
|
+
rawtext[column_mapping[Strings::MAP_COLUMNAME_TO]] = column_mapping['column']
|
129
|
+
end
|
130
|
+
|
121
131
|
next unless column_mapping.key?(Strings::MAPPINGS)
|
132
|
+
|
122
133
|
column_mapping[Strings::MAPPINGS].each do |field_mapping|
|
123
134
|
# create a duplicate of the raw value we can manipulate
|
124
|
-
original_value = raw_value
|
135
|
+
original_value = raw_value&.dup
|
125
136
|
|
126
137
|
replace_before_mapping(original_value, field_mapping)
|
127
138
|
value = mapped_value(original_value, field_mapping)
|
@@ -137,7 +148,8 @@ module NdrImport::Mapper
|
|
137
148
|
|
138
149
|
data[field] ||= {}
|
139
150
|
data[field][:values] ||= [] # "better" values come earlier
|
140
|
-
data[field][:
|
151
|
+
data[field][:zipped_values] ||= []
|
152
|
+
data[field][:compact] = true unless data[field].key?(:compact)
|
141
153
|
|
142
154
|
if field_mapping[Strings::ORDER]
|
143
155
|
data[field][:join] ||= field_mapping[Strings::JOIN]
|
@@ -148,6 +160,9 @@ module NdrImport::Mapper
|
|
148
160
|
data[field][:values][field_mapping[Strings::ORDER] - 1] = value
|
149
161
|
elsif field_mapping[Strings::PRIORITY]
|
150
162
|
data[field][:values][field_mapping[Strings::PRIORITY]] = value
|
163
|
+
elsif field_zippable?(field_mapping, data[field])
|
164
|
+
data[field][:split_char] ||= field_mapping[Strings::SPLIT_CHAR]
|
165
|
+
data[field][:zipped_values][field_mapping[Strings::ZIP_ORDER] - 1] = value
|
151
166
|
else
|
152
167
|
data[field][:values].unshift(value) # new "best" value
|
153
168
|
end
|
@@ -160,6 +175,7 @@ module NdrImport::Mapper
|
|
160
175
|
# and one to many, for cross-populating
|
161
176
|
data.each do |field, field_data|
|
162
177
|
values = field_data[:values]
|
178
|
+
zipped_values = field_data[:zipped_values]
|
163
179
|
|
164
180
|
attributes[field] =
|
165
181
|
if field_data.key?(:join)
|
@@ -167,6 +183,9 @@ module NdrImport::Mapper
|
|
167
183
|
values = values.map(&:presence)
|
168
184
|
values.compact! if field_data[:compact]
|
169
185
|
values.join(field_data[:join])
|
186
|
+
elsif zipped_values.present?
|
187
|
+
values = zipped_values.map { |value| value.split(field_data[:split_char]) }
|
188
|
+
values.first.zip(*values[1..])
|
170
189
|
else
|
171
190
|
values.detect(&:present?)
|
172
191
|
end
|
@@ -176,6 +195,12 @@ module NdrImport::Mapper
|
|
176
195
|
attributes
|
177
196
|
end
|
178
197
|
|
198
|
+
def field_zippable?(field_mapping, data_field)
|
199
|
+
return false if field_mapping[Strings::ZIP_ORDER].blank?
|
200
|
+
|
201
|
+
data_field[:split_char].present? || field_mapping[Strings::SPLIT_CHAR].present?
|
202
|
+
end
|
203
|
+
|
179
204
|
def mapped_value(original_value, field_mapping)
|
180
205
|
if field_mapping.include?(Strings::FORMAT)
|
181
206
|
begin
|
data/lib/ndr_import/table.rb
CHANGED
@@ -66,6 +66,7 @@ module NdrImport
|
|
66
66
|
return enum_for(:process_line, line) unless block
|
67
67
|
|
68
68
|
if @row_index < header_lines
|
69
|
+
mutate_regexp_columns(line)
|
69
70
|
consume_header_line(line, @columns)
|
70
71
|
else
|
71
72
|
transform_line(line, @row_index, &block)
|
@@ -79,6 +80,15 @@ module NdrImport
|
|
79
80
|
@notifier.try(:processed, @row_index)
|
80
81
|
end
|
81
82
|
|
83
|
+
# Update 'column' values expressed as a regular expression
|
84
|
+
def mutate_regexp_columns(line)
|
85
|
+
@columns.each_with_index do |column, index|
|
86
|
+
next unless column['column'].is_a? Regexp
|
87
|
+
|
88
|
+
column['column'] = line[index] if line[index].match? column['column']
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
82
92
|
# This method transforms an incoming line of data by applying each of the klass masked
|
83
93
|
# mappings to the line and yielding the klass and fields for each mapped klass.
|
84
94
|
def transform_line(line, index)
|
@@ -227,7 +237,7 @@ module NdrImport
|
|
227
237
|
|
228
238
|
# returns the column names as we expect to receive them
|
229
239
|
def column_names(column_mappings)
|
230
|
-
column_mappings.map { |c| (c['column'] || c['standard_mapping']).downcase }
|
240
|
+
column_mappings.map { |c| (c['column'] || c['standard_mapping']).try(:downcase) }
|
231
241
|
end
|
232
242
|
|
233
243
|
# If specified in the mapping, stop transforming data at a given index (column)
|
@@ -51,22 +51,25 @@ module NdrImport
|
|
51
51
|
NdrImport::File::Registry.files(source_file, 'unzip_path' => unzip_path).each do |filename|
|
52
52
|
# now at the individual file level, can we find the table mapping?
|
53
53
|
table_mapping = get_table_mapping(filename, nil)
|
54
|
-
|
55
|
-
options = { 'unzip_path' => unzip_path,
|
56
|
-
'col_sep' => table_mapping.try(:delimiter),
|
57
|
-
'file_password' => table_mapping.try(:file_password),
|
58
|
-
'liberal_parsing' => table_mapping.try(:liberal_parsing),
|
59
|
-
'xml_record_xpath' => table_mapping.try(:xml_record_xpath),
|
60
|
-
'slurp' => table_mapping.try(:slurp),
|
61
|
-
'yield_xml_record' => table_mapping.try(:yield_xml_record),
|
62
|
-
'pattern_match_record_xpath' => table_mapping.try(:pattern_match_record_xpath),
|
63
|
-
'xml_file_metadata' => table_mapping.try(:xml_file_metadata) }
|
54
|
+
options = table_options_from(table_mapping).merge { 'unzip_path' => unzip_path }
|
64
55
|
|
65
56
|
tables = NdrImport::File::Registry.tables(filename, table_mapping.try(:format), options)
|
66
57
|
yield_tables_and_their_content(filename, tables, &block)
|
67
58
|
end
|
68
59
|
end
|
69
60
|
|
61
|
+
def table_options_from(table_mapping)
|
62
|
+
{ 'col_sep' => table_mapping.try(:delimiter),
|
63
|
+
'file_password' => table_mapping.try(:file_password),
|
64
|
+
'liberal_parsing' => table_mapping.try(:liberal_parsing),
|
65
|
+
'xml_record_xpath' => table_mapping.try(:xml_record_xpath),
|
66
|
+
'slurp' => table_mapping.try(:slurp),
|
67
|
+
'yield_xml_record' => table_mapping.try(:yield_xml_record),
|
68
|
+
'pattern_match_record_xpath' => table_mapping.try(:pattern_match_record_xpath),
|
69
|
+
'xml_file_metadata' => table_mapping.try(:xml_file_metadata),
|
70
|
+
'vcf_file_metadata' => table_mapping.try(:vcf_file_metadata) }
|
71
|
+
end
|
72
|
+
|
70
73
|
# This method does the table row yielding for the extract method, setting the notifier
|
71
74
|
# so that we can monitor progress
|
72
75
|
def yield_tables_and_their_content(filename, tables, &block)
|
data/lib/ndr_import/vcf/table.rb
CHANGED
@@ -6,7 +6,7 @@ module NdrImport
|
|
6
6
|
# All other Table logic is inherited from `NdrImport::Table`
|
7
7
|
class Table < ::NdrImport::Table
|
8
8
|
def self.all_valid_options
|
9
|
-
super - %w[delimiter header_lines footer_lines]
|
9
|
+
super - %w[delimiter header_lines footer_lines] + %w[vcf_file_metadata]
|
10
10
|
end
|
11
11
|
|
12
12
|
def header_lines
|
data/lib/ndr_import/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ndr_import
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 11.
|
4
|
+
version: 11.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- NCRS Development Team
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-02-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activemodel
|
@@ -479,7 +479,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
479
479
|
- !ruby/object:Gem::Version
|
480
480
|
version: '0'
|
481
481
|
requirements: []
|
482
|
-
rubygems_version: 3.3
|
482
|
+
rubygems_version: 3.2.3
|
483
483
|
signing_key:
|
484
484
|
specification_version: 4
|
485
485
|
summary: NDR Import
|