ndr_import 11.2.1 → 11.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 77f2f2adb4de01a7dca5f2e004976a0f96e8d486f4bf573e24543c1a27b894dd
4
- data.tar.gz: eb83c8b95408ee9761b513c9e9309a6b565fd937e7cfba585088bd3d7e7b2573
3
+ metadata.gz: 3dd3ec74b568c0429492ded5083edf0eaf31a0a6c7849945f7fa69dbe0e43a49
4
+ data.tar.gz: 0f845a90d21901350122a6d41437c8ebe53e1eaa49694af76b38811210280804
5
5
  SHA512:
6
- metadata.gz: 6dcfa45678b041cbe5c298b43c935aa2dacc47da9421cfbc6d6f8e646049f89326fb7a7a41e6e5ea283a1114ce367e7cff92d77dd4b83db56775e1d69a93fc6e
7
- data.tar.gz: 2443a1c7b4ab1f1c8fcc105cf7c3d29b1885f996e41ad55b65f0ea74d64fbe43042d50a2bb3943a5563374aa8bdc0dd3d47e066226f010922f1fd7e69c167641
6
+ metadata.gz: 26668128f4fa8a7165d50ee39e729f5d5a63b704c3db41a5cab1be8df02ea38bdeb7cd69494ef62656c22c7b8bd4aea2c94ee839019accff59a28f2b1452065c
7
+ data.tar.gz: 6b09bbb5ae408a1ca9d79e74e6c1de3f2b7a1d3597e67a02207e20dc83e18a24bdd8e6f2117f56d781cff855dd14495a686ecfb4589dc5af4388ffc13a8f3d03
data/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  ## [Unreleased]
2
- * no unreleased changes *
2
+ * no unreleased changes
3
+
4
+ ## 11.3.0/ 2025-02-11
5
+ ### Fixed
6
+ * Fix CSV parsing bug
7
+
8
+ ### Added
9
+ * Column zipping functionality *
10
+ * Capturing Column name *
11
+ * Regular expression column names *
12
+ * VCF file/table metadata storage *
3
13
 
4
14
  ## 11.2.1 / 2024-11-18
5
15
  ### Fixed
@@ -8,13 +8,38 @@ module NdrImport
8
8
  module File
9
9
  # This class is a vcf file handler that returns a single table.
10
10
  class Vcf < Base
11
+ attr_accessor :vcf_file_metadata
12
+
13
+ def initialize(*)
14
+ super
15
+
16
+ @vcf_file_metadata = @options['vcf_file_metadata']
17
+ assign_file_metadata
18
+ end
19
+
11
20
  private
12
21
 
22
+ def assign_file_metadata
23
+ return unless vcf_file_metadata.is_a?(Hash)
24
+
25
+ file_metadata_hash = {}
26
+
27
+ ::File.read(@filename).each_line do |line|
28
+ next unless line.match?(/^##/)
29
+
30
+ vcf_file_metadata.each do |attribute, pattern|
31
+ file_metadata_hash[attribute] = line.match(pattern)[1].presence if line.match? pattern
32
+ end
33
+ end
34
+
35
+ self.file_metadata = file_metadata_hash
36
+ end
37
+
13
38
  def rows(&block)
14
39
  return enum_for(:rows) unless block
15
40
 
16
41
  ::File.read(@filename).each_line do |line|
17
- next if line =~ /^##/
42
+ next if line.match?(/^##/)
18
43
 
19
44
  yield BioVcf::VcfLine.parse(line)
20
45
  end
@@ -28,7 +28,7 @@ module NdrImport
28
28
  end
29
29
 
30
30
  # Iterate through the file line by line, yielding each one in turn.
31
- def delimited_rows(path, col_sep = nil, liberal = false)
31
+ def delimited_rows(path, col_sep = nil, liberal = false) # rubocop:disable Style/OptionalBooleanParameter
32
32
  return enum_for(:delimited_rows, path, col_sep, liberal) unless block_given?
33
33
 
34
34
  safe_path = SafeFile.safepath_to_string(path)
@@ -36,7 +36,7 @@ module NdrImport
36
36
 
37
37
  # By now, we know `options` should let us read the whole
38
38
  # file succesfully; if there are problems, we should crash.
39
- CSV.foreach(safe_path, options.delete(:mode), **options) do |line|
39
+ CSV.foreach(safe_path, options[:mode], **options.except(:mode)) do |line|
40
40
  yield line.map(&:to_s)
41
41
  end
42
42
  end
@@ -30,6 +30,9 @@ module NdrImport::Mapper
30
30
  STANDARD_MAPPING = 'standard_mapping'.freeze
31
31
  UNPACK_PATTERN = 'unpack_pattern'.freeze
32
32
  VALIDATES = 'validates'.freeze
33
+ ZIP_ORDER = 'zip_order'.freeze
34
+ SPLIT_CHAR = 'split_char'.freeze
35
+ MAP_COLUMNAME_TO = 'map_columname_to'.freeze
33
36
  end
34
37
 
35
38
  private
@@ -118,10 +121,18 @@ module NdrImport::Mapper
118
121
  # Store the raw column value
119
122
  rawtext[rawtext_column_name] = raw_value
120
123
 
124
+ # If configured, store the column name in the given field
125
+ if column_mapping[Strings::MAP_COLUMNAME_TO].present?
126
+ data[column_mapping[Strings::MAP_COLUMNAME_TO]] ||= {}
127
+ data[column_mapping[Strings::MAP_COLUMNAME_TO]][:values] = [column_mapping['column']]
128
+ rawtext[column_mapping[Strings::MAP_COLUMNAME_TO]] = column_mapping['column']
129
+ end
130
+
121
131
  next unless column_mapping.key?(Strings::MAPPINGS)
132
+
122
133
  column_mapping[Strings::MAPPINGS].each do |field_mapping|
123
134
  # create a duplicate of the raw value we can manipulate
124
- original_value = raw_value ? raw_value.dup : nil
135
+ original_value = raw_value&.dup
125
136
 
126
137
  replace_before_mapping(original_value, field_mapping)
127
138
  value = mapped_value(original_value, field_mapping)
@@ -137,7 +148,8 @@ module NdrImport::Mapper
137
148
 
138
149
  data[field] ||= {}
139
150
  data[field][:values] ||= [] # "better" values come earlier
140
- data[field][:compact] = true unless data[field].key?(:compact)
151
+ data[field][:zipped_values] ||= []
152
+ data[field][:compact] = true unless data[field].key?(:compact)
141
153
 
142
154
  if field_mapping[Strings::ORDER]
143
155
  data[field][:join] ||= field_mapping[Strings::JOIN]
@@ -148,6 +160,9 @@ module NdrImport::Mapper
148
160
  data[field][:values][field_mapping[Strings::ORDER] - 1] = value
149
161
  elsif field_mapping[Strings::PRIORITY]
150
162
  data[field][:values][field_mapping[Strings::PRIORITY]] = value
163
+ elsif field_zippable?(field_mapping, data[field])
164
+ data[field][:split_char] ||= field_mapping[Strings::SPLIT_CHAR]
165
+ data[field][:zipped_values][field_mapping[Strings::ZIP_ORDER] - 1] = value
151
166
  else
152
167
  data[field][:values].unshift(value) # new "best" value
153
168
  end
@@ -160,6 +175,7 @@ module NdrImport::Mapper
160
175
  # and one to many, for cross-populating
161
176
  data.each do |field, field_data|
162
177
  values = field_data[:values]
178
+ zipped_values = field_data[:zipped_values]
163
179
 
164
180
  attributes[field] =
165
181
  if field_data.key?(:join)
@@ -167,6 +183,9 @@ module NdrImport::Mapper
167
183
  values = values.map(&:presence)
168
184
  values.compact! if field_data[:compact]
169
185
  values.join(field_data[:join])
186
+ elsif zipped_values.present?
187
+ values = zipped_values.map { |value| value.split(field_data[:split_char]) }
188
+ values.first.zip(*values[1..])
170
189
  else
171
190
  values.detect(&:present?)
172
191
  end
@@ -176,6 +195,12 @@ module NdrImport::Mapper
176
195
  attributes
177
196
  end
178
197
 
198
+ def field_zippable?(field_mapping, data_field)
199
+ return false if field_mapping[Strings::ZIP_ORDER].blank?
200
+
201
+ data_field[:split_char].present? || field_mapping[Strings::SPLIT_CHAR].present?
202
+ end
203
+
179
204
  def mapped_value(original_value, field_mapping)
180
205
  if field_mapping.include?(Strings::FORMAT)
181
206
  begin
@@ -66,6 +66,7 @@ module NdrImport
66
66
  return enum_for(:process_line, line) unless block
67
67
 
68
68
  if @row_index < header_lines
69
+ mutate_regexp_columns(line)
69
70
  consume_header_line(line, @columns)
70
71
  else
71
72
  transform_line(line, @row_index, &block)
@@ -79,6 +80,15 @@ module NdrImport
79
80
  @notifier.try(:processed, @row_index)
80
81
  end
81
82
 
83
+ # Update 'column' values expressed as a regular expression
84
+ def mutate_regexp_columns(line)
85
+ @columns.each_with_index do |column, index|
86
+ next unless column['column'].is_a? Regexp
87
+
88
+ column['column'] = line[index] if line[index].match? column['column']
89
+ end
90
+ end
91
+
82
92
  # This method transforms an incoming line of data by applying each of the klass masked
83
93
  # mappings to the line and yielding the klass and fields for each mapped klass.
84
94
  def transform_line(line, index)
@@ -227,7 +237,7 @@ module NdrImport
227
237
 
228
238
  # returns the column names as we expect to receive them
229
239
  def column_names(column_mappings)
230
- column_mappings.map { |c| (c['column'] || c['standard_mapping']).downcase }
240
+ column_mappings.map { |c| (c['column'] || c['standard_mapping']).try(:downcase) }
231
241
  end
232
242
 
233
243
  # If specified in the mapping, stop transforming data at a given index (column)
@@ -51,22 +51,25 @@ module NdrImport
51
51
  NdrImport::File::Registry.files(source_file, 'unzip_path' => unzip_path).each do |filename|
52
52
  # now at the individual file level, can we find the table mapping?
53
53
  table_mapping = get_table_mapping(filename, nil)
54
-
55
- options = { 'unzip_path' => unzip_path,
56
- 'col_sep' => table_mapping.try(:delimiter),
57
- 'file_password' => table_mapping.try(:file_password),
58
- 'liberal_parsing' => table_mapping.try(:liberal_parsing),
59
- 'xml_record_xpath' => table_mapping.try(:xml_record_xpath),
60
- 'slurp' => table_mapping.try(:slurp),
61
- 'yield_xml_record' => table_mapping.try(:yield_xml_record),
62
- 'pattern_match_record_xpath' => table_mapping.try(:pattern_match_record_xpath),
63
- 'xml_file_metadata' => table_mapping.try(:xml_file_metadata) }
54
+ options = table_options_from(table_mapping).merge { 'unzip_path' => unzip_path }
64
55
 
65
56
  tables = NdrImport::File::Registry.tables(filename, table_mapping.try(:format), options)
66
57
  yield_tables_and_their_content(filename, tables, &block)
67
58
  end
68
59
  end
69
60
 
61
+ def table_options_from(table_mapping)
62
+ { 'col_sep' => table_mapping.try(:delimiter),
63
+ 'file_password' => table_mapping.try(:file_password),
64
+ 'liberal_parsing' => table_mapping.try(:liberal_parsing),
65
+ 'xml_record_xpath' => table_mapping.try(:xml_record_xpath),
66
+ 'slurp' => table_mapping.try(:slurp),
67
+ 'yield_xml_record' => table_mapping.try(:yield_xml_record),
68
+ 'pattern_match_record_xpath' => table_mapping.try(:pattern_match_record_xpath),
69
+ 'xml_file_metadata' => table_mapping.try(:xml_file_metadata),
70
+ 'vcf_file_metadata' => table_mapping.try(:vcf_file_metadata) }
71
+ end
72
+
70
73
  # This method does the table row yielding for the extract method, setting the notifier
71
74
  # so that we can monitor progress
72
75
  def yield_tables_and_their_content(filename, tables, &block)
@@ -6,7 +6,7 @@ module NdrImport
6
6
  # All other Table logic is inherited from `NdrImport::Table`
7
7
  class Table < ::NdrImport::Table
8
8
  def self.all_valid_options
9
- super - %w[delimiter header_lines footer_lines]
9
+ super - %w[delimiter header_lines footer_lines] + %w[vcf_file_metadata]
10
10
  end
11
11
 
12
12
  def header_lines
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  # This stores the current version of the NdrImport gem
3
3
  module NdrImport
4
- VERSION = '11.2.1'
4
+ VERSION = '11.3.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ndr_import
3
3
  version: !ruby/object:Gem::Version
4
- version: 11.2.1
4
+ version: 11.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - NCRS Development Team
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-11-18 00:00:00.000000000 Z
11
+ date: 2025-02-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activemodel
@@ -479,7 +479,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
479
479
  - !ruby/object:Gem::Version
480
480
  version: '0'
481
481
  requirements: []
482
- rubygems_version: 3.3.27
482
+ rubygems_version: 3.2.3
483
483
  signing_key:
484
484
  specification_version: 4
485
485
  summary: NDR Import