ndr_import 10.1.3 → 10.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: adf4468c5093398277008f08e3b4ff01619b0b39152dd7a604cccc863e7229f2
4
- data.tar.gz: aa3225ff8384c7bc42f33251c2943060382b9c27dae6243fbe70c18796597fbf
3
+ metadata.gz: aaca9cf96e7433b7889c004f769997288ea1cec77322754e965b97a545ffb0ab
4
+ data.tar.gz: 4a1e456f8766f2ea3422b0b712b2ca4c420873248e66fc77b1db8559470a1247
5
5
  SHA512:
6
- metadata.gz: d437ffcdf6733d4d62859f3450f47a7067bc7155c93404c14b883b5b2cb45dd661d555146c6317d87be640ad7033b7d214c5be6842cd32009e8462a398105249
7
- data.tar.gz: f7d10bf3ba7c1e16c75cb05585ff70d88dd56b00305b389752b5d5dec329d88f313454494307025b53cb02e1164c10a4328286b8c55220cdf506f5cf7bf4e094
6
+ metadata.gz: '081ce6d8bce5dde04dca97a68057897a093bc90cca23f7336415330d92dcc599b96b606e9626358731c6638f0c3c1e41b7e763b2c03488cb76eeaa5c5c7a2cd8'
7
+ data.tar.gz: 802ba8017a16cc843196004854c3a82d161e76bdd4c2f61a5f93e34018dae9090212dd67c4e503a46c5ee78d6ed2da2d7e6f17192e62e4abefbd2382389d1cce
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  ## [Unreleased]
2
- * No unreleased changes
2
+ *no unreleased changes*
3
+
4
+ ## 10.2.0 / 2023-05-16
5
+ * avro file support
6
+ * allow storage of `significant_mapped_fields` in `Table`
3
7
 
4
8
  ## 10.1.3 / 2022-12-08
5
9
  ### Added
data/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  # NdrImport [![Build Status](https://github.com/NHSDigital/ndr_import/workflows/Test/badge.svg)](https://github.com/NHSDigital/ndr_import/actions?query=workflow%3Atest) [![Gem Version](https://badge.fury.io/rb/ndr_import.svg)](https://rubygems.org/gems/ndr_import) [![Documentation](https://img.shields.io/badge/ndr_import-docs-blue.svg)](https://www.rubydoc.info/gems/ndr_import)
2
2
  This is the NHS Digital (NHSD) National Disease Registers (NDR) Import ETL ruby gem, providing:
3
3
 
4
- 1. file import handlers for *extracting* data from delimited files (csv, pipe, tab, thorn), JSON Lines, .xls(x) spreadsheets, .doc(x) word documents, PDF, PDF AcroForms, XML, 7-Zip and Zip files.
4
+ 1. file import handlers for *extracting* data from delimited files (csv, pipe, tab, thorn), JSON Lines, .xls(x) spreadsheets, .doc(x) word documents, PDF, PDF AcroForms, XML, 7-Zip, Zip and avro files.
5
5
  2. table mappers for *transforming* tabular and non-tabular data into key value pairs grouped by a common "klass".
6
6
 
7
7
  ## Installation
@@ -0,0 +1,49 @@
1
+ require 'ndr_import/table'
2
+
3
+ module NdrImport
4
+ module Avro
5
+ # Syntatic sugar to ensure `header_lines` and `footer_lines` are 1 and 0 respectively.
6
+ # All other Table logic is inherited from `NdrImport::Table`
7
+ class Table < ::NdrImport::Table
8
+ # Scaffold an `NdrImport::Avro::Table` instance from avro schema file
9
+ def self.from_schema(safe_path)
10
+ raise SecurityError, "#{safe_path} is not a SafePath" unless safe_path.is_a? SafePath
11
+
12
+ table_columns = columns_from(::Avro::Schema.parse(::File.open(safe_path)))
13
+ file_name = SafeFile.basename(safe_path).sub(/\.avsc\z/, '.avro')
14
+
15
+ new(filename_pattern: "/#{file_name}\\z/",
16
+ klass: 'ExampleKlass',
17
+ columns: table_columns)
18
+ end
19
+
20
+ def self.all_valid_options
21
+ super - %w[delimiter header_lines footer_lines]
22
+ end
23
+
24
+ def header_lines
25
+ 1
26
+ end
27
+
28
+ def footer_lines
29
+ 0
30
+ end
31
+
32
+ def self.columns_from(schema)
33
+ schema.fields.map do |field|
34
+ column = { column: field.name }
35
+ column[:mappings] = { field: field.name, daysafter: '1970-01-01' } if date_field?(field)
36
+
37
+ column
38
+ end
39
+ end
40
+
41
+ def self.date_field?(field)
42
+ field.type.schemas.any? { |schema| schema.logical_type == 'date' }
43
+ end
44
+
45
+ private_class_method :columns_from
46
+ private_class_method :date_field?
47
+ end
48
+ end
49
+ end
@@ -1,5 +1,6 @@
1
1
  require_relative 'base'
2
2
  require_relative 'acro_form'
3
+ require_relative 'avro'
3
4
  require_relative 'delimited'
4
5
  require_relative 'docx'
5
6
  require_relative 'excel'
@@ -0,0 +1,34 @@
1
+ require 'avro'
2
+ require 'ndr_support/safe_file'
3
+ require_relative 'registry'
4
+
5
+ module NdrImport
6
+ # This is one of a collection of file handlers that deal with individual formats of data.
7
+ # They can be instantiated directly or via the factory method Registry.tables
8
+ module File
9
+ # This class is an avro file handler that returns a single table.
10
+ class Avro < Base
11
+ private
12
+
13
+ def rows(&block)
14
+ return enum_for(:rows) unless block
15
+
16
+ # Create an instance of DatumReader
17
+ reader = ::Avro::IO::DatumReader.new
18
+ # Open @filename in read mode
19
+ file = ::File.open(@filename, 'rb')
20
+ # Equivalent to DataFileReader instance creation in Java
21
+ dr = ::Avro::DataFile::Reader.new(file, reader)
22
+
23
+ dr.each_with_index do |avro_row, i|
24
+ # Ensure the first row is always the "header"
25
+ yield(avro_row.keys) if i.zero?
26
+ yield(avro_row.values.map(&:to_s))
27
+ end
28
+ rescue StandardError => e
29
+ raise("#{SafeFile.basename(@filename)} [#{e.class}: #{e.message}]")
30
+ end
31
+ end
32
+ Registry.register(Avro, 'avro')
33
+ end
34
+ end
@@ -12,7 +12,7 @@ module NdrImport
12
12
  def self.all_valid_options
13
13
  %w[canonical_name delimiter liberal_parsing filename_pattern file_password last_data_column
14
14
  tablename_pattern header_lines footer_lines format klass columns xml_record_xpath slurp
15
- row_identifier]
15
+ row_identifier significant_mapped_fields]
16
16
  end
17
17
 
18
18
  def all_valid_options
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  # This stores the current version of the NdrImport gem
3
3
  module NdrImport
4
- VERSION = '10.1.3'
4
+ VERSION = '10.2.0'
5
5
  end
data/lib/ndr_import.rb CHANGED
@@ -9,6 +9,7 @@ require 'ndr_import/non_tabular/table'
9
9
  require 'ndr_import/fixed_width/table'
10
10
  require 'ndr_import/xml/table'
11
11
  require 'ndr_import/pdf_form/table'
12
+ require 'ndr_import/avro/table'
12
13
  require 'ndr_import/unmapped_data_error'
13
14
  require 'ndr_import/acroform_reader'
14
15
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ndr_import
3
3
  version: !ruby/object:Gem::Version
4
- version: 10.1.3
4
+ version: 10.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - NCRS Development Team
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-12-08 00:00:00.000000000 Z
11
+ date: 2023-05-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activemodel
@@ -92,6 +92,20 @@ dependencies:
92
92
  - - "~>"
93
93
  - !ruby/object:Gem::Version
94
94
  version: '2.0'
95
+ - !ruby/object:Gem::Dependency
96
+ name: avro
97
+ requirement: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - "~>"
100
+ - !ruby/object:Gem::Version
101
+ version: 1.11.0
102
+ type: :runtime
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - "~>"
107
+ - !ruby/object:Gem::Version
108
+ version: 1.11.0
95
109
  - !ruby/object:Gem::Dependency
96
110
  name: docx
97
111
  requirement: !ruby/object:Gem::Requirement
@@ -369,9 +383,11 @@ files:
369
383
  - exe/word_to_text
370
384
  - lib/ndr_import.rb
371
385
  - lib/ndr_import/acroform_reader.rb
386
+ - lib/ndr_import/avro/table.rb
372
387
  - lib/ndr_import/csv_library.rb
373
388
  - lib/ndr_import/file/acro_form.rb
374
389
  - lib/ndr_import/file/all.rb
390
+ - lib/ndr_import/file/avro.rb
375
391
  - lib/ndr_import/file/base.rb
376
392
  - lib/ndr_import/file/delimited.rb
377
393
  - lib/ndr_import/file/docx.rb
@@ -430,7 +446,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
430
446
  - !ruby/object:Gem::Version
431
447
  version: '0'
432
448
  requirements: []
433
- rubygems_version: 3.3.26
449
+ rubygems_version: 3.2.33
434
450
  signing_key:
435
451
  specification_version: 4
436
452
  summary: NDR Import