ndr_import 8.5.0 → 8.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/Gemfile +0 -3
- data/README.md +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/code_safety.yml +27 -11
- data/exe/pdf_acro_form_to_yaml +23 -0
- data/exe/pdf_to_text +28 -0
- data/exe/word_to_text +26 -0
- data/gemfiles/Gemfile.rails52 +0 -3
- data/gemfiles/Gemfile.rails60 +5 -0
- data/lib/ndr_import/version.rb +1 -1
- data/ndr_import.gemspec +9 -7
- metadata +23 -164
- data/gemfiles/Gemfile.rails50 +0 -8
- data/gemfiles/Gemfile.rails51 +0 -9
- data/test/file/acro_form_test.rb +0 -39
- data/test/file/base_test.rb +0 -54
- data/test/file/delimited_test.rb +0 -233
- data/test/file/docx_test.rb +0 -53
- data/test/file/excel_test.rb +0 -124
- data/test/file/pdf_test.rb +0 -36
- data/test/file/registry_test.rb +0 -62
- data/test/file/seven_zip_test.rb +0 -59
- data/test/file/text_test.rb +0 -92
- data/test/file/word_test.rb +0 -35
- data/test/file/xml_test.rb +0 -21
- data/test/file/zip_test.rb +0 -47
- data/test/fixed_width/table_test.rb +0 -35
- data/test/helpers/file/delimited_test.rb +0 -105
- data/test/helpers/file/excel_test.rb +0 -82
- data/test/helpers/file/pdf_test.rb +0 -27
- data/test/helpers/file/word_test.rb +0 -26
- data/test/helpers/file/xml_test.rb +0 -131
- data/test/helpers/file/zip_test.rb +0 -75
- data/test/mapper_test.rb +0 -676
- data/test/non_tabular/mapping_test.rb +0 -36
- data/test/non_tabular/table_test.rb +0 -590
- data/test/non_tabular_file_helper_test.rb +0 -501
- data/test/pdf_form/table_test.rb +0 -119
- data/test/readme_test.rb +0 -53
- data/test/resources/acro_form.pdf +0 -0
- data/test/resources/blank_tab_test.xlsx +0 -0
- data/test/resources/bomd.csv +0 -3
- data/test/resources/broken.csv +0 -3
- data/test/resources/filesystem_paths.yml +0 -26
- data/test/resources/flat_file.pdf +0 -0
- data/test/resources/flat_file.txt +0 -27
- data/test/resources/flat_file.yml +0 -20
- data/test/resources/hello_utf16be.txt +0 -0
- data/test/resources/hello_utf16le.txt +0 -0
- data/test/resources/hello_utf8.txt +0 -2
- data/test/resources/hello_windows.txt +0 -2
- data/test/resources/hello_world.doc +0 -0
- data/test/resources/hello_world.docx +0 -0
- data/test/resources/hello_world.pdf +0 -0
- data/test/resources/hello_world.txt +0 -2
- data/test/resources/high_ascii_delimited.txt +0 -2
- data/test/resources/high_ascii_delimited_example_two.txt +0 -3
- data/test/resources/malformed.csv +0 -3
- data/test/resources/malformed.xml +0 -6
- data/test/resources/malformed_pipe.csv +0 -3
- data/test/resources/normal.7z +0 -0
- data/test/resources/normal.csv +0 -3
- data/test/resources/normal.csv.zip +0 -0
- data/test/resources/normal_pipe.csv +0 -3
- data/test/resources/normal_thorn.csv +0 -3
- data/test/resources/not_a_pdf.pdf +0 -0
- data/test/resources/not_a_word_file.doc +0 -0
- data/test/resources/not_a_word_file.docx +0 -0
- data/test/resources/not_sign_delimited.txt +0 -3
- data/test/resources/password_protected_hello_world.docx +0 -0
- data/test/resources/password_protected_sample_xlsx.xlsx +0 -0
- data/test/resources/sample.xml +0 -34
- data/test/resources/sample_xls.xls +0 -0
- data/test/resources/sample_xlsx.xlsx +0 -0
- data/test/resources/sheet_streaming.xls +0 -0
- data/test/resources/sheet_streaming.xlsx +0 -0
- data/test/resources/standard_mappings.yml +0 -39
- data/test/resources/txt_file_xls_extension.xls +0 -1
- data/test/resources/txt_file_xlsx_extension.xlsx +0 -1
- data/test/resources/utf-16be_xml.xml +0 -0
- data/test/resources/utf-16be_xml_with_declaration.xml +0 -0
- data/test/resources/utf-16le_xml.xml +0 -0
- data/test/resources/utf-8_xml.xml +0 -9
- data/test/resources/windows-1252_xml.xml +0 -9
- data/test/resources/windows.csv +0 -5
- data/test/resources/xlsx_file_xls_extension.xls +0 -0
- data/test/standard_mappings_test.rb +0 -22
- data/test/table_test.rb +0 -545
- data/test/test_helper.rb +0 -35
- data/test/universal_importer_helper_test.rb +0 -86
- data/test/xml/table_test.rb +0 -90
data/test/test_helper.rb
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
require 'simplecov'
|
2
|
-
SimpleCov.start
|
3
|
-
|
4
|
-
require 'active_support/test_case'
|
5
|
-
require 'active_support/core_ext/string'
|
6
|
-
require 'ndr_support/safe_path'
|
7
|
-
require 'ndr_import'
|
8
|
-
require 'yaml'
|
9
|
-
|
10
|
-
begin
|
11
|
-
# Shim for Test::Unit vs. Minitest:
|
12
|
-
require 'active_support/testing/autorun'
|
13
|
-
rescue LoadError
|
14
|
-
# Rails 4+ only
|
15
|
-
end
|
16
|
-
|
17
|
-
ActiveSupport.test_order = :random if ActiveSupport.respond_to?(:test_order=)
|
18
|
-
|
19
|
-
# The default changes to UTC in Rails 4.
|
20
|
-
# TODO: ndr_support should cope...
|
21
|
-
ActiveRecord::Base.default_timezone = :local
|
22
|
-
|
23
|
-
SafePath.configure! File.dirname(__FILE__) + '/resources/filesystem_paths.yml'
|
24
|
-
NdrImport::StandardMappings.mappings = YAML.load_file(
|
25
|
-
File.expand_path(File.dirname(__FILE__) + '/resources/standard_mappings.yml')
|
26
|
-
)
|
27
|
-
|
28
|
-
# Different Rubies report this differently:
|
29
|
-
CORRUPTED_QUOTES_MESSAGE_PATTERN = /(
|
30
|
-
Missing\sor\sstray\squote|
|
31
|
-
col_sep_split|
|
32
|
-
value\safter\squoted\sfield\sisn't\sallowed
|
33
|
-
)/x
|
34
|
-
|
35
|
-
require 'mocha/minitest'
|
@@ -1,86 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
require 'test_helper'
|
3
|
-
require 'ndr_import/universal_importer_helper'
|
4
|
-
|
5
|
-
# This tests the UniversalImporterHelper mixin
|
6
|
-
class UniversalImporterHelperTest < ActiveSupport::TestCase
|
7
|
-
# This is a test importer class to test the excel file helper mixin
|
8
|
-
class TestImporter
|
9
|
-
include NdrImport::UniversalImporterHelper
|
10
|
-
|
11
|
-
def initialize
|
12
|
-
@table_mappings = [
|
13
|
-
NdrImport::Table.new(:filename_pattern => /\.xls\z/i,
|
14
|
-
:tablename_pattern => /\Asheet1\z/i)
|
15
|
-
]
|
16
|
-
end
|
17
|
-
|
18
|
-
def get_notifier(_)
|
19
|
-
end
|
20
|
-
|
21
|
-
def unzip_path
|
22
|
-
SafePath.new('test_space_rw')
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
def setup
|
27
|
-
@permanent_test_files = SafePath.new('permanent_test_files')
|
28
|
-
@test_importer = TestImporter.new
|
29
|
-
end
|
30
|
-
|
31
|
-
test 'extract with matching mapping' do
|
32
|
-
source_file = @permanent_test_files.join('sample_xls.xls')
|
33
|
-
enumerator_ran = false
|
34
|
-
@test_importer.extract(source_file) do |table, rows|
|
35
|
-
assert_instance_of NdrImport::Table, table
|
36
|
-
assert_instance_of Enumerator, rows
|
37
|
-
enumerator_ran = true
|
38
|
-
end
|
39
|
-
assert enumerator_ran
|
40
|
-
end
|
41
|
-
|
42
|
-
test 'extract without matching mapping' do
|
43
|
-
source_file = @permanent_test_files.join('sample_xlsx.xlsx')
|
44
|
-
enumerator_ran = false
|
45
|
-
@test_importer.extract(source_file) do |_table, _rows|
|
46
|
-
enumerator_ran = true
|
47
|
-
end
|
48
|
-
refute enumerator_ran
|
49
|
-
end
|
50
|
-
|
51
|
-
test 'extract and transform with bespoke delimiter' do
|
52
|
-
table_mappings = [
|
53
|
-
NdrImport::Table.new(filename_pattern: /\.txt\z/i,
|
54
|
-
format: 'delimited',
|
55
|
-
delimiter: '¬',
|
56
|
-
header_lines: 1,
|
57
|
-
footer_lines: 0,
|
58
|
-
klass: 'SomeTestClass',
|
59
|
-
columns: [{ 'column' => 'one' },
|
60
|
-
{ 'column' => 'two' },
|
61
|
-
{ 'column' => 'three' }])
|
62
|
-
]
|
63
|
-
source_file = @permanent_test_files.join('not_sign_delimited.txt')
|
64
|
-
@test_importer.stubs(:get_table_mapping).returns(table_mappings.first)
|
65
|
-
@test_importer.extract(source_file) do |table, rows|
|
66
|
-
mapped_rows = table.transform(rows)
|
67
|
-
|
68
|
-
assert_instance_of NdrImport::Table, table
|
69
|
-
assert_instance_of Enumerator, rows
|
70
|
-
expected_mapped_data = [{ rawtext: { 'one' => 'some', 'two' => 'data', 'three' => 'here' } },
|
71
|
-
{ rawtext: { 'one' => 'more', 'two' => 'data', 'three' => 'here' } }]
|
72
|
-
|
73
|
-
assert_equal expected_mapped_data, (mapped_rows.to_a.map { |_klass, fields| fields })
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
test 'get_notifier' do
|
78
|
-
class TestImporterWithoutNotifier
|
79
|
-
include NdrImport::UniversalImporterHelper
|
80
|
-
end
|
81
|
-
|
82
|
-
assert_raise(NotImplementedError) do
|
83
|
-
TestImporterWithoutNotifier.new.get_notifier(10_000)
|
84
|
-
end
|
85
|
-
end
|
86
|
-
end
|
data/test/xml/table_test.rb
DELETED
@@ -1,90 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
# This tests the NdrImport::Xml::Table mapping class
|
4
|
-
module Xml
|
5
|
-
class TableTest < ActiveSupport::TestCase
|
6
|
-
def setup
|
7
|
-
file_path = SafePath.new('permanent_test_files').join('sample.xml')
|
8
|
-
handler = NdrImport::File::Xml.new(file_path, nil, 'xml_record_xpath' => 'root/record')
|
9
|
-
|
10
|
-
@element_lines = handler.send(:rows)
|
11
|
-
end
|
12
|
-
|
13
|
-
def test_should_transform_xml_element_lines
|
14
|
-
table = NdrImport::Xml::Table.new(klass: 'SomeTestKlass', columns: xml_column_mapping)
|
15
|
-
|
16
|
-
expected_data = ['SomeTestKlass', { rawtext: {
|
17
|
-
'no_relative_path' => 'A value',
|
18
|
-
'no_relative_path_inner_text' => '',
|
19
|
-
'no_path_or_att' => 'Another value',
|
20
|
-
'demographics_1' => 'AAA',
|
21
|
-
'demographics_2' => '03',
|
22
|
-
'demographics_2_inner_text' => 'Inner text',
|
23
|
-
'address1' => 'Address',
|
24
|
-
'address2' => 'Address 2',
|
25
|
-
'pathology_date_1' => '2018-01-01',
|
26
|
-
'pathology_date_2' => '',
|
27
|
-
'should_be_blank' => ''
|
28
|
-
} }, 1]
|
29
|
-
|
30
|
-
transformed_data = table.transform(@element_lines)
|
31
|
-
assert_equal 2, transformed_data.count
|
32
|
-
|
33
|
-
transformed_data.each do |klass, fields, _index|
|
34
|
-
assert_equal expected_data[0], klass
|
35
|
-
assert_equal expected_data[1], fields
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
def test_should_fail_with_unmappped_nodes
|
40
|
-
table = NdrImport::Xml::Table.new(klass: 'SomeTestKlass', columns: partial_xml_column_mapping)
|
41
|
-
|
42
|
-
exception = assert_raises(RuntimeError) { table.transform(@element_lines).to_a }
|
43
|
-
assert exception.message.starts_with? 'sample.xml [RuntimeError: Unmapped data!'
|
44
|
-
end
|
45
|
-
|
46
|
-
private
|
47
|
-
|
48
|
-
def xml_column_mapping
|
49
|
-
[
|
50
|
-
{ 'column' => 'no_relative_path',
|
51
|
-
'xml_cell' => { 'relative_path' => '', 'attribute' => 'value' } },
|
52
|
-
{ 'column' => 'no_relative_path', 'rawtext_name' => 'no_relative_path_inner_text',
|
53
|
-
'xml_cell' => { 'relative_path' => '' } },
|
54
|
-
{ 'column' => 'no_path_or_att',
|
55
|
-
'xml_cell' => { 'relative_path' => '', 'attribute' => '' } },
|
56
|
-
{ 'column' => 'demographics_1',
|
57
|
-
'xml_cell' => { 'relative_path' => 'demographics' } },
|
58
|
-
{ 'column' => 'demographics_2',
|
59
|
-
'xml_cell' => { 'relative_path' => 'demographics', 'attribute' => 'code' } },
|
60
|
-
{ 'column' => 'demographics_2', 'rawtext_name' => 'demographics_2_inner_text',
|
61
|
-
'xml_cell' => { 'relative_path' => 'demographics' } },
|
62
|
-
{ 'column' => 'address_line1[1]', 'rawtext_name' => 'address1',
|
63
|
-
'xml_cell' => { 'relative_path' => 'demographics/address' } },
|
64
|
-
{ 'column' => 'address_line1[2]', 'rawtext_name' => 'address2',
|
65
|
-
'xml_cell' => { 'relative_path' => 'demographics/address' } },
|
66
|
-
{ 'column' => 'pathology_date_1',
|
67
|
-
'xml_cell' => { 'relative_path' => 'pathology' } },
|
68
|
-
{ 'column' => 'pathology_date_2',
|
69
|
-
'xml_cell' => { 'relative_path' => 'pathology' } },
|
70
|
-
{ 'column' => 'should_be_blank',
|
71
|
-
'xml_cell' => { 'relative_path' => 'not_present' } }
|
72
|
-
]
|
73
|
-
end
|
74
|
-
|
75
|
-
def partial_xml_column_mapping
|
76
|
-
[
|
77
|
-
{ 'column' => 'no_relative_path',
|
78
|
-
'xml_cell' => { 'relative_path' => '', 'attribute' => 'value' } },
|
79
|
-
{ 'column' => 'no_path_or_att',
|
80
|
-
'xml_cell' => { 'relative_path' => '', 'attribute' => '' } },
|
81
|
-
{ 'column' => 'demographics_1',
|
82
|
-
'xml_cell' => { 'relative_path' => 'demographics' } },
|
83
|
-
{ 'column' => 'demographics_2',
|
84
|
-
'xml_cell' => { 'relative_path' => 'demographics', 'attribute' => 'code' } },
|
85
|
-
{ 'column' => 'address_line1',
|
86
|
-
'xml_cell' => { 'relative_path' => 'demographics/address' } }
|
87
|
-
]
|
88
|
-
end
|
89
|
-
end
|
90
|
-
end
|