ndr_import 8.5.0 → 8.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +7 -0
  3. data/Gemfile +0 -3
  4. data/README.md +6 -0
  5. data/bin/console +14 -0
  6. data/bin/setup +8 -0
  7. data/code_safety.yml +27 -11
  8. data/exe/pdf_acro_form_to_yaml +23 -0
  9. data/exe/pdf_to_text +28 -0
  10. data/exe/word_to_text +26 -0
  11. data/gemfiles/Gemfile.rails52 +0 -3
  12. data/gemfiles/Gemfile.rails60 +5 -0
  13. data/lib/ndr_import/version.rb +1 -1
  14. data/ndr_import.gemspec +9 -7
  15. metadata +23 -164
  16. data/gemfiles/Gemfile.rails50 +0 -8
  17. data/gemfiles/Gemfile.rails51 +0 -9
  18. data/test/file/acro_form_test.rb +0 -39
  19. data/test/file/base_test.rb +0 -54
  20. data/test/file/delimited_test.rb +0 -233
  21. data/test/file/docx_test.rb +0 -53
  22. data/test/file/excel_test.rb +0 -124
  23. data/test/file/pdf_test.rb +0 -36
  24. data/test/file/registry_test.rb +0 -62
  25. data/test/file/seven_zip_test.rb +0 -59
  26. data/test/file/text_test.rb +0 -92
  27. data/test/file/word_test.rb +0 -35
  28. data/test/file/xml_test.rb +0 -21
  29. data/test/file/zip_test.rb +0 -47
  30. data/test/fixed_width/table_test.rb +0 -35
  31. data/test/helpers/file/delimited_test.rb +0 -105
  32. data/test/helpers/file/excel_test.rb +0 -82
  33. data/test/helpers/file/pdf_test.rb +0 -27
  34. data/test/helpers/file/word_test.rb +0 -26
  35. data/test/helpers/file/xml_test.rb +0 -131
  36. data/test/helpers/file/zip_test.rb +0 -75
  37. data/test/mapper_test.rb +0 -676
  38. data/test/non_tabular/mapping_test.rb +0 -36
  39. data/test/non_tabular/table_test.rb +0 -590
  40. data/test/non_tabular_file_helper_test.rb +0 -501
  41. data/test/pdf_form/table_test.rb +0 -119
  42. data/test/readme_test.rb +0 -53
  43. data/test/resources/acro_form.pdf +0 -0
  44. data/test/resources/blank_tab_test.xlsx +0 -0
  45. data/test/resources/bomd.csv +0 -3
  46. data/test/resources/broken.csv +0 -3
  47. data/test/resources/filesystem_paths.yml +0 -26
  48. data/test/resources/flat_file.pdf +0 -0
  49. data/test/resources/flat_file.txt +0 -27
  50. data/test/resources/flat_file.yml +0 -20
  51. data/test/resources/hello_utf16be.txt +0 -0
  52. data/test/resources/hello_utf16le.txt +0 -0
  53. data/test/resources/hello_utf8.txt +0 -2
  54. data/test/resources/hello_windows.txt +0 -2
  55. data/test/resources/hello_world.doc +0 -0
  56. data/test/resources/hello_world.docx +0 -0
  57. data/test/resources/hello_world.pdf +0 -0
  58. data/test/resources/hello_world.txt +0 -2
  59. data/test/resources/high_ascii_delimited.txt +0 -2
  60. data/test/resources/high_ascii_delimited_example_two.txt +0 -3
  61. data/test/resources/malformed.csv +0 -3
  62. data/test/resources/malformed.xml +0 -6
  63. data/test/resources/malformed_pipe.csv +0 -3
  64. data/test/resources/normal.7z +0 -0
  65. data/test/resources/normal.csv +0 -3
  66. data/test/resources/normal.csv.zip +0 -0
  67. data/test/resources/normal_pipe.csv +0 -3
  68. data/test/resources/normal_thorn.csv +0 -3
  69. data/test/resources/not_a_pdf.pdf +0 -0
  70. data/test/resources/not_a_word_file.doc +0 -0
  71. data/test/resources/not_a_word_file.docx +0 -0
  72. data/test/resources/not_sign_delimited.txt +0 -3
  73. data/test/resources/password_protected_hello_world.docx +0 -0
  74. data/test/resources/password_protected_sample_xlsx.xlsx +0 -0
  75. data/test/resources/sample.xml +0 -34
  76. data/test/resources/sample_xls.xls +0 -0
  77. data/test/resources/sample_xlsx.xlsx +0 -0
  78. data/test/resources/sheet_streaming.xls +0 -0
  79. data/test/resources/sheet_streaming.xlsx +0 -0
  80. data/test/resources/standard_mappings.yml +0 -39
  81. data/test/resources/txt_file_xls_extension.xls +0 -1
  82. data/test/resources/txt_file_xlsx_extension.xlsx +0 -1
  83. data/test/resources/utf-16be_xml.xml +0 -0
  84. data/test/resources/utf-16be_xml_with_declaration.xml +0 -0
  85. data/test/resources/utf-16le_xml.xml +0 -0
  86. data/test/resources/utf-8_xml.xml +0 -9
  87. data/test/resources/windows-1252_xml.xml +0 -9
  88. data/test/resources/windows.csv +0 -5
  89. data/test/resources/xlsx_file_xls_extension.xls +0 -0
  90. data/test/standard_mappings_test.rb +0 -22
  91. data/test/table_test.rb +0 -545
  92. data/test/test_helper.rb +0 -35
  93. data/test/universal_importer_helper_test.rb +0 -86
  94. data/test/xml/table_test.rb +0 -90
@@ -1,36 +0,0 @@
1
- require 'test_helper'
2
- require 'ndr_import/file/pdf'
3
-
4
- module NdrImport
5
- module File
6
- # PDF file handler tests
7
- class PdfTest < ActiveSupport::TestCase
8
- def setup
9
- @permanent_test_files = SafePath.new('permanent_test_files')
10
- end
11
-
12
- test 'should read pdf correctly' do
13
- file_path = @permanent_test_files.join('hello_world.pdf')
14
- handler = NdrImport::File::Pdf.new(file_path, nil)
15
- handler.tables.each do |tablename, sheet|
16
- assert_nil tablename
17
- assert_instance_of Enumerator, sheet
18
- assert_equal ['Hello World', '',
19
- 'Goodbye Universe'], sheet.to_a
20
- end
21
- end
22
-
23
- test 'should raise exception on invalid pdf file' do
24
- assert_raises RuntimeError do
25
- file_path = @permanent_test_files.join('not_a_pdf.pdf')
26
- handler = NdrImport::File::Pdf.new(file_path, nil)
27
- handler.tables.each do |tablename, sheet|
28
- assert_nil tablename
29
- assert_instance_of Enumerator, sheet
30
- sheet.to_a
31
- end
32
- end
33
- end
34
- end
35
- end
36
- end
@@ -1,62 +0,0 @@
1
- require 'test_helper'
2
- require 'ndr_import/file/registry'
3
-
4
- module NdrImport
5
- module File
6
- # Registry file handler tests
7
- class RegistryTest < ActiveSupport::TestCase
8
- def setup
9
- @home = SafePath.new('test_space_rw')
10
- @permanent_test_files = SafePath.new('permanent_test_files')
11
- end
12
-
13
- test 'Registry.handlers' do
14
- assert_instance_of Hash, NdrImport::File::Registry.handlers
15
- assert_equal %w[7z acroform csv delimited doc docx nontabular
16
- pdf text txt xls xlsx xml_table zip],
17
- NdrImport::File::Registry.handlers.keys.sort
18
- end
19
-
20
- test 'should fail to enumerate unknown format' do
21
- exception = assert_raises(RuntimeError) do
22
- file_path = @permanent_test_files.join('normal.csv')
23
- tables = NdrImport::File::Registry.tables(file_path, 'mp3')
24
- tables.each do |tablename, sheet|
25
- assert_nil tablename
26
- sheet.to_a
27
- end
28
- end
29
-
30
- assert_equal 'Error: Unknown file format "mp3"', exception.message
31
- end
32
-
33
- test 'should enumerate pdf file table' do
34
- file_path = @permanent_test_files.join('hello_world.pdf')
35
- tables = NdrImport::File::Registry.tables(file_path, nil)
36
- tables.each do |tablename, sheet|
37
- assert_nil tablename
38
- assert_instance_of Enumerator, sheet
39
- assert_equal ['Hello World', '',
40
- 'Goodbye Universe'], sheet.to_a
41
- end
42
- end
43
-
44
- test 'should enumerate zip file tables' do
45
- options = { 'unzip_path' => @home }
46
- file_path = @permanent_test_files.join('normal.csv.zip')
47
- files = NdrImport::File::Registry.files(file_path, options)
48
- files.each do |filename|
49
- tables = NdrImport::File::Registry.tables(filename, nil, options)
50
-
51
- tables.each do |tablename, sheet|
52
- assert_nil tablename
53
- sheet = sheet.to_a
54
- assert_equal(('A'..'Z').to_a, sheet[0])
55
- assert_equal ['1'] * 26, sheet[1]
56
- assert_equal ['2'] * 26, sheet[2]
57
- end
58
- end
59
- end
60
- end
61
- end
62
- end
@@ -1,59 +0,0 @@
1
- require 'test_helper'
2
- require 'ndr_import/file/seven_zip'
3
-
4
- module NdrImport
5
- module File
6
- # 7zip file handler tests
7
- class SevenZipTestTest < ActiveSupport::TestCase
8
- def setup
9
- @home = SafePath.new('test_space_rw')
10
- @permanent_test_files = SafePath.new('permanent_test_files')
11
- end
12
-
13
- test 'should reject non SafePath arguments' do
14
- file_path = @home.join('imaginary.7z')
15
-
16
- assert_raises ArgumentError do
17
- NdrImport::File::SevenZip.new(file_path.to_s, nil, 'unzip_path' => @home.to_s)
18
- end
19
-
20
- assert_raises ArgumentError do
21
- NdrImport::File::SevenZip.new(file_path.to_s, nil, 'unzip_path' => @home)
22
- end
23
-
24
- assert_raises ArgumentError do
25
- NdrImport::File::SevenZip.new(file_path, nil, 'unzip_path' => @home.to_s)
26
- end
27
- end
28
-
29
- test 'should read 7zip file with correct password' do
30
- options = { 'password' => 'FortuneCookie', 'unzip_path' => @home }
31
- file_path = @permanent_test_files.join('normal.7z')
32
-
33
- handler = NdrImport::File::SevenZip.new(file_path, nil, options)
34
- handler.files.all? do |filename|
35
- assert_instance_of SafePath, filename
36
- end
37
- files = handler.files.to_a
38
- assert_equal 'normal_pipe.csv', ::File.basename(files[0])
39
- assert_equal 'normal_thorn.csv', ::File.basename(files[1])
40
-
41
- exception = assert_raises RuntimeError do
42
- handler.tables
43
- end
44
- assert_equal 'SevenZip#tables should never be called', exception.message
45
- end
46
-
47
- test 'should not read 7zip file with incorrect password' do
48
- options = { 'password' => 'WrongPassword', 'unzip_path' => @home }
49
- file_path = @permanent_test_files.join('normal.7z')
50
-
51
- handler = NdrImport::File::SevenZip.new(file_path, nil, options)
52
-
53
- assert_raises SevenZipRuby::InvalidArchive do
54
- handler.files.to_a
55
- end
56
- end
57
- end
58
- end
59
- end
@@ -1,92 +0,0 @@
1
- # encoding: UTF-8
2
- require 'test_helper'
3
- require 'ndr_import/file/text'
4
-
5
- module NdrImport
6
- module File
7
- # Text file handler tests
8
- class TextTest < ActiveSupport::TestCase
9
- def setup
10
- @permanent_test_files = SafePath.new('permanent_test_files')
11
- end
12
-
13
- test 'should read text file' do
14
- file_path = @permanent_test_files.join('hello_world.txt')
15
- handler = NdrImport::File::Text.new(file_path, nil)
16
- handler.tables.each do |tablename, sheet|
17
- assert_nil tablename
18
- assert_instance_of Enumerator, sheet
19
- assert_equal ['Hello world,', 'this is a text document'], sheet.to_a
20
- end
21
- end
22
-
23
- test 'should read text file with UTF-8 encoding' do
24
- file_path = @permanent_test_files.join('hello_utf8.txt')
25
- handler = NdrImport::File::Text.new(file_path, nil)
26
- handler.tables.each do |tablename, sheet|
27
- assert_nil tablename
28
- assert_instance_of Enumerator, sheet
29
-
30
- lines = sheet.to_a
31
-
32
- assert_equal ['Hello world', 'This is a thorny þ issue!'], lines
33
- assert lines.all? { |line| line.encoding.name == 'UTF-8' && line.valid_encoding? }
34
- end
35
- end
36
-
37
- test 'should read text file with UTF-16 [BE] encoding' do
38
- file_path = @permanent_test_files.join('hello_utf16be.txt')
39
- handler = NdrImport::File::Text.new(file_path, nil)
40
- handler.tables.each do |tablename, sheet|
41
- assert_nil tablename
42
- assert_instance_of Enumerator, sheet
43
-
44
- lines = sheet.to_a
45
-
46
- assert_equal ['Hello world', 'This is a thorny þ issue!'], lines
47
- assert lines.all? { |line| line.encoding.name == 'UTF-8' && line.valid_encoding? }
48
- end
49
- end
50
-
51
- test 'should read text file with UTF-16 [LE] encoding' do
52
- file_path = @permanent_test_files.join('hello_utf16le.txt')
53
- handler = NdrImport::File::Text.new(file_path, nil)
54
- handler.tables.each do |tablename, sheet|
55
- assert_nil tablename
56
- assert_instance_of Enumerator, sheet
57
-
58
- lines = sheet.to_a
59
-
60
- assert_equal ['Hello world', 'This is a thorny þ issue!'], lines
61
- assert lines.all? { |line| line.encoding.name == 'UTF-8' && line.valid_encoding? }
62
- end
63
- end
64
-
65
- test 'should read text file with Windows-1252 encoding' do
66
- file_path = @permanent_test_files.join('hello_windows.txt')
67
- handler = NdrImport::File::Text.new(file_path, nil)
68
- handler.tables.each do |tablename, sheet|
69
- assert_nil tablename
70
- assert_instance_of Enumerator, sheet
71
-
72
- lines = sheet.to_a
73
-
74
- assert_equal ['Hello windows world', 'This is a thorny þ issue!'], lines
75
- assert lines.all? { |line| line.encoding.name == 'UTF-8' && line.valid_encoding? }
76
- end
77
- end
78
-
79
- test 'should raise exception on invalid text file' do
80
- assert_raises RuntimeError do
81
- file_path = @permanent_test_files.join('hello_world.pdf')
82
- handler = NdrImport::File::Text.new(file_path, nil)
83
- handler.tables.each do |tablename, sheet|
84
- assert_nil tablename
85
- assert_instance_of Enumerator, sheet
86
- sheet.to_a
87
- end
88
- end
89
- end
90
- end
91
- end
92
- end
@@ -1,35 +0,0 @@
1
- require 'test_helper'
2
- require 'ndr_import/file/word'
3
-
4
- module NdrImport
5
- module File
6
- # Word document file handler tests
7
- class WordTest < ActiveSupport::TestCase
8
- def setup
9
- @permanent_test_files = SafePath.new('permanent_test_files')
10
- end
11
-
12
- test 'should read word file' do
13
- file_path = @permanent_test_files.join('hello_world.doc')
14
- handler = NdrImport::File::Word.new(file_path, nil)
15
- handler.tables.each do |tablename, sheet|
16
- assert_nil tablename
17
- assert_instance_of Enumerator, sheet
18
- assert_equal ['Hello world, this is a word document'], sheet.to_a
19
- end
20
- end
21
-
22
- test 'should raise exception on invalid word file' do
23
- assert_raises RuntimeError do
24
- file_path = @permanent_test_files.join('not_a_word_file.doc')
25
- handler = NdrImport::File::Word.new(file_path, nil)
26
- handler.tables.each do |tablename, sheet|
27
- assert_nil tablename
28
- assert_instance_of Enumerator, sheet
29
- sheet.to_a
30
- end
31
- end
32
- end
33
- end
34
- end
35
- end
@@ -1,21 +0,0 @@
1
- require 'test_helper'
2
- require 'ndr_import/file/xml'
3
-
4
- module NdrImport
5
- module File
6
- # Xml file handler tests
7
- class XmlTest < ActiveSupport::TestCase
8
- def setup
9
- @permanent_test_files = SafePath.new('permanent_test_files')
10
- end
11
-
12
- test 'should return enum of xml elements' do
13
- file_path = @permanent_test_files.join('sample.xml')
14
- handler = NdrImport::File::Xml.new(file_path, nil, 'xml_record_xpath' => 'root/record')
15
- rows = handler.send(:rows)
16
- assert rows.is_a? Enumerator
17
- assert(rows.all? { |row| row.is_a? Nokogiri::XML::Element })
18
- end
19
- end
20
- end
21
- end
@@ -1,47 +0,0 @@
1
- require 'test_helper'
2
- require 'ndr_import/file/zip'
3
- require 'zip'
4
-
5
- module NdrImport
6
- module File
7
- # Zip file handler tests
8
- class ZipTest < ActiveSupport::TestCase
9
- def setup
10
- @home = SafePath.new('test_space_rw')
11
- @permanent_test_files = SafePath.new('permanent_test_files')
12
- end
13
-
14
- test 'should reject non SafePath arguments' do
15
- file_path = @home.join('imaginary.zip')
16
-
17
- assert_raises ArgumentError do
18
- NdrImport::File::Zip.new(file_path.to_s, nil, 'unzip_path' => @home.to_s)
19
- end
20
-
21
- assert_raises ArgumentError do
22
- NdrImport::File::Zip.new(file_path.to_s, nil, 'unzip_path' => @home)
23
- end
24
-
25
- assert_raises ArgumentError do
26
- NdrImport::File::Zip.new(file_path, nil, 'unzip_path' => @home.to_s)
27
- end
28
- end
29
-
30
- test 'should read table correctly' do
31
- options = { 'unzip_path' => @home }
32
- file_path = @permanent_test_files.join('normal.csv.zip')
33
-
34
- handler = NdrImport::File::Zip.new(file_path, nil, options)
35
- handler.files.each do |filename|
36
- assert_instance_of SafePath, filename
37
- assert_equal 'normal.csv', ::File.basename(filename)
38
- end
39
-
40
- exception = assert_raises RuntimeError do
41
- handler.tables
42
- end
43
- assert_equal 'Zip#tables should never be called', exception.message
44
- end
45
- end
46
- end
47
- end
@@ -1,35 +0,0 @@
1
- require 'test_helper'
2
-
3
- # This tests the NdrImport::FixedWidth::Table mapping class
4
- module FixedWidth
5
- class TableTest < ActiveSupport::TestCase
6
- def test_transform_fixed_width_line
7
- table = NdrImport::FixedWidth::Table.new(header_lines: 2,
8
- footer_lines: 1,
9
- klass: 'SomeTestKlass',
10
- columns: fixed_width_column_mapping)
11
-
12
- enum = table.transform_line('123 abcdexyz', 2)
13
- assert_instance_of Enumerator, enum
14
- output = []
15
- enum.each do |klass, fields, index|
16
- output << [klass, fields, index]
17
- end
18
-
19
- expected_output = [
20
- ['SomeTestKlass', { rawtext: { 'one' => '123 ', 'two' => 'abcde', 'three' => 'xyz' } }, 2]
21
- ]
22
- assert_equal expected_output.sort, output.sort
23
- end
24
-
25
- private
26
-
27
- def fixed_width_column_mapping
28
- [
29
- { 'column' => 'one', 'unpack_pattern' => 'a5' },
30
- { 'column' => 'two', 'unpack_pattern' => 'a5' },
31
- { 'column' => 'three', 'unpack_pattern' => 'a3' }
32
- ]
33
- end
34
- end
35
- end
@@ -1,105 +0,0 @@
1
- require 'test_helper'
2
- require 'ndr_import/helpers/file/delimited'
3
-
4
- # Delimited file helper tests
5
- class DelimitedTest < ActiveSupport::TestCase
6
- # This is a test importer class to test the Delimited file helper mixin
7
- class TestImporter
8
- include NdrImport::Helpers::File::Delimited
9
- end
10
-
11
- def setup
12
- @permanent_test_files = SafePath.new('permanent_test_files')
13
- @importer = TestImporter.new
14
- end
15
-
16
- test 'should read csv correctly' do
17
- rows = @importer.read_delimited_file(@permanent_test_files.join('normal.csv'), nil)
18
- assert_equal(('A'..'Z').to_a, rows[0])
19
- assert_equal ['1'] * 26, rows[1]
20
- assert_equal ['2'] * 26, rows[2]
21
- end
22
-
23
- test 'should read csv with a BOM' do
24
- rows = @importer.read_delimited_file(@permanent_test_files.join('bomd.csv'), nil)
25
- assert_equal(('A'..'Z').to_a, rows[0])
26
- assert_equal ['1'] * 26, rows[1]
27
- assert_equal ['2'] * 26, rows[2]
28
- end
29
-
30
- test 'should read windows-1252 csv' do
31
- rows = @importer.read_delimited_file(@permanent_test_files.join('windows.csv'), nil)
32
- assert_equal 1, rows.length
33
- end
34
-
35
- test 'should read acsii-delimited csv' do
36
- rows = @importer.read_delimited_file(@permanent_test_files.join('high_ascii_delimited.txt'),
37
- "\xfe")
38
- assert_equal 2, rows.length
39
- end
40
-
41
- test 'should read line-by-line' do
42
- rows = []
43
- @importer.delimited_rows(@permanent_test_files.join('normal.csv')) { |row| rows << row }
44
- assert_equal(('A'..'Z').to_a, rows[0])
45
- assert_equal ['1'] * 26, rows[1]
46
- assert_equal ['2'] * 26, rows[2]
47
- end
48
-
49
- test 'should read line-by-line with custom delimiter' do
50
- count = 0
51
- file = @permanent_test_files.join('high_ascii_delimited.txt')
52
-
53
- @importer.delimited_rows(file, "\xfe") { count += 1 }
54
- assert_equal 2, count
55
- end
56
-
57
- test 'should report addition details upon failure to slurp csv' do
58
- exception = assert_raises(CSVLibrary::MalformedCSVError) do
59
- @importer.read_delimited_file(@permanent_test_files.join('broken.csv'), nil)
60
- end
61
-
62
- assert_match(/Invalid CSV format on row 2 of broken\.csv\./, exception.message)
63
- assert_match(CORRUPTED_QUOTES_MESSAGE_PATTERN, exception.message)
64
- assert_match(/in line 2/, exception.message)
65
- end
66
-
67
- test 'should be able to use liberal parsing to overcome minor CSV errors' do
68
- file_path = @permanent_test_files.join('malformed.csv')
69
- assert_raises(CSVLibrary::MalformedCSVError) do
70
- @importer.read_delimited_file(file_path, nil)
71
- end
72
-
73
- rows = @importer.read_delimited_file(file_path, nil, true)
74
-
75
- expected_row = ['2'] * 25
76
- expected_row << '2"malformed"'
77
- assert_equal expected_row, rows[2].sort
78
- end
79
-
80
- test 'should report addition details upon failure to read csv line-by-line' do
81
- rows_yielded = []
82
- exception = assert_raises(CSVLibrary::MalformedCSVError) do
83
- @importer.delimited_rows(@permanent_test_files.join('broken.csv')) do |row|
84
- rows_yielded << row
85
- end
86
- end
87
-
88
- assert rows_yielded.empty?, 'no rows should have been yielded'
89
-
90
- assert_match(/Invalid CSV format on row 2 of broken\.csv\./, exception.message)
91
- assert_match(CORRUPTED_QUOTES_MESSAGE_PATTERN, exception.message)
92
- assert_match(/in line 2/, exception.message)
93
- end
94
-
95
- test 'delimited_tables should read table correctly' do
96
- table = @importer.send(:delimited_tables, @permanent_test_files.join('normal.csv'))
97
- table.each do |tablename, sheet|
98
- assert_nil tablename
99
- sheet = sheet.to_a
100
- assert_equal(('A'..'Z').to_a, sheet[0])
101
- assert_equal ['1'] * 26, sheet[1]
102
- assert_equal ['2'] * 26, sheet[2]
103
- end
104
- end
105
- end