ndr_import 8.5.0 → 8.5.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +7 -0
  3. data/Gemfile +0 -3
  4. data/README.md +6 -0
  5. data/bin/console +14 -0
  6. data/bin/setup +8 -0
  7. data/code_safety.yml +27 -11
  8. data/exe/pdf_acro_form_to_yaml +23 -0
  9. data/exe/pdf_to_text +28 -0
  10. data/exe/word_to_text +26 -0
  11. data/gemfiles/Gemfile.rails52 +0 -3
  12. data/gemfiles/Gemfile.rails60 +5 -0
  13. data/lib/ndr_import/version.rb +1 -1
  14. data/ndr_import.gemspec +9 -7
  15. metadata +23 -164
  16. data/gemfiles/Gemfile.rails50 +0 -8
  17. data/gemfiles/Gemfile.rails51 +0 -9
  18. data/test/file/acro_form_test.rb +0 -39
  19. data/test/file/base_test.rb +0 -54
  20. data/test/file/delimited_test.rb +0 -233
  21. data/test/file/docx_test.rb +0 -53
  22. data/test/file/excel_test.rb +0 -124
  23. data/test/file/pdf_test.rb +0 -36
  24. data/test/file/registry_test.rb +0 -62
  25. data/test/file/seven_zip_test.rb +0 -59
  26. data/test/file/text_test.rb +0 -92
  27. data/test/file/word_test.rb +0 -35
  28. data/test/file/xml_test.rb +0 -21
  29. data/test/file/zip_test.rb +0 -47
  30. data/test/fixed_width/table_test.rb +0 -35
  31. data/test/helpers/file/delimited_test.rb +0 -105
  32. data/test/helpers/file/excel_test.rb +0 -82
  33. data/test/helpers/file/pdf_test.rb +0 -27
  34. data/test/helpers/file/word_test.rb +0 -26
  35. data/test/helpers/file/xml_test.rb +0 -131
  36. data/test/helpers/file/zip_test.rb +0 -75
  37. data/test/mapper_test.rb +0 -676
  38. data/test/non_tabular/mapping_test.rb +0 -36
  39. data/test/non_tabular/table_test.rb +0 -590
  40. data/test/non_tabular_file_helper_test.rb +0 -501
  41. data/test/pdf_form/table_test.rb +0 -119
  42. data/test/readme_test.rb +0 -53
  43. data/test/resources/acro_form.pdf +0 -0
  44. data/test/resources/blank_tab_test.xlsx +0 -0
  45. data/test/resources/bomd.csv +0 -3
  46. data/test/resources/broken.csv +0 -3
  47. data/test/resources/filesystem_paths.yml +0 -26
  48. data/test/resources/flat_file.pdf +0 -0
  49. data/test/resources/flat_file.txt +0 -27
  50. data/test/resources/flat_file.yml +0 -20
  51. data/test/resources/hello_utf16be.txt +0 -0
  52. data/test/resources/hello_utf16le.txt +0 -0
  53. data/test/resources/hello_utf8.txt +0 -2
  54. data/test/resources/hello_windows.txt +0 -2
  55. data/test/resources/hello_world.doc +0 -0
  56. data/test/resources/hello_world.docx +0 -0
  57. data/test/resources/hello_world.pdf +0 -0
  58. data/test/resources/hello_world.txt +0 -2
  59. data/test/resources/high_ascii_delimited.txt +0 -2
  60. data/test/resources/high_ascii_delimited_example_two.txt +0 -3
  61. data/test/resources/malformed.csv +0 -3
  62. data/test/resources/malformed.xml +0 -6
  63. data/test/resources/malformed_pipe.csv +0 -3
  64. data/test/resources/normal.7z +0 -0
  65. data/test/resources/normal.csv +0 -3
  66. data/test/resources/normal.csv.zip +0 -0
  67. data/test/resources/normal_pipe.csv +0 -3
  68. data/test/resources/normal_thorn.csv +0 -3
  69. data/test/resources/not_a_pdf.pdf +0 -0
  70. data/test/resources/not_a_word_file.doc +0 -0
  71. data/test/resources/not_a_word_file.docx +0 -0
  72. data/test/resources/not_sign_delimited.txt +0 -3
  73. data/test/resources/password_protected_hello_world.docx +0 -0
  74. data/test/resources/password_protected_sample_xlsx.xlsx +0 -0
  75. data/test/resources/sample.xml +0 -34
  76. data/test/resources/sample_xls.xls +0 -0
  77. data/test/resources/sample_xlsx.xlsx +0 -0
  78. data/test/resources/sheet_streaming.xls +0 -0
  79. data/test/resources/sheet_streaming.xlsx +0 -0
  80. data/test/resources/standard_mappings.yml +0 -39
  81. data/test/resources/txt_file_xls_extension.xls +0 -1
  82. data/test/resources/txt_file_xlsx_extension.xlsx +0 -1
  83. data/test/resources/utf-16be_xml.xml +0 -0
  84. data/test/resources/utf-16be_xml_with_declaration.xml +0 -0
  85. data/test/resources/utf-16le_xml.xml +0 -0
  86. data/test/resources/utf-8_xml.xml +0 -9
  87. data/test/resources/windows-1252_xml.xml +0 -9
  88. data/test/resources/windows.csv +0 -5
  89. data/test/resources/xlsx_file_xls_extension.xls +0 -0
  90. data/test/standard_mappings_test.rb +0 -22
  91. data/test/table_test.rb +0 -545
  92. data/test/test_helper.rb +0 -35
  93. data/test/universal_importer_helper_test.rb +0 -86
  94. data/test/xml/table_test.rb +0 -90
@@ -1,36 +0,0 @@
1
- require 'test_helper'
2
- require 'ndr_import/file/pdf'
3
-
4
- module NdrImport
5
- module File
6
- # PDF file handler tests
7
- class PdfTest < ActiveSupport::TestCase
8
- def setup
9
- @permanent_test_files = SafePath.new('permanent_test_files')
10
- end
11
-
12
- test 'should read pdf correctly' do
13
- file_path = @permanent_test_files.join('hello_world.pdf')
14
- handler = NdrImport::File::Pdf.new(file_path, nil)
15
- handler.tables.each do |tablename, sheet|
16
- assert_nil tablename
17
- assert_instance_of Enumerator, sheet
18
- assert_equal ['Hello World', '',
19
- 'Goodbye Universe'], sheet.to_a
20
- end
21
- end
22
-
23
- test 'should raise exception on invalid pdf file' do
24
- assert_raises RuntimeError do
25
- file_path = @permanent_test_files.join('not_a_pdf.pdf')
26
- handler = NdrImport::File::Pdf.new(file_path, nil)
27
- handler.tables.each do |tablename, sheet|
28
- assert_nil tablename
29
- assert_instance_of Enumerator, sheet
30
- sheet.to_a
31
- end
32
- end
33
- end
34
- end
35
- end
36
- end
@@ -1,62 +0,0 @@
1
- require 'test_helper'
2
- require 'ndr_import/file/registry'
3
-
4
- module NdrImport
5
- module File
6
- # Registry file handler tests
7
- class RegistryTest < ActiveSupport::TestCase
8
- def setup
9
- @home = SafePath.new('test_space_rw')
10
- @permanent_test_files = SafePath.new('permanent_test_files')
11
- end
12
-
13
- test 'Registry.handlers' do
14
- assert_instance_of Hash, NdrImport::File::Registry.handlers
15
- assert_equal %w[7z acroform csv delimited doc docx nontabular
16
- pdf text txt xls xlsx xml_table zip],
17
- NdrImport::File::Registry.handlers.keys.sort
18
- end
19
-
20
- test 'should fail to enumerate unknown format' do
21
- exception = assert_raises(RuntimeError) do
22
- file_path = @permanent_test_files.join('normal.csv')
23
- tables = NdrImport::File::Registry.tables(file_path, 'mp3')
24
- tables.each do |tablename, sheet|
25
- assert_nil tablename
26
- sheet.to_a
27
- end
28
- end
29
-
30
- assert_equal 'Error: Unknown file format "mp3"', exception.message
31
- end
32
-
33
- test 'should enumerate pdf file table' do
34
- file_path = @permanent_test_files.join('hello_world.pdf')
35
- tables = NdrImport::File::Registry.tables(file_path, nil)
36
- tables.each do |tablename, sheet|
37
- assert_nil tablename
38
- assert_instance_of Enumerator, sheet
39
- assert_equal ['Hello World', '',
40
- 'Goodbye Universe'], sheet.to_a
41
- end
42
- end
43
-
44
- test 'should enumerate zip file tables' do
45
- options = { 'unzip_path' => @home }
46
- file_path = @permanent_test_files.join('normal.csv.zip')
47
- files = NdrImport::File::Registry.files(file_path, options)
48
- files.each do |filename|
49
- tables = NdrImport::File::Registry.tables(filename, nil, options)
50
-
51
- tables.each do |tablename, sheet|
52
- assert_nil tablename
53
- sheet = sheet.to_a
54
- assert_equal(('A'..'Z').to_a, sheet[0])
55
- assert_equal ['1'] * 26, sheet[1]
56
- assert_equal ['2'] * 26, sheet[2]
57
- end
58
- end
59
- end
60
- end
61
- end
62
- end
@@ -1,59 +0,0 @@
1
- require 'test_helper'
2
- require 'ndr_import/file/seven_zip'
3
-
4
- module NdrImport
5
- module File
6
- # 7zip file handler tests
7
- class SevenZipTestTest < ActiveSupport::TestCase
8
- def setup
9
- @home = SafePath.new('test_space_rw')
10
- @permanent_test_files = SafePath.new('permanent_test_files')
11
- end
12
-
13
- test 'should reject non SafePath arguments' do
14
- file_path = @home.join('imaginary.7z')
15
-
16
- assert_raises ArgumentError do
17
- NdrImport::File::SevenZip.new(file_path.to_s, nil, 'unzip_path' => @home.to_s)
18
- end
19
-
20
- assert_raises ArgumentError do
21
- NdrImport::File::SevenZip.new(file_path.to_s, nil, 'unzip_path' => @home)
22
- end
23
-
24
- assert_raises ArgumentError do
25
- NdrImport::File::SevenZip.new(file_path, nil, 'unzip_path' => @home.to_s)
26
- end
27
- end
28
-
29
- test 'should read 7zip file with correct password' do
30
- options = { 'password' => 'FortuneCookie', 'unzip_path' => @home }
31
- file_path = @permanent_test_files.join('normal.7z')
32
-
33
- handler = NdrImport::File::SevenZip.new(file_path, nil, options)
34
- handler.files.all? do |filename|
35
- assert_instance_of SafePath, filename
36
- end
37
- files = handler.files.to_a
38
- assert_equal 'normal_pipe.csv', ::File.basename(files[0])
39
- assert_equal 'normal_thorn.csv', ::File.basename(files[1])
40
-
41
- exception = assert_raises RuntimeError do
42
- handler.tables
43
- end
44
- assert_equal 'SevenZip#tables should never be called', exception.message
45
- end
46
-
47
- test 'should not read 7zip file with incorrect password' do
48
- options = { 'password' => 'WrongPassword', 'unzip_path' => @home }
49
- file_path = @permanent_test_files.join('normal.7z')
50
-
51
- handler = NdrImport::File::SevenZip.new(file_path, nil, options)
52
-
53
- assert_raises SevenZipRuby::InvalidArchive do
54
- handler.files.to_a
55
- end
56
- end
57
- end
58
- end
59
- end
@@ -1,92 +0,0 @@
1
- # encoding: UTF-8
2
- require 'test_helper'
3
- require 'ndr_import/file/text'
4
-
5
- module NdrImport
6
- module File
7
- # Text file handler tests
8
- class TextTest < ActiveSupport::TestCase
9
- def setup
10
- @permanent_test_files = SafePath.new('permanent_test_files')
11
- end
12
-
13
- test 'should read text file' do
14
- file_path = @permanent_test_files.join('hello_world.txt')
15
- handler = NdrImport::File::Text.new(file_path, nil)
16
- handler.tables.each do |tablename, sheet|
17
- assert_nil tablename
18
- assert_instance_of Enumerator, sheet
19
- assert_equal ['Hello world,', 'this is a text document'], sheet.to_a
20
- end
21
- end
22
-
23
- test 'should read text file with UTF-8 encoding' do
24
- file_path = @permanent_test_files.join('hello_utf8.txt')
25
- handler = NdrImport::File::Text.new(file_path, nil)
26
- handler.tables.each do |tablename, sheet|
27
- assert_nil tablename
28
- assert_instance_of Enumerator, sheet
29
-
30
- lines = sheet.to_a
31
-
32
- assert_equal ['Hello world', 'This is a thorny þ issue!'], lines
33
- assert lines.all? { |line| line.encoding.name == 'UTF-8' && line.valid_encoding? }
34
- end
35
- end
36
-
37
- test 'should read text file with UTF-16 [BE] encoding' do
38
- file_path = @permanent_test_files.join('hello_utf16be.txt')
39
- handler = NdrImport::File::Text.new(file_path, nil)
40
- handler.tables.each do |tablename, sheet|
41
- assert_nil tablename
42
- assert_instance_of Enumerator, sheet
43
-
44
- lines = sheet.to_a
45
-
46
- assert_equal ['Hello world', 'This is a thorny þ issue!'], lines
47
- assert lines.all? { |line| line.encoding.name == 'UTF-8' && line.valid_encoding? }
48
- end
49
- end
50
-
51
- test 'should read text file with UTF-16 [LE] encoding' do
52
- file_path = @permanent_test_files.join('hello_utf16le.txt')
53
- handler = NdrImport::File::Text.new(file_path, nil)
54
- handler.tables.each do |tablename, sheet|
55
- assert_nil tablename
56
- assert_instance_of Enumerator, sheet
57
-
58
- lines = sheet.to_a
59
-
60
- assert_equal ['Hello world', 'This is a thorny þ issue!'], lines
61
- assert lines.all? { |line| line.encoding.name == 'UTF-8' && line.valid_encoding? }
62
- end
63
- end
64
-
65
- test 'should read text file with Windows-1252 encoding' do
66
- file_path = @permanent_test_files.join('hello_windows.txt')
67
- handler = NdrImport::File::Text.new(file_path, nil)
68
- handler.tables.each do |tablename, sheet|
69
- assert_nil tablename
70
- assert_instance_of Enumerator, sheet
71
-
72
- lines = sheet.to_a
73
-
74
- assert_equal ['Hello windows world', 'This is a thorny þ issue!'], lines
75
- assert lines.all? { |line| line.encoding.name == 'UTF-8' && line.valid_encoding? }
76
- end
77
- end
78
-
79
- test 'should raise exception on invalid text file' do
80
- assert_raises RuntimeError do
81
- file_path = @permanent_test_files.join('hello_world.pdf')
82
- handler = NdrImport::File::Text.new(file_path, nil)
83
- handler.tables.each do |tablename, sheet|
84
- assert_nil tablename
85
- assert_instance_of Enumerator, sheet
86
- sheet.to_a
87
- end
88
- end
89
- end
90
- end
91
- end
92
- end
@@ -1,35 +0,0 @@
1
- require 'test_helper'
2
- require 'ndr_import/file/word'
3
-
4
- module NdrImport
5
- module File
6
- # Word document file handler tests
7
- class WordTest < ActiveSupport::TestCase
8
- def setup
9
- @permanent_test_files = SafePath.new('permanent_test_files')
10
- end
11
-
12
- test 'should read word file' do
13
- file_path = @permanent_test_files.join('hello_world.doc')
14
- handler = NdrImport::File::Word.new(file_path, nil)
15
- handler.tables.each do |tablename, sheet|
16
- assert_nil tablename
17
- assert_instance_of Enumerator, sheet
18
- assert_equal ['Hello world, this is a word document'], sheet.to_a
19
- end
20
- end
21
-
22
- test 'should raise exception on invalid word file' do
23
- assert_raises RuntimeError do
24
- file_path = @permanent_test_files.join('not_a_word_file.doc')
25
- handler = NdrImport::File::Word.new(file_path, nil)
26
- handler.tables.each do |tablename, sheet|
27
- assert_nil tablename
28
- assert_instance_of Enumerator, sheet
29
- sheet.to_a
30
- end
31
- end
32
- end
33
- end
34
- end
35
- end
@@ -1,21 +0,0 @@
1
- require 'test_helper'
2
- require 'ndr_import/file/xml'
3
-
4
- module NdrImport
5
- module File
6
- # Xml file handler tests
7
- class XmlTest < ActiveSupport::TestCase
8
- def setup
9
- @permanent_test_files = SafePath.new('permanent_test_files')
10
- end
11
-
12
- test 'should return enum of xml elements' do
13
- file_path = @permanent_test_files.join('sample.xml')
14
- handler = NdrImport::File::Xml.new(file_path, nil, 'xml_record_xpath' => 'root/record')
15
- rows = handler.send(:rows)
16
- assert rows.is_a? Enumerator
17
- assert(rows.all? { |row| row.is_a? Nokogiri::XML::Element })
18
- end
19
- end
20
- end
21
- end
@@ -1,47 +0,0 @@
1
- require 'test_helper'
2
- require 'ndr_import/file/zip'
3
- require 'zip'
4
-
5
- module NdrImport
6
- module File
7
- # Zip file handler tests
8
- class ZipTest < ActiveSupport::TestCase
9
- def setup
10
- @home = SafePath.new('test_space_rw')
11
- @permanent_test_files = SafePath.new('permanent_test_files')
12
- end
13
-
14
- test 'should reject non SafePath arguments' do
15
- file_path = @home.join('imaginary.zip')
16
-
17
- assert_raises ArgumentError do
18
- NdrImport::File::Zip.new(file_path.to_s, nil, 'unzip_path' => @home.to_s)
19
- end
20
-
21
- assert_raises ArgumentError do
22
- NdrImport::File::Zip.new(file_path.to_s, nil, 'unzip_path' => @home)
23
- end
24
-
25
- assert_raises ArgumentError do
26
- NdrImport::File::Zip.new(file_path, nil, 'unzip_path' => @home.to_s)
27
- end
28
- end
29
-
30
- test 'should read table correctly' do
31
- options = { 'unzip_path' => @home }
32
- file_path = @permanent_test_files.join('normal.csv.zip')
33
-
34
- handler = NdrImport::File::Zip.new(file_path, nil, options)
35
- handler.files.each do |filename|
36
- assert_instance_of SafePath, filename
37
- assert_equal 'normal.csv', ::File.basename(filename)
38
- end
39
-
40
- exception = assert_raises RuntimeError do
41
- handler.tables
42
- end
43
- assert_equal 'Zip#tables should never be called', exception.message
44
- end
45
- end
46
- end
47
- end
@@ -1,35 +0,0 @@
1
- require 'test_helper'
2
-
3
- # This tests the NdrImport::FixedWidth::Table mapping class
4
- module FixedWidth
5
- class TableTest < ActiveSupport::TestCase
6
- def test_transform_fixed_width_line
7
- table = NdrImport::FixedWidth::Table.new(header_lines: 2,
8
- footer_lines: 1,
9
- klass: 'SomeTestKlass',
10
- columns: fixed_width_column_mapping)
11
-
12
- enum = table.transform_line('123 abcdexyz', 2)
13
- assert_instance_of Enumerator, enum
14
- output = []
15
- enum.each do |klass, fields, index|
16
- output << [klass, fields, index]
17
- end
18
-
19
- expected_output = [
20
- ['SomeTestKlass', { rawtext: { 'one' => '123 ', 'two' => 'abcde', 'three' => 'xyz' } }, 2]
21
- ]
22
- assert_equal expected_output.sort, output.sort
23
- end
24
-
25
- private
26
-
27
- def fixed_width_column_mapping
28
- [
29
- { 'column' => 'one', 'unpack_pattern' => 'a5' },
30
- { 'column' => 'two', 'unpack_pattern' => 'a5' },
31
- { 'column' => 'three', 'unpack_pattern' => 'a3' }
32
- ]
33
- end
34
- end
35
- end
@@ -1,105 +0,0 @@
1
- require 'test_helper'
2
- require 'ndr_import/helpers/file/delimited'
3
-
4
- # Delimited file helper tests
5
- class DelimitedTest < ActiveSupport::TestCase
6
- # This is a test importer class to test the Delimited file helper mixin
7
- class TestImporter
8
- include NdrImport::Helpers::File::Delimited
9
- end
10
-
11
- def setup
12
- @permanent_test_files = SafePath.new('permanent_test_files')
13
- @importer = TestImporter.new
14
- end
15
-
16
- test 'should read csv correctly' do
17
- rows = @importer.read_delimited_file(@permanent_test_files.join('normal.csv'), nil)
18
- assert_equal(('A'..'Z').to_a, rows[0])
19
- assert_equal ['1'] * 26, rows[1]
20
- assert_equal ['2'] * 26, rows[2]
21
- end
22
-
23
- test 'should read csv with a BOM' do
24
- rows = @importer.read_delimited_file(@permanent_test_files.join('bomd.csv'), nil)
25
- assert_equal(('A'..'Z').to_a, rows[0])
26
- assert_equal ['1'] * 26, rows[1]
27
- assert_equal ['2'] * 26, rows[2]
28
- end
29
-
30
- test 'should read windows-1252 csv' do
31
- rows = @importer.read_delimited_file(@permanent_test_files.join('windows.csv'), nil)
32
- assert_equal 1, rows.length
33
- end
34
-
35
- test 'should read acsii-delimited csv' do
36
- rows = @importer.read_delimited_file(@permanent_test_files.join('high_ascii_delimited.txt'),
37
- "\xfe")
38
- assert_equal 2, rows.length
39
- end
40
-
41
- test 'should read line-by-line' do
42
- rows = []
43
- @importer.delimited_rows(@permanent_test_files.join('normal.csv')) { |row| rows << row }
44
- assert_equal(('A'..'Z').to_a, rows[0])
45
- assert_equal ['1'] * 26, rows[1]
46
- assert_equal ['2'] * 26, rows[2]
47
- end
48
-
49
- test 'should read line-by-line with custom delimiter' do
50
- count = 0
51
- file = @permanent_test_files.join('high_ascii_delimited.txt')
52
-
53
- @importer.delimited_rows(file, "\xfe") { count += 1 }
54
- assert_equal 2, count
55
- end
56
-
57
- test 'should report addition details upon failure to slurp csv' do
58
- exception = assert_raises(CSVLibrary::MalformedCSVError) do
59
- @importer.read_delimited_file(@permanent_test_files.join('broken.csv'), nil)
60
- end
61
-
62
- assert_match(/Invalid CSV format on row 2 of broken\.csv\./, exception.message)
63
- assert_match(CORRUPTED_QUOTES_MESSAGE_PATTERN, exception.message)
64
- assert_match(/in line 2/, exception.message)
65
- end
66
-
67
- test 'should be able to use liberal parsing to overcome minor CSV errors' do
68
- file_path = @permanent_test_files.join('malformed.csv')
69
- assert_raises(CSVLibrary::MalformedCSVError) do
70
- @importer.read_delimited_file(file_path, nil)
71
- end
72
-
73
- rows = @importer.read_delimited_file(file_path, nil, true)
74
-
75
- expected_row = ['2'] * 25
76
- expected_row << '2"malformed"'
77
- assert_equal expected_row, rows[2].sort
78
- end
79
-
80
- test 'should report addition details upon failure to read csv line-by-line' do
81
- rows_yielded = []
82
- exception = assert_raises(CSVLibrary::MalformedCSVError) do
83
- @importer.delimited_rows(@permanent_test_files.join('broken.csv')) do |row|
84
- rows_yielded << row
85
- end
86
- end
87
-
88
- assert rows_yielded.empty?, 'no rows should have been yielded'
89
-
90
- assert_match(/Invalid CSV format on row 2 of broken\.csv\./, exception.message)
91
- assert_match(CORRUPTED_QUOTES_MESSAGE_PATTERN, exception.message)
92
- assert_match(/in line 2/, exception.message)
93
- end
94
-
95
- test 'delimited_tables should read table correctly' do
96
- table = @importer.send(:delimited_tables, @permanent_test_files.join('normal.csv'))
97
- table.each do |tablename, sheet|
98
- assert_nil tablename
99
- sheet = sheet.to_a
100
- assert_equal(('A'..'Z').to_a, sheet[0])
101
- assert_equal ['1'] * 26, sheet[1]
102
- assert_equal ['2'] * 26, sheet[2]
103
- end
104
- end
105
- end