ndr_import 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (103) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +14 -0
  3. data/.rubocop.yml +27 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +22 -0
  6. data/CODE_OF_CONDUCT.md +13 -0
  7. data/Gemfile +4 -0
  8. data/Guardfile +16 -0
  9. data/LICENSE.txt +21 -0
  10. data/README.md +69 -0
  11. data/Rakefile +13 -0
  12. data/code_safety.yml +374 -0
  13. data/gemfiles/Gemfile.rails32 +5 -0
  14. data/gemfiles/Gemfile.rails32.lock +142 -0
  15. data/gemfiles/Gemfile.rails41 +5 -0
  16. data/gemfiles/Gemfile.rails41.lock +145 -0
  17. data/gemfiles/Gemfile.rails42 +5 -0
  18. data/gemfiles/Gemfile.rails42.lock +145 -0
  19. data/lib/ndr_import.rb +13 -0
  20. data/lib/ndr_import/csv_library.rb +40 -0
  21. data/lib/ndr_import/file/all.rb +8 -0
  22. data/lib/ndr_import/file/base.rb +76 -0
  23. data/lib/ndr_import/file/delimited.rb +86 -0
  24. data/lib/ndr_import/file/excel.rb +131 -0
  25. data/lib/ndr_import/file/pdf.rb +38 -0
  26. data/lib/ndr_import/file/registry.rb +50 -0
  27. data/lib/ndr_import/file/text.rb +52 -0
  28. data/lib/ndr_import/file/word.rb +30 -0
  29. data/lib/ndr_import/file/zip.rb +67 -0
  30. data/lib/ndr_import/helpers/file/delimited.rb +105 -0
  31. data/lib/ndr_import/helpers/file/excel.rb +181 -0
  32. data/lib/ndr_import/helpers/file/pdf.rb +29 -0
  33. data/lib/ndr_import/helpers/file/word.rb +27 -0
  34. data/lib/ndr_import/helpers/file/xml.rb +45 -0
  35. data/lib/ndr_import/helpers/file/zip.rb +44 -0
  36. data/lib/ndr_import/mapper.rb +220 -0
  37. data/lib/ndr_import/mapping_error.rb +5 -0
  38. data/lib/ndr_import/non_tabular/column_mapping.rb +73 -0
  39. data/lib/ndr_import/non_tabular/line.rb +46 -0
  40. data/lib/ndr_import/non_tabular/mapping.rb +35 -0
  41. data/lib/ndr_import/non_tabular/record.rb +99 -0
  42. data/lib/ndr_import/non_tabular/table.rb +193 -0
  43. data/lib/ndr_import/non_tabular_file_helper.rb +160 -0
  44. data/lib/ndr_import/standard_mappings.rb +23 -0
  45. data/lib/ndr_import/table.rb +179 -0
  46. data/lib/ndr_import/version.rb +4 -0
  47. data/ndr_import.gemspec +44 -0
  48. data/test/file/base_test.rb +54 -0
  49. data/test/file/delimited_test.rb +143 -0
  50. data/test/file/excel_test.rb +85 -0
  51. data/test/file/pdf_test.rb +35 -0
  52. data/test/file/registry_test.rb +60 -0
  53. data/test/file/text_test.rb +92 -0
  54. data/test/file/word_test.rb +35 -0
  55. data/test/file/zip_test.rb +47 -0
  56. data/test/helpers/file/delimited_test.rb +113 -0
  57. data/test/helpers/file/excel_test.rb +97 -0
  58. data/test/helpers/file/pdf_test.rb +26 -0
  59. data/test/helpers/file/word_test.rb +26 -0
  60. data/test/helpers/file/xml_test.rb +131 -0
  61. data/test/helpers/file/zip_test.rb +75 -0
  62. data/test/mapper_test.rb +551 -0
  63. data/test/non_tabular/mapping_test.rb +36 -0
  64. data/test/non_tabular/table_test.rb +510 -0
  65. data/test/non_tabular_file_helper_test.rb +501 -0
  66. data/test/readme_test.rb +53 -0
  67. data/test/resources/bomd.csv +3 -0
  68. data/test/resources/broken.csv +3 -0
  69. data/test/resources/filesystem_paths.yml +26 -0
  70. data/test/resources/flat_file.pdf +0 -0
  71. data/test/resources/flat_file.txt +27 -0
  72. data/test/resources/flat_file.yml +20 -0
  73. data/test/resources/hello_utf16be.txt +0 -0
  74. data/test/resources/hello_utf16le.txt +0 -0
  75. data/test/resources/hello_utf8.txt +2 -0
  76. data/test/resources/hello_windows.txt +2 -0
  77. data/test/resources/hello_world.doc +0 -0
  78. data/test/resources/hello_world.pdf +0 -0
  79. data/test/resources/hello_world.txt +2 -0
  80. data/test/resources/high_ascii_delimited.txt +2 -0
  81. data/test/resources/malformed.xml +6 -0
  82. data/test/resources/normal.csv +3 -0
  83. data/test/resources/normal.csv.zip +0 -0
  84. data/test/resources/normal_pipe.csv +3 -0
  85. data/test/resources/normal_thorn.csv +3 -0
  86. data/test/resources/not_a_pdf.pdf +0 -0
  87. data/test/resources/not_a_word_file.doc +0 -0
  88. data/test/resources/sample_xls.xls +0 -0
  89. data/test/resources/sample_xlsx.xlsx +0 -0
  90. data/test/resources/standard_mappings.yml +39 -0
  91. data/test/resources/txt_file_xls_extension.xls +1 -0
  92. data/test/resources/txt_file_xlsx_extension.xlsx +1 -0
  93. data/test/resources/utf-16be_xml.xml +0 -0
  94. data/test/resources/utf-16be_xml_with_declaration.xml +0 -0
  95. data/test/resources/utf-16le_xml.xml +0 -0
  96. data/test/resources/utf-8_xml.xml +9 -0
  97. data/test/resources/windows-1252_xml.xml +9 -0
  98. data/test/resources/windows.csv +5 -0
  99. data/test/resources/xlsx_file_xls_extension.xls +0 -0
  100. data/test/standard_mappings_test.rb +22 -0
  101. data/test/table_test.rb +288 -0
  102. data/test/test_helper.rb +13 -0
  103. metadata +443 -0
@@ -0,0 +1,36 @@
1
+ require 'test_helper'
2
+
3
+ # This tests the NdrImport::NonTabular::Mapping mapping class
4
+ class MappingTestTest < ActiveSupport::TestCase
5
+ def test_should_raise_error_with_no_non_tabular_row
6
+ assert_raise NdrImport::MappingError do
7
+ NdrImport::NonTabular::Mapping.new(
8
+ 'columns' => [{ 'column' => 'one' }]
9
+ )
10
+ end
11
+ end
12
+
13
+ def test_should_raise_error_with_no_non_tabular_row_start_line_pattern
14
+ assert_raise NdrImport::MappingError do
15
+ NdrImport::NonTabular::Mapping.new(
16
+ 'non_tabular_row' => nil,
17
+ 'columns' => [{ 'column' => 'one' }]
18
+ )
19
+ end
20
+
21
+ assert_raise NdrImport::MappingError do
22
+ NdrImport::NonTabular::Mapping.new(
23
+ 'non_tabular_row' => { 'start_line_pattern' => nil },
24
+ 'columns' => [{ 'column' => 'one' }]
25
+ )
26
+ end
27
+ end
28
+
29
+ def test_should_initialize_with_non_tabular_row
30
+ mapping = NdrImport::NonTabular::Mapping.new(
31
+ 'non_tabular_row' => { 'start_line_pattern' => /\A-*\z/ },
32
+ 'columns' => [{ 'column' => 'one' }]
33
+ )
34
+ assert_equal(/\A-*\z/, mapping.start_line_pattern)
35
+ end
36
+ end
@@ -0,0 +1,510 @@
1
+ require 'test_helper'
2
+
3
+ # This tests the NdrImport::NonTabular::Table mapping class
4
+ class TableTest < ActiveSupport::TestCase
5
+ def setup
6
+ @simple_divider_example = <<-STR.split(/\n/).map
7
+ 111
8
+ Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
9
+ ------
10
+ 222
11
+ Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo.
12
+ ------
13
+ 333
14
+ Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla.
15
+ ------
16
+ 444
17
+ Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim.
18
+ STR
19
+
20
+ @no_divider_example = <<-STR.split(/\n/).map
21
+ 111
22
+ Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
23
+ STR
24
+
25
+ @simple_start_and_end_divider_example = <<-STR.split(/\n/).map
26
+ ----- START -----
27
+ 111
28
+ Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
29
+ ------ END ------
30
+ This is never captured
31
+ ----- START -----
32
+ 222
33
+ Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo.
34
+ ------ END ------
35
+ This is never captured
36
+ ----- START -----
37
+ 333
38
+ Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla.
39
+ ------ END ------
40
+ This is never captured
41
+ ----- START -----
42
+ 444
43
+ This is captured
44
+ Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim.
45
+ ------ END ------
46
+ STR
47
+ end
48
+
49
+ def test_all_valid_options
50
+ valid_options = %w(
51
+ canonical_name capture_start_line columns end_in_a_record end_line_pattern filename_pattern
52
+ format klass remove_lines start_in_a_record start_line_pattern
53
+ )
54
+ assert_equal valid_options.sort,
55
+ NdrImport::NonTabular::Table.all_valid_options.sort
56
+ end
57
+
58
+ def test_should_raise_error_with_no_start_line_pattern
59
+ assert_raise NdrImport::MappingError do
60
+ NdrImport::NonTabular::Table.new(
61
+ 'columns' => [{ 'column' => 'one' }]
62
+ )
63
+ end
64
+
65
+ assert_raise NdrImport::MappingError do
66
+ NdrImport::NonTabular::Table.new(
67
+ 'start_line_pattern' => nil,
68
+ 'columns' => [{ 'column' => 'one' }]
69
+ )
70
+ end
71
+ end
72
+
73
+ def test_should_initialize_with_non_tabular_row
74
+ table = NdrImport::NonTabular::Table.new(
75
+ 'start_line_pattern' => /\A-*\z/,
76
+ 'columns' => [{ 'column' => 'one' }]
77
+ )
78
+ assert_equal(/\A-*\z/, table.start_line_pattern)
79
+ end
80
+
81
+ def test_should_test_flat_file_txt
82
+ table = YAML.load_file(SafePath.new('permanent_test_files').join('flat_file.yml'))
83
+ assert table.is_a?(NdrImport::NonTabular::Table)
84
+ filename = SafePath.new('permanent_test_files').join('flat_file.txt')
85
+ enum = table.transform(File.new(filename).each)
86
+ # puts enum.to_a.inspect
87
+
88
+ results = []
89
+ enum.each do |_klass, fields, _index|
90
+ results << fields[:rawtext]['one']
91
+ end
92
+
93
+ assert_equal 4, results.count
94
+ assert results.first.start_with?('1')
95
+ assert results.last.start_with?('4')
96
+
97
+ assert results.any? { |result| result =~ /This is captured/ }
98
+ refute results.any? { |result| result =~ /This is never captured/ }
99
+ refute results.any? { |result| result =~ /== Page/ }
100
+ end
101
+
102
+ def test_should_raise_error_with_no_column_non_tabular_cell
103
+ table = YAML.load <<-YML.strip_heredoc
104
+ --- !ruby/object:NdrImport::NonTabular::Table
105
+ start_line_pattern: !ruby/regexp /^-{6}$/
106
+ klass: SomeTestKlass
107
+ columns:
108
+ - column: one
109
+ YML
110
+ assert_raise NdrImport::MappingError do
111
+ table.transform(@simple_divider_example).to_a
112
+ end
113
+ end
114
+
115
+ def test_should_raise_error_with_no_column_non_tabular_cell_lines
116
+ table = YAML.load <<-YML.strip_heredoc
117
+ --- !ruby/object:NdrImport::NonTabular::Table
118
+ start_line_pattern: !ruby/regexp /^-{6}$/
119
+ klass: SomeTestKlass
120
+ columns:
121
+ - column: one
122
+ non_tabular_cell:
123
+ YML
124
+ assert_raise NdrImport::MappingError do
125
+ table.transform(@simple_divider_example).to_a
126
+ end
127
+
128
+ table = YAML.load <<-YML.strip_heredoc
129
+ --- !ruby/object:NdrImport::NonTabular::Table
130
+ start_line_pattern: !ruby/regexp /^-{6}$/
131
+ klass: SomeTestKlass
132
+ columns:
133
+ - column: one
134
+ non_tabular_cell:
135
+ lines:
136
+ YML
137
+ assert_raise NdrImport::MappingError do
138
+ table.transform(@simple_divider_example).to_a
139
+ end
140
+ end
141
+
142
+ def test_should_raise_error_with_no_column_non_tabular_cell_capture
143
+ table = YAML.load <<-YML.strip_heredoc
144
+ --- !ruby/object:NdrImport::NonTabular::Table
145
+ start_line_pattern: !ruby/regexp /^-{6}$/
146
+ klass: SomeTestKlass
147
+ columns:
148
+ - column: one
149
+ non_tabular_cell:
150
+ lines: !ruby/range
151
+ begin: 0
152
+ end: -1
153
+ excl: false
154
+ YML
155
+ assert_raise NdrImport::MappingError do
156
+ table.transform(@simple_divider_example).to_a
157
+ end
158
+
159
+ table = YAML.load <<-YML.strip_heredoc
160
+ --- !ruby/object:NdrImport::NonTabular::Table
161
+ start_line_pattern: !ruby/regexp /^-{6}$/
162
+ klass: SomeTestKlass
163
+ columns:
164
+ - column: one
165
+ non_tabular_cell:
166
+ lines: !ruby/range
167
+ begin: 0
168
+ end: -1
169
+ excl: false
170
+ capture:
171
+ YML
172
+ assert_raise NdrImport::MappingError do
173
+ table.transform(@simple_divider_example).to_a
174
+ end
175
+ end
176
+
177
+ def test_should_only_return_two_results_with_no_start_in_a_record_or_end_in_a_record
178
+ table = YAML.load <<-YML.strip_heredoc
179
+ --- !ruby/object:NdrImport::NonTabular::Table
180
+ start_line_pattern: !ruby/regexp /^-{6}$/
181
+ klass: SomeTestKlass
182
+ columns:
183
+ - column: one
184
+ non_tabular_cell:
185
+ lines: !ruby/range
186
+ begin: 0
187
+ end: -1
188
+ excl: false
189
+ capture: !ruby/regexp /^(.*)$/i
190
+ YML
191
+ enum = table.transform(@simple_divider_example)
192
+ assert_instance_of Enumerator, enum
193
+ results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
194
+
195
+ assert_equal 2, results.count
196
+ assert results.first.start_with?('222')
197
+ assert results.last.start_with?('333')
198
+ end
199
+
200
+ def test_should_return_three_results_with_start_in_a_record
201
+ table = YAML.load <<-YML.strip_heredoc
202
+ --- !ruby/object:NdrImport::NonTabular::Table
203
+ start_line_pattern: !ruby/regexp /^-{6}$/
204
+ start_in_a_record: true
205
+ klass: SomeTestKlass
206
+ columns:
207
+ - column: one
208
+ non_tabular_cell:
209
+ lines: !ruby/range
210
+ begin: 0
211
+ end: -1
212
+ excl: false
213
+ capture: !ruby/regexp /^(.*)$/i
214
+ YML
215
+ enum = table.transform(@simple_divider_example)
216
+ assert_instance_of Enumerator, enum
217
+ results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
218
+
219
+ assert_equal 3, results.count
220
+ assert results.first.start_with?('111')
221
+ assert results.last.start_with?('333')
222
+ end
223
+
224
+ def test_should_return_three_results_with_end_in_a_record
225
+ table = YAML.load <<-YML.strip_heredoc
226
+ --- !ruby/object:NdrImport::NonTabular::Table
227
+ start_line_pattern: !ruby/regexp /^-{6}$/
228
+ end_in_a_record: true
229
+ klass: SomeTestKlass
230
+ columns:
231
+ - column: one
232
+ non_tabular_cell:
233
+ lines: !ruby/range
234
+ begin: 0
235
+ end: -1
236
+ excl: false
237
+ capture: !ruby/regexp /^(.*)$/i
238
+ YML
239
+ enum = table.transform(@simple_divider_example)
240
+ assert_instance_of Enumerator, enum
241
+ results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
242
+
243
+ assert_equal 3, results.count
244
+ assert results.first.start_with?('222')
245
+ assert results.last.start_with?('444')
246
+ end
247
+
248
+ def test_should_return_four_results_with_start_in_a_record_and_end_in_a_record
249
+ table = YAML.load <<-YML.strip_heredoc
250
+ --- !ruby/object:NdrImport::NonTabular::Table
251
+ start_line_pattern: !ruby/regexp /^-{6}$/
252
+ start_in_a_record: true
253
+ end_in_a_record: true
254
+ klass: SomeTestKlass
255
+ columns:
256
+ - column: one
257
+ non_tabular_cell:
258
+ lines: !ruby/range
259
+ begin: 0
260
+ end: -1
261
+ excl: false
262
+ capture: !ruby/regexp /^(.*)$/i
263
+ YML
264
+ enum = table.transform(@simple_divider_example)
265
+ assert_instance_of Enumerator, enum
266
+ results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
267
+
268
+ assert_equal 4, results.count
269
+ assert results.first.start_with?('111')
270
+ assert results.last.start_with?('444')
271
+ end
272
+
273
+ def test_should_return_one_results_with_start_in_a_record_and_end_in_a_record
274
+ table = YAML.load <<-YML.strip_heredoc
275
+ --- !ruby/object:NdrImport::NonTabular::Table
276
+ start_line_pattern: !ruby/regexp /^-{6}$/
277
+ start_in_a_record: true
278
+ end_in_a_record: true
279
+ klass: SomeTestKlass
280
+ columns:
281
+ - column: one
282
+ non_tabular_cell:
283
+ lines: !ruby/range
284
+ begin: 0
285
+ end: -1
286
+ excl: false
287
+ capture: !ruby/regexp /^(.*)$/i
288
+ YML
289
+ enum = table.transform(@no_divider_example)
290
+ assert_instance_of Enumerator, enum
291
+ results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
292
+
293
+ assert_equal 1, results.count
294
+ assert results.first.start_with?('111')
295
+ end
296
+
297
+ def test_should_return_four_results_with_start_and_end_dividers
298
+ table = YAML.load <<-YML.strip_heredoc
299
+ --- !ruby/object:NdrImport::NonTabular::Table
300
+ start_line_pattern: !ruby/regexp /^----- START -----$/
301
+ end_line_pattern: !ruby/regexp /^------ END ------$/
302
+ klass: SomeTestKlass
303
+ columns:
304
+ - column: one
305
+ non_tabular_cell:
306
+ lines: !ruby/range
307
+ begin: 0
308
+ end: -1
309
+ excl: false
310
+ capture: !ruby/regexp /^(.*)$/i
311
+ YML
312
+ enum = table.transform(@simple_start_and_end_divider_example)
313
+ assert_instance_of Enumerator, enum
314
+
315
+ results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
316
+
317
+ assert_equal 4, results.count
318
+ assert results.first.start_with?('111')
319
+ assert results.last.start_with?('444')
320
+
321
+ assert results.any? { |result| result =~ /This is captured/ }
322
+ refute results.any? { |result| result =~ /This is never captured/ }
323
+ end
324
+
325
+ def test_should_capture
326
+ table = YAML.load <<-YML.strip_heredoc
327
+ --- !ruby/object:NdrImport::NonTabular::Table
328
+ start_line_pattern: !ruby/regexp /^-{6}$/
329
+ klass: SomeTestKlass
330
+ columns:
331
+ - standard_mapping: nhsnumber
332
+ non_tabular_cell:
333
+ lines: 0
334
+ capture: !ruby/regexp /^(\\d*)$/i
335
+ - column: address
336
+ non_tabular_cell:
337
+ lines: !ruby/range
338
+ begin: 1
339
+ end: 5
340
+ excl: false
341
+ capture: !ruby/regexp /^.{50}(.*)$/i
342
+ join: ", "
343
+ - standard_mapping: postcode
344
+ non_tabular_cell:
345
+ lines: 6
346
+ capture: !ruby/regexp /^.{50}(.*)$/i
347
+ - column: capture_inclusive
348
+ non_tabular_cell:
349
+ lines: !ruby/object:RegexpRange
350
+ begin: !ruby/regexp /^CAPTURE INCLUSIVE$/
351
+ end: !ruby/regexp /^Capture me.$/i
352
+ excl: false
353
+ capture: !ruby/regexp /^(.*)$/i
354
+ join: "\\n"
355
+ - column: capture_exclusive
356
+ non_tabular_cell:
357
+ lines: !ruby/object:RegexpRange
358
+ begin: !ruby/regexp /^CAPTURE EXCLUSIVE$/
359
+ end: !ruby/regexp /^Do NOT capture me.$/i
360
+ excl: true
361
+ capture: !ruby/regexp /^(.*)$/i
362
+ join: "\\n"
363
+ - column: capture_to_end
364
+ non_tabular_cell:
365
+ lines: !ruby/object:RegexpRange
366
+ begin: !ruby/regexp /^CAPTURE TO END$/
367
+ end: -1
368
+ excl: false
369
+ capture: !ruby/regexp /^(.*)$/i
370
+ join: "\\n"
371
+ YML
372
+ capture_example = <<-STR
373
+ This is never captured
374
+ ------
375
+ 1111111111
376
+ <----------------- 50 characters ---------------->Unit C, Magog Court
377
+ Shelford Bottom
378
+ Hinton Way
379
+ Cambridge
380
+
381
+ CB22 3AD
382
+
383
+ CAPTURE INCLUSIVE
384
+ Lorem ipsum dolor sit amet,
385
+ consectetur adipisicing elit,
386
+ Capture me.
387
+
388
+ CAPTURE EXCLUSIVE
389
+ Ut enim ad minim veniam, quis nostrud exercitation.
390
+ Do NOT capture me.
391
+
392
+ CAPTURE TO END
393
+ Lorem ipsum dolor sit amet, consectetur adipisicing elit.
394
+ Ut enim ad minim veniam, quis nostrud exercitation ullamco.
395
+ Duis aute irure dolor in reprehenderit in voluptate velit.
396
+ Excepteur sint occaecat cupidatat non proident, sunt in culpa.
397
+ ------
398
+ This is never captured
399
+ STR
400
+ enum = table.transform(capture_example.split(/\n/).map)
401
+ assert_instance_of Enumerator, enum
402
+
403
+ output = []
404
+ enum.each do |klass, fields, index|
405
+ output << [klass, fields, index]
406
+ end
407
+
408
+ expected_output = [
409
+ [
410
+ 'SomeTestKlass', {
411
+ 'nhsnumber' => '1111111111',
412
+ 'postcode' => 'CB223AD',
413
+ :rawtext => {
414
+ 'nhsnumber' => '1111111111',
415
+ 'address' => 'Unit C, Magog Court, Shelford Bottom, Hinton Way, Cambridge',
416
+ 'postcode' => 'CB22 3AD',
417
+ 'capture_inclusive' => "CAPTURE INCLUSIVE\nLorem ipsum dolor sit amet,\n" \
418
+ "consectetur adipisicing elit,\nCapture me.",
419
+ 'capture_exclusive' => "CAPTURE EXCLUSIVE\n" \
420
+ 'Ut enim ad minim veniam, quis nostrud exercitation.',
421
+ 'capture_to_end' => "CAPTURE TO END\n" \
422
+ "Lorem ipsum dolor sit amet, consectetur adipisicing elit.\n" \
423
+ "Ut enim ad minim veniam, quis nostrud exercitation ullamco.\n" \
424
+ "Duis aute irure dolor in reprehenderit in voluptate velit.\n" \
425
+ 'Excepteur sint occaecat cupidatat non proident, sunt in culpa.'
426
+ }
427
+ },
428
+ 0
429
+ ]
430
+ ]
431
+ assert_equal expected_output.sort, output.sort
432
+ assert_equal 25, table.non_tabular_lines.last.absolute_line_number
433
+ end
434
+
435
+ def test_handles_non_utf8_characters
436
+ mixed_encoding_example = <<-STR.each_line
437
+ 111
438
+ Lorem ipsum dolor sit amet.
439
+ ------
440
+ 111
441
+ Lorem ipsum dolor\xBE sit amet.
442
+ ------
443
+ 111
444
+ Lorem ipsum dolor sit amet.
445
+ ------
446
+ STR
447
+
448
+ table = YAML.load <<-YML.strip_heredoc
449
+ --- !ruby/object:NdrImport::NonTabular::Table
450
+ start_line_pattern: !ruby/regexp /^111$/
451
+ end_in_a_record: true
452
+ klass: SomeTestKlass
453
+ columns:
454
+ - column: one
455
+ non_tabular_cell:
456
+ lines: !ruby/range
457
+ begin: 0
458
+ end: -1
459
+ excl: true
460
+ capture: !ruby/regexp /^(.*)$/i
461
+ YML
462
+
463
+ enum = table.transform(mixed_encoding_example)
464
+ assert_instance_of Enumerator, enum
465
+ results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
466
+
467
+ assert_equal 3, results.count, 'records were lost'
468
+
469
+ assert_equal [27, 28, 27], results.map { |row| row.chars.to_a.length }
470
+ assert_equal [27, 29, 27], results.map { |row| row.bytes.to_a.length }
471
+
472
+ results.each do |row|
473
+ assert row.first.valid_encoding?
474
+ assert_equal Encoding.find('UTF-8'), row.first.encoding
475
+ end
476
+ end
477
+
478
+ def test_should_not_allow_junk_bytes
479
+ junk = <<-STR.each_line
480
+ 111
481
+ Lorem ipsum dolor sit amet.
482
+ ------
483
+ 111
484
+ Lorem ipsum dolor\x8D sit amet.
485
+ ------
486
+ 111
487
+ Lorem ipsum dolor sit amet.
488
+ ------
489
+ STR
490
+
491
+ table = YAML.load <<-YML.strip_heredoc
492
+ --- !ruby/object:NdrImport::NonTabular::Table
493
+ start_line_pattern: !ruby/regexp /^111$/
494
+ end_in_a_record: true
495
+ klass: SomeTestKlass
496
+ columns:
497
+ - column: one
498
+ non_tabular_cell:
499
+ lines: !ruby/range
500
+ begin: 0
501
+ end: -1
502
+ excl: true
503
+ capture: !ruby/regexp /^(.*)$/i
504
+ YML
505
+
506
+ assert_raises(UTF8Encoding::UTF8CoercionError) do
507
+ table.transform(junk).to_a
508
+ end
509
+ end
510
+ end