ndr_import 8.5.0 → 8.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +7 -0
  3. data/Gemfile +0 -3
  4. data/README.md +6 -0
  5. data/bin/console +14 -0
  6. data/bin/setup +8 -0
  7. data/code_safety.yml +27 -11
  8. data/exe/pdf_acro_form_to_yaml +23 -0
  9. data/exe/pdf_to_text +28 -0
  10. data/exe/word_to_text +26 -0
  11. data/gemfiles/Gemfile.rails52 +0 -3
  12. data/gemfiles/Gemfile.rails60 +5 -0
  13. data/lib/ndr_import/version.rb +1 -1
  14. data/ndr_import.gemspec +9 -7
  15. metadata +23 -164
  16. data/gemfiles/Gemfile.rails50 +0 -8
  17. data/gemfiles/Gemfile.rails51 +0 -9
  18. data/test/file/acro_form_test.rb +0 -39
  19. data/test/file/base_test.rb +0 -54
  20. data/test/file/delimited_test.rb +0 -233
  21. data/test/file/docx_test.rb +0 -53
  22. data/test/file/excel_test.rb +0 -124
  23. data/test/file/pdf_test.rb +0 -36
  24. data/test/file/registry_test.rb +0 -62
  25. data/test/file/seven_zip_test.rb +0 -59
  26. data/test/file/text_test.rb +0 -92
  27. data/test/file/word_test.rb +0 -35
  28. data/test/file/xml_test.rb +0 -21
  29. data/test/file/zip_test.rb +0 -47
  30. data/test/fixed_width/table_test.rb +0 -35
  31. data/test/helpers/file/delimited_test.rb +0 -105
  32. data/test/helpers/file/excel_test.rb +0 -82
  33. data/test/helpers/file/pdf_test.rb +0 -27
  34. data/test/helpers/file/word_test.rb +0 -26
  35. data/test/helpers/file/xml_test.rb +0 -131
  36. data/test/helpers/file/zip_test.rb +0 -75
  37. data/test/mapper_test.rb +0 -676
  38. data/test/non_tabular/mapping_test.rb +0 -36
  39. data/test/non_tabular/table_test.rb +0 -590
  40. data/test/non_tabular_file_helper_test.rb +0 -501
  41. data/test/pdf_form/table_test.rb +0 -119
  42. data/test/readme_test.rb +0 -53
  43. data/test/resources/acro_form.pdf +0 -0
  44. data/test/resources/blank_tab_test.xlsx +0 -0
  45. data/test/resources/bomd.csv +0 -3
  46. data/test/resources/broken.csv +0 -3
  47. data/test/resources/filesystem_paths.yml +0 -26
  48. data/test/resources/flat_file.pdf +0 -0
  49. data/test/resources/flat_file.txt +0 -27
  50. data/test/resources/flat_file.yml +0 -20
  51. data/test/resources/hello_utf16be.txt +0 -0
  52. data/test/resources/hello_utf16le.txt +0 -0
  53. data/test/resources/hello_utf8.txt +0 -2
  54. data/test/resources/hello_windows.txt +0 -2
  55. data/test/resources/hello_world.doc +0 -0
  56. data/test/resources/hello_world.docx +0 -0
  57. data/test/resources/hello_world.pdf +0 -0
  58. data/test/resources/hello_world.txt +0 -2
  59. data/test/resources/high_ascii_delimited.txt +0 -2
  60. data/test/resources/high_ascii_delimited_example_two.txt +0 -3
  61. data/test/resources/malformed.csv +0 -3
  62. data/test/resources/malformed.xml +0 -6
  63. data/test/resources/malformed_pipe.csv +0 -3
  64. data/test/resources/normal.7z +0 -0
  65. data/test/resources/normal.csv +0 -3
  66. data/test/resources/normal.csv.zip +0 -0
  67. data/test/resources/normal_pipe.csv +0 -3
  68. data/test/resources/normal_thorn.csv +0 -3
  69. data/test/resources/not_a_pdf.pdf +0 -0
  70. data/test/resources/not_a_word_file.doc +0 -0
  71. data/test/resources/not_a_word_file.docx +0 -0
  72. data/test/resources/not_sign_delimited.txt +0 -3
  73. data/test/resources/password_protected_hello_world.docx +0 -0
  74. data/test/resources/password_protected_sample_xlsx.xlsx +0 -0
  75. data/test/resources/sample.xml +0 -34
  76. data/test/resources/sample_xls.xls +0 -0
  77. data/test/resources/sample_xlsx.xlsx +0 -0
  78. data/test/resources/sheet_streaming.xls +0 -0
  79. data/test/resources/sheet_streaming.xlsx +0 -0
  80. data/test/resources/standard_mappings.yml +0 -39
  81. data/test/resources/txt_file_xls_extension.xls +0 -1
  82. data/test/resources/txt_file_xlsx_extension.xlsx +0 -1
  83. data/test/resources/utf-16be_xml.xml +0 -0
  84. data/test/resources/utf-16be_xml_with_declaration.xml +0 -0
  85. data/test/resources/utf-16le_xml.xml +0 -0
  86. data/test/resources/utf-8_xml.xml +0 -9
  87. data/test/resources/windows-1252_xml.xml +0 -9
  88. data/test/resources/windows.csv +0 -5
  89. data/test/resources/xlsx_file_xls_extension.xls +0 -0
  90. data/test/standard_mappings_test.rb +0 -22
  91. data/test/table_test.rb +0 -545
  92. data/test/test_helper.rb +0 -35
  93. data/test/universal_importer_helper_test.rb +0 -86
  94. data/test/xml/table_test.rb +0 -90
@@ -1,36 +0,0 @@
1
- require 'test_helper'
2
-
3
- # This tests the NdrImport::NonTabular::Mapping mapping class
4
- class MappingTestTest < ActiveSupport::TestCase
5
- def test_should_raise_error_with_no_non_tabular_row
6
- assert_raise NdrImport::MappingError do
7
- NdrImport::NonTabular::Mapping.new(
8
- 'columns' => [{ 'column' => 'one' }]
9
- )
10
- end
11
- end
12
-
13
- def test_should_raise_error_with_no_non_tabular_row_start_line_pattern
14
- assert_raise NdrImport::MappingError do
15
- NdrImport::NonTabular::Mapping.new(
16
- 'non_tabular_row' => nil,
17
- 'columns' => [{ 'column' => 'one' }]
18
- )
19
- end
20
-
21
- assert_raise NdrImport::MappingError do
22
- NdrImport::NonTabular::Mapping.new(
23
- 'non_tabular_row' => { 'start_line_pattern' => nil },
24
- 'columns' => [{ 'column' => 'one' }]
25
- )
26
- end
27
- end
28
-
29
- def test_should_initialize_with_non_tabular_row
30
- mapping = NdrImport::NonTabular::Mapping.new(
31
- 'non_tabular_row' => { 'start_line_pattern' => /\A-*\z/ },
32
- 'columns' => [{ 'column' => 'one' }]
33
- )
34
- assert_equal(/\A-*\z/, mapping.start_line_pattern)
35
- end
36
- end
@@ -1,590 +0,0 @@
1
- require 'test_helper'
2
-
3
- # This tests the NdrImport::NonTabular::Table mapping class
4
- class TableTest < ActiveSupport::TestCase
5
- def setup
6
- @simple_divider_example = <<-STR.split(/\n/).map
7
- 111
8
- Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
9
- ------
10
- 222
11
- Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo.
12
- ------
13
- 333
14
- Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla.
15
- ------
16
- 444
17
- Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim.
18
- STR
19
-
20
- @no_divider_example = <<-STR.split(/\n/).map
21
- 111
22
- Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
23
- STR
24
-
25
- @simple_start_and_end_divider_example = <<-STR.split(/\n/).map
26
- ----- START -----
27
- 111
28
- Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
29
- ------ END ------
30
- This is never captured
31
- ----- START -----
32
- 222
33
- Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo.
34
- ------ END ------
35
- This is never captured
36
- ----- START -----
37
- 333
38
- Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla.
39
- ------ END ------
40
- This is never captured
41
- ----- START -----
42
- 444
43
- This is captured
44
- Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim.
45
- ------ END ------
46
- STR
47
- end
48
-
49
- def test_all_valid_options
50
- valid_options = %w[
51
- canonical_name capture_end_line capture_start_line columns end_in_a_record end_line_pattern
52
- filename_pattern file_password format klass remove_lines row_identifier start_in_a_record
53
- start_line_pattern
54
- ]
55
- assert_equal valid_options.sort,
56
- NdrImport::NonTabular::Table.all_valid_options.sort
57
- end
58
-
59
- def test_should_raise_error_with_no_start_line_pattern
60
- assert_raise NdrImport::MappingError do
61
- NdrImport::NonTabular::Table.new(
62
- 'columns' => [{ 'column' => 'one' }]
63
- )
64
- end
65
-
66
- assert_raise NdrImport::MappingError do
67
- NdrImport::NonTabular::Table.new(
68
- 'start_line_pattern' => nil,
69
- 'columns' => [{ 'column' => 'one' }]
70
- )
71
- end
72
- end
73
-
74
- def test_should_initialize_with_non_tabular_row
75
- table = NdrImport::NonTabular::Table.new(
76
- 'start_line_pattern' => /\A-*\z/,
77
- 'columns' => [{ 'column' => 'one' }]
78
- )
79
- assert_equal(/\A-*\z/, table.start_line_pattern)
80
- end
81
-
82
- def test_should_test_flat_file_txt
83
- table = YAML.load_file(SafePath.new('permanent_test_files').join('flat_file.yml'))
84
- assert table.is_a?(NdrImport::NonTabular::Table)
85
- filename = SafePath.new('permanent_test_files').join('flat_file.txt')
86
- enum = table.transform(File.new(filename).each)
87
- # puts enum.to_a.inspect
88
-
89
- results = []
90
- enum.each do |_klass, fields, _index|
91
- results << fields[:rawtext]['one']
92
- end
93
-
94
- assert_equal 4, results.count
95
- assert results.first.start_with?('1')
96
- assert results.last.start_with?('4')
97
-
98
- assert results.any? { |result| result =~ /This is captured/ }
99
- refute results.any? { |result| result =~ /This is never captured/ }
100
- refute results.any? { |result| result =~ /== Page/ }
101
- end
102
-
103
- def test_should_raise_error_with_no_column_non_tabular_cell
104
- table = YAML.load <<-YML.strip_heredoc
105
- --- !ruby/object:NdrImport::NonTabular::Table
106
- start_line_pattern: !ruby/regexp /^-{6}$/
107
- klass: SomeTestKlass
108
- columns:
109
- - column: one
110
- YML
111
- assert_raise NdrImport::MappingError do
112
- table.transform(@simple_divider_example).to_a
113
- end
114
- end
115
-
116
- def test_should_raise_error_with_no_column_non_tabular_cell_lines
117
- table = YAML.load <<-YML.strip_heredoc
118
- --- !ruby/object:NdrImport::NonTabular::Table
119
- start_line_pattern: !ruby/regexp /^-{6}$/
120
- klass: SomeTestKlass
121
- columns:
122
- - column: one
123
- non_tabular_cell:
124
- YML
125
- assert_raise NdrImport::MappingError do
126
- table.transform(@simple_divider_example).to_a
127
- end
128
-
129
- table = YAML.load <<-YML.strip_heredoc
130
- --- !ruby/object:NdrImport::NonTabular::Table
131
- start_line_pattern: !ruby/regexp /^-{6}$/
132
- klass: SomeTestKlass
133
- columns:
134
- - column: one
135
- non_tabular_cell:
136
- lines:
137
- YML
138
- assert_raise NdrImport::MappingError do
139
- table.transform(@simple_divider_example).to_a
140
- end
141
- end
142
-
143
- def test_should_raise_error_with_no_column_non_tabular_cell_capture
144
- table = YAML.load <<-YML.strip_heredoc
145
- --- !ruby/object:NdrImport::NonTabular::Table
146
- start_line_pattern: !ruby/regexp /^-{6}$/
147
- klass: SomeTestKlass
148
- columns:
149
- - column: one
150
- non_tabular_cell:
151
- lines: !ruby/range
152
- begin: 0
153
- end: -1
154
- excl: false
155
- YML
156
- assert_raise NdrImport::MappingError do
157
- table.transform(@simple_divider_example).to_a
158
- end
159
-
160
- table = YAML.load <<-YML.strip_heredoc
161
- --- !ruby/object:NdrImport::NonTabular::Table
162
- start_line_pattern: !ruby/regexp /^-{6}$/
163
- klass: SomeTestKlass
164
- columns:
165
- - column: one
166
- non_tabular_cell:
167
- lines: !ruby/range
168
- begin: 0
169
- end: -1
170
- excl: false
171
- capture:
172
- YML
173
- assert_raise NdrImport::MappingError do
174
- table.transform(@simple_divider_example).to_a
175
- end
176
- end
177
-
178
- def test_should_only_return_two_results_with_no_start_in_a_record_or_end_in_a_record
179
- table = YAML.load <<-YML.strip_heredoc
180
- --- !ruby/object:NdrImport::NonTabular::Table
181
- start_line_pattern: !ruby/regexp /^-{6}$/
182
- klass: SomeTestKlass
183
- columns:
184
- - column: one
185
- non_tabular_cell:
186
- lines: !ruby/range
187
- begin: 0
188
- end: -1
189
- excl: false
190
- capture: !ruby/regexp /^(.*)$/i
191
- YML
192
- enum = table.transform(@simple_divider_example)
193
- assert_instance_of Enumerator, enum
194
- results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
195
-
196
- assert_equal 2, results.count
197
- assert results.first.start_with?('222')
198
- assert results.last.start_with?('333')
199
- end
200
-
201
- def test_should_return_three_results_with_start_in_a_record
202
- table = YAML.load <<-YML.strip_heredoc
203
- --- !ruby/object:NdrImport::NonTabular::Table
204
- start_line_pattern: !ruby/regexp /^-{6}$/
205
- start_in_a_record: true
206
- klass: SomeTestKlass
207
- columns:
208
- - column: one
209
- non_tabular_cell:
210
- lines: !ruby/range
211
- begin: 0
212
- end: -1
213
- excl: false
214
- capture: !ruby/regexp /^(.*)$/i
215
- YML
216
- enum = table.transform(@simple_divider_example)
217
- assert_instance_of Enumerator, enum
218
- results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
219
-
220
- assert_equal 3, results.count
221
- assert results.first.start_with?('111')
222
- assert results.last.start_with?('333')
223
- end
224
-
225
- def test_should_return_three_results_with_end_in_a_record
226
- table = YAML.load <<-YML.strip_heredoc
227
- --- !ruby/object:NdrImport::NonTabular::Table
228
- start_line_pattern: !ruby/regexp /^-{6}$/
229
- end_in_a_record: true
230
- klass: SomeTestKlass
231
- columns:
232
- - column: one
233
- non_tabular_cell:
234
- lines: !ruby/range
235
- begin: 0
236
- end: -1
237
- excl: false
238
- capture: !ruby/regexp /^(.*)$/i
239
- YML
240
- enum = table.transform(@simple_divider_example)
241
- assert_instance_of Enumerator, enum
242
- results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
243
-
244
- assert_equal 3, results.count
245
- assert results.first.start_with?('222')
246
- assert results.last.start_with?('444')
247
- end
248
-
249
- def test_should_return_four_results_with_start_in_a_record_and_end_in_a_record
250
- table = YAML.load <<-YML.strip_heredoc
251
- --- !ruby/object:NdrImport::NonTabular::Table
252
- start_line_pattern: !ruby/regexp /^-{6}$/
253
- start_in_a_record: true
254
- end_in_a_record: true
255
- klass: SomeTestKlass
256
- columns:
257
- - column: one
258
- non_tabular_cell:
259
- lines: !ruby/range
260
- begin: 0
261
- end: -1
262
- excl: false
263
- capture: !ruby/regexp /^(.*)$/i
264
- YML
265
- enum = table.transform(@simple_divider_example)
266
- assert_instance_of Enumerator, enum
267
- results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
268
-
269
- assert_equal 4, results.count
270
- assert results.first.start_with?('111')
271
- assert results.last.start_with?('444')
272
- end
273
-
274
- def test_should_return_one_results_with_start_in_a_record_and_end_in_a_record
275
- table = YAML.load <<-YML.strip_heredoc
276
- --- !ruby/object:NdrImport::NonTabular::Table
277
- start_line_pattern: !ruby/regexp /^-{6}$/
278
- start_in_a_record: true
279
- end_in_a_record: true
280
- klass: SomeTestKlass
281
- columns:
282
- - column: one
283
- non_tabular_cell:
284
- lines: !ruby/range
285
- begin: 0
286
- end: -1
287
- excl: false
288
- capture: !ruby/regexp /^(.*)$/i
289
- YML
290
- enum = table.transform(@no_divider_example)
291
- assert_instance_of Enumerator, enum
292
- results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
293
-
294
- assert_equal 1, results.count
295
- assert results.first.start_with?('111')
296
- end
297
-
298
- def test_should_return_four_results_with_start_and_end_dividers
299
- table = YAML.load <<-YML.strip_heredoc
300
- --- !ruby/object:NdrImport::NonTabular::Table
301
- start_line_pattern: !ruby/regexp /^----- START -----$/
302
- end_line_pattern: !ruby/regexp /^------ END ------$/
303
- klass: SomeTestKlass
304
- columns:
305
- - column: one
306
- non_tabular_cell:
307
- lines: !ruby/range
308
- begin: 0
309
- end: -1
310
- excl: false
311
- capture: !ruby/regexp /^(.*)$/i
312
- YML
313
- enum = table.transform(@simple_start_and_end_divider_example)
314
- assert_instance_of Enumerator, enum
315
-
316
- results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
317
-
318
- assert_equal 4, results.count
319
- assert results.first.start_with?('111')
320
- assert results.last.start_with?('444')
321
-
322
- assert results.any? { |result| result =~ /This is captured/ }
323
- refute results.any? { |result| result =~ /This is never captured/ }
324
- end
325
-
326
- def test_should_capture_end_line
327
- data = <<~STR.each_line
328
- 111
329
- Lorem ipsum dolor sit amet.
330
- CAPTURE THIS CODE ABC
331
- 111
332
- Lorem ipsum dolor sit amet.
333
- CAPTURE THIS CODE XYZ
334
- 111
335
- Lorem ipsum dolor sit amet.
336
- CAPTURE THIS CODE 123
337
- STR
338
-
339
- table = YAML.load <<-YML.strip_heredoc
340
- --- !ruby/object:NdrImport::NonTabular::Table
341
- start_line_pattern: !ruby/regexp /\\A111\\z/
342
- end_line_pattern: !ruby/regexp /\\ACAPTURE THIS CODE/
343
- capture_start_line: true
344
- capture_end_line: true
345
- klass: SomeTestKlass
346
- columns:
347
- - column: one
348
- non_tabular_cell:
349
- lines: -1
350
- capture: !ruby/regexp /\\A(.*)\\z/i
351
- YML
352
- enum = table.transform(data)
353
- assert_instance_of Enumerator, enum
354
-
355
- results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
356
-
357
- assert_equal 3, results.count
358
- assert_equal 'CAPTURE THIS CODE ABC', results.first
359
- end
360
-
361
- def test_should_capture
362
- table = YAML.load <<-YML.strip_heredoc
363
- --- !ruby/object:NdrImport::NonTabular::Table
364
- start_line_pattern: !ruby/regexp /^-{6}$/
365
- klass: SomeTestKlass
366
- columns:
367
- - standard_mapping: nhsnumber
368
- non_tabular_cell:
369
- lines: 0
370
- capture: !ruby/regexp /^(\\d*)$/i
371
- - column: address
372
- non_tabular_cell:
373
- lines: !ruby/range
374
- begin: 1
375
- end: 5
376
- excl: false
377
- capture: !ruby/regexp /^.{50}(.*)$/i
378
- join: ", "
379
- - standard_mapping: postcode
380
- non_tabular_cell:
381
- lines: 6
382
- capture: !ruby/regexp /^.{50}(.*)$/i
383
- - column: capture_inclusive
384
- non_tabular_cell:
385
- lines: !ruby/object:RegexpRange
386
- begin: !ruby/regexp /^CAPTURE INCLUSIVE$/
387
- end: !ruby/regexp /^Capture me.$/i
388
- excl: false
389
- capture: !ruby/regexp /^(.*)$/i
390
- join: "\\n"
391
- - column: capture_exclusive
392
- non_tabular_cell:
393
- lines: !ruby/object:RegexpRange
394
- begin: !ruby/regexp /^CAPTURE EXCLUSIVE$/
395
- end: !ruby/regexp /^Do NOT capture me.$/i
396
- excl: true
397
- capture: !ruby/regexp /^(.*)$/i
398
- join: "\\n"
399
- - column: capture_to_end
400
- non_tabular_cell:
401
- lines: !ruby/object:RegexpRange
402
- begin: !ruby/regexp /^CAPTURE TO END$/
403
- end: -1
404
- excl: false
405
- capture: !ruby/regexp /^(.*)$/i
406
- join: "\\n"
407
- YML
408
- capture_example = <<-STR
409
- This is never captured
410
- ------
411
- 1111111111
412
- <----------------- 50 characters ---------------->Unit C, Magog Court
413
- Shelford Bottom
414
- Hinton Way
415
- Cambridge
416
-
417
- CB22 3AD
418
-
419
- CAPTURE INCLUSIVE
420
- Lorem ipsum dolor sit amet,
421
- consectetur adipisicing elit,
422
- Capture me.
423
-
424
- CAPTURE EXCLUSIVE
425
- Ut enim ad minim veniam, quis nostrud exercitation.
426
- Do NOT capture me.
427
-
428
- CAPTURE TO END
429
- Lorem ipsum dolor sit amet, consectetur adipisicing elit.
430
- Ut enim ad minim veniam, quis nostrud exercitation ullamco.
431
- Duis aute irure dolor in reprehenderit in voluptate velit.
432
- Excepteur sint occaecat cupidatat non proident, sunt in culpa.
433
- ------
434
- This is never captured
435
- STR
436
- enum = table.transform(capture_example.split(/\n/).map)
437
- assert_instance_of Enumerator, enum
438
-
439
- output = []
440
- enum.each do |klass, fields, index|
441
- output << [klass, fields, index]
442
- end
443
-
444
- expected_output = [
445
- [
446
- 'SomeTestKlass', {
447
- 'nhsnumber' => '1111111111',
448
- 'postcode' => 'CB223AD',
449
- :rawtext => {
450
- 'nhsnumber' => '1111111111',
451
- 'address' => 'Unit C, Magog Court, Shelford Bottom, Hinton Way, Cambridge',
452
- 'postcode' => 'CB22 3AD',
453
- 'capture_inclusive' => "CAPTURE INCLUSIVE\nLorem ipsum dolor sit amet,\n" \
454
- "consectetur adipisicing elit,\nCapture me.",
455
- 'capture_exclusive' => "CAPTURE EXCLUSIVE\n" \
456
- 'Ut enim ad minim veniam, quis nostrud exercitation.',
457
- 'capture_to_end' => "CAPTURE TO END\n" \
458
- "Lorem ipsum dolor sit amet, consectetur adipisicing elit.\n" \
459
- "Ut enim ad minim veniam, quis nostrud exercitation ullamco.\n" \
460
- "Duis aute irure dolor in reprehenderit in voluptate velit.\n" \
461
- 'Excepteur sint occaecat cupidatat non proident, sunt in culpa.'
462
- }
463
- },
464
- 0
465
- ]
466
- ]
467
- assert_equal expected_output.sort, output.sort
468
- assert_equal 25, table.non_tabular_lines.last.absolute_line_number
469
- end
470
-
471
- def test_handles_non_utf8_characters
472
- mixed_encoding_example = <<-STR.each_line
473
- 111
474
- Lorem ipsum dolor sit amet.
475
- ------
476
- 111
477
- Lorem ipsum dolor\xBE sit amet.
478
- ------
479
- 111
480
- Lorem ipsum dolor sit amet.
481
- ------
482
- STR
483
-
484
- table = YAML.load <<-YML.strip_heredoc
485
- --- !ruby/object:NdrImport::NonTabular::Table
486
- start_line_pattern: !ruby/regexp /^111$/
487
- end_in_a_record: true
488
- klass: SomeTestKlass
489
- columns:
490
- - column: one
491
- non_tabular_cell:
492
- lines: !ruby/range
493
- begin: 0
494
- end: -1
495
- excl: true
496
- capture: !ruby/regexp /^(.*)$/i
497
- YML
498
-
499
- enum = table.transform(mixed_encoding_example)
500
- assert_instance_of Enumerator, enum
501
- results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
502
-
503
- assert_equal 3, results.count, 'records were lost'
504
-
505
- assert_equal [27, 28, 27], results.map { |row| row.chars.to_a.length }
506
- assert_equal [27, 29, 27], results.map { |row| row.bytes.to_a.length }
507
-
508
- results.each do |row|
509
- assert row.first.valid_encoding?
510
- assert_equal Encoding.find('UTF-8'), row.first.encoding
511
- end
512
- end
513
-
514
- def test_should_not_allow_junk_bytes
515
- junk = <<-STR.each_line
516
- 111
517
- Lorem ipsum dolor sit amet.
518
- ------
519
- 111
520
- Lorem ipsum dolor\x8D sit amet.
521
- ------
522
- 111
523
- Lorem ipsum dolor sit amet.
524
- ------
525
- STR
526
-
527
- table = YAML.load <<-YML.strip_heredoc
528
- --- !ruby/object:NdrImport::NonTabular::Table
529
- start_line_pattern: !ruby/regexp /^111$/
530
- end_in_a_record: true
531
- klass: SomeTestKlass
532
- columns:
533
- - column: one
534
- non_tabular_cell:
535
- lines: !ruby/range
536
- begin: 0
537
- end: -1
538
- excl: true
539
- capture: !ruby/regexp /^(.*)$/i
540
- YML
541
-
542
- assert_raises(UTF8Encoding::UTF8CoercionError) do
543
- table.transform(junk).to_a
544
- end
545
- end
546
-
547
- def test_should_strip_captured_rawtext
548
- unwanted_white_space = <<-STR.each_line
549
- 111
550
- Trailing whitespace end_of_line
551
- ------
552
- 111
553
- Leading whitespaceend_of_line
554
- ------
555
- 111
556
- Leading and trailing whitespace end_of_line
557
- ------
558
- 111
559
- Should not match this
560
- ------
561
- STR
562
-
563
- table = YAML.load <<-YML.strip_heredoc
564
- --- !ruby/object:NdrImport::NonTabular::Table
565
- start_line_pattern: !ruby/regexp /^111$/
566
- end_in_a_record: true
567
- klass: SomeTestKlass
568
- columns:
569
- - column: one
570
- non_tabular_cell:
571
- lines: 0
572
- capture: !ruby/regexp /^(.*)end_of_line$/i
573
- trim_rawtext: left
574
- YML
575
-
576
- enum = table.transform(unwanted_white_space)
577
- assert_instance_of Enumerator, enum
578
-
579
- output = []
580
- enum.each do |klass, fields, index|
581
- output << [klass, fields, index]
582
- end
583
-
584
- expected_rawtext_ouput = [{ 'one' => 'Trailing whitespace' },
585
- { 'one' => 'Leading whitespace' },
586
- { 'one' => 'Leading and trailing whitespace' },
587
- { 'one' => '' }]
588
- assert_equal expected_rawtext_ouput, (output.map { |row| row[1][:rawtext] })
589
- end
590
- end