ndr_import 8.5.0 → 8.5.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +7 -0
  3. data/Gemfile +0 -3
  4. data/README.md +6 -0
  5. data/bin/console +14 -0
  6. data/bin/setup +8 -0
  7. data/code_safety.yml +27 -11
  8. data/exe/pdf_acro_form_to_yaml +23 -0
  9. data/exe/pdf_to_text +28 -0
  10. data/exe/word_to_text +26 -0
  11. data/gemfiles/Gemfile.rails52 +0 -3
  12. data/gemfiles/Gemfile.rails60 +5 -0
  13. data/lib/ndr_import/version.rb +1 -1
  14. data/ndr_import.gemspec +9 -7
  15. metadata +23 -164
  16. data/gemfiles/Gemfile.rails50 +0 -8
  17. data/gemfiles/Gemfile.rails51 +0 -9
  18. data/test/file/acro_form_test.rb +0 -39
  19. data/test/file/base_test.rb +0 -54
  20. data/test/file/delimited_test.rb +0 -233
  21. data/test/file/docx_test.rb +0 -53
  22. data/test/file/excel_test.rb +0 -124
  23. data/test/file/pdf_test.rb +0 -36
  24. data/test/file/registry_test.rb +0 -62
  25. data/test/file/seven_zip_test.rb +0 -59
  26. data/test/file/text_test.rb +0 -92
  27. data/test/file/word_test.rb +0 -35
  28. data/test/file/xml_test.rb +0 -21
  29. data/test/file/zip_test.rb +0 -47
  30. data/test/fixed_width/table_test.rb +0 -35
  31. data/test/helpers/file/delimited_test.rb +0 -105
  32. data/test/helpers/file/excel_test.rb +0 -82
  33. data/test/helpers/file/pdf_test.rb +0 -27
  34. data/test/helpers/file/word_test.rb +0 -26
  35. data/test/helpers/file/xml_test.rb +0 -131
  36. data/test/helpers/file/zip_test.rb +0 -75
  37. data/test/mapper_test.rb +0 -676
  38. data/test/non_tabular/mapping_test.rb +0 -36
  39. data/test/non_tabular/table_test.rb +0 -590
  40. data/test/non_tabular_file_helper_test.rb +0 -501
  41. data/test/pdf_form/table_test.rb +0 -119
  42. data/test/readme_test.rb +0 -53
  43. data/test/resources/acro_form.pdf +0 -0
  44. data/test/resources/blank_tab_test.xlsx +0 -0
  45. data/test/resources/bomd.csv +0 -3
  46. data/test/resources/broken.csv +0 -3
  47. data/test/resources/filesystem_paths.yml +0 -26
  48. data/test/resources/flat_file.pdf +0 -0
  49. data/test/resources/flat_file.txt +0 -27
  50. data/test/resources/flat_file.yml +0 -20
  51. data/test/resources/hello_utf16be.txt +0 -0
  52. data/test/resources/hello_utf16le.txt +0 -0
  53. data/test/resources/hello_utf8.txt +0 -2
  54. data/test/resources/hello_windows.txt +0 -2
  55. data/test/resources/hello_world.doc +0 -0
  56. data/test/resources/hello_world.docx +0 -0
  57. data/test/resources/hello_world.pdf +0 -0
  58. data/test/resources/hello_world.txt +0 -2
  59. data/test/resources/high_ascii_delimited.txt +0 -2
  60. data/test/resources/high_ascii_delimited_example_two.txt +0 -3
  61. data/test/resources/malformed.csv +0 -3
  62. data/test/resources/malformed.xml +0 -6
  63. data/test/resources/malformed_pipe.csv +0 -3
  64. data/test/resources/normal.7z +0 -0
  65. data/test/resources/normal.csv +0 -3
  66. data/test/resources/normal.csv.zip +0 -0
  67. data/test/resources/normal_pipe.csv +0 -3
  68. data/test/resources/normal_thorn.csv +0 -3
  69. data/test/resources/not_a_pdf.pdf +0 -0
  70. data/test/resources/not_a_word_file.doc +0 -0
  71. data/test/resources/not_a_word_file.docx +0 -0
  72. data/test/resources/not_sign_delimited.txt +0 -3
  73. data/test/resources/password_protected_hello_world.docx +0 -0
  74. data/test/resources/password_protected_sample_xlsx.xlsx +0 -0
  75. data/test/resources/sample.xml +0 -34
  76. data/test/resources/sample_xls.xls +0 -0
  77. data/test/resources/sample_xlsx.xlsx +0 -0
  78. data/test/resources/sheet_streaming.xls +0 -0
  79. data/test/resources/sheet_streaming.xlsx +0 -0
  80. data/test/resources/standard_mappings.yml +0 -39
  81. data/test/resources/txt_file_xls_extension.xls +0 -1
  82. data/test/resources/txt_file_xlsx_extension.xlsx +0 -1
  83. data/test/resources/utf-16be_xml.xml +0 -0
  84. data/test/resources/utf-16be_xml_with_declaration.xml +0 -0
  85. data/test/resources/utf-16le_xml.xml +0 -0
  86. data/test/resources/utf-8_xml.xml +0 -9
  87. data/test/resources/windows-1252_xml.xml +0 -9
  88. data/test/resources/windows.csv +0 -5
  89. data/test/resources/xlsx_file_xls_extension.xls +0 -0
  90. data/test/standard_mappings_test.rb +0 -22
  91. data/test/table_test.rb +0 -545
  92. data/test/test_helper.rb +0 -35
  93. data/test/universal_importer_helper_test.rb +0 -86
  94. data/test/xml/table_test.rb +0 -90
@@ -1,36 +0,0 @@
1
- require 'test_helper'
2
-
3
- # This tests the NdrImport::NonTabular::Mapping mapping class
4
- class MappingTestTest < ActiveSupport::TestCase
5
- def test_should_raise_error_with_no_non_tabular_row
6
- assert_raise NdrImport::MappingError do
7
- NdrImport::NonTabular::Mapping.new(
8
- 'columns' => [{ 'column' => 'one' }]
9
- )
10
- end
11
- end
12
-
13
- def test_should_raise_error_with_no_non_tabular_row_start_line_pattern
14
- assert_raise NdrImport::MappingError do
15
- NdrImport::NonTabular::Mapping.new(
16
- 'non_tabular_row' => nil,
17
- 'columns' => [{ 'column' => 'one' }]
18
- )
19
- end
20
-
21
- assert_raise NdrImport::MappingError do
22
- NdrImport::NonTabular::Mapping.new(
23
- 'non_tabular_row' => { 'start_line_pattern' => nil },
24
- 'columns' => [{ 'column' => 'one' }]
25
- )
26
- end
27
- end
28
-
29
- def test_should_initialize_with_non_tabular_row
30
- mapping = NdrImport::NonTabular::Mapping.new(
31
- 'non_tabular_row' => { 'start_line_pattern' => /\A-*\z/ },
32
- 'columns' => [{ 'column' => 'one' }]
33
- )
34
- assert_equal(/\A-*\z/, mapping.start_line_pattern)
35
- end
36
- end
@@ -1,590 +0,0 @@
1
- require 'test_helper'
2
-
3
- # This tests the NdrImport::NonTabular::Table mapping class
4
- class TableTest < ActiveSupport::TestCase
5
- def setup
6
- @simple_divider_example = <<-STR.split(/\n/).map
7
- 111
8
- Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
9
- ------
10
- 222
11
- Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo.
12
- ------
13
- 333
14
- Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla.
15
- ------
16
- 444
17
- Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim.
18
- STR
19
-
20
- @no_divider_example = <<-STR.split(/\n/).map
21
- 111
22
- Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
23
- STR
24
-
25
- @simple_start_and_end_divider_example = <<-STR.split(/\n/).map
26
- ----- START -----
27
- 111
28
- Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
29
- ------ END ------
30
- This is never captured
31
- ----- START -----
32
- 222
33
- Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo.
34
- ------ END ------
35
- This is never captured
36
- ----- START -----
37
- 333
38
- Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla.
39
- ------ END ------
40
- This is never captured
41
- ----- START -----
42
- 444
43
- This is captured
44
- Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim.
45
- ------ END ------
46
- STR
47
- end
48
-
49
- def test_all_valid_options
50
- valid_options = %w[
51
- canonical_name capture_end_line capture_start_line columns end_in_a_record end_line_pattern
52
- filename_pattern file_password format klass remove_lines row_identifier start_in_a_record
53
- start_line_pattern
54
- ]
55
- assert_equal valid_options.sort,
56
- NdrImport::NonTabular::Table.all_valid_options.sort
57
- end
58
-
59
- def test_should_raise_error_with_no_start_line_pattern
60
- assert_raise NdrImport::MappingError do
61
- NdrImport::NonTabular::Table.new(
62
- 'columns' => [{ 'column' => 'one' }]
63
- )
64
- end
65
-
66
- assert_raise NdrImport::MappingError do
67
- NdrImport::NonTabular::Table.new(
68
- 'start_line_pattern' => nil,
69
- 'columns' => [{ 'column' => 'one' }]
70
- )
71
- end
72
- end
73
-
74
- def test_should_initialize_with_non_tabular_row
75
- table = NdrImport::NonTabular::Table.new(
76
- 'start_line_pattern' => /\A-*\z/,
77
- 'columns' => [{ 'column' => 'one' }]
78
- )
79
- assert_equal(/\A-*\z/, table.start_line_pattern)
80
- end
81
-
82
- def test_should_test_flat_file_txt
83
- table = YAML.load_file(SafePath.new('permanent_test_files').join('flat_file.yml'))
84
- assert table.is_a?(NdrImport::NonTabular::Table)
85
- filename = SafePath.new('permanent_test_files').join('flat_file.txt')
86
- enum = table.transform(File.new(filename).each)
87
- # puts enum.to_a.inspect
88
-
89
- results = []
90
- enum.each do |_klass, fields, _index|
91
- results << fields[:rawtext]['one']
92
- end
93
-
94
- assert_equal 4, results.count
95
- assert results.first.start_with?('1')
96
- assert results.last.start_with?('4')
97
-
98
- assert results.any? { |result| result =~ /This is captured/ }
99
- refute results.any? { |result| result =~ /This is never captured/ }
100
- refute results.any? { |result| result =~ /== Page/ }
101
- end
102
-
103
- def test_should_raise_error_with_no_column_non_tabular_cell
104
- table = YAML.load <<-YML.strip_heredoc
105
- --- !ruby/object:NdrImport::NonTabular::Table
106
- start_line_pattern: !ruby/regexp /^-{6}$/
107
- klass: SomeTestKlass
108
- columns:
109
- - column: one
110
- YML
111
- assert_raise NdrImport::MappingError do
112
- table.transform(@simple_divider_example).to_a
113
- end
114
- end
115
-
116
- def test_should_raise_error_with_no_column_non_tabular_cell_lines
117
- table = YAML.load <<-YML.strip_heredoc
118
- --- !ruby/object:NdrImport::NonTabular::Table
119
- start_line_pattern: !ruby/regexp /^-{6}$/
120
- klass: SomeTestKlass
121
- columns:
122
- - column: one
123
- non_tabular_cell:
124
- YML
125
- assert_raise NdrImport::MappingError do
126
- table.transform(@simple_divider_example).to_a
127
- end
128
-
129
- table = YAML.load <<-YML.strip_heredoc
130
- --- !ruby/object:NdrImport::NonTabular::Table
131
- start_line_pattern: !ruby/regexp /^-{6}$/
132
- klass: SomeTestKlass
133
- columns:
134
- - column: one
135
- non_tabular_cell:
136
- lines:
137
- YML
138
- assert_raise NdrImport::MappingError do
139
- table.transform(@simple_divider_example).to_a
140
- end
141
- end
142
-
143
- def test_should_raise_error_with_no_column_non_tabular_cell_capture
144
- table = YAML.load <<-YML.strip_heredoc
145
- --- !ruby/object:NdrImport::NonTabular::Table
146
- start_line_pattern: !ruby/regexp /^-{6}$/
147
- klass: SomeTestKlass
148
- columns:
149
- - column: one
150
- non_tabular_cell:
151
- lines: !ruby/range
152
- begin: 0
153
- end: -1
154
- excl: false
155
- YML
156
- assert_raise NdrImport::MappingError do
157
- table.transform(@simple_divider_example).to_a
158
- end
159
-
160
- table = YAML.load <<-YML.strip_heredoc
161
- --- !ruby/object:NdrImport::NonTabular::Table
162
- start_line_pattern: !ruby/regexp /^-{6}$/
163
- klass: SomeTestKlass
164
- columns:
165
- - column: one
166
- non_tabular_cell:
167
- lines: !ruby/range
168
- begin: 0
169
- end: -1
170
- excl: false
171
- capture:
172
- YML
173
- assert_raise NdrImport::MappingError do
174
- table.transform(@simple_divider_example).to_a
175
- end
176
- end
177
-
178
- def test_should_only_return_two_results_with_no_start_in_a_record_or_end_in_a_record
179
- table = YAML.load <<-YML.strip_heredoc
180
- --- !ruby/object:NdrImport::NonTabular::Table
181
- start_line_pattern: !ruby/regexp /^-{6}$/
182
- klass: SomeTestKlass
183
- columns:
184
- - column: one
185
- non_tabular_cell:
186
- lines: !ruby/range
187
- begin: 0
188
- end: -1
189
- excl: false
190
- capture: !ruby/regexp /^(.*)$/i
191
- YML
192
- enum = table.transform(@simple_divider_example)
193
- assert_instance_of Enumerator, enum
194
- results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
195
-
196
- assert_equal 2, results.count
197
- assert results.first.start_with?('222')
198
- assert results.last.start_with?('333')
199
- end
200
-
201
- def test_should_return_three_results_with_start_in_a_record
202
- table = YAML.load <<-YML.strip_heredoc
203
- --- !ruby/object:NdrImport::NonTabular::Table
204
- start_line_pattern: !ruby/regexp /^-{6}$/
205
- start_in_a_record: true
206
- klass: SomeTestKlass
207
- columns:
208
- - column: one
209
- non_tabular_cell:
210
- lines: !ruby/range
211
- begin: 0
212
- end: -1
213
- excl: false
214
- capture: !ruby/regexp /^(.*)$/i
215
- YML
216
- enum = table.transform(@simple_divider_example)
217
- assert_instance_of Enumerator, enum
218
- results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
219
-
220
- assert_equal 3, results.count
221
- assert results.first.start_with?('111')
222
- assert results.last.start_with?('333')
223
- end
224
-
225
- def test_should_return_three_results_with_end_in_a_record
226
- table = YAML.load <<-YML.strip_heredoc
227
- --- !ruby/object:NdrImport::NonTabular::Table
228
- start_line_pattern: !ruby/regexp /^-{6}$/
229
- end_in_a_record: true
230
- klass: SomeTestKlass
231
- columns:
232
- - column: one
233
- non_tabular_cell:
234
- lines: !ruby/range
235
- begin: 0
236
- end: -1
237
- excl: false
238
- capture: !ruby/regexp /^(.*)$/i
239
- YML
240
- enum = table.transform(@simple_divider_example)
241
- assert_instance_of Enumerator, enum
242
- results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
243
-
244
- assert_equal 3, results.count
245
- assert results.first.start_with?('222')
246
- assert results.last.start_with?('444')
247
- end
248
-
249
- def test_should_return_four_results_with_start_in_a_record_and_end_in_a_record
250
- table = YAML.load <<-YML.strip_heredoc
251
- --- !ruby/object:NdrImport::NonTabular::Table
252
- start_line_pattern: !ruby/regexp /^-{6}$/
253
- start_in_a_record: true
254
- end_in_a_record: true
255
- klass: SomeTestKlass
256
- columns:
257
- - column: one
258
- non_tabular_cell:
259
- lines: !ruby/range
260
- begin: 0
261
- end: -1
262
- excl: false
263
- capture: !ruby/regexp /^(.*)$/i
264
- YML
265
- enum = table.transform(@simple_divider_example)
266
- assert_instance_of Enumerator, enum
267
- results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
268
-
269
- assert_equal 4, results.count
270
- assert results.first.start_with?('111')
271
- assert results.last.start_with?('444')
272
- end
273
-
274
- def test_should_return_one_results_with_start_in_a_record_and_end_in_a_record
275
- table = YAML.load <<-YML.strip_heredoc
276
- --- !ruby/object:NdrImport::NonTabular::Table
277
- start_line_pattern: !ruby/regexp /^-{6}$/
278
- start_in_a_record: true
279
- end_in_a_record: true
280
- klass: SomeTestKlass
281
- columns:
282
- - column: one
283
- non_tabular_cell:
284
- lines: !ruby/range
285
- begin: 0
286
- end: -1
287
- excl: false
288
- capture: !ruby/regexp /^(.*)$/i
289
- YML
290
- enum = table.transform(@no_divider_example)
291
- assert_instance_of Enumerator, enum
292
- results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
293
-
294
- assert_equal 1, results.count
295
- assert results.first.start_with?('111')
296
- end
297
-
298
- def test_should_return_four_results_with_start_and_end_dividers
299
- table = YAML.load <<-YML.strip_heredoc
300
- --- !ruby/object:NdrImport::NonTabular::Table
301
- start_line_pattern: !ruby/regexp /^----- START -----$/
302
- end_line_pattern: !ruby/regexp /^------ END ------$/
303
- klass: SomeTestKlass
304
- columns:
305
- - column: one
306
- non_tabular_cell:
307
- lines: !ruby/range
308
- begin: 0
309
- end: -1
310
- excl: false
311
- capture: !ruby/regexp /^(.*)$/i
312
- YML
313
- enum = table.transform(@simple_start_and_end_divider_example)
314
- assert_instance_of Enumerator, enum
315
-
316
- results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
317
-
318
- assert_equal 4, results.count
319
- assert results.first.start_with?('111')
320
- assert results.last.start_with?('444')
321
-
322
- assert results.any? { |result| result =~ /This is captured/ }
323
- refute results.any? { |result| result =~ /This is never captured/ }
324
- end
325
-
326
- def test_should_capture_end_line
327
- data = <<~STR.each_line
328
- 111
329
- Lorem ipsum dolor sit amet.
330
- CAPTURE THIS CODE ABC
331
- 111
332
- Lorem ipsum dolor sit amet.
333
- CAPTURE THIS CODE XYZ
334
- 111
335
- Lorem ipsum dolor sit amet.
336
- CAPTURE THIS CODE 123
337
- STR
338
-
339
- table = YAML.load <<-YML.strip_heredoc
340
- --- !ruby/object:NdrImport::NonTabular::Table
341
- start_line_pattern: !ruby/regexp /\\A111\\z/
342
- end_line_pattern: !ruby/regexp /\\ACAPTURE THIS CODE/
343
- capture_start_line: true
344
- capture_end_line: true
345
- klass: SomeTestKlass
346
- columns:
347
- - column: one
348
- non_tabular_cell:
349
- lines: -1
350
- capture: !ruby/regexp /\\A(.*)\\z/i
351
- YML
352
- enum = table.transform(data)
353
- assert_instance_of Enumerator, enum
354
-
355
- results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
356
-
357
- assert_equal 3, results.count
358
- assert_equal 'CAPTURE THIS CODE ABC', results.first
359
- end
360
-
361
- def test_should_capture
362
- table = YAML.load <<-YML.strip_heredoc
363
- --- !ruby/object:NdrImport::NonTabular::Table
364
- start_line_pattern: !ruby/regexp /^-{6}$/
365
- klass: SomeTestKlass
366
- columns:
367
- - standard_mapping: nhsnumber
368
- non_tabular_cell:
369
- lines: 0
370
- capture: !ruby/regexp /^(\\d*)$/i
371
- - column: address
372
- non_tabular_cell:
373
- lines: !ruby/range
374
- begin: 1
375
- end: 5
376
- excl: false
377
- capture: !ruby/regexp /^.{50}(.*)$/i
378
- join: ", "
379
- - standard_mapping: postcode
380
- non_tabular_cell:
381
- lines: 6
382
- capture: !ruby/regexp /^.{50}(.*)$/i
383
- - column: capture_inclusive
384
- non_tabular_cell:
385
- lines: !ruby/object:RegexpRange
386
- begin: !ruby/regexp /^CAPTURE INCLUSIVE$/
387
- end: !ruby/regexp /^Capture me.$/i
388
- excl: false
389
- capture: !ruby/regexp /^(.*)$/i
390
- join: "\\n"
391
- - column: capture_exclusive
392
- non_tabular_cell:
393
- lines: !ruby/object:RegexpRange
394
- begin: !ruby/regexp /^CAPTURE EXCLUSIVE$/
395
- end: !ruby/regexp /^Do NOT capture me.$/i
396
- excl: true
397
- capture: !ruby/regexp /^(.*)$/i
398
- join: "\\n"
399
- - column: capture_to_end
400
- non_tabular_cell:
401
- lines: !ruby/object:RegexpRange
402
- begin: !ruby/regexp /^CAPTURE TO END$/
403
- end: -1
404
- excl: false
405
- capture: !ruby/regexp /^(.*)$/i
406
- join: "\\n"
407
- YML
408
- capture_example = <<-STR
409
- This is never captured
410
- ------
411
- 1111111111
412
- <----------------- 50 characters ---------------->Unit C, Magog Court
413
- Shelford Bottom
414
- Hinton Way
415
- Cambridge
416
-
417
- CB22 3AD
418
-
419
- CAPTURE INCLUSIVE
420
- Lorem ipsum dolor sit amet,
421
- consectetur adipisicing elit,
422
- Capture me.
423
-
424
- CAPTURE EXCLUSIVE
425
- Ut enim ad minim veniam, quis nostrud exercitation.
426
- Do NOT capture me.
427
-
428
- CAPTURE TO END
429
- Lorem ipsum dolor sit amet, consectetur adipisicing elit.
430
- Ut enim ad minim veniam, quis nostrud exercitation ullamco.
431
- Duis aute irure dolor in reprehenderit in voluptate velit.
432
- Excepteur sint occaecat cupidatat non proident, sunt in culpa.
433
- ------
434
- This is never captured
435
- STR
436
- enum = table.transform(capture_example.split(/\n/).map)
437
- assert_instance_of Enumerator, enum
438
-
439
- output = []
440
- enum.each do |klass, fields, index|
441
- output << [klass, fields, index]
442
- end
443
-
444
- expected_output = [
445
- [
446
- 'SomeTestKlass', {
447
- 'nhsnumber' => '1111111111',
448
- 'postcode' => 'CB223AD',
449
- :rawtext => {
450
- 'nhsnumber' => '1111111111',
451
- 'address' => 'Unit C, Magog Court, Shelford Bottom, Hinton Way, Cambridge',
452
- 'postcode' => 'CB22 3AD',
453
- 'capture_inclusive' => "CAPTURE INCLUSIVE\nLorem ipsum dolor sit amet,\n" \
454
- "consectetur adipisicing elit,\nCapture me.",
455
- 'capture_exclusive' => "CAPTURE EXCLUSIVE\n" \
456
- 'Ut enim ad minim veniam, quis nostrud exercitation.',
457
- 'capture_to_end' => "CAPTURE TO END\n" \
458
- "Lorem ipsum dolor sit amet, consectetur adipisicing elit.\n" \
459
- "Ut enim ad minim veniam, quis nostrud exercitation ullamco.\n" \
460
- "Duis aute irure dolor in reprehenderit in voluptate velit.\n" \
461
- 'Excepteur sint occaecat cupidatat non proident, sunt in culpa.'
462
- }
463
- },
464
- 0
465
- ]
466
- ]
467
- assert_equal expected_output.sort, output.sort
468
- assert_equal 25, table.non_tabular_lines.last.absolute_line_number
469
- end
470
-
471
- def test_handles_non_utf8_characters
472
- mixed_encoding_example = <<-STR.each_line
473
- 111
474
- Lorem ipsum dolor sit amet.
475
- ------
476
- 111
477
- Lorem ipsum dolor\xBE sit amet.
478
- ------
479
- 111
480
- Lorem ipsum dolor sit amet.
481
- ------
482
- STR
483
-
484
- table = YAML.load <<-YML.strip_heredoc
485
- --- !ruby/object:NdrImport::NonTabular::Table
486
- start_line_pattern: !ruby/regexp /^111$/
487
- end_in_a_record: true
488
- klass: SomeTestKlass
489
- columns:
490
- - column: one
491
- non_tabular_cell:
492
- lines: !ruby/range
493
- begin: 0
494
- end: -1
495
- excl: true
496
- capture: !ruby/regexp /^(.*)$/i
497
- YML
498
-
499
- enum = table.transform(mixed_encoding_example)
500
- assert_instance_of Enumerator, enum
501
- results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
502
-
503
- assert_equal 3, results.count, 'records were lost'
504
-
505
- assert_equal [27, 28, 27], results.map { |row| row.chars.to_a.length }
506
- assert_equal [27, 29, 27], results.map { |row| row.bytes.to_a.length }
507
-
508
- results.each do |row|
509
- assert row.first.valid_encoding?
510
- assert_equal Encoding.find('UTF-8'), row.first.encoding
511
- end
512
- end
513
-
514
- def test_should_not_allow_junk_bytes
515
- junk = <<-STR.each_line
516
- 111
517
- Lorem ipsum dolor sit amet.
518
- ------
519
- 111
520
- Lorem ipsum dolor\x8D sit amet.
521
- ------
522
- 111
523
- Lorem ipsum dolor sit amet.
524
- ------
525
- STR
526
-
527
- table = YAML.load <<-YML.strip_heredoc
528
- --- !ruby/object:NdrImport::NonTabular::Table
529
- start_line_pattern: !ruby/regexp /^111$/
530
- end_in_a_record: true
531
- klass: SomeTestKlass
532
- columns:
533
- - column: one
534
- non_tabular_cell:
535
- lines: !ruby/range
536
- begin: 0
537
- end: -1
538
- excl: true
539
- capture: !ruby/regexp /^(.*)$/i
540
- YML
541
-
542
- assert_raises(UTF8Encoding::UTF8CoercionError) do
543
- table.transform(junk).to_a
544
- end
545
- end
546
-
547
- def test_should_strip_captured_rawtext
548
- unwanted_white_space = <<-STR.each_line
549
- 111
550
- Trailing whitespace end_of_line
551
- ------
552
- 111
553
- Leading whitespaceend_of_line
554
- ------
555
- 111
556
- Leading and trailing whitespace end_of_line
557
- ------
558
- 111
559
- Should not match this
560
- ------
561
- STR
562
-
563
- table = YAML.load <<-YML.strip_heredoc
564
- --- !ruby/object:NdrImport::NonTabular::Table
565
- start_line_pattern: !ruby/regexp /^111$/
566
- end_in_a_record: true
567
- klass: SomeTestKlass
568
- columns:
569
- - column: one
570
- non_tabular_cell:
571
- lines: 0
572
- capture: !ruby/regexp /^(.*)end_of_line$/i
573
- trim_rawtext: left
574
- YML
575
-
576
- enum = table.transform(unwanted_white_space)
577
- assert_instance_of Enumerator, enum
578
-
579
- output = []
580
- enum.each do |klass, fields, index|
581
- output << [klass, fields, index]
582
- end
583
-
584
- expected_rawtext_ouput = [{ 'one' => 'Trailing whitespace' },
585
- { 'one' => 'Leading whitespace' },
586
- { 'one' => 'Leading and trailing whitespace' },
587
- { 'one' => '' }]
588
- assert_equal expected_rawtext_ouput, (output.map { |row| row[1][:rawtext] })
589
- end
590
- end