ndr_import 8.5.0 → 8.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/Gemfile +0 -3
- data/README.md +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/code_safety.yml +27 -11
- data/exe/pdf_acro_form_to_yaml +23 -0
- data/exe/pdf_to_text +28 -0
- data/exe/word_to_text +26 -0
- data/gemfiles/Gemfile.rails52 +0 -3
- data/gemfiles/Gemfile.rails60 +5 -0
- data/lib/ndr_import/version.rb +1 -1
- data/ndr_import.gemspec +9 -7
- metadata +23 -164
- data/gemfiles/Gemfile.rails50 +0 -8
- data/gemfiles/Gemfile.rails51 +0 -9
- data/test/file/acro_form_test.rb +0 -39
- data/test/file/base_test.rb +0 -54
- data/test/file/delimited_test.rb +0 -233
- data/test/file/docx_test.rb +0 -53
- data/test/file/excel_test.rb +0 -124
- data/test/file/pdf_test.rb +0 -36
- data/test/file/registry_test.rb +0 -62
- data/test/file/seven_zip_test.rb +0 -59
- data/test/file/text_test.rb +0 -92
- data/test/file/word_test.rb +0 -35
- data/test/file/xml_test.rb +0 -21
- data/test/file/zip_test.rb +0 -47
- data/test/fixed_width/table_test.rb +0 -35
- data/test/helpers/file/delimited_test.rb +0 -105
- data/test/helpers/file/excel_test.rb +0 -82
- data/test/helpers/file/pdf_test.rb +0 -27
- data/test/helpers/file/word_test.rb +0 -26
- data/test/helpers/file/xml_test.rb +0 -131
- data/test/helpers/file/zip_test.rb +0 -75
- data/test/mapper_test.rb +0 -676
- data/test/non_tabular/mapping_test.rb +0 -36
- data/test/non_tabular/table_test.rb +0 -590
- data/test/non_tabular_file_helper_test.rb +0 -501
- data/test/pdf_form/table_test.rb +0 -119
- data/test/readme_test.rb +0 -53
- data/test/resources/acro_form.pdf +0 -0
- data/test/resources/blank_tab_test.xlsx +0 -0
- data/test/resources/bomd.csv +0 -3
- data/test/resources/broken.csv +0 -3
- data/test/resources/filesystem_paths.yml +0 -26
- data/test/resources/flat_file.pdf +0 -0
- data/test/resources/flat_file.txt +0 -27
- data/test/resources/flat_file.yml +0 -20
- data/test/resources/hello_utf16be.txt +0 -0
- data/test/resources/hello_utf16le.txt +0 -0
- data/test/resources/hello_utf8.txt +0 -2
- data/test/resources/hello_windows.txt +0 -2
- data/test/resources/hello_world.doc +0 -0
- data/test/resources/hello_world.docx +0 -0
- data/test/resources/hello_world.pdf +0 -0
- data/test/resources/hello_world.txt +0 -2
- data/test/resources/high_ascii_delimited.txt +0 -2
- data/test/resources/high_ascii_delimited_example_two.txt +0 -3
- data/test/resources/malformed.csv +0 -3
- data/test/resources/malformed.xml +0 -6
- data/test/resources/malformed_pipe.csv +0 -3
- data/test/resources/normal.7z +0 -0
- data/test/resources/normal.csv +0 -3
- data/test/resources/normal.csv.zip +0 -0
- data/test/resources/normal_pipe.csv +0 -3
- data/test/resources/normal_thorn.csv +0 -3
- data/test/resources/not_a_pdf.pdf +0 -0
- data/test/resources/not_a_word_file.doc +0 -0
- data/test/resources/not_a_word_file.docx +0 -0
- data/test/resources/not_sign_delimited.txt +0 -3
- data/test/resources/password_protected_hello_world.docx +0 -0
- data/test/resources/password_protected_sample_xlsx.xlsx +0 -0
- data/test/resources/sample.xml +0 -34
- data/test/resources/sample_xls.xls +0 -0
- data/test/resources/sample_xlsx.xlsx +0 -0
- data/test/resources/sheet_streaming.xls +0 -0
- data/test/resources/sheet_streaming.xlsx +0 -0
- data/test/resources/standard_mappings.yml +0 -39
- data/test/resources/txt_file_xls_extension.xls +0 -1
- data/test/resources/txt_file_xlsx_extension.xlsx +0 -1
- data/test/resources/utf-16be_xml.xml +0 -0
- data/test/resources/utf-16be_xml_with_declaration.xml +0 -0
- data/test/resources/utf-16le_xml.xml +0 -0
- data/test/resources/utf-8_xml.xml +0 -9
- data/test/resources/windows-1252_xml.xml +0 -9
- data/test/resources/windows.csv +0 -5
- data/test/resources/xlsx_file_xls_extension.xls +0 -0
- data/test/standard_mappings_test.rb +0 -22
- data/test/table_test.rb +0 -545
- data/test/test_helper.rb +0 -35
- data/test/universal_importer_helper_test.rb +0 -86
- data/test/xml/table_test.rb +0 -90
@@ -1,501 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
|
3
|
-
require 'test_helper'
|
4
|
-
|
5
|
-
# Test non tabular mapper class that expose private method(s) for testing
|
6
|
-
class NonTabularTestMapper
|
7
|
-
# include NdrImport::Mapper
|
8
|
-
include NdrImport::NonTabularFileHelper
|
9
|
-
|
10
|
-
attr_accessor :mappings
|
11
|
-
|
12
|
-
public :read_non_tabular_string
|
13
|
-
end
|
14
|
-
|
15
|
-
# This tests the NonTabularFileHelper class
|
16
|
-
class NonTabularFileHelperTest < ActiveSupport::TestCase
|
17
|
-
simple_divider_example = <<-STR
|
18
|
-
111
|
19
|
-
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
|
20
|
-
------
|
21
|
-
222
|
22
|
-
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo.
|
23
|
-
------
|
24
|
-
333
|
25
|
-
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla.
|
26
|
-
------
|
27
|
-
444
|
28
|
-
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim.
|
29
|
-
STR
|
30
|
-
|
31
|
-
test 'should raise error with no non_tabular_row' do
|
32
|
-
mapper = NonTabularTestMapper.new
|
33
|
-
mapper.mappings = YAML.load <<-YML
|
34
|
-
columns:
|
35
|
-
- column: one
|
36
|
-
YML
|
37
|
-
assert_raise NdrImport::MappingError do
|
38
|
-
mapper.read_non_tabular_string(simple_divider_example)
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
test 'should raise error with no non_tabular_row start_line_pattern' do
|
43
|
-
mapper = NonTabularTestMapper.new
|
44
|
-
mapper.mappings = YAML.load <<-YML
|
45
|
-
non_tabular_row:
|
46
|
-
columns:
|
47
|
-
- column: one
|
48
|
-
YML
|
49
|
-
assert_raise NdrImport::MappingError do
|
50
|
-
mapper.read_non_tabular_string(simple_divider_example)
|
51
|
-
end
|
52
|
-
|
53
|
-
mapper = NonTabularTestMapper.new
|
54
|
-
mapper.mappings = YAML.load <<-YML
|
55
|
-
non_tabular_row:
|
56
|
-
start_line_pattern:
|
57
|
-
columns:
|
58
|
-
- column: one
|
59
|
-
YML
|
60
|
-
assert_raise NdrImport::MappingError do
|
61
|
-
mapper.read_non_tabular_string(simple_divider_example)
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
test 'should raise error with no column non_tabular_cell' do
|
66
|
-
mapper = NonTabularTestMapper.new
|
67
|
-
mapper.mappings = YAML.load <<-YML
|
68
|
-
non_tabular_row:
|
69
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
70
|
-
columns:
|
71
|
-
- column: one
|
72
|
-
YML
|
73
|
-
assert_raise NdrImport::MappingError do
|
74
|
-
mapper.read_non_tabular_string(simple_divider_example)
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
test 'should raise error with no column non_tabular_cell lines' do
|
79
|
-
mapper = NonTabularTestMapper.new
|
80
|
-
mapper.mappings = YAML.load <<-YML
|
81
|
-
non_tabular_row:
|
82
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
83
|
-
columns:
|
84
|
-
- column: one
|
85
|
-
non_tabular_cell:
|
86
|
-
YML
|
87
|
-
assert_raise NdrImport::MappingError do
|
88
|
-
mapper.read_non_tabular_string(simple_divider_example)
|
89
|
-
end
|
90
|
-
|
91
|
-
mapper = NonTabularTestMapper.new
|
92
|
-
mapper.mappings = YAML.load <<-YML
|
93
|
-
non_tabular_row:
|
94
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
95
|
-
columns:
|
96
|
-
- column: one
|
97
|
-
non_tabular_cell:
|
98
|
-
lines:
|
99
|
-
YML
|
100
|
-
assert_raise NdrImport::MappingError do
|
101
|
-
mapper.read_non_tabular_string(simple_divider_example)
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
test 'should raise error with no column non_tabular_cell capture' do
|
106
|
-
mapper = NonTabularTestMapper.new
|
107
|
-
mapper.mappings = YAML.load <<-YML
|
108
|
-
non_tabular_row:
|
109
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
110
|
-
columns:
|
111
|
-
- column: one
|
112
|
-
non_tabular_cell:
|
113
|
-
lines: !ruby/range
|
114
|
-
begin: 0
|
115
|
-
end: -1
|
116
|
-
excl: false
|
117
|
-
YML
|
118
|
-
assert_raise NdrImport::MappingError do
|
119
|
-
mapper.read_non_tabular_string(simple_divider_example)
|
120
|
-
end
|
121
|
-
|
122
|
-
mapper = NonTabularTestMapper.new
|
123
|
-
mapper.mappings = YAML.load <<-YML
|
124
|
-
non_tabular_row:
|
125
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
126
|
-
columns:
|
127
|
-
- column: one
|
128
|
-
non_tabular_cell:
|
129
|
-
lines: !ruby/range
|
130
|
-
begin: 0
|
131
|
-
end: -1
|
132
|
-
excl: false
|
133
|
-
capture:
|
134
|
-
YML
|
135
|
-
assert_raise NdrImport::MappingError do
|
136
|
-
mapper.read_non_tabular_string(simple_divider_example)
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
test 'should only return two results with no start_in_a_record or end_in_a_record' do
|
141
|
-
mapper = NonTabularTestMapper.new
|
142
|
-
mapper.mappings = YAML.load <<-YML
|
143
|
-
non_tabular_row:
|
144
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
145
|
-
columns:
|
146
|
-
- column: one
|
147
|
-
non_tabular_cell:
|
148
|
-
lines: !ruby/range
|
149
|
-
begin: 0
|
150
|
-
end: -1
|
151
|
-
excl: false
|
152
|
-
capture: !ruby/regexp /^(.*)$/i
|
153
|
-
YML
|
154
|
-
results = mapper.read_non_tabular_string(simple_divider_example)
|
155
|
-
assert_equal 2, results.count
|
156
|
-
assert results.first[0].start_with?('222')
|
157
|
-
assert results.last[0].start_with?('333')
|
158
|
-
end
|
159
|
-
|
160
|
-
test 'should return three results with start_in_a_record' do
|
161
|
-
mapper = NonTabularTestMapper.new
|
162
|
-
mapper.mappings = YAML.load <<-YML
|
163
|
-
non_tabular_row:
|
164
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
165
|
-
start_in_a_record: true
|
166
|
-
columns:
|
167
|
-
- column: one
|
168
|
-
non_tabular_cell:
|
169
|
-
lines: !ruby/range
|
170
|
-
begin: 0
|
171
|
-
end: -1
|
172
|
-
excl: false
|
173
|
-
capture: !ruby/regexp /^(.*)$/i
|
174
|
-
YML
|
175
|
-
results = mapper.read_non_tabular_string(simple_divider_example)
|
176
|
-
assert_equal 3, results.count
|
177
|
-
assert results.first[0].start_with?('111')
|
178
|
-
assert results.last[0].start_with?('333')
|
179
|
-
end
|
180
|
-
|
181
|
-
test 'should return three results with end_in_a_record' do
|
182
|
-
mapper = NonTabularTestMapper.new
|
183
|
-
mapper.mappings = YAML.load <<-YML
|
184
|
-
non_tabular_row:
|
185
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
186
|
-
end_in_a_record: true
|
187
|
-
columns:
|
188
|
-
- column: one
|
189
|
-
non_tabular_cell:
|
190
|
-
lines: !ruby/range
|
191
|
-
begin: 0
|
192
|
-
end: -1
|
193
|
-
excl: false
|
194
|
-
capture: !ruby/regexp /^(.*)$/i
|
195
|
-
YML
|
196
|
-
results = mapper.read_non_tabular_string(simple_divider_example)
|
197
|
-
assert_equal 3, results.count
|
198
|
-
assert results.first[0].start_with?('222')
|
199
|
-
assert results.last[0].start_with?('444')
|
200
|
-
end
|
201
|
-
|
202
|
-
test 'should return four results with start_in_a_record and end_in_a_record' do
|
203
|
-
mapper = NonTabularTestMapper.new
|
204
|
-
mapper.mappings = YAML.load <<-YML
|
205
|
-
non_tabular_row:
|
206
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
207
|
-
start_in_a_record: true
|
208
|
-
end_in_a_record: true
|
209
|
-
columns:
|
210
|
-
- column: one
|
211
|
-
non_tabular_cell:
|
212
|
-
lines: !ruby/range
|
213
|
-
begin: 0
|
214
|
-
end: -1
|
215
|
-
excl: false
|
216
|
-
capture: !ruby/regexp /^(.*)$/i
|
217
|
-
YML
|
218
|
-
results = mapper.read_non_tabular_string(simple_divider_example)
|
219
|
-
assert_equal 4, results.count
|
220
|
-
assert results.first[0].start_with?('111')
|
221
|
-
assert results.last[0].start_with?('444')
|
222
|
-
end
|
223
|
-
|
224
|
-
no_divider_example = <<-STR
|
225
|
-
111
|
226
|
-
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
|
227
|
-
STR
|
228
|
-
|
229
|
-
test 'should return one results with start_in_a_record and end_in_a_record' do
|
230
|
-
mapper = NonTabularTestMapper.new
|
231
|
-
mapper.mappings = YAML.load <<-YML
|
232
|
-
non_tabular_row:
|
233
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
234
|
-
start_in_a_record: true
|
235
|
-
end_in_a_record: true
|
236
|
-
columns:
|
237
|
-
- column: one
|
238
|
-
non_tabular_cell:
|
239
|
-
lines: !ruby/range
|
240
|
-
begin: 0
|
241
|
-
end: -1
|
242
|
-
excl: false
|
243
|
-
capture: !ruby/regexp /^(.*)$/i
|
244
|
-
YML
|
245
|
-
results = mapper.read_non_tabular_string(no_divider_example)
|
246
|
-
assert_equal 1, results.count
|
247
|
-
assert results.first[0].start_with?('111')
|
248
|
-
end
|
249
|
-
|
250
|
-
simple_start_and_end_divider_example = <<-STR
|
251
|
-
----- START -----
|
252
|
-
111
|
253
|
-
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
|
254
|
-
------ END ------
|
255
|
-
This is never captured
|
256
|
-
----- START -----
|
257
|
-
222
|
258
|
-
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo.
|
259
|
-
------ END ------
|
260
|
-
This is never captured
|
261
|
-
----- START -----
|
262
|
-
333
|
263
|
-
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla.
|
264
|
-
------ END ------
|
265
|
-
This is never captured
|
266
|
-
----- START -----
|
267
|
-
444
|
268
|
-
This is captured
|
269
|
-
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim.
|
270
|
-
------ END ------
|
271
|
-
STR
|
272
|
-
|
273
|
-
test 'should return four results with start and end dividers' do
|
274
|
-
mapper = NonTabularTestMapper.new
|
275
|
-
mapper.mappings = YAML.load <<-YML
|
276
|
-
non_tabular_row:
|
277
|
-
start_line_pattern: !ruby/regexp /^----- START -----$/
|
278
|
-
end_line_pattern: !ruby/regexp /^------ END ------$/
|
279
|
-
columns:
|
280
|
-
- column: one
|
281
|
-
non_tabular_cell:
|
282
|
-
lines: !ruby/range
|
283
|
-
begin: 0
|
284
|
-
end: -1
|
285
|
-
excl: false
|
286
|
-
capture: !ruby/regexp /^(.*)$/i
|
287
|
-
YML
|
288
|
-
results = mapper.read_non_tabular_string(simple_start_and_end_divider_example)
|
289
|
-
assert_equal 4, results.count
|
290
|
-
assert results.first[0].start_with?('111')
|
291
|
-
assert results.last[0].start_with?('444')
|
292
|
-
|
293
|
-
assert results.flatten.any? { |result| result =~ /This is captured/ }
|
294
|
-
refute results.flatten.any? { |result| result =~ /This is never captured/ }
|
295
|
-
end
|
296
|
-
|
297
|
-
test 'documentation example' do
|
298
|
-
mapper = NonTabularTestMapper.new
|
299
|
-
mapper.mappings = YAML.load <<-YML
|
300
|
-
non_tabular_row:
|
301
|
-
start_line_pattern: !ruby/regexp /^D\\|/
|
302
|
-
capture_start_line: true
|
303
|
-
end_in_a_record: true
|
304
|
-
columns:
|
305
|
-
- standard_mapping: nhsnumber
|
306
|
-
non_tabular_cell:
|
307
|
-
lines: 0
|
308
|
-
capture:
|
309
|
-
- !ruby/regexp /^D\\|([^|]*).*/
|
310
|
-
- column: fulltextreport
|
311
|
-
non_tabular_cell:
|
312
|
-
lines: !ruby/range
|
313
|
-
begin: 1
|
314
|
-
end: -1
|
315
|
-
excl: false
|
316
|
-
capture: !ruby/regexp /^(?:R|\\d+)\\|(.*)$/i
|
317
|
-
join: "\\n"
|
318
|
-
YML
|
319
|
-
documentation_example = [
|
320
|
-
'D|1111111111|...',
|
321
|
-
'R|This is a',
|
322
|
-
'1|multiline report'
|
323
|
-
].join("\n")
|
324
|
-
|
325
|
-
results = mapper.read_non_tabular_string(documentation_example)
|
326
|
-
assert_equal 1, results.count
|
327
|
-
result = results.first
|
328
|
-
assert_equal '1111111111', result[0]
|
329
|
-
assert_equal "This is a\nmultiline report", result[1]
|
330
|
-
end
|
331
|
-
|
332
|
-
test 'should capture' do
|
333
|
-
mapper = NonTabularTestMapper.new
|
334
|
-
mapper.mappings = YAML.load <<-YML
|
335
|
-
non_tabular_row:
|
336
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
337
|
-
columns:
|
338
|
-
- standard_mapping: nhsnumber
|
339
|
-
non_tabular_cell:
|
340
|
-
lines: 0
|
341
|
-
capture: !ruby/regexp /^(\\d*)$/i
|
342
|
-
- column: address
|
343
|
-
non_tabular_cell:
|
344
|
-
lines: !ruby/range
|
345
|
-
begin: 1
|
346
|
-
end: 5
|
347
|
-
excl: false
|
348
|
-
capture: !ruby/regexp /^.{50}(.*)$/i
|
349
|
-
join: ", "
|
350
|
-
- standard_mapping: postcode
|
351
|
-
non_tabular_cell:
|
352
|
-
lines: 6
|
353
|
-
capture: !ruby/regexp /^.{50}(.*)$/i
|
354
|
-
- column: capture_inclusive
|
355
|
-
non_tabular_cell:
|
356
|
-
lines: !ruby/object:RegexpRange
|
357
|
-
begin: !ruby/regexp /^CAPTURE INCLUSIVE$/
|
358
|
-
end: !ruby/regexp /^Capture me.$/i
|
359
|
-
excl: false
|
360
|
-
capture: !ruby/regexp /^(.*)$/i
|
361
|
-
join: "\\n"
|
362
|
-
- column: capture_exclusive
|
363
|
-
non_tabular_cell:
|
364
|
-
lines: !ruby/object:RegexpRange
|
365
|
-
begin: !ruby/regexp /^CAPTURE EXCLUSIVE$/
|
366
|
-
end: !ruby/regexp /^Do NOT capture me.$/i
|
367
|
-
excl: true
|
368
|
-
capture: !ruby/regexp /^(.*)$/i
|
369
|
-
join: "\\n"
|
370
|
-
- column: capture_to_end
|
371
|
-
non_tabular_cell:
|
372
|
-
lines: !ruby/object:RegexpRange
|
373
|
-
begin: !ruby/regexp /^CAPTURE TO END$/
|
374
|
-
end: -1
|
375
|
-
excl: false
|
376
|
-
capture: !ruby/regexp /^(.*)$/i
|
377
|
-
join: "\\n"
|
378
|
-
YML
|
379
|
-
capture_example = <<-STR
|
380
|
-
This is never captured
|
381
|
-
------
|
382
|
-
1111111111
|
383
|
-
<----------------- 50 characters ---------------->Unit C, Magog Court
|
384
|
-
Shelford Bottom
|
385
|
-
Hinton Way
|
386
|
-
Cambridge
|
387
|
-
|
388
|
-
CB22 3AD
|
389
|
-
|
390
|
-
CAPTURE INCLUSIVE
|
391
|
-
Lorem ipsum dolor sit amet,
|
392
|
-
consectetur adipisicing elit,
|
393
|
-
Capture me.
|
394
|
-
|
395
|
-
CAPTURE EXCLUSIVE
|
396
|
-
Ut enim ad minim veniam, quis nostrud exercitation.
|
397
|
-
Do NOT capture me.
|
398
|
-
|
399
|
-
CAPTURE TO END
|
400
|
-
Lorem ipsum dolor sit amet, consectetur adipisicing elit.
|
401
|
-
Ut enim ad minim veniam, quis nostrud exercitation ullamco.
|
402
|
-
Duis aute irure dolor in reprehenderit in voluptate velit.
|
403
|
-
Excepteur sint occaecat cupidatat non proident, sunt in culpa.
|
404
|
-
------
|
405
|
-
This is never captured
|
406
|
-
STR
|
407
|
-
results = mapper.read_non_tabular_string(capture_example)
|
408
|
-
assert_equal 1, results.count
|
409
|
-
result = results.first
|
410
|
-
assert_equal '1111111111', result[0]
|
411
|
-
assert_equal 'Unit C, Magog Court, Shelford Bottom, Hinton Way, Cambridge', result[1]
|
412
|
-
assert_equal 'CB22 3AD', result[2]
|
413
|
-
assert_equal "CAPTURE INCLUSIVE\nLorem ipsum dolor sit amet,\n" \
|
414
|
-
"consectetur adipisicing elit,\nCapture me.",
|
415
|
-
result[3]
|
416
|
-
assert_equal "CAPTURE EXCLUSIVE\nUt enim ad minim veniam, quis nostrud exercitation.",
|
417
|
-
result[4]
|
418
|
-
assert_equal "CAPTURE TO END\n" \
|
419
|
-
"Lorem ipsum dolor sit amet, consectetur adipisicing elit.\n" \
|
420
|
-
"Ut enim ad minim veniam, quis nostrud exercitation ullamco.\n" \
|
421
|
-
"Duis aute irure dolor in reprehenderit in voluptate velit.\n" \
|
422
|
-
'Excepteur sint occaecat cupidatat non proident, sunt in culpa.',
|
423
|
-
result[5]
|
424
|
-
|
425
|
-
assert_equal 25, mapper.non_tabular_lines.last.absolute_line_number
|
426
|
-
end
|
427
|
-
|
428
|
-
test 'handles non utf8 characters' do
|
429
|
-
mixed_encoding_example = <<-STR
|
430
|
-
111
|
431
|
-
Lorem ipsum dolor sit amet.
|
432
|
-
------
|
433
|
-
111
|
434
|
-
Lorem ipsum dolor\xBE sit amet.
|
435
|
-
------
|
436
|
-
111
|
437
|
-
Lorem ipsum dolor sit amet.
|
438
|
-
------
|
439
|
-
STR
|
440
|
-
|
441
|
-
mapper = NonTabularTestMapper.new
|
442
|
-
mapper.mappings = YAML.load <<-YML
|
443
|
-
non_tabular_row:
|
444
|
-
start_line_pattern: !ruby/regexp /^111$/
|
445
|
-
end_in_a_record: true
|
446
|
-
columns:
|
447
|
-
- column: one
|
448
|
-
non_tabular_cell:
|
449
|
-
lines: !ruby/range
|
450
|
-
begin: 0
|
451
|
-
end: -1
|
452
|
-
excl: true
|
453
|
-
capture: !ruby/regexp /^(.*)$/i
|
454
|
-
YML
|
455
|
-
|
456
|
-
results = mapper.read_non_tabular_string(mixed_encoding_example)
|
457
|
-
|
458
|
-
assert_equal 3, results.count, 'records were lost'
|
459
|
-
|
460
|
-
assert_equal [27, 28, 27], results.map { |row| row.first.chars.to_a.length }
|
461
|
-
assert_equal [27, 29, 27], results.map { |row| row.first.bytes.to_a.length }
|
462
|
-
|
463
|
-
results.each do |row|
|
464
|
-
assert row.first.valid_encoding?
|
465
|
-
assert_equal Encoding.find('UTF-8'), row.first.encoding
|
466
|
-
end
|
467
|
-
end
|
468
|
-
|
469
|
-
test 'should not allow junk bytes' do
|
470
|
-
junk = <<-STR
|
471
|
-
111
|
472
|
-
Lorem ipsum dolor sit amet.
|
473
|
-
------
|
474
|
-
111
|
475
|
-
Lorem ipsum dolor\x8D sit amet.
|
476
|
-
------
|
477
|
-
111
|
478
|
-
Lorem ipsum dolor sit amet.
|
479
|
-
------
|
480
|
-
STR
|
481
|
-
|
482
|
-
mapper = NonTabularTestMapper.new
|
483
|
-
mapper.mappings = YAML.load <<-YML
|
484
|
-
non_tabular_row:
|
485
|
-
start_line_pattern: !ruby/regexp /^111$/
|
486
|
-
end_in_a_record: true
|
487
|
-
columns:
|
488
|
-
- column: one
|
489
|
-
non_tabular_cell:
|
490
|
-
lines: !ruby/range
|
491
|
-
begin: 0
|
492
|
-
end: -1
|
493
|
-
excl: true
|
494
|
-
capture: !ruby/regexp /^(.*)$/i
|
495
|
-
YML
|
496
|
-
|
497
|
-
assert_raises(UTF8Encoding::UTF8CoercionError) do
|
498
|
-
mapper.read_non_tabular_string(junk)
|
499
|
-
end
|
500
|
-
end
|
501
|
-
end
|