ndr_import 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +14 -0
- data/.rubocop.yml +27 -0
- data/.ruby-version +1 -0
- data/.travis.yml +22 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/Guardfile +16 -0
- data/LICENSE.txt +21 -0
- data/README.md +69 -0
- data/Rakefile +13 -0
- data/code_safety.yml +374 -0
- data/gemfiles/Gemfile.rails32 +5 -0
- data/gemfiles/Gemfile.rails32.lock +142 -0
- data/gemfiles/Gemfile.rails41 +5 -0
- data/gemfiles/Gemfile.rails41.lock +145 -0
- data/gemfiles/Gemfile.rails42 +5 -0
- data/gemfiles/Gemfile.rails42.lock +145 -0
- data/lib/ndr_import.rb +13 -0
- data/lib/ndr_import/csv_library.rb +40 -0
- data/lib/ndr_import/file/all.rb +8 -0
- data/lib/ndr_import/file/base.rb +76 -0
- data/lib/ndr_import/file/delimited.rb +86 -0
- data/lib/ndr_import/file/excel.rb +131 -0
- data/lib/ndr_import/file/pdf.rb +38 -0
- data/lib/ndr_import/file/registry.rb +50 -0
- data/lib/ndr_import/file/text.rb +52 -0
- data/lib/ndr_import/file/word.rb +30 -0
- data/lib/ndr_import/file/zip.rb +67 -0
- data/lib/ndr_import/helpers/file/delimited.rb +105 -0
- data/lib/ndr_import/helpers/file/excel.rb +181 -0
- data/lib/ndr_import/helpers/file/pdf.rb +29 -0
- data/lib/ndr_import/helpers/file/word.rb +27 -0
- data/lib/ndr_import/helpers/file/xml.rb +45 -0
- data/lib/ndr_import/helpers/file/zip.rb +44 -0
- data/lib/ndr_import/mapper.rb +220 -0
- data/lib/ndr_import/mapping_error.rb +5 -0
- data/lib/ndr_import/non_tabular/column_mapping.rb +73 -0
- data/lib/ndr_import/non_tabular/line.rb +46 -0
- data/lib/ndr_import/non_tabular/mapping.rb +35 -0
- data/lib/ndr_import/non_tabular/record.rb +99 -0
- data/lib/ndr_import/non_tabular/table.rb +193 -0
- data/lib/ndr_import/non_tabular_file_helper.rb +160 -0
- data/lib/ndr_import/standard_mappings.rb +23 -0
- data/lib/ndr_import/table.rb +179 -0
- data/lib/ndr_import/version.rb +4 -0
- data/ndr_import.gemspec +44 -0
- data/test/file/base_test.rb +54 -0
- data/test/file/delimited_test.rb +143 -0
- data/test/file/excel_test.rb +85 -0
- data/test/file/pdf_test.rb +35 -0
- data/test/file/registry_test.rb +60 -0
- data/test/file/text_test.rb +92 -0
- data/test/file/word_test.rb +35 -0
- data/test/file/zip_test.rb +47 -0
- data/test/helpers/file/delimited_test.rb +113 -0
- data/test/helpers/file/excel_test.rb +97 -0
- data/test/helpers/file/pdf_test.rb +26 -0
- data/test/helpers/file/word_test.rb +26 -0
- data/test/helpers/file/xml_test.rb +131 -0
- data/test/helpers/file/zip_test.rb +75 -0
- data/test/mapper_test.rb +551 -0
- data/test/non_tabular/mapping_test.rb +36 -0
- data/test/non_tabular/table_test.rb +510 -0
- data/test/non_tabular_file_helper_test.rb +501 -0
- data/test/readme_test.rb +53 -0
- data/test/resources/bomd.csv +3 -0
- data/test/resources/broken.csv +3 -0
- data/test/resources/filesystem_paths.yml +26 -0
- data/test/resources/flat_file.pdf +0 -0
- data/test/resources/flat_file.txt +27 -0
- data/test/resources/flat_file.yml +20 -0
- data/test/resources/hello_utf16be.txt +0 -0
- data/test/resources/hello_utf16le.txt +0 -0
- data/test/resources/hello_utf8.txt +2 -0
- data/test/resources/hello_windows.txt +2 -0
- data/test/resources/hello_world.doc +0 -0
- data/test/resources/hello_world.pdf +0 -0
- data/test/resources/hello_world.txt +2 -0
- data/test/resources/high_ascii_delimited.txt +2 -0
- data/test/resources/malformed.xml +6 -0
- data/test/resources/normal.csv +3 -0
- data/test/resources/normal.csv.zip +0 -0
- data/test/resources/normal_pipe.csv +3 -0
- data/test/resources/normal_thorn.csv +3 -0
- data/test/resources/not_a_pdf.pdf +0 -0
- data/test/resources/not_a_word_file.doc +0 -0
- data/test/resources/sample_xls.xls +0 -0
- data/test/resources/sample_xlsx.xlsx +0 -0
- data/test/resources/standard_mappings.yml +39 -0
- data/test/resources/txt_file_xls_extension.xls +1 -0
- data/test/resources/txt_file_xlsx_extension.xlsx +1 -0
- data/test/resources/utf-16be_xml.xml +0 -0
- data/test/resources/utf-16be_xml_with_declaration.xml +0 -0
- data/test/resources/utf-16le_xml.xml +0 -0
- data/test/resources/utf-8_xml.xml +9 -0
- data/test/resources/windows-1252_xml.xml +9 -0
- data/test/resources/windows.csv +5 -0
- data/test/resources/xlsx_file_xls_extension.xls +0 -0
- data/test/standard_mappings_test.rb +22 -0
- data/test/table_test.rb +288 -0
- data/test/test_helper.rb +13 -0
- metadata +443 -0
@@ -0,0 +1,501 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'test_helper'
|
4
|
+
|
5
|
+
# Test non tabular mapper class that expose private method(s) for testing
|
6
|
+
class NonTabularTestMapper
|
7
|
+
# include NdrImport::Mapper
|
8
|
+
include NdrImport::NonTabularFileHelper
|
9
|
+
|
10
|
+
attr_accessor :mappings
|
11
|
+
|
12
|
+
public :read_non_tabular_string
|
13
|
+
end
|
14
|
+
|
15
|
+
# This tests the NonTabularFileHelper class
|
16
|
+
class NonTabularFileHelperTest < ActiveSupport::TestCase
|
17
|
+
simple_divider_example = <<-STR
|
18
|
+
111
|
19
|
+
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
|
20
|
+
------
|
21
|
+
222
|
22
|
+
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo.
|
23
|
+
------
|
24
|
+
333
|
25
|
+
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla.
|
26
|
+
------
|
27
|
+
444
|
28
|
+
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim.
|
29
|
+
STR
|
30
|
+
|
31
|
+
test 'should raise error with no non_tabular_row' do
|
32
|
+
mapper = NonTabularTestMapper.new
|
33
|
+
mapper.mappings = YAML.load <<-YML
|
34
|
+
columns:
|
35
|
+
- column: one
|
36
|
+
YML
|
37
|
+
assert_raise NdrImport::MappingError do
|
38
|
+
mapper.read_non_tabular_string(simple_divider_example)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
test 'should raise error with no non_tabular_row start_line_pattern' do
|
43
|
+
mapper = NonTabularTestMapper.new
|
44
|
+
mapper.mappings = YAML.load <<-YML
|
45
|
+
non_tabular_row:
|
46
|
+
columns:
|
47
|
+
- column: one
|
48
|
+
YML
|
49
|
+
assert_raise NdrImport::MappingError do
|
50
|
+
mapper.read_non_tabular_string(simple_divider_example)
|
51
|
+
end
|
52
|
+
|
53
|
+
mapper = NonTabularTestMapper.new
|
54
|
+
mapper.mappings = YAML.load <<-YML
|
55
|
+
non_tabular_row:
|
56
|
+
start_line_pattern:
|
57
|
+
columns:
|
58
|
+
- column: one
|
59
|
+
YML
|
60
|
+
assert_raise NdrImport::MappingError do
|
61
|
+
mapper.read_non_tabular_string(simple_divider_example)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
test 'should raise error with no column non_tabular_cell' do
|
66
|
+
mapper = NonTabularTestMapper.new
|
67
|
+
mapper.mappings = YAML.load <<-YML
|
68
|
+
non_tabular_row:
|
69
|
+
start_line_pattern: !ruby/regexp /^-{6}$/
|
70
|
+
columns:
|
71
|
+
- column: one
|
72
|
+
YML
|
73
|
+
assert_raise NdrImport::MappingError do
|
74
|
+
mapper.read_non_tabular_string(simple_divider_example)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
test 'should raise error with no column non_tabular_cell lines' do
|
79
|
+
mapper = NonTabularTestMapper.new
|
80
|
+
mapper.mappings = YAML.load <<-YML
|
81
|
+
non_tabular_row:
|
82
|
+
start_line_pattern: !ruby/regexp /^-{6}$/
|
83
|
+
columns:
|
84
|
+
- column: one
|
85
|
+
non_tabular_cell:
|
86
|
+
YML
|
87
|
+
assert_raise NdrImport::MappingError do
|
88
|
+
mapper.read_non_tabular_string(simple_divider_example)
|
89
|
+
end
|
90
|
+
|
91
|
+
mapper = NonTabularTestMapper.new
|
92
|
+
mapper.mappings = YAML.load <<-YML
|
93
|
+
non_tabular_row:
|
94
|
+
start_line_pattern: !ruby/regexp /^-{6}$/
|
95
|
+
columns:
|
96
|
+
- column: one
|
97
|
+
non_tabular_cell:
|
98
|
+
lines:
|
99
|
+
YML
|
100
|
+
assert_raise NdrImport::MappingError do
|
101
|
+
mapper.read_non_tabular_string(simple_divider_example)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
test 'should raise error with no column non_tabular_cell capture' do
|
106
|
+
mapper = NonTabularTestMapper.new
|
107
|
+
mapper.mappings = YAML.load <<-YML
|
108
|
+
non_tabular_row:
|
109
|
+
start_line_pattern: !ruby/regexp /^-{6}$/
|
110
|
+
columns:
|
111
|
+
- column: one
|
112
|
+
non_tabular_cell:
|
113
|
+
lines: !ruby/range
|
114
|
+
begin: 0
|
115
|
+
end: -1
|
116
|
+
excl: false
|
117
|
+
YML
|
118
|
+
assert_raise NdrImport::MappingError do
|
119
|
+
mapper.read_non_tabular_string(simple_divider_example)
|
120
|
+
end
|
121
|
+
|
122
|
+
mapper = NonTabularTestMapper.new
|
123
|
+
mapper.mappings = YAML.load <<-YML
|
124
|
+
non_tabular_row:
|
125
|
+
start_line_pattern: !ruby/regexp /^-{6}$/
|
126
|
+
columns:
|
127
|
+
- column: one
|
128
|
+
non_tabular_cell:
|
129
|
+
lines: !ruby/range
|
130
|
+
begin: 0
|
131
|
+
end: -1
|
132
|
+
excl: false
|
133
|
+
capture:
|
134
|
+
YML
|
135
|
+
assert_raise NdrImport::MappingError do
|
136
|
+
mapper.read_non_tabular_string(simple_divider_example)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
test 'should only return two results with no start_in_a_record or end_in_a_record' do
|
141
|
+
mapper = NonTabularTestMapper.new
|
142
|
+
mapper.mappings = YAML.load <<-YML
|
143
|
+
non_tabular_row:
|
144
|
+
start_line_pattern: !ruby/regexp /^-{6}$/
|
145
|
+
columns:
|
146
|
+
- column: one
|
147
|
+
non_tabular_cell:
|
148
|
+
lines: !ruby/range
|
149
|
+
begin: 0
|
150
|
+
end: -1
|
151
|
+
excl: false
|
152
|
+
capture: !ruby/regexp /^(.*)$/i
|
153
|
+
YML
|
154
|
+
results = mapper.read_non_tabular_string(simple_divider_example)
|
155
|
+
assert_equal 2, results.count
|
156
|
+
assert results.first[0].start_with?('222')
|
157
|
+
assert results.last[0].start_with?('333')
|
158
|
+
end
|
159
|
+
|
160
|
+
test 'should return three results with start_in_a_record' do
|
161
|
+
mapper = NonTabularTestMapper.new
|
162
|
+
mapper.mappings = YAML.load <<-YML
|
163
|
+
non_tabular_row:
|
164
|
+
start_line_pattern: !ruby/regexp /^-{6}$/
|
165
|
+
start_in_a_record: true
|
166
|
+
columns:
|
167
|
+
- column: one
|
168
|
+
non_tabular_cell:
|
169
|
+
lines: !ruby/range
|
170
|
+
begin: 0
|
171
|
+
end: -1
|
172
|
+
excl: false
|
173
|
+
capture: !ruby/regexp /^(.*)$/i
|
174
|
+
YML
|
175
|
+
results = mapper.read_non_tabular_string(simple_divider_example)
|
176
|
+
assert_equal 3, results.count
|
177
|
+
assert results.first[0].start_with?('111')
|
178
|
+
assert results.last[0].start_with?('333')
|
179
|
+
end
|
180
|
+
|
181
|
+
test 'should return three results with end_in_a_record' do
|
182
|
+
mapper = NonTabularTestMapper.new
|
183
|
+
mapper.mappings = YAML.load <<-YML
|
184
|
+
non_tabular_row:
|
185
|
+
start_line_pattern: !ruby/regexp /^-{6}$/
|
186
|
+
end_in_a_record: true
|
187
|
+
columns:
|
188
|
+
- column: one
|
189
|
+
non_tabular_cell:
|
190
|
+
lines: !ruby/range
|
191
|
+
begin: 0
|
192
|
+
end: -1
|
193
|
+
excl: false
|
194
|
+
capture: !ruby/regexp /^(.*)$/i
|
195
|
+
YML
|
196
|
+
results = mapper.read_non_tabular_string(simple_divider_example)
|
197
|
+
assert_equal 3, results.count
|
198
|
+
assert results.first[0].start_with?('222')
|
199
|
+
assert results.last[0].start_with?('444')
|
200
|
+
end
|
201
|
+
|
202
|
+
test 'should return four results with start_in_a_record and end_in_a_record' do
|
203
|
+
mapper = NonTabularTestMapper.new
|
204
|
+
mapper.mappings = YAML.load <<-YML
|
205
|
+
non_tabular_row:
|
206
|
+
start_line_pattern: !ruby/regexp /^-{6}$/
|
207
|
+
start_in_a_record: true
|
208
|
+
end_in_a_record: true
|
209
|
+
columns:
|
210
|
+
- column: one
|
211
|
+
non_tabular_cell:
|
212
|
+
lines: !ruby/range
|
213
|
+
begin: 0
|
214
|
+
end: -1
|
215
|
+
excl: false
|
216
|
+
capture: !ruby/regexp /^(.*)$/i
|
217
|
+
YML
|
218
|
+
results = mapper.read_non_tabular_string(simple_divider_example)
|
219
|
+
assert_equal 4, results.count
|
220
|
+
assert results.first[0].start_with?('111')
|
221
|
+
assert results.last[0].start_with?('444')
|
222
|
+
end
|
223
|
+
|
224
|
+
no_divider_example = <<-STR
|
225
|
+
111
|
226
|
+
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
|
227
|
+
STR
|
228
|
+
|
229
|
+
test 'should return one results with start_in_a_record and end_in_a_record' do
|
230
|
+
mapper = NonTabularTestMapper.new
|
231
|
+
mapper.mappings = YAML.load <<-YML
|
232
|
+
non_tabular_row:
|
233
|
+
start_line_pattern: !ruby/regexp /^-{6}$/
|
234
|
+
start_in_a_record: true
|
235
|
+
end_in_a_record: true
|
236
|
+
columns:
|
237
|
+
- column: one
|
238
|
+
non_tabular_cell:
|
239
|
+
lines: !ruby/range
|
240
|
+
begin: 0
|
241
|
+
end: -1
|
242
|
+
excl: false
|
243
|
+
capture: !ruby/regexp /^(.*)$/i
|
244
|
+
YML
|
245
|
+
results = mapper.read_non_tabular_string(no_divider_example)
|
246
|
+
assert_equal 1, results.count
|
247
|
+
assert results.first[0].start_with?('111')
|
248
|
+
end
|
249
|
+
|
250
|
+
simple_start_and_end_divider_example = <<-STR
|
251
|
+
----- START -----
|
252
|
+
111
|
253
|
+
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
|
254
|
+
------ END ------
|
255
|
+
This is never captured
|
256
|
+
----- START -----
|
257
|
+
222
|
258
|
+
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo.
|
259
|
+
------ END ------
|
260
|
+
This is never captured
|
261
|
+
----- START -----
|
262
|
+
333
|
263
|
+
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla.
|
264
|
+
------ END ------
|
265
|
+
This is never captured
|
266
|
+
----- START -----
|
267
|
+
444
|
268
|
+
This is captured
|
269
|
+
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim.
|
270
|
+
------ END ------
|
271
|
+
STR
|
272
|
+
|
273
|
+
test 'should return four results with start and end dividers' do
|
274
|
+
mapper = NonTabularTestMapper.new
|
275
|
+
mapper.mappings = YAML.load <<-YML
|
276
|
+
non_tabular_row:
|
277
|
+
start_line_pattern: !ruby/regexp /^----- START -----$/
|
278
|
+
end_line_pattern: !ruby/regexp /^------ END ------$/
|
279
|
+
columns:
|
280
|
+
- column: one
|
281
|
+
non_tabular_cell:
|
282
|
+
lines: !ruby/range
|
283
|
+
begin: 0
|
284
|
+
end: -1
|
285
|
+
excl: false
|
286
|
+
capture: !ruby/regexp /^(.*)$/i
|
287
|
+
YML
|
288
|
+
results = mapper.read_non_tabular_string(simple_start_and_end_divider_example)
|
289
|
+
assert_equal 4, results.count
|
290
|
+
assert results.first[0].start_with?('111')
|
291
|
+
assert results.last[0].start_with?('444')
|
292
|
+
|
293
|
+
assert results.flatten.any? { |result| result =~ /This is captured/ }
|
294
|
+
refute results.flatten.any? { |result| result =~ /This is never captured/ }
|
295
|
+
end
|
296
|
+
|
297
|
+
test 'documentation example' do
|
298
|
+
mapper = NonTabularTestMapper.new
|
299
|
+
mapper.mappings = YAML.load <<-YML
|
300
|
+
non_tabular_row:
|
301
|
+
start_line_pattern: !ruby/regexp /^D\\|/
|
302
|
+
capture_start_line: true
|
303
|
+
end_in_a_record: true
|
304
|
+
columns:
|
305
|
+
- standard_mapping: nhsnumber
|
306
|
+
non_tabular_cell:
|
307
|
+
lines: 0
|
308
|
+
capture:
|
309
|
+
- !ruby/regexp /^D\\|([^|]*).*/
|
310
|
+
- column: fulltextreport
|
311
|
+
non_tabular_cell:
|
312
|
+
lines: !ruby/range
|
313
|
+
begin: 1
|
314
|
+
end: -1
|
315
|
+
excl: false
|
316
|
+
capture: !ruby/regexp /^(?:R|\\d+)\\|(.*)$/i
|
317
|
+
join: "\\n"
|
318
|
+
YML
|
319
|
+
documentation_example = [
|
320
|
+
'D|1111111111|...',
|
321
|
+
'R|This is a',
|
322
|
+
'1|multiline report'
|
323
|
+
].join("\n")
|
324
|
+
|
325
|
+
results = mapper.read_non_tabular_string(documentation_example)
|
326
|
+
assert_equal 1, results.count
|
327
|
+
result = results.first
|
328
|
+
assert_equal '1111111111', result[0]
|
329
|
+
assert_equal "This is a\nmultiline report", result[1]
|
330
|
+
end
|
331
|
+
|
332
|
+
test 'should capture' do
|
333
|
+
mapper = NonTabularTestMapper.new
|
334
|
+
mapper.mappings = YAML.load <<-YML
|
335
|
+
non_tabular_row:
|
336
|
+
start_line_pattern: !ruby/regexp /^-{6}$/
|
337
|
+
columns:
|
338
|
+
- standard_mapping: nhsnumber
|
339
|
+
non_tabular_cell:
|
340
|
+
lines: 0
|
341
|
+
capture: !ruby/regexp /^(\\d*)$/i
|
342
|
+
- column: address
|
343
|
+
non_tabular_cell:
|
344
|
+
lines: !ruby/range
|
345
|
+
begin: 1
|
346
|
+
end: 5
|
347
|
+
excl: false
|
348
|
+
capture: !ruby/regexp /^.{50}(.*)$/i
|
349
|
+
join: ", "
|
350
|
+
- standard_mapping: postcode
|
351
|
+
non_tabular_cell:
|
352
|
+
lines: 6
|
353
|
+
capture: !ruby/regexp /^.{50}(.*)$/i
|
354
|
+
- column: capture_inclusive
|
355
|
+
non_tabular_cell:
|
356
|
+
lines: !ruby/object:RegexpRange
|
357
|
+
begin: !ruby/regexp /^CAPTURE INCLUSIVE$/
|
358
|
+
end: !ruby/regexp /^Capture me.$/i
|
359
|
+
excl: false
|
360
|
+
capture: !ruby/regexp /^(.*)$/i
|
361
|
+
join: "\\n"
|
362
|
+
- column: capture_exclusive
|
363
|
+
non_tabular_cell:
|
364
|
+
lines: !ruby/object:RegexpRange
|
365
|
+
begin: !ruby/regexp /^CAPTURE EXCLUSIVE$/
|
366
|
+
end: !ruby/regexp /^Do NOT capture me.$/i
|
367
|
+
excl: true
|
368
|
+
capture: !ruby/regexp /^(.*)$/i
|
369
|
+
join: "\\n"
|
370
|
+
- column: capture_to_end
|
371
|
+
non_tabular_cell:
|
372
|
+
lines: !ruby/object:RegexpRange
|
373
|
+
begin: !ruby/regexp /^CAPTURE TO END$/
|
374
|
+
end: -1
|
375
|
+
excl: false
|
376
|
+
capture: !ruby/regexp /^(.*)$/i
|
377
|
+
join: "\\n"
|
378
|
+
YML
|
379
|
+
capture_example = <<-STR
|
380
|
+
This is never captured
|
381
|
+
------
|
382
|
+
1111111111
|
383
|
+
<----------------- 50 characters ---------------->Unit C, Magog Court
|
384
|
+
Shelford Bottom
|
385
|
+
Hinton Way
|
386
|
+
Cambridge
|
387
|
+
|
388
|
+
CB22 3AD
|
389
|
+
|
390
|
+
CAPTURE INCLUSIVE
|
391
|
+
Lorem ipsum dolor sit amet,
|
392
|
+
consectetur adipisicing elit,
|
393
|
+
Capture me.
|
394
|
+
|
395
|
+
CAPTURE EXCLUSIVE
|
396
|
+
Ut enim ad minim veniam, quis nostrud exercitation.
|
397
|
+
Do NOT capture me.
|
398
|
+
|
399
|
+
CAPTURE TO END
|
400
|
+
Lorem ipsum dolor sit amet, consectetur adipisicing elit.
|
401
|
+
Ut enim ad minim veniam, quis nostrud exercitation ullamco.
|
402
|
+
Duis aute irure dolor in reprehenderit in voluptate velit.
|
403
|
+
Excepteur sint occaecat cupidatat non proident, sunt in culpa.
|
404
|
+
------
|
405
|
+
This is never captured
|
406
|
+
STR
|
407
|
+
results = mapper.read_non_tabular_string(capture_example)
|
408
|
+
assert_equal 1, results.count
|
409
|
+
result = results.first
|
410
|
+
assert_equal '1111111111', result[0]
|
411
|
+
assert_equal 'Unit C, Magog Court, Shelford Bottom, Hinton Way, Cambridge', result[1]
|
412
|
+
assert_equal 'CB22 3AD', result[2]
|
413
|
+
assert_equal "CAPTURE INCLUSIVE\nLorem ipsum dolor sit amet,\n" \
|
414
|
+
"consectetur adipisicing elit,\nCapture me.",
|
415
|
+
result[3]
|
416
|
+
assert_equal "CAPTURE EXCLUSIVE\nUt enim ad minim veniam, quis nostrud exercitation.",
|
417
|
+
result[4]
|
418
|
+
assert_equal "CAPTURE TO END\n" \
|
419
|
+
"Lorem ipsum dolor sit amet, consectetur adipisicing elit.\n" \
|
420
|
+
"Ut enim ad minim veniam, quis nostrud exercitation ullamco.\n" \
|
421
|
+
"Duis aute irure dolor in reprehenderit in voluptate velit.\n" \
|
422
|
+
'Excepteur sint occaecat cupidatat non proident, sunt in culpa.',
|
423
|
+
result[5]
|
424
|
+
|
425
|
+
assert_equal 25, mapper.non_tabular_lines.last.absolute_line_number
|
426
|
+
end
|
427
|
+
|
428
|
+
test 'handles non utf8 characters' do
|
429
|
+
mixed_encoding_example = <<-STR
|
430
|
+
111
|
431
|
+
Lorem ipsum dolor sit amet.
|
432
|
+
------
|
433
|
+
111
|
434
|
+
Lorem ipsum dolor\xBE sit amet.
|
435
|
+
------
|
436
|
+
111
|
437
|
+
Lorem ipsum dolor sit amet.
|
438
|
+
------
|
439
|
+
STR
|
440
|
+
|
441
|
+
mapper = NonTabularTestMapper.new
|
442
|
+
mapper.mappings = YAML.load <<-YML
|
443
|
+
non_tabular_row:
|
444
|
+
start_line_pattern: !ruby/regexp /^111$/
|
445
|
+
end_in_a_record: true
|
446
|
+
columns:
|
447
|
+
- column: one
|
448
|
+
non_tabular_cell:
|
449
|
+
lines: !ruby/range
|
450
|
+
begin: 0
|
451
|
+
end: -1
|
452
|
+
excl: true
|
453
|
+
capture: !ruby/regexp /^(.*)$/i
|
454
|
+
YML
|
455
|
+
|
456
|
+
results = mapper.read_non_tabular_string(mixed_encoding_example)
|
457
|
+
|
458
|
+
assert_equal 3, results.count, 'records were lost'
|
459
|
+
|
460
|
+
assert_equal [27, 28, 27], results.map { |row| row.first.chars.to_a.length }
|
461
|
+
assert_equal [27, 29, 27], results.map { |row| row.first.bytes.to_a.length }
|
462
|
+
|
463
|
+
results.each do |row|
|
464
|
+
assert row.first.valid_encoding?
|
465
|
+
assert_equal Encoding.find('UTF-8'), row.first.encoding
|
466
|
+
end
|
467
|
+
end
|
468
|
+
|
469
|
+
test 'should not allow junk bytes' do
|
470
|
+
junk = <<-STR
|
471
|
+
111
|
472
|
+
Lorem ipsum dolor sit amet.
|
473
|
+
------
|
474
|
+
111
|
475
|
+
Lorem ipsum dolor\x8D sit amet.
|
476
|
+
------
|
477
|
+
111
|
478
|
+
Lorem ipsum dolor sit amet.
|
479
|
+
------
|
480
|
+
STR
|
481
|
+
|
482
|
+
mapper = NonTabularTestMapper.new
|
483
|
+
mapper.mappings = YAML.load <<-YML
|
484
|
+
non_tabular_row:
|
485
|
+
start_line_pattern: !ruby/regexp /^111$/
|
486
|
+
end_in_a_record: true
|
487
|
+
columns:
|
488
|
+
- column: one
|
489
|
+
non_tabular_cell:
|
490
|
+
lines: !ruby/range
|
491
|
+
begin: 0
|
492
|
+
end: -1
|
493
|
+
excl: true
|
494
|
+
capture: !ruby/regexp /^(.*)$/i
|
495
|
+
YML
|
496
|
+
|
497
|
+
assert_raises(UTF8Encoding::UTF8CoercionError) do
|
498
|
+
mapper.read_non_tabular_string(junk)
|
499
|
+
end
|
500
|
+
end
|
501
|
+
end
|