ndr_import 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +14 -0
- data/.rubocop.yml +27 -0
- data/.ruby-version +1 -0
- data/.travis.yml +22 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/Guardfile +16 -0
- data/LICENSE.txt +21 -0
- data/README.md +69 -0
- data/Rakefile +13 -0
- data/code_safety.yml +374 -0
- data/gemfiles/Gemfile.rails32 +5 -0
- data/gemfiles/Gemfile.rails32.lock +142 -0
- data/gemfiles/Gemfile.rails41 +5 -0
- data/gemfiles/Gemfile.rails41.lock +145 -0
- data/gemfiles/Gemfile.rails42 +5 -0
- data/gemfiles/Gemfile.rails42.lock +145 -0
- data/lib/ndr_import.rb +13 -0
- data/lib/ndr_import/csv_library.rb +40 -0
- data/lib/ndr_import/file/all.rb +8 -0
- data/lib/ndr_import/file/base.rb +76 -0
- data/lib/ndr_import/file/delimited.rb +86 -0
- data/lib/ndr_import/file/excel.rb +131 -0
- data/lib/ndr_import/file/pdf.rb +38 -0
- data/lib/ndr_import/file/registry.rb +50 -0
- data/lib/ndr_import/file/text.rb +52 -0
- data/lib/ndr_import/file/word.rb +30 -0
- data/lib/ndr_import/file/zip.rb +67 -0
- data/lib/ndr_import/helpers/file/delimited.rb +105 -0
- data/lib/ndr_import/helpers/file/excel.rb +181 -0
- data/lib/ndr_import/helpers/file/pdf.rb +29 -0
- data/lib/ndr_import/helpers/file/word.rb +27 -0
- data/lib/ndr_import/helpers/file/xml.rb +45 -0
- data/lib/ndr_import/helpers/file/zip.rb +44 -0
- data/lib/ndr_import/mapper.rb +220 -0
- data/lib/ndr_import/mapping_error.rb +5 -0
- data/lib/ndr_import/non_tabular/column_mapping.rb +73 -0
- data/lib/ndr_import/non_tabular/line.rb +46 -0
- data/lib/ndr_import/non_tabular/mapping.rb +35 -0
- data/lib/ndr_import/non_tabular/record.rb +99 -0
- data/lib/ndr_import/non_tabular/table.rb +193 -0
- data/lib/ndr_import/non_tabular_file_helper.rb +160 -0
- data/lib/ndr_import/standard_mappings.rb +23 -0
- data/lib/ndr_import/table.rb +179 -0
- data/lib/ndr_import/version.rb +4 -0
- data/ndr_import.gemspec +44 -0
- data/test/file/base_test.rb +54 -0
- data/test/file/delimited_test.rb +143 -0
- data/test/file/excel_test.rb +85 -0
- data/test/file/pdf_test.rb +35 -0
- data/test/file/registry_test.rb +60 -0
- data/test/file/text_test.rb +92 -0
- data/test/file/word_test.rb +35 -0
- data/test/file/zip_test.rb +47 -0
- data/test/helpers/file/delimited_test.rb +113 -0
- data/test/helpers/file/excel_test.rb +97 -0
- data/test/helpers/file/pdf_test.rb +26 -0
- data/test/helpers/file/word_test.rb +26 -0
- data/test/helpers/file/xml_test.rb +131 -0
- data/test/helpers/file/zip_test.rb +75 -0
- data/test/mapper_test.rb +551 -0
- data/test/non_tabular/mapping_test.rb +36 -0
- data/test/non_tabular/table_test.rb +510 -0
- data/test/non_tabular_file_helper_test.rb +501 -0
- data/test/readme_test.rb +53 -0
- data/test/resources/bomd.csv +3 -0
- data/test/resources/broken.csv +3 -0
- data/test/resources/filesystem_paths.yml +26 -0
- data/test/resources/flat_file.pdf +0 -0
- data/test/resources/flat_file.txt +27 -0
- data/test/resources/flat_file.yml +20 -0
- data/test/resources/hello_utf16be.txt +0 -0
- data/test/resources/hello_utf16le.txt +0 -0
- data/test/resources/hello_utf8.txt +2 -0
- data/test/resources/hello_windows.txt +2 -0
- data/test/resources/hello_world.doc +0 -0
- data/test/resources/hello_world.pdf +0 -0
- data/test/resources/hello_world.txt +2 -0
- data/test/resources/high_ascii_delimited.txt +2 -0
- data/test/resources/malformed.xml +6 -0
- data/test/resources/normal.csv +3 -0
- data/test/resources/normal.csv.zip +0 -0
- data/test/resources/normal_pipe.csv +3 -0
- data/test/resources/normal_thorn.csv +3 -0
- data/test/resources/not_a_pdf.pdf +0 -0
- data/test/resources/not_a_word_file.doc +0 -0
- data/test/resources/sample_xls.xls +0 -0
- data/test/resources/sample_xlsx.xlsx +0 -0
- data/test/resources/standard_mappings.yml +39 -0
- data/test/resources/txt_file_xls_extension.xls +1 -0
- data/test/resources/txt_file_xlsx_extension.xlsx +1 -0
- data/test/resources/utf-16be_xml.xml +0 -0
- data/test/resources/utf-16be_xml_with_declaration.xml +0 -0
- data/test/resources/utf-16le_xml.xml +0 -0
- data/test/resources/utf-8_xml.xml +9 -0
- data/test/resources/windows-1252_xml.xml +9 -0
- data/test/resources/windows.csv +5 -0
- data/test/resources/xlsx_file_xls_extension.xls +0 -0
- data/test/standard_mappings_test.rb +22 -0
- data/test/table_test.rb +288 -0
- data/test/test_helper.rb +13 -0
- metadata +443 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
# This tests the NdrImport::NonTabular::Mapping mapping class
|
4
|
+
class MappingTestTest < ActiveSupport::TestCase
|
5
|
+
def test_should_raise_error_with_no_non_tabular_row
|
6
|
+
assert_raise NdrImport::MappingError do
|
7
|
+
NdrImport::NonTabular::Mapping.new(
|
8
|
+
'columns' => [{ 'column' => 'one' }]
|
9
|
+
)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_should_raise_error_with_no_non_tabular_row_start_line_pattern
|
14
|
+
assert_raise NdrImport::MappingError do
|
15
|
+
NdrImport::NonTabular::Mapping.new(
|
16
|
+
'non_tabular_row' => nil,
|
17
|
+
'columns' => [{ 'column' => 'one' }]
|
18
|
+
)
|
19
|
+
end
|
20
|
+
|
21
|
+
assert_raise NdrImport::MappingError do
|
22
|
+
NdrImport::NonTabular::Mapping.new(
|
23
|
+
'non_tabular_row' => { 'start_line_pattern' => nil },
|
24
|
+
'columns' => [{ 'column' => 'one' }]
|
25
|
+
)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_should_initialize_with_non_tabular_row
|
30
|
+
mapping = NdrImport::NonTabular::Mapping.new(
|
31
|
+
'non_tabular_row' => { 'start_line_pattern' => /\A-*\z/ },
|
32
|
+
'columns' => [{ 'column' => 'one' }]
|
33
|
+
)
|
34
|
+
assert_equal(/\A-*\z/, mapping.start_line_pattern)
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,510 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
# This tests the NdrImport::NonTabular::Table mapping class
|
4
|
+
class TableTest < ActiveSupport::TestCase
|
5
|
+
def setup
|
6
|
+
@simple_divider_example = <<-STR.split(/\n/).map
|
7
|
+
111
|
8
|
+
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
|
9
|
+
------
|
10
|
+
222
|
11
|
+
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo.
|
12
|
+
------
|
13
|
+
333
|
14
|
+
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla.
|
15
|
+
------
|
16
|
+
444
|
17
|
+
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim.
|
18
|
+
STR
|
19
|
+
|
20
|
+
@no_divider_example = <<-STR.split(/\n/).map
|
21
|
+
111
|
22
|
+
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
|
23
|
+
STR
|
24
|
+
|
25
|
+
@simple_start_and_end_divider_example = <<-STR.split(/\n/).map
|
26
|
+
----- START -----
|
27
|
+
111
|
28
|
+
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
|
29
|
+
------ END ------
|
30
|
+
This is never captured
|
31
|
+
----- START -----
|
32
|
+
222
|
33
|
+
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo.
|
34
|
+
------ END ------
|
35
|
+
This is never captured
|
36
|
+
----- START -----
|
37
|
+
333
|
38
|
+
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla.
|
39
|
+
------ END ------
|
40
|
+
This is never captured
|
41
|
+
----- START -----
|
42
|
+
444
|
43
|
+
This is captured
|
44
|
+
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim.
|
45
|
+
------ END ------
|
46
|
+
STR
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_all_valid_options
|
50
|
+
valid_options = %w(
|
51
|
+
canonical_name capture_start_line columns end_in_a_record end_line_pattern filename_pattern
|
52
|
+
format klass remove_lines start_in_a_record start_line_pattern
|
53
|
+
)
|
54
|
+
assert_equal valid_options.sort,
|
55
|
+
NdrImport::NonTabular::Table.all_valid_options.sort
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_should_raise_error_with_no_start_line_pattern
|
59
|
+
assert_raise NdrImport::MappingError do
|
60
|
+
NdrImport::NonTabular::Table.new(
|
61
|
+
'columns' => [{ 'column' => 'one' }]
|
62
|
+
)
|
63
|
+
end
|
64
|
+
|
65
|
+
assert_raise NdrImport::MappingError do
|
66
|
+
NdrImport::NonTabular::Table.new(
|
67
|
+
'start_line_pattern' => nil,
|
68
|
+
'columns' => [{ 'column' => 'one' }]
|
69
|
+
)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_should_initialize_with_non_tabular_row
|
74
|
+
table = NdrImport::NonTabular::Table.new(
|
75
|
+
'start_line_pattern' => /\A-*\z/,
|
76
|
+
'columns' => [{ 'column' => 'one' }]
|
77
|
+
)
|
78
|
+
assert_equal(/\A-*\z/, table.start_line_pattern)
|
79
|
+
end
|
80
|
+
|
81
|
+
def test_should_test_flat_file_txt
|
82
|
+
table = YAML.load_file(SafePath.new('permanent_test_files').join('flat_file.yml'))
|
83
|
+
assert table.is_a?(NdrImport::NonTabular::Table)
|
84
|
+
filename = SafePath.new('permanent_test_files').join('flat_file.txt')
|
85
|
+
enum = table.transform(File.new(filename).each)
|
86
|
+
# puts enum.to_a.inspect
|
87
|
+
|
88
|
+
results = []
|
89
|
+
enum.each do |_klass, fields, _index|
|
90
|
+
results << fields[:rawtext]['one']
|
91
|
+
end
|
92
|
+
|
93
|
+
assert_equal 4, results.count
|
94
|
+
assert results.first.start_with?('1')
|
95
|
+
assert results.last.start_with?('4')
|
96
|
+
|
97
|
+
assert results.any? { |result| result =~ /This is captured/ }
|
98
|
+
refute results.any? { |result| result =~ /This is never captured/ }
|
99
|
+
refute results.any? { |result| result =~ /== Page/ }
|
100
|
+
end
|
101
|
+
|
102
|
+
def test_should_raise_error_with_no_column_non_tabular_cell
|
103
|
+
table = YAML.load <<-YML.strip_heredoc
|
104
|
+
--- !ruby/object:NdrImport::NonTabular::Table
|
105
|
+
start_line_pattern: !ruby/regexp /^-{6}$/
|
106
|
+
klass: SomeTestKlass
|
107
|
+
columns:
|
108
|
+
- column: one
|
109
|
+
YML
|
110
|
+
assert_raise NdrImport::MappingError do
|
111
|
+
table.transform(@simple_divider_example).to_a
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def test_should_raise_error_with_no_column_non_tabular_cell_lines
|
116
|
+
table = YAML.load <<-YML.strip_heredoc
|
117
|
+
--- !ruby/object:NdrImport::NonTabular::Table
|
118
|
+
start_line_pattern: !ruby/regexp /^-{6}$/
|
119
|
+
klass: SomeTestKlass
|
120
|
+
columns:
|
121
|
+
- column: one
|
122
|
+
non_tabular_cell:
|
123
|
+
YML
|
124
|
+
assert_raise NdrImport::MappingError do
|
125
|
+
table.transform(@simple_divider_example).to_a
|
126
|
+
end
|
127
|
+
|
128
|
+
table = YAML.load <<-YML.strip_heredoc
|
129
|
+
--- !ruby/object:NdrImport::NonTabular::Table
|
130
|
+
start_line_pattern: !ruby/regexp /^-{6}$/
|
131
|
+
klass: SomeTestKlass
|
132
|
+
columns:
|
133
|
+
- column: one
|
134
|
+
non_tabular_cell:
|
135
|
+
lines:
|
136
|
+
YML
|
137
|
+
assert_raise NdrImport::MappingError do
|
138
|
+
table.transform(@simple_divider_example).to_a
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def test_should_raise_error_with_no_column_non_tabular_cell_capture
|
143
|
+
table = YAML.load <<-YML.strip_heredoc
|
144
|
+
--- !ruby/object:NdrImport::NonTabular::Table
|
145
|
+
start_line_pattern: !ruby/regexp /^-{6}$/
|
146
|
+
klass: SomeTestKlass
|
147
|
+
columns:
|
148
|
+
- column: one
|
149
|
+
non_tabular_cell:
|
150
|
+
lines: !ruby/range
|
151
|
+
begin: 0
|
152
|
+
end: -1
|
153
|
+
excl: false
|
154
|
+
YML
|
155
|
+
assert_raise NdrImport::MappingError do
|
156
|
+
table.transform(@simple_divider_example).to_a
|
157
|
+
end
|
158
|
+
|
159
|
+
table = YAML.load <<-YML.strip_heredoc
|
160
|
+
--- !ruby/object:NdrImport::NonTabular::Table
|
161
|
+
start_line_pattern: !ruby/regexp /^-{6}$/
|
162
|
+
klass: SomeTestKlass
|
163
|
+
columns:
|
164
|
+
- column: one
|
165
|
+
non_tabular_cell:
|
166
|
+
lines: !ruby/range
|
167
|
+
begin: 0
|
168
|
+
end: -1
|
169
|
+
excl: false
|
170
|
+
capture:
|
171
|
+
YML
|
172
|
+
assert_raise NdrImport::MappingError do
|
173
|
+
table.transform(@simple_divider_example).to_a
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def test_should_only_return_two_results_with_no_start_in_a_record_or_end_in_a_record
|
178
|
+
table = YAML.load <<-YML.strip_heredoc
|
179
|
+
--- !ruby/object:NdrImport::NonTabular::Table
|
180
|
+
start_line_pattern: !ruby/regexp /^-{6}$/
|
181
|
+
klass: SomeTestKlass
|
182
|
+
columns:
|
183
|
+
- column: one
|
184
|
+
non_tabular_cell:
|
185
|
+
lines: !ruby/range
|
186
|
+
begin: 0
|
187
|
+
end: -1
|
188
|
+
excl: false
|
189
|
+
capture: !ruby/regexp /^(.*)$/i
|
190
|
+
YML
|
191
|
+
enum = table.transform(@simple_divider_example)
|
192
|
+
assert_instance_of Enumerator, enum
|
193
|
+
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
194
|
+
|
195
|
+
assert_equal 2, results.count
|
196
|
+
assert results.first.start_with?('222')
|
197
|
+
assert results.last.start_with?('333')
|
198
|
+
end
|
199
|
+
|
200
|
+
def test_should_return_three_results_with_start_in_a_record
|
201
|
+
table = YAML.load <<-YML.strip_heredoc
|
202
|
+
--- !ruby/object:NdrImport::NonTabular::Table
|
203
|
+
start_line_pattern: !ruby/regexp /^-{6}$/
|
204
|
+
start_in_a_record: true
|
205
|
+
klass: SomeTestKlass
|
206
|
+
columns:
|
207
|
+
- column: one
|
208
|
+
non_tabular_cell:
|
209
|
+
lines: !ruby/range
|
210
|
+
begin: 0
|
211
|
+
end: -1
|
212
|
+
excl: false
|
213
|
+
capture: !ruby/regexp /^(.*)$/i
|
214
|
+
YML
|
215
|
+
enum = table.transform(@simple_divider_example)
|
216
|
+
assert_instance_of Enumerator, enum
|
217
|
+
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
218
|
+
|
219
|
+
assert_equal 3, results.count
|
220
|
+
assert results.first.start_with?('111')
|
221
|
+
assert results.last.start_with?('333')
|
222
|
+
end
|
223
|
+
|
224
|
+
def test_should_return_three_results_with_end_in_a_record
|
225
|
+
table = YAML.load <<-YML.strip_heredoc
|
226
|
+
--- !ruby/object:NdrImport::NonTabular::Table
|
227
|
+
start_line_pattern: !ruby/regexp /^-{6}$/
|
228
|
+
end_in_a_record: true
|
229
|
+
klass: SomeTestKlass
|
230
|
+
columns:
|
231
|
+
- column: one
|
232
|
+
non_tabular_cell:
|
233
|
+
lines: !ruby/range
|
234
|
+
begin: 0
|
235
|
+
end: -1
|
236
|
+
excl: false
|
237
|
+
capture: !ruby/regexp /^(.*)$/i
|
238
|
+
YML
|
239
|
+
enum = table.transform(@simple_divider_example)
|
240
|
+
assert_instance_of Enumerator, enum
|
241
|
+
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
242
|
+
|
243
|
+
assert_equal 3, results.count
|
244
|
+
assert results.first.start_with?('222')
|
245
|
+
assert results.last.start_with?('444')
|
246
|
+
end
|
247
|
+
|
248
|
+
def test_should_return_four_results_with_start_in_a_record_and_end_in_a_record
|
249
|
+
table = YAML.load <<-YML.strip_heredoc
|
250
|
+
--- !ruby/object:NdrImport::NonTabular::Table
|
251
|
+
start_line_pattern: !ruby/regexp /^-{6}$/
|
252
|
+
start_in_a_record: true
|
253
|
+
end_in_a_record: true
|
254
|
+
klass: SomeTestKlass
|
255
|
+
columns:
|
256
|
+
- column: one
|
257
|
+
non_tabular_cell:
|
258
|
+
lines: !ruby/range
|
259
|
+
begin: 0
|
260
|
+
end: -1
|
261
|
+
excl: false
|
262
|
+
capture: !ruby/regexp /^(.*)$/i
|
263
|
+
YML
|
264
|
+
enum = table.transform(@simple_divider_example)
|
265
|
+
assert_instance_of Enumerator, enum
|
266
|
+
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
267
|
+
|
268
|
+
assert_equal 4, results.count
|
269
|
+
assert results.first.start_with?('111')
|
270
|
+
assert results.last.start_with?('444')
|
271
|
+
end
|
272
|
+
|
273
|
+
def test_should_return_one_results_with_start_in_a_record_and_end_in_a_record
|
274
|
+
table = YAML.load <<-YML.strip_heredoc
|
275
|
+
--- !ruby/object:NdrImport::NonTabular::Table
|
276
|
+
start_line_pattern: !ruby/regexp /^-{6}$/
|
277
|
+
start_in_a_record: true
|
278
|
+
end_in_a_record: true
|
279
|
+
klass: SomeTestKlass
|
280
|
+
columns:
|
281
|
+
- column: one
|
282
|
+
non_tabular_cell:
|
283
|
+
lines: !ruby/range
|
284
|
+
begin: 0
|
285
|
+
end: -1
|
286
|
+
excl: false
|
287
|
+
capture: !ruby/regexp /^(.*)$/i
|
288
|
+
YML
|
289
|
+
enum = table.transform(@no_divider_example)
|
290
|
+
assert_instance_of Enumerator, enum
|
291
|
+
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
292
|
+
|
293
|
+
assert_equal 1, results.count
|
294
|
+
assert results.first.start_with?('111')
|
295
|
+
end
|
296
|
+
|
297
|
+
def test_should_return_four_results_with_start_and_end_dividers
|
298
|
+
table = YAML.load <<-YML.strip_heredoc
|
299
|
+
--- !ruby/object:NdrImport::NonTabular::Table
|
300
|
+
start_line_pattern: !ruby/regexp /^----- START -----$/
|
301
|
+
end_line_pattern: !ruby/regexp /^------ END ------$/
|
302
|
+
klass: SomeTestKlass
|
303
|
+
columns:
|
304
|
+
- column: one
|
305
|
+
non_tabular_cell:
|
306
|
+
lines: !ruby/range
|
307
|
+
begin: 0
|
308
|
+
end: -1
|
309
|
+
excl: false
|
310
|
+
capture: !ruby/regexp /^(.*)$/i
|
311
|
+
YML
|
312
|
+
enum = table.transform(@simple_start_and_end_divider_example)
|
313
|
+
assert_instance_of Enumerator, enum
|
314
|
+
|
315
|
+
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
316
|
+
|
317
|
+
assert_equal 4, results.count
|
318
|
+
assert results.first.start_with?('111')
|
319
|
+
assert results.last.start_with?('444')
|
320
|
+
|
321
|
+
assert results.any? { |result| result =~ /This is captured/ }
|
322
|
+
refute results.any? { |result| result =~ /This is never captured/ }
|
323
|
+
end
|
324
|
+
|
325
|
+
def test_should_capture
|
326
|
+
table = YAML.load <<-YML.strip_heredoc
|
327
|
+
--- !ruby/object:NdrImport::NonTabular::Table
|
328
|
+
start_line_pattern: !ruby/regexp /^-{6}$/
|
329
|
+
klass: SomeTestKlass
|
330
|
+
columns:
|
331
|
+
- standard_mapping: nhsnumber
|
332
|
+
non_tabular_cell:
|
333
|
+
lines: 0
|
334
|
+
capture: !ruby/regexp /^(\\d*)$/i
|
335
|
+
- column: address
|
336
|
+
non_tabular_cell:
|
337
|
+
lines: !ruby/range
|
338
|
+
begin: 1
|
339
|
+
end: 5
|
340
|
+
excl: false
|
341
|
+
capture: !ruby/regexp /^.{50}(.*)$/i
|
342
|
+
join: ", "
|
343
|
+
- standard_mapping: postcode
|
344
|
+
non_tabular_cell:
|
345
|
+
lines: 6
|
346
|
+
capture: !ruby/regexp /^.{50}(.*)$/i
|
347
|
+
- column: capture_inclusive
|
348
|
+
non_tabular_cell:
|
349
|
+
lines: !ruby/object:RegexpRange
|
350
|
+
begin: !ruby/regexp /^CAPTURE INCLUSIVE$/
|
351
|
+
end: !ruby/regexp /^Capture me.$/i
|
352
|
+
excl: false
|
353
|
+
capture: !ruby/regexp /^(.*)$/i
|
354
|
+
join: "\\n"
|
355
|
+
- column: capture_exclusive
|
356
|
+
non_tabular_cell:
|
357
|
+
lines: !ruby/object:RegexpRange
|
358
|
+
begin: !ruby/regexp /^CAPTURE EXCLUSIVE$/
|
359
|
+
end: !ruby/regexp /^Do NOT capture me.$/i
|
360
|
+
excl: true
|
361
|
+
capture: !ruby/regexp /^(.*)$/i
|
362
|
+
join: "\\n"
|
363
|
+
- column: capture_to_end
|
364
|
+
non_tabular_cell:
|
365
|
+
lines: !ruby/object:RegexpRange
|
366
|
+
begin: !ruby/regexp /^CAPTURE TO END$/
|
367
|
+
end: -1
|
368
|
+
excl: false
|
369
|
+
capture: !ruby/regexp /^(.*)$/i
|
370
|
+
join: "\\n"
|
371
|
+
YML
|
372
|
+
capture_example = <<-STR
|
373
|
+
This is never captured
|
374
|
+
------
|
375
|
+
1111111111
|
376
|
+
<----------------- 50 characters ---------------->Unit C, Magog Court
|
377
|
+
Shelford Bottom
|
378
|
+
Hinton Way
|
379
|
+
Cambridge
|
380
|
+
|
381
|
+
CB22 3AD
|
382
|
+
|
383
|
+
CAPTURE INCLUSIVE
|
384
|
+
Lorem ipsum dolor sit amet,
|
385
|
+
consectetur adipisicing elit,
|
386
|
+
Capture me.
|
387
|
+
|
388
|
+
CAPTURE EXCLUSIVE
|
389
|
+
Ut enim ad minim veniam, quis nostrud exercitation.
|
390
|
+
Do NOT capture me.
|
391
|
+
|
392
|
+
CAPTURE TO END
|
393
|
+
Lorem ipsum dolor sit amet, consectetur adipisicing elit.
|
394
|
+
Ut enim ad minim veniam, quis nostrud exercitation ullamco.
|
395
|
+
Duis aute irure dolor in reprehenderit in voluptate velit.
|
396
|
+
Excepteur sint occaecat cupidatat non proident, sunt in culpa.
|
397
|
+
------
|
398
|
+
This is never captured
|
399
|
+
STR
|
400
|
+
enum = table.transform(capture_example.split(/\n/).map)
|
401
|
+
assert_instance_of Enumerator, enum
|
402
|
+
|
403
|
+
output = []
|
404
|
+
enum.each do |klass, fields, index|
|
405
|
+
output << [klass, fields, index]
|
406
|
+
end
|
407
|
+
|
408
|
+
expected_output = [
|
409
|
+
[
|
410
|
+
'SomeTestKlass', {
|
411
|
+
'nhsnumber' => '1111111111',
|
412
|
+
'postcode' => 'CB223AD',
|
413
|
+
:rawtext => {
|
414
|
+
'nhsnumber' => '1111111111',
|
415
|
+
'address' => 'Unit C, Magog Court, Shelford Bottom, Hinton Way, Cambridge',
|
416
|
+
'postcode' => 'CB22 3AD',
|
417
|
+
'capture_inclusive' => "CAPTURE INCLUSIVE\nLorem ipsum dolor sit amet,\n" \
|
418
|
+
"consectetur adipisicing elit,\nCapture me.",
|
419
|
+
'capture_exclusive' => "CAPTURE EXCLUSIVE\n" \
|
420
|
+
'Ut enim ad minim veniam, quis nostrud exercitation.',
|
421
|
+
'capture_to_end' => "CAPTURE TO END\n" \
|
422
|
+
"Lorem ipsum dolor sit amet, consectetur adipisicing elit.\n" \
|
423
|
+
"Ut enim ad minim veniam, quis nostrud exercitation ullamco.\n" \
|
424
|
+
"Duis aute irure dolor in reprehenderit in voluptate velit.\n" \
|
425
|
+
'Excepteur sint occaecat cupidatat non proident, sunt in culpa.'
|
426
|
+
}
|
427
|
+
},
|
428
|
+
0
|
429
|
+
]
|
430
|
+
]
|
431
|
+
assert_equal expected_output.sort, output.sort
|
432
|
+
assert_equal 25, table.non_tabular_lines.last.absolute_line_number
|
433
|
+
end
|
434
|
+
|
435
|
+
def test_handles_non_utf8_characters
|
436
|
+
mixed_encoding_example = <<-STR.each_line
|
437
|
+
111
|
438
|
+
Lorem ipsum dolor sit amet.
|
439
|
+
------
|
440
|
+
111
|
441
|
+
Lorem ipsum dolor\xBE sit amet.
|
442
|
+
------
|
443
|
+
111
|
444
|
+
Lorem ipsum dolor sit amet.
|
445
|
+
------
|
446
|
+
STR
|
447
|
+
|
448
|
+
table = YAML.load <<-YML.strip_heredoc
|
449
|
+
--- !ruby/object:NdrImport::NonTabular::Table
|
450
|
+
start_line_pattern: !ruby/regexp /^111$/
|
451
|
+
end_in_a_record: true
|
452
|
+
klass: SomeTestKlass
|
453
|
+
columns:
|
454
|
+
- column: one
|
455
|
+
non_tabular_cell:
|
456
|
+
lines: !ruby/range
|
457
|
+
begin: 0
|
458
|
+
end: -1
|
459
|
+
excl: true
|
460
|
+
capture: !ruby/regexp /^(.*)$/i
|
461
|
+
YML
|
462
|
+
|
463
|
+
enum = table.transform(mixed_encoding_example)
|
464
|
+
assert_instance_of Enumerator, enum
|
465
|
+
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
466
|
+
|
467
|
+
assert_equal 3, results.count, 'records were lost'
|
468
|
+
|
469
|
+
assert_equal [27, 28, 27], results.map { |row| row.chars.to_a.length }
|
470
|
+
assert_equal [27, 29, 27], results.map { |row| row.bytes.to_a.length }
|
471
|
+
|
472
|
+
results.each do |row|
|
473
|
+
assert row.first.valid_encoding?
|
474
|
+
assert_equal Encoding.find('UTF-8'), row.first.encoding
|
475
|
+
end
|
476
|
+
end
|
477
|
+
|
478
|
+
def test_should_not_allow_junk_bytes
|
479
|
+
junk = <<-STR.each_line
|
480
|
+
111
|
481
|
+
Lorem ipsum dolor sit amet.
|
482
|
+
------
|
483
|
+
111
|
484
|
+
Lorem ipsum dolor\x8D sit amet.
|
485
|
+
------
|
486
|
+
111
|
487
|
+
Lorem ipsum dolor sit amet.
|
488
|
+
------
|
489
|
+
STR
|
490
|
+
|
491
|
+
table = YAML.load <<-YML.strip_heredoc
|
492
|
+
--- !ruby/object:NdrImport::NonTabular::Table
|
493
|
+
start_line_pattern: !ruby/regexp /^111$/
|
494
|
+
end_in_a_record: true
|
495
|
+
klass: SomeTestKlass
|
496
|
+
columns:
|
497
|
+
- column: one
|
498
|
+
non_tabular_cell:
|
499
|
+
lines: !ruby/range
|
500
|
+
begin: 0
|
501
|
+
end: -1
|
502
|
+
excl: true
|
503
|
+
capture: !ruby/regexp /^(.*)$/i
|
504
|
+
YML
|
505
|
+
|
506
|
+
assert_raises(UTF8Encoding::UTF8CoercionError) do
|
507
|
+
table.transform(junk).to_a
|
508
|
+
end
|
509
|
+
end
|
510
|
+
end
|