ndr_import 8.5.0 → 8.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/Gemfile +0 -3
- data/README.md +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/code_safety.yml +27 -11
- data/exe/pdf_acro_form_to_yaml +23 -0
- data/exe/pdf_to_text +28 -0
- data/exe/word_to_text +26 -0
- data/gemfiles/Gemfile.rails52 +0 -3
- data/gemfiles/Gemfile.rails60 +5 -0
- data/lib/ndr_import/version.rb +1 -1
- data/ndr_import.gemspec +9 -7
- metadata +23 -164
- data/gemfiles/Gemfile.rails50 +0 -8
- data/gemfiles/Gemfile.rails51 +0 -9
- data/test/file/acro_form_test.rb +0 -39
- data/test/file/base_test.rb +0 -54
- data/test/file/delimited_test.rb +0 -233
- data/test/file/docx_test.rb +0 -53
- data/test/file/excel_test.rb +0 -124
- data/test/file/pdf_test.rb +0 -36
- data/test/file/registry_test.rb +0 -62
- data/test/file/seven_zip_test.rb +0 -59
- data/test/file/text_test.rb +0 -92
- data/test/file/word_test.rb +0 -35
- data/test/file/xml_test.rb +0 -21
- data/test/file/zip_test.rb +0 -47
- data/test/fixed_width/table_test.rb +0 -35
- data/test/helpers/file/delimited_test.rb +0 -105
- data/test/helpers/file/excel_test.rb +0 -82
- data/test/helpers/file/pdf_test.rb +0 -27
- data/test/helpers/file/word_test.rb +0 -26
- data/test/helpers/file/xml_test.rb +0 -131
- data/test/helpers/file/zip_test.rb +0 -75
- data/test/mapper_test.rb +0 -676
- data/test/non_tabular/mapping_test.rb +0 -36
- data/test/non_tabular/table_test.rb +0 -590
- data/test/non_tabular_file_helper_test.rb +0 -501
- data/test/pdf_form/table_test.rb +0 -119
- data/test/readme_test.rb +0 -53
- data/test/resources/acro_form.pdf +0 -0
- data/test/resources/blank_tab_test.xlsx +0 -0
- data/test/resources/bomd.csv +0 -3
- data/test/resources/broken.csv +0 -3
- data/test/resources/filesystem_paths.yml +0 -26
- data/test/resources/flat_file.pdf +0 -0
- data/test/resources/flat_file.txt +0 -27
- data/test/resources/flat_file.yml +0 -20
- data/test/resources/hello_utf16be.txt +0 -0
- data/test/resources/hello_utf16le.txt +0 -0
- data/test/resources/hello_utf8.txt +0 -2
- data/test/resources/hello_windows.txt +0 -2
- data/test/resources/hello_world.doc +0 -0
- data/test/resources/hello_world.docx +0 -0
- data/test/resources/hello_world.pdf +0 -0
- data/test/resources/hello_world.txt +0 -2
- data/test/resources/high_ascii_delimited.txt +0 -2
- data/test/resources/high_ascii_delimited_example_two.txt +0 -3
- data/test/resources/malformed.csv +0 -3
- data/test/resources/malformed.xml +0 -6
- data/test/resources/malformed_pipe.csv +0 -3
- data/test/resources/normal.7z +0 -0
- data/test/resources/normal.csv +0 -3
- data/test/resources/normal.csv.zip +0 -0
- data/test/resources/normal_pipe.csv +0 -3
- data/test/resources/normal_thorn.csv +0 -3
- data/test/resources/not_a_pdf.pdf +0 -0
- data/test/resources/not_a_word_file.doc +0 -0
- data/test/resources/not_a_word_file.docx +0 -0
- data/test/resources/not_sign_delimited.txt +0 -3
- data/test/resources/password_protected_hello_world.docx +0 -0
- data/test/resources/password_protected_sample_xlsx.xlsx +0 -0
- data/test/resources/sample.xml +0 -34
- data/test/resources/sample_xls.xls +0 -0
- data/test/resources/sample_xlsx.xlsx +0 -0
- data/test/resources/sheet_streaming.xls +0 -0
- data/test/resources/sheet_streaming.xlsx +0 -0
- data/test/resources/standard_mappings.yml +0 -39
- data/test/resources/txt_file_xls_extension.xls +0 -1
- data/test/resources/txt_file_xlsx_extension.xlsx +0 -1
- data/test/resources/utf-16be_xml.xml +0 -0
- data/test/resources/utf-16be_xml_with_declaration.xml +0 -0
- data/test/resources/utf-16le_xml.xml +0 -0
- data/test/resources/utf-8_xml.xml +0 -9
- data/test/resources/windows-1252_xml.xml +0 -9
- data/test/resources/windows.csv +0 -5
- data/test/resources/xlsx_file_xls_extension.xls +0 -0
- data/test/standard_mappings_test.rb +0 -22
- data/test/table_test.rb +0 -545
- data/test/test_helper.rb +0 -35
- data/test/universal_importer_helper_test.rb +0 -86
- data/test/xml/table_test.rb +0 -90
@@ -1,36 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
# This tests the NdrImport::NonTabular::Mapping mapping class
|
4
|
-
class MappingTestTest < ActiveSupport::TestCase
|
5
|
-
def test_should_raise_error_with_no_non_tabular_row
|
6
|
-
assert_raise NdrImport::MappingError do
|
7
|
-
NdrImport::NonTabular::Mapping.new(
|
8
|
-
'columns' => [{ 'column' => 'one' }]
|
9
|
-
)
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
def test_should_raise_error_with_no_non_tabular_row_start_line_pattern
|
14
|
-
assert_raise NdrImport::MappingError do
|
15
|
-
NdrImport::NonTabular::Mapping.new(
|
16
|
-
'non_tabular_row' => nil,
|
17
|
-
'columns' => [{ 'column' => 'one' }]
|
18
|
-
)
|
19
|
-
end
|
20
|
-
|
21
|
-
assert_raise NdrImport::MappingError do
|
22
|
-
NdrImport::NonTabular::Mapping.new(
|
23
|
-
'non_tabular_row' => { 'start_line_pattern' => nil },
|
24
|
-
'columns' => [{ 'column' => 'one' }]
|
25
|
-
)
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
def test_should_initialize_with_non_tabular_row
|
30
|
-
mapping = NdrImport::NonTabular::Mapping.new(
|
31
|
-
'non_tabular_row' => { 'start_line_pattern' => /\A-*\z/ },
|
32
|
-
'columns' => [{ 'column' => 'one' }]
|
33
|
-
)
|
34
|
-
assert_equal(/\A-*\z/, mapping.start_line_pattern)
|
35
|
-
end
|
36
|
-
end
|
@@ -1,590 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
# This tests the NdrImport::NonTabular::Table mapping class
|
4
|
-
class TableTest < ActiveSupport::TestCase
|
5
|
-
def setup
|
6
|
-
@simple_divider_example = <<-STR.split(/\n/).map
|
7
|
-
111
|
8
|
-
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
|
9
|
-
------
|
10
|
-
222
|
11
|
-
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo.
|
12
|
-
------
|
13
|
-
333
|
14
|
-
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla.
|
15
|
-
------
|
16
|
-
444
|
17
|
-
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim.
|
18
|
-
STR
|
19
|
-
|
20
|
-
@no_divider_example = <<-STR.split(/\n/).map
|
21
|
-
111
|
22
|
-
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
|
23
|
-
STR
|
24
|
-
|
25
|
-
@simple_start_and_end_divider_example = <<-STR.split(/\n/).map
|
26
|
-
----- START -----
|
27
|
-
111
|
28
|
-
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
|
29
|
-
------ END ------
|
30
|
-
This is never captured
|
31
|
-
----- START -----
|
32
|
-
222
|
33
|
-
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo.
|
34
|
-
------ END ------
|
35
|
-
This is never captured
|
36
|
-
----- START -----
|
37
|
-
333
|
38
|
-
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla.
|
39
|
-
------ END ------
|
40
|
-
This is never captured
|
41
|
-
----- START -----
|
42
|
-
444
|
43
|
-
This is captured
|
44
|
-
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim.
|
45
|
-
------ END ------
|
46
|
-
STR
|
47
|
-
end
|
48
|
-
|
49
|
-
def test_all_valid_options
|
50
|
-
valid_options = %w[
|
51
|
-
canonical_name capture_end_line capture_start_line columns end_in_a_record end_line_pattern
|
52
|
-
filename_pattern file_password format klass remove_lines row_identifier start_in_a_record
|
53
|
-
start_line_pattern
|
54
|
-
]
|
55
|
-
assert_equal valid_options.sort,
|
56
|
-
NdrImport::NonTabular::Table.all_valid_options.sort
|
57
|
-
end
|
58
|
-
|
59
|
-
def test_should_raise_error_with_no_start_line_pattern
|
60
|
-
assert_raise NdrImport::MappingError do
|
61
|
-
NdrImport::NonTabular::Table.new(
|
62
|
-
'columns' => [{ 'column' => 'one' }]
|
63
|
-
)
|
64
|
-
end
|
65
|
-
|
66
|
-
assert_raise NdrImport::MappingError do
|
67
|
-
NdrImport::NonTabular::Table.new(
|
68
|
-
'start_line_pattern' => nil,
|
69
|
-
'columns' => [{ 'column' => 'one' }]
|
70
|
-
)
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
def test_should_initialize_with_non_tabular_row
|
75
|
-
table = NdrImport::NonTabular::Table.new(
|
76
|
-
'start_line_pattern' => /\A-*\z/,
|
77
|
-
'columns' => [{ 'column' => 'one' }]
|
78
|
-
)
|
79
|
-
assert_equal(/\A-*\z/, table.start_line_pattern)
|
80
|
-
end
|
81
|
-
|
82
|
-
def test_should_test_flat_file_txt
|
83
|
-
table = YAML.load_file(SafePath.new('permanent_test_files').join('flat_file.yml'))
|
84
|
-
assert table.is_a?(NdrImport::NonTabular::Table)
|
85
|
-
filename = SafePath.new('permanent_test_files').join('flat_file.txt')
|
86
|
-
enum = table.transform(File.new(filename).each)
|
87
|
-
# puts enum.to_a.inspect
|
88
|
-
|
89
|
-
results = []
|
90
|
-
enum.each do |_klass, fields, _index|
|
91
|
-
results << fields[:rawtext]['one']
|
92
|
-
end
|
93
|
-
|
94
|
-
assert_equal 4, results.count
|
95
|
-
assert results.first.start_with?('1')
|
96
|
-
assert results.last.start_with?('4')
|
97
|
-
|
98
|
-
assert results.any? { |result| result =~ /This is captured/ }
|
99
|
-
refute results.any? { |result| result =~ /This is never captured/ }
|
100
|
-
refute results.any? { |result| result =~ /== Page/ }
|
101
|
-
end
|
102
|
-
|
103
|
-
def test_should_raise_error_with_no_column_non_tabular_cell
|
104
|
-
table = YAML.load <<-YML.strip_heredoc
|
105
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
106
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
107
|
-
klass: SomeTestKlass
|
108
|
-
columns:
|
109
|
-
- column: one
|
110
|
-
YML
|
111
|
-
assert_raise NdrImport::MappingError do
|
112
|
-
table.transform(@simple_divider_example).to_a
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
def test_should_raise_error_with_no_column_non_tabular_cell_lines
|
117
|
-
table = YAML.load <<-YML.strip_heredoc
|
118
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
119
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
120
|
-
klass: SomeTestKlass
|
121
|
-
columns:
|
122
|
-
- column: one
|
123
|
-
non_tabular_cell:
|
124
|
-
YML
|
125
|
-
assert_raise NdrImport::MappingError do
|
126
|
-
table.transform(@simple_divider_example).to_a
|
127
|
-
end
|
128
|
-
|
129
|
-
table = YAML.load <<-YML.strip_heredoc
|
130
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
131
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
132
|
-
klass: SomeTestKlass
|
133
|
-
columns:
|
134
|
-
- column: one
|
135
|
-
non_tabular_cell:
|
136
|
-
lines:
|
137
|
-
YML
|
138
|
-
assert_raise NdrImport::MappingError do
|
139
|
-
table.transform(@simple_divider_example).to_a
|
140
|
-
end
|
141
|
-
end
|
142
|
-
|
143
|
-
def test_should_raise_error_with_no_column_non_tabular_cell_capture
|
144
|
-
table = YAML.load <<-YML.strip_heredoc
|
145
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
146
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
147
|
-
klass: SomeTestKlass
|
148
|
-
columns:
|
149
|
-
- column: one
|
150
|
-
non_tabular_cell:
|
151
|
-
lines: !ruby/range
|
152
|
-
begin: 0
|
153
|
-
end: -1
|
154
|
-
excl: false
|
155
|
-
YML
|
156
|
-
assert_raise NdrImport::MappingError do
|
157
|
-
table.transform(@simple_divider_example).to_a
|
158
|
-
end
|
159
|
-
|
160
|
-
table = YAML.load <<-YML.strip_heredoc
|
161
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
162
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
163
|
-
klass: SomeTestKlass
|
164
|
-
columns:
|
165
|
-
- column: one
|
166
|
-
non_tabular_cell:
|
167
|
-
lines: !ruby/range
|
168
|
-
begin: 0
|
169
|
-
end: -1
|
170
|
-
excl: false
|
171
|
-
capture:
|
172
|
-
YML
|
173
|
-
assert_raise NdrImport::MappingError do
|
174
|
-
table.transform(@simple_divider_example).to_a
|
175
|
-
end
|
176
|
-
end
|
177
|
-
|
178
|
-
def test_should_only_return_two_results_with_no_start_in_a_record_or_end_in_a_record
|
179
|
-
table = YAML.load <<-YML.strip_heredoc
|
180
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
181
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
182
|
-
klass: SomeTestKlass
|
183
|
-
columns:
|
184
|
-
- column: one
|
185
|
-
non_tabular_cell:
|
186
|
-
lines: !ruby/range
|
187
|
-
begin: 0
|
188
|
-
end: -1
|
189
|
-
excl: false
|
190
|
-
capture: !ruby/regexp /^(.*)$/i
|
191
|
-
YML
|
192
|
-
enum = table.transform(@simple_divider_example)
|
193
|
-
assert_instance_of Enumerator, enum
|
194
|
-
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
195
|
-
|
196
|
-
assert_equal 2, results.count
|
197
|
-
assert results.first.start_with?('222')
|
198
|
-
assert results.last.start_with?('333')
|
199
|
-
end
|
200
|
-
|
201
|
-
def test_should_return_three_results_with_start_in_a_record
|
202
|
-
table = YAML.load <<-YML.strip_heredoc
|
203
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
204
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
205
|
-
start_in_a_record: true
|
206
|
-
klass: SomeTestKlass
|
207
|
-
columns:
|
208
|
-
- column: one
|
209
|
-
non_tabular_cell:
|
210
|
-
lines: !ruby/range
|
211
|
-
begin: 0
|
212
|
-
end: -1
|
213
|
-
excl: false
|
214
|
-
capture: !ruby/regexp /^(.*)$/i
|
215
|
-
YML
|
216
|
-
enum = table.transform(@simple_divider_example)
|
217
|
-
assert_instance_of Enumerator, enum
|
218
|
-
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
219
|
-
|
220
|
-
assert_equal 3, results.count
|
221
|
-
assert results.first.start_with?('111')
|
222
|
-
assert results.last.start_with?('333')
|
223
|
-
end
|
224
|
-
|
225
|
-
def test_should_return_three_results_with_end_in_a_record
|
226
|
-
table = YAML.load <<-YML.strip_heredoc
|
227
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
228
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
229
|
-
end_in_a_record: true
|
230
|
-
klass: SomeTestKlass
|
231
|
-
columns:
|
232
|
-
- column: one
|
233
|
-
non_tabular_cell:
|
234
|
-
lines: !ruby/range
|
235
|
-
begin: 0
|
236
|
-
end: -1
|
237
|
-
excl: false
|
238
|
-
capture: !ruby/regexp /^(.*)$/i
|
239
|
-
YML
|
240
|
-
enum = table.transform(@simple_divider_example)
|
241
|
-
assert_instance_of Enumerator, enum
|
242
|
-
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
243
|
-
|
244
|
-
assert_equal 3, results.count
|
245
|
-
assert results.first.start_with?('222')
|
246
|
-
assert results.last.start_with?('444')
|
247
|
-
end
|
248
|
-
|
249
|
-
def test_should_return_four_results_with_start_in_a_record_and_end_in_a_record
|
250
|
-
table = YAML.load <<-YML.strip_heredoc
|
251
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
252
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
253
|
-
start_in_a_record: true
|
254
|
-
end_in_a_record: true
|
255
|
-
klass: SomeTestKlass
|
256
|
-
columns:
|
257
|
-
- column: one
|
258
|
-
non_tabular_cell:
|
259
|
-
lines: !ruby/range
|
260
|
-
begin: 0
|
261
|
-
end: -1
|
262
|
-
excl: false
|
263
|
-
capture: !ruby/regexp /^(.*)$/i
|
264
|
-
YML
|
265
|
-
enum = table.transform(@simple_divider_example)
|
266
|
-
assert_instance_of Enumerator, enum
|
267
|
-
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
268
|
-
|
269
|
-
assert_equal 4, results.count
|
270
|
-
assert results.first.start_with?('111')
|
271
|
-
assert results.last.start_with?('444')
|
272
|
-
end
|
273
|
-
|
274
|
-
def test_should_return_one_results_with_start_in_a_record_and_end_in_a_record
|
275
|
-
table = YAML.load <<-YML.strip_heredoc
|
276
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
277
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
278
|
-
start_in_a_record: true
|
279
|
-
end_in_a_record: true
|
280
|
-
klass: SomeTestKlass
|
281
|
-
columns:
|
282
|
-
- column: one
|
283
|
-
non_tabular_cell:
|
284
|
-
lines: !ruby/range
|
285
|
-
begin: 0
|
286
|
-
end: -1
|
287
|
-
excl: false
|
288
|
-
capture: !ruby/regexp /^(.*)$/i
|
289
|
-
YML
|
290
|
-
enum = table.transform(@no_divider_example)
|
291
|
-
assert_instance_of Enumerator, enum
|
292
|
-
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
293
|
-
|
294
|
-
assert_equal 1, results.count
|
295
|
-
assert results.first.start_with?('111')
|
296
|
-
end
|
297
|
-
|
298
|
-
def test_should_return_four_results_with_start_and_end_dividers
|
299
|
-
table = YAML.load <<-YML.strip_heredoc
|
300
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
301
|
-
start_line_pattern: !ruby/regexp /^----- START -----$/
|
302
|
-
end_line_pattern: !ruby/regexp /^------ END ------$/
|
303
|
-
klass: SomeTestKlass
|
304
|
-
columns:
|
305
|
-
- column: one
|
306
|
-
non_tabular_cell:
|
307
|
-
lines: !ruby/range
|
308
|
-
begin: 0
|
309
|
-
end: -1
|
310
|
-
excl: false
|
311
|
-
capture: !ruby/regexp /^(.*)$/i
|
312
|
-
YML
|
313
|
-
enum = table.transform(@simple_start_and_end_divider_example)
|
314
|
-
assert_instance_of Enumerator, enum
|
315
|
-
|
316
|
-
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
317
|
-
|
318
|
-
assert_equal 4, results.count
|
319
|
-
assert results.first.start_with?('111')
|
320
|
-
assert results.last.start_with?('444')
|
321
|
-
|
322
|
-
assert results.any? { |result| result =~ /This is captured/ }
|
323
|
-
refute results.any? { |result| result =~ /This is never captured/ }
|
324
|
-
end
|
325
|
-
|
326
|
-
def test_should_capture_end_line
|
327
|
-
data = <<~STR.each_line
|
328
|
-
111
|
329
|
-
Lorem ipsum dolor sit amet.
|
330
|
-
CAPTURE THIS CODE ABC
|
331
|
-
111
|
332
|
-
Lorem ipsum dolor sit amet.
|
333
|
-
CAPTURE THIS CODE XYZ
|
334
|
-
111
|
335
|
-
Lorem ipsum dolor sit amet.
|
336
|
-
CAPTURE THIS CODE 123
|
337
|
-
STR
|
338
|
-
|
339
|
-
table = YAML.load <<-YML.strip_heredoc
|
340
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
341
|
-
start_line_pattern: !ruby/regexp /\\A111\\z/
|
342
|
-
end_line_pattern: !ruby/regexp /\\ACAPTURE THIS CODE/
|
343
|
-
capture_start_line: true
|
344
|
-
capture_end_line: true
|
345
|
-
klass: SomeTestKlass
|
346
|
-
columns:
|
347
|
-
- column: one
|
348
|
-
non_tabular_cell:
|
349
|
-
lines: -1
|
350
|
-
capture: !ruby/regexp /\\A(.*)\\z/i
|
351
|
-
YML
|
352
|
-
enum = table.transform(data)
|
353
|
-
assert_instance_of Enumerator, enum
|
354
|
-
|
355
|
-
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
356
|
-
|
357
|
-
assert_equal 3, results.count
|
358
|
-
assert_equal 'CAPTURE THIS CODE ABC', results.first
|
359
|
-
end
|
360
|
-
|
361
|
-
def test_should_capture
|
362
|
-
table = YAML.load <<-YML.strip_heredoc
|
363
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
364
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
365
|
-
klass: SomeTestKlass
|
366
|
-
columns:
|
367
|
-
- standard_mapping: nhsnumber
|
368
|
-
non_tabular_cell:
|
369
|
-
lines: 0
|
370
|
-
capture: !ruby/regexp /^(\\d*)$/i
|
371
|
-
- column: address
|
372
|
-
non_tabular_cell:
|
373
|
-
lines: !ruby/range
|
374
|
-
begin: 1
|
375
|
-
end: 5
|
376
|
-
excl: false
|
377
|
-
capture: !ruby/regexp /^.{50}(.*)$/i
|
378
|
-
join: ", "
|
379
|
-
- standard_mapping: postcode
|
380
|
-
non_tabular_cell:
|
381
|
-
lines: 6
|
382
|
-
capture: !ruby/regexp /^.{50}(.*)$/i
|
383
|
-
- column: capture_inclusive
|
384
|
-
non_tabular_cell:
|
385
|
-
lines: !ruby/object:RegexpRange
|
386
|
-
begin: !ruby/regexp /^CAPTURE INCLUSIVE$/
|
387
|
-
end: !ruby/regexp /^Capture me.$/i
|
388
|
-
excl: false
|
389
|
-
capture: !ruby/regexp /^(.*)$/i
|
390
|
-
join: "\\n"
|
391
|
-
- column: capture_exclusive
|
392
|
-
non_tabular_cell:
|
393
|
-
lines: !ruby/object:RegexpRange
|
394
|
-
begin: !ruby/regexp /^CAPTURE EXCLUSIVE$/
|
395
|
-
end: !ruby/regexp /^Do NOT capture me.$/i
|
396
|
-
excl: true
|
397
|
-
capture: !ruby/regexp /^(.*)$/i
|
398
|
-
join: "\\n"
|
399
|
-
- column: capture_to_end
|
400
|
-
non_tabular_cell:
|
401
|
-
lines: !ruby/object:RegexpRange
|
402
|
-
begin: !ruby/regexp /^CAPTURE TO END$/
|
403
|
-
end: -1
|
404
|
-
excl: false
|
405
|
-
capture: !ruby/regexp /^(.*)$/i
|
406
|
-
join: "\\n"
|
407
|
-
YML
|
408
|
-
capture_example = <<-STR
|
409
|
-
This is never captured
|
410
|
-
------
|
411
|
-
1111111111
|
412
|
-
<----------------- 50 characters ---------------->Unit C, Magog Court
|
413
|
-
Shelford Bottom
|
414
|
-
Hinton Way
|
415
|
-
Cambridge
|
416
|
-
|
417
|
-
CB22 3AD
|
418
|
-
|
419
|
-
CAPTURE INCLUSIVE
|
420
|
-
Lorem ipsum dolor sit amet,
|
421
|
-
consectetur adipisicing elit,
|
422
|
-
Capture me.
|
423
|
-
|
424
|
-
CAPTURE EXCLUSIVE
|
425
|
-
Ut enim ad minim veniam, quis nostrud exercitation.
|
426
|
-
Do NOT capture me.
|
427
|
-
|
428
|
-
CAPTURE TO END
|
429
|
-
Lorem ipsum dolor sit amet, consectetur adipisicing elit.
|
430
|
-
Ut enim ad minim veniam, quis nostrud exercitation ullamco.
|
431
|
-
Duis aute irure dolor in reprehenderit in voluptate velit.
|
432
|
-
Excepteur sint occaecat cupidatat non proident, sunt in culpa.
|
433
|
-
------
|
434
|
-
This is never captured
|
435
|
-
STR
|
436
|
-
enum = table.transform(capture_example.split(/\n/).map)
|
437
|
-
assert_instance_of Enumerator, enum
|
438
|
-
|
439
|
-
output = []
|
440
|
-
enum.each do |klass, fields, index|
|
441
|
-
output << [klass, fields, index]
|
442
|
-
end
|
443
|
-
|
444
|
-
expected_output = [
|
445
|
-
[
|
446
|
-
'SomeTestKlass', {
|
447
|
-
'nhsnumber' => '1111111111',
|
448
|
-
'postcode' => 'CB223AD',
|
449
|
-
:rawtext => {
|
450
|
-
'nhsnumber' => '1111111111',
|
451
|
-
'address' => 'Unit C, Magog Court, Shelford Bottom, Hinton Way, Cambridge',
|
452
|
-
'postcode' => 'CB22 3AD',
|
453
|
-
'capture_inclusive' => "CAPTURE INCLUSIVE\nLorem ipsum dolor sit amet,\n" \
|
454
|
-
"consectetur adipisicing elit,\nCapture me.",
|
455
|
-
'capture_exclusive' => "CAPTURE EXCLUSIVE\n" \
|
456
|
-
'Ut enim ad minim veniam, quis nostrud exercitation.',
|
457
|
-
'capture_to_end' => "CAPTURE TO END\n" \
|
458
|
-
"Lorem ipsum dolor sit amet, consectetur adipisicing elit.\n" \
|
459
|
-
"Ut enim ad minim veniam, quis nostrud exercitation ullamco.\n" \
|
460
|
-
"Duis aute irure dolor in reprehenderit in voluptate velit.\n" \
|
461
|
-
'Excepteur sint occaecat cupidatat non proident, sunt in culpa.'
|
462
|
-
}
|
463
|
-
},
|
464
|
-
0
|
465
|
-
]
|
466
|
-
]
|
467
|
-
assert_equal expected_output.sort, output.sort
|
468
|
-
assert_equal 25, table.non_tabular_lines.last.absolute_line_number
|
469
|
-
end
|
470
|
-
|
471
|
-
def test_handles_non_utf8_characters
|
472
|
-
mixed_encoding_example = <<-STR.each_line
|
473
|
-
111
|
474
|
-
Lorem ipsum dolor sit amet.
|
475
|
-
------
|
476
|
-
111
|
477
|
-
Lorem ipsum dolor\xBE sit amet.
|
478
|
-
------
|
479
|
-
111
|
480
|
-
Lorem ipsum dolor sit amet.
|
481
|
-
------
|
482
|
-
STR
|
483
|
-
|
484
|
-
table = YAML.load <<-YML.strip_heredoc
|
485
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
486
|
-
start_line_pattern: !ruby/regexp /^111$/
|
487
|
-
end_in_a_record: true
|
488
|
-
klass: SomeTestKlass
|
489
|
-
columns:
|
490
|
-
- column: one
|
491
|
-
non_tabular_cell:
|
492
|
-
lines: !ruby/range
|
493
|
-
begin: 0
|
494
|
-
end: -1
|
495
|
-
excl: true
|
496
|
-
capture: !ruby/regexp /^(.*)$/i
|
497
|
-
YML
|
498
|
-
|
499
|
-
enum = table.transform(mixed_encoding_example)
|
500
|
-
assert_instance_of Enumerator, enum
|
501
|
-
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
502
|
-
|
503
|
-
assert_equal 3, results.count, 'records were lost'
|
504
|
-
|
505
|
-
assert_equal [27, 28, 27], results.map { |row| row.chars.to_a.length }
|
506
|
-
assert_equal [27, 29, 27], results.map { |row| row.bytes.to_a.length }
|
507
|
-
|
508
|
-
results.each do |row|
|
509
|
-
assert row.first.valid_encoding?
|
510
|
-
assert_equal Encoding.find('UTF-8'), row.first.encoding
|
511
|
-
end
|
512
|
-
end
|
513
|
-
|
514
|
-
def test_should_not_allow_junk_bytes
|
515
|
-
junk = <<-STR.each_line
|
516
|
-
111
|
517
|
-
Lorem ipsum dolor sit amet.
|
518
|
-
------
|
519
|
-
111
|
520
|
-
Lorem ipsum dolor\x8D sit amet.
|
521
|
-
------
|
522
|
-
111
|
523
|
-
Lorem ipsum dolor sit amet.
|
524
|
-
------
|
525
|
-
STR
|
526
|
-
|
527
|
-
table = YAML.load <<-YML.strip_heredoc
|
528
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
529
|
-
start_line_pattern: !ruby/regexp /^111$/
|
530
|
-
end_in_a_record: true
|
531
|
-
klass: SomeTestKlass
|
532
|
-
columns:
|
533
|
-
- column: one
|
534
|
-
non_tabular_cell:
|
535
|
-
lines: !ruby/range
|
536
|
-
begin: 0
|
537
|
-
end: -1
|
538
|
-
excl: true
|
539
|
-
capture: !ruby/regexp /^(.*)$/i
|
540
|
-
YML
|
541
|
-
|
542
|
-
assert_raises(UTF8Encoding::UTF8CoercionError) do
|
543
|
-
table.transform(junk).to_a
|
544
|
-
end
|
545
|
-
end
|
546
|
-
|
547
|
-
def test_should_strip_captured_rawtext
|
548
|
-
unwanted_white_space = <<-STR.each_line
|
549
|
-
111
|
550
|
-
Trailing whitespace end_of_line
|
551
|
-
------
|
552
|
-
111
|
553
|
-
Leading whitespaceend_of_line
|
554
|
-
------
|
555
|
-
111
|
556
|
-
Leading and trailing whitespace end_of_line
|
557
|
-
------
|
558
|
-
111
|
559
|
-
Should not match this
|
560
|
-
------
|
561
|
-
STR
|
562
|
-
|
563
|
-
table = YAML.load <<-YML.strip_heredoc
|
564
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
565
|
-
start_line_pattern: !ruby/regexp /^111$/
|
566
|
-
end_in_a_record: true
|
567
|
-
klass: SomeTestKlass
|
568
|
-
columns:
|
569
|
-
- column: one
|
570
|
-
non_tabular_cell:
|
571
|
-
lines: 0
|
572
|
-
capture: !ruby/regexp /^(.*)end_of_line$/i
|
573
|
-
trim_rawtext: left
|
574
|
-
YML
|
575
|
-
|
576
|
-
enum = table.transform(unwanted_white_space)
|
577
|
-
assert_instance_of Enumerator, enum
|
578
|
-
|
579
|
-
output = []
|
580
|
-
enum.each do |klass, fields, index|
|
581
|
-
output << [klass, fields, index]
|
582
|
-
end
|
583
|
-
|
584
|
-
expected_rawtext_ouput = [{ 'one' => 'Trailing whitespace' },
|
585
|
-
{ 'one' => 'Leading whitespace' },
|
586
|
-
{ 'one' => 'Leading and trailing whitespace' },
|
587
|
-
{ 'one' => '' }]
|
588
|
-
assert_equal expected_rawtext_ouput, (output.map { |row| row[1][:rawtext] })
|
589
|
-
end
|
590
|
-
end
|