ndr_import 8.5.0 → 8.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/Gemfile +0 -3
- data/README.md +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/code_safety.yml +27 -11
- data/exe/pdf_acro_form_to_yaml +23 -0
- data/exe/pdf_to_text +28 -0
- data/exe/word_to_text +26 -0
- data/gemfiles/Gemfile.rails52 +0 -3
- data/gemfiles/Gemfile.rails60 +5 -0
- data/lib/ndr_import/version.rb +1 -1
- data/ndr_import.gemspec +9 -7
- metadata +23 -164
- data/gemfiles/Gemfile.rails50 +0 -8
- data/gemfiles/Gemfile.rails51 +0 -9
- data/test/file/acro_form_test.rb +0 -39
- data/test/file/base_test.rb +0 -54
- data/test/file/delimited_test.rb +0 -233
- data/test/file/docx_test.rb +0 -53
- data/test/file/excel_test.rb +0 -124
- data/test/file/pdf_test.rb +0 -36
- data/test/file/registry_test.rb +0 -62
- data/test/file/seven_zip_test.rb +0 -59
- data/test/file/text_test.rb +0 -92
- data/test/file/word_test.rb +0 -35
- data/test/file/xml_test.rb +0 -21
- data/test/file/zip_test.rb +0 -47
- data/test/fixed_width/table_test.rb +0 -35
- data/test/helpers/file/delimited_test.rb +0 -105
- data/test/helpers/file/excel_test.rb +0 -82
- data/test/helpers/file/pdf_test.rb +0 -27
- data/test/helpers/file/word_test.rb +0 -26
- data/test/helpers/file/xml_test.rb +0 -131
- data/test/helpers/file/zip_test.rb +0 -75
- data/test/mapper_test.rb +0 -676
- data/test/non_tabular/mapping_test.rb +0 -36
- data/test/non_tabular/table_test.rb +0 -590
- data/test/non_tabular_file_helper_test.rb +0 -501
- data/test/pdf_form/table_test.rb +0 -119
- data/test/readme_test.rb +0 -53
- data/test/resources/acro_form.pdf +0 -0
- data/test/resources/blank_tab_test.xlsx +0 -0
- data/test/resources/bomd.csv +0 -3
- data/test/resources/broken.csv +0 -3
- data/test/resources/filesystem_paths.yml +0 -26
- data/test/resources/flat_file.pdf +0 -0
- data/test/resources/flat_file.txt +0 -27
- data/test/resources/flat_file.yml +0 -20
- data/test/resources/hello_utf16be.txt +0 -0
- data/test/resources/hello_utf16le.txt +0 -0
- data/test/resources/hello_utf8.txt +0 -2
- data/test/resources/hello_windows.txt +0 -2
- data/test/resources/hello_world.doc +0 -0
- data/test/resources/hello_world.docx +0 -0
- data/test/resources/hello_world.pdf +0 -0
- data/test/resources/hello_world.txt +0 -2
- data/test/resources/high_ascii_delimited.txt +0 -2
- data/test/resources/high_ascii_delimited_example_two.txt +0 -3
- data/test/resources/malformed.csv +0 -3
- data/test/resources/malformed.xml +0 -6
- data/test/resources/malformed_pipe.csv +0 -3
- data/test/resources/normal.7z +0 -0
- data/test/resources/normal.csv +0 -3
- data/test/resources/normal.csv.zip +0 -0
- data/test/resources/normal_pipe.csv +0 -3
- data/test/resources/normal_thorn.csv +0 -3
- data/test/resources/not_a_pdf.pdf +0 -0
- data/test/resources/not_a_word_file.doc +0 -0
- data/test/resources/not_a_word_file.docx +0 -0
- data/test/resources/not_sign_delimited.txt +0 -3
- data/test/resources/password_protected_hello_world.docx +0 -0
- data/test/resources/password_protected_sample_xlsx.xlsx +0 -0
- data/test/resources/sample.xml +0 -34
- data/test/resources/sample_xls.xls +0 -0
- data/test/resources/sample_xlsx.xlsx +0 -0
- data/test/resources/sheet_streaming.xls +0 -0
- data/test/resources/sheet_streaming.xlsx +0 -0
- data/test/resources/standard_mappings.yml +0 -39
- data/test/resources/txt_file_xls_extension.xls +0 -1
- data/test/resources/txt_file_xlsx_extension.xlsx +0 -1
- data/test/resources/utf-16be_xml.xml +0 -0
- data/test/resources/utf-16be_xml_with_declaration.xml +0 -0
- data/test/resources/utf-16le_xml.xml +0 -0
- data/test/resources/utf-8_xml.xml +0 -9
- data/test/resources/windows-1252_xml.xml +0 -9
- data/test/resources/windows.csv +0 -5
- data/test/resources/xlsx_file_xls_extension.xls +0 -0
- data/test/standard_mappings_test.rb +0 -22
- data/test/table_test.rb +0 -545
- data/test/test_helper.rb +0 -35
- data/test/universal_importer_helper_test.rb +0 -86
- data/test/xml/table_test.rb +0 -90
@@ -1,36 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
# This tests the NdrImport::NonTabular::Mapping mapping class
|
4
|
-
class MappingTestTest < ActiveSupport::TestCase
|
5
|
-
def test_should_raise_error_with_no_non_tabular_row
|
6
|
-
assert_raise NdrImport::MappingError do
|
7
|
-
NdrImport::NonTabular::Mapping.new(
|
8
|
-
'columns' => [{ 'column' => 'one' }]
|
9
|
-
)
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
def test_should_raise_error_with_no_non_tabular_row_start_line_pattern
|
14
|
-
assert_raise NdrImport::MappingError do
|
15
|
-
NdrImport::NonTabular::Mapping.new(
|
16
|
-
'non_tabular_row' => nil,
|
17
|
-
'columns' => [{ 'column' => 'one' }]
|
18
|
-
)
|
19
|
-
end
|
20
|
-
|
21
|
-
assert_raise NdrImport::MappingError do
|
22
|
-
NdrImport::NonTabular::Mapping.new(
|
23
|
-
'non_tabular_row' => { 'start_line_pattern' => nil },
|
24
|
-
'columns' => [{ 'column' => 'one' }]
|
25
|
-
)
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
def test_should_initialize_with_non_tabular_row
|
30
|
-
mapping = NdrImport::NonTabular::Mapping.new(
|
31
|
-
'non_tabular_row' => { 'start_line_pattern' => /\A-*\z/ },
|
32
|
-
'columns' => [{ 'column' => 'one' }]
|
33
|
-
)
|
34
|
-
assert_equal(/\A-*\z/, mapping.start_line_pattern)
|
35
|
-
end
|
36
|
-
end
|
@@ -1,590 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
# This tests the NdrImport::NonTabular::Table mapping class
|
4
|
-
class TableTest < ActiveSupport::TestCase
|
5
|
-
def setup
|
6
|
-
@simple_divider_example = <<-STR.split(/\n/).map
|
7
|
-
111
|
8
|
-
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
|
9
|
-
------
|
10
|
-
222
|
11
|
-
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo.
|
12
|
-
------
|
13
|
-
333
|
14
|
-
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla.
|
15
|
-
------
|
16
|
-
444
|
17
|
-
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim.
|
18
|
-
STR
|
19
|
-
|
20
|
-
@no_divider_example = <<-STR.split(/\n/).map
|
21
|
-
111
|
22
|
-
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
|
23
|
-
STR
|
24
|
-
|
25
|
-
@simple_start_and_end_divider_example = <<-STR.split(/\n/).map
|
26
|
-
----- START -----
|
27
|
-
111
|
28
|
-
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt.
|
29
|
-
------ END ------
|
30
|
-
This is never captured
|
31
|
-
----- START -----
|
32
|
-
222
|
33
|
-
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo.
|
34
|
-
------ END ------
|
35
|
-
This is never captured
|
36
|
-
----- START -----
|
37
|
-
333
|
38
|
-
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla.
|
39
|
-
------ END ------
|
40
|
-
This is never captured
|
41
|
-
----- START -----
|
42
|
-
444
|
43
|
-
This is captured
|
44
|
-
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim.
|
45
|
-
------ END ------
|
46
|
-
STR
|
47
|
-
end
|
48
|
-
|
49
|
-
def test_all_valid_options
|
50
|
-
valid_options = %w[
|
51
|
-
canonical_name capture_end_line capture_start_line columns end_in_a_record end_line_pattern
|
52
|
-
filename_pattern file_password format klass remove_lines row_identifier start_in_a_record
|
53
|
-
start_line_pattern
|
54
|
-
]
|
55
|
-
assert_equal valid_options.sort,
|
56
|
-
NdrImport::NonTabular::Table.all_valid_options.sort
|
57
|
-
end
|
58
|
-
|
59
|
-
def test_should_raise_error_with_no_start_line_pattern
|
60
|
-
assert_raise NdrImport::MappingError do
|
61
|
-
NdrImport::NonTabular::Table.new(
|
62
|
-
'columns' => [{ 'column' => 'one' }]
|
63
|
-
)
|
64
|
-
end
|
65
|
-
|
66
|
-
assert_raise NdrImport::MappingError do
|
67
|
-
NdrImport::NonTabular::Table.new(
|
68
|
-
'start_line_pattern' => nil,
|
69
|
-
'columns' => [{ 'column' => 'one' }]
|
70
|
-
)
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
def test_should_initialize_with_non_tabular_row
|
75
|
-
table = NdrImport::NonTabular::Table.new(
|
76
|
-
'start_line_pattern' => /\A-*\z/,
|
77
|
-
'columns' => [{ 'column' => 'one' }]
|
78
|
-
)
|
79
|
-
assert_equal(/\A-*\z/, table.start_line_pattern)
|
80
|
-
end
|
81
|
-
|
82
|
-
def test_should_test_flat_file_txt
|
83
|
-
table = YAML.load_file(SafePath.new('permanent_test_files').join('flat_file.yml'))
|
84
|
-
assert table.is_a?(NdrImport::NonTabular::Table)
|
85
|
-
filename = SafePath.new('permanent_test_files').join('flat_file.txt')
|
86
|
-
enum = table.transform(File.new(filename).each)
|
87
|
-
# puts enum.to_a.inspect
|
88
|
-
|
89
|
-
results = []
|
90
|
-
enum.each do |_klass, fields, _index|
|
91
|
-
results << fields[:rawtext]['one']
|
92
|
-
end
|
93
|
-
|
94
|
-
assert_equal 4, results.count
|
95
|
-
assert results.first.start_with?('1')
|
96
|
-
assert results.last.start_with?('4')
|
97
|
-
|
98
|
-
assert results.any? { |result| result =~ /This is captured/ }
|
99
|
-
refute results.any? { |result| result =~ /This is never captured/ }
|
100
|
-
refute results.any? { |result| result =~ /== Page/ }
|
101
|
-
end
|
102
|
-
|
103
|
-
def test_should_raise_error_with_no_column_non_tabular_cell
|
104
|
-
table = YAML.load <<-YML.strip_heredoc
|
105
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
106
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
107
|
-
klass: SomeTestKlass
|
108
|
-
columns:
|
109
|
-
- column: one
|
110
|
-
YML
|
111
|
-
assert_raise NdrImport::MappingError do
|
112
|
-
table.transform(@simple_divider_example).to_a
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
def test_should_raise_error_with_no_column_non_tabular_cell_lines
|
117
|
-
table = YAML.load <<-YML.strip_heredoc
|
118
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
119
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
120
|
-
klass: SomeTestKlass
|
121
|
-
columns:
|
122
|
-
- column: one
|
123
|
-
non_tabular_cell:
|
124
|
-
YML
|
125
|
-
assert_raise NdrImport::MappingError do
|
126
|
-
table.transform(@simple_divider_example).to_a
|
127
|
-
end
|
128
|
-
|
129
|
-
table = YAML.load <<-YML.strip_heredoc
|
130
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
131
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
132
|
-
klass: SomeTestKlass
|
133
|
-
columns:
|
134
|
-
- column: one
|
135
|
-
non_tabular_cell:
|
136
|
-
lines:
|
137
|
-
YML
|
138
|
-
assert_raise NdrImport::MappingError do
|
139
|
-
table.transform(@simple_divider_example).to_a
|
140
|
-
end
|
141
|
-
end
|
142
|
-
|
143
|
-
def test_should_raise_error_with_no_column_non_tabular_cell_capture
|
144
|
-
table = YAML.load <<-YML.strip_heredoc
|
145
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
146
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
147
|
-
klass: SomeTestKlass
|
148
|
-
columns:
|
149
|
-
- column: one
|
150
|
-
non_tabular_cell:
|
151
|
-
lines: !ruby/range
|
152
|
-
begin: 0
|
153
|
-
end: -1
|
154
|
-
excl: false
|
155
|
-
YML
|
156
|
-
assert_raise NdrImport::MappingError do
|
157
|
-
table.transform(@simple_divider_example).to_a
|
158
|
-
end
|
159
|
-
|
160
|
-
table = YAML.load <<-YML.strip_heredoc
|
161
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
162
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
163
|
-
klass: SomeTestKlass
|
164
|
-
columns:
|
165
|
-
- column: one
|
166
|
-
non_tabular_cell:
|
167
|
-
lines: !ruby/range
|
168
|
-
begin: 0
|
169
|
-
end: -1
|
170
|
-
excl: false
|
171
|
-
capture:
|
172
|
-
YML
|
173
|
-
assert_raise NdrImport::MappingError do
|
174
|
-
table.transform(@simple_divider_example).to_a
|
175
|
-
end
|
176
|
-
end
|
177
|
-
|
178
|
-
def test_should_only_return_two_results_with_no_start_in_a_record_or_end_in_a_record
|
179
|
-
table = YAML.load <<-YML.strip_heredoc
|
180
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
181
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
182
|
-
klass: SomeTestKlass
|
183
|
-
columns:
|
184
|
-
- column: one
|
185
|
-
non_tabular_cell:
|
186
|
-
lines: !ruby/range
|
187
|
-
begin: 0
|
188
|
-
end: -1
|
189
|
-
excl: false
|
190
|
-
capture: !ruby/regexp /^(.*)$/i
|
191
|
-
YML
|
192
|
-
enum = table.transform(@simple_divider_example)
|
193
|
-
assert_instance_of Enumerator, enum
|
194
|
-
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
195
|
-
|
196
|
-
assert_equal 2, results.count
|
197
|
-
assert results.first.start_with?('222')
|
198
|
-
assert results.last.start_with?('333')
|
199
|
-
end
|
200
|
-
|
201
|
-
def test_should_return_three_results_with_start_in_a_record
|
202
|
-
table = YAML.load <<-YML.strip_heredoc
|
203
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
204
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
205
|
-
start_in_a_record: true
|
206
|
-
klass: SomeTestKlass
|
207
|
-
columns:
|
208
|
-
- column: one
|
209
|
-
non_tabular_cell:
|
210
|
-
lines: !ruby/range
|
211
|
-
begin: 0
|
212
|
-
end: -1
|
213
|
-
excl: false
|
214
|
-
capture: !ruby/regexp /^(.*)$/i
|
215
|
-
YML
|
216
|
-
enum = table.transform(@simple_divider_example)
|
217
|
-
assert_instance_of Enumerator, enum
|
218
|
-
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
219
|
-
|
220
|
-
assert_equal 3, results.count
|
221
|
-
assert results.first.start_with?('111')
|
222
|
-
assert results.last.start_with?('333')
|
223
|
-
end
|
224
|
-
|
225
|
-
def test_should_return_three_results_with_end_in_a_record
|
226
|
-
table = YAML.load <<-YML.strip_heredoc
|
227
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
228
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
229
|
-
end_in_a_record: true
|
230
|
-
klass: SomeTestKlass
|
231
|
-
columns:
|
232
|
-
- column: one
|
233
|
-
non_tabular_cell:
|
234
|
-
lines: !ruby/range
|
235
|
-
begin: 0
|
236
|
-
end: -1
|
237
|
-
excl: false
|
238
|
-
capture: !ruby/regexp /^(.*)$/i
|
239
|
-
YML
|
240
|
-
enum = table.transform(@simple_divider_example)
|
241
|
-
assert_instance_of Enumerator, enum
|
242
|
-
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
243
|
-
|
244
|
-
assert_equal 3, results.count
|
245
|
-
assert results.first.start_with?('222')
|
246
|
-
assert results.last.start_with?('444')
|
247
|
-
end
|
248
|
-
|
249
|
-
def test_should_return_four_results_with_start_in_a_record_and_end_in_a_record
|
250
|
-
table = YAML.load <<-YML.strip_heredoc
|
251
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
252
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
253
|
-
start_in_a_record: true
|
254
|
-
end_in_a_record: true
|
255
|
-
klass: SomeTestKlass
|
256
|
-
columns:
|
257
|
-
- column: one
|
258
|
-
non_tabular_cell:
|
259
|
-
lines: !ruby/range
|
260
|
-
begin: 0
|
261
|
-
end: -1
|
262
|
-
excl: false
|
263
|
-
capture: !ruby/regexp /^(.*)$/i
|
264
|
-
YML
|
265
|
-
enum = table.transform(@simple_divider_example)
|
266
|
-
assert_instance_of Enumerator, enum
|
267
|
-
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
268
|
-
|
269
|
-
assert_equal 4, results.count
|
270
|
-
assert results.first.start_with?('111')
|
271
|
-
assert results.last.start_with?('444')
|
272
|
-
end
|
273
|
-
|
274
|
-
def test_should_return_one_results_with_start_in_a_record_and_end_in_a_record
|
275
|
-
table = YAML.load <<-YML.strip_heredoc
|
276
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
277
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
278
|
-
start_in_a_record: true
|
279
|
-
end_in_a_record: true
|
280
|
-
klass: SomeTestKlass
|
281
|
-
columns:
|
282
|
-
- column: one
|
283
|
-
non_tabular_cell:
|
284
|
-
lines: !ruby/range
|
285
|
-
begin: 0
|
286
|
-
end: -1
|
287
|
-
excl: false
|
288
|
-
capture: !ruby/regexp /^(.*)$/i
|
289
|
-
YML
|
290
|
-
enum = table.transform(@no_divider_example)
|
291
|
-
assert_instance_of Enumerator, enum
|
292
|
-
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
293
|
-
|
294
|
-
assert_equal 1, results.count
|
295
|
-
assert results.first.start_with?('111')
|
296
|
-
end
|
297
|
-
|
298
|
-
def test_should_return_four_results_with_start_and_end_dividers
|
299
|
-
table = YAML.load <<-YML.strip_heredoc
|
300
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
301
|
-
start_line_pattern: !ruby/regexp /^----- START -----$/
|
302
|
-
end_line_pattern: !ruby/regexp /^------ END ------$/
|
303
|
-
klass: SomeTestKlass
|
304
|
-
columns:
|
305
|
-
- column: one
|
306
|
-
non_tabular_cell:
|
307
|
-
lines: !ruby/range
|
308
|
-
begin: 0
|
309
|
-
end: -1
|
310
|
-
excl: false
|
311
|
-
capture: !ruby/regexp /^(.*)$/i
|
312
|
-
YML
|
313
|
-
enum = table.transform(@simple_start_and_end_divider_example)
|
314
|
-
assert_instance_of Enumerator, enum
|
315
|
-
|
316
|
-
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
317
|
-
|
318
|
-
assert_equal 4, results.count
|
319
|
-
assert results.first.start_with?('111')
|
320
|
-
assert results.last.start_with?('444')
|
321
|
-
|
322
|
-
assert results.any? { |result| result =~ /This is captured/ }
|
323
|
-
refute results.any? { |result| result =~ /This is never captured/ }
|
324
|
-
end
|
325
|
-
|
326
|
-
def test_should_capture_end_line
|
327
|
-
data = <<~STR.each_line
|
328
|
-
111
|
329
|
-
Lorem ipsum dolor sit amet.
|
330
|
-
CAPTURE THIS CODE ABC
|
331
|
-
111
|
332
|
-
Lorem ipsum dolor sit amet.
|
333
|
-
CAPTURE THIS CODE XYZ
|
334
|
-
111
|
335
|
-
Lorem ipsum dolor sit amet.
|
336
|
-
CAPTURE THIS CODE 123
|
337
|
-
STR
|
338
|
-
|
339
|
-
table = YAML.load <<-YML.strip_heredoc
|
340
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
341
|
-
start_line_pattern: !ruby/regexp /\\A111\\z/
|
342
|
-
end_line_pattern: !ruby/regexp /\\ACAPTURE THIS CODE/
|
343
|
-
capture_start_line: true
|
344
|
-
capture_end_line: true
|
345
|
-
klass: SomeTestKlass
|
346
|
-
columns:
|
347
|
-
- column: one
|
348
|
-
non_tabular_cell:
|
349
|
-
lines: -1
|
350
|
-
capture: !ruby/regexp /\\A(.*)\\z/i
|
351
|
-
YML
|
352
|
-
enum = table.transform(data)
|
353
|
-
assert_instance_of Enumerator, enum
|
354
|
-
|
355
|
-
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
356
|
-
|
357
|
-
assert_equal 3, results.count
|
358
|
-
assert_equal 'CAPTURE THIS CODE ABC', results.first
|
359
|
-
end
|
360
|
-
|
361
|
-
def test_should_capture
|
362
|
-
table = YAML.load <<-YML.strip_heredoc
|
363
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
364
|
-
start_line_pattern: !ruby/regexp /^-{6}$/
|
365
|
-
klass: SomeTestKlass
|
366
|
-
columns:
|
367
|
-
- standard_mapping: nhsnumber
|
368
|
-
non_tabular_cell:
|
369
|
-
lines: 0
|
370
|
-
capture: !ruby/regexp /^(\\d*)$/i
|
371
|
-
- column: address
|
372
|
-
non_tabular_cell:
|
373
|
-
lines: !ruby/range
|
374
|
-
begin: 1
|
375
|
-
end: 5
|
376
|
-
excl: false
|
377
|
-
capture: !ruby/regexp /^.{50}(.*)$/i
|
378
|
-
join: ", "
|
379
|
-
- standard_mapping: postcode
|
380
|
-
non_tabular_cell:
|
381
|
-
lines: 6
|
382
|
-
capture: !ruby/regexp /^.{50}(.*)$/i
|
383
|
-
- column: capture_inclusive
|
384
|
-
non_tabular_cell:
|
385
|
-
lines: !ruby/object:RegexpRange
|
386
|
-
begin: !ruby/regexp /^CAPTURE INCLUSIVE$/
|
387
|
-
end: !ruby/regexp /^Capture me.$/i
|
388
|
-
excl: false
|
389
|
-
capture: !ruby/regexp /^(.*)$/i
|
390
|
-
join: "\\n"
|
391
|
-
- column: capture_exclusive
|
392
|
-
non_tabular_cell:
|
393
|
-
lines: !ruby/object:RegexpRange
|
394
|
-
begin: !ruby/regexp /^CAPTURE EXCLUSIVE$/
|
395
|
-
end: !ruby/regexp /^Do NOT capture me.$/i
|
396
|
-
excl: true
|
397
|
-
capture: !ruby/regexp /^(.*)$/i
|
398
|
-
join: "\\n"
|
399
|
-
- column: capture_to_end
|
400
|
-
non_tabular_cell:
|
401
|
-
lines: !ruby/object:RegexpRange
|
402
|
-
begin: !ruby/regexp /^CAPTURE TO END$/
|
403
|
-
end: -1
|
404
|
-
excl: false
|
405
|
-
capture: !ruby/regexp /^(.*)$/i
|
406
|
-
join: "\\n"
|
407
|
-
YML
|
408
|
-
capture_example = <<-STR
|
409
|
-
This is never captured
|
410
|
-
------
|
411
|
-
1111111111
|
412
|
-
<----------------- 50 characters ---------------->Unit C, Magog Court
|
413
|
-
Shelford Bottom
|
414
|
-
Hinton Way
|
415
|
-
Cambridge
|
416
|
-
|
417
|
-
CB22 3AD
|
418
|
-
|
419
|
-
CAPTURE INCLUSIVE
|
420
|
-
Lorem ipsum dolor sit amet,
|
421
|
-
consectetur adipisicing elit,
|
422
|
-
Capture me.
|
423
|
-
|
424
|
-
CAPTURE EXCLUSIVE
|
425
|
-
Ut enim ad minim veniam, quis nostrud exercitation.
|
426
|
-
Do NOT capture me.
|
427
|
-
|
428
|
-
CAPTURE TO END
|
429
|
-
Lorem ipsum dolor sit amet, consectetur adipisicing elit.
|
430
|
-
Ut enim ad minim veniam, quis nostrud exercitation ullamco.
|
431
|
-
Duis aute irure dolor in reprehenderit in voluptate velit.
|
432
|
-
Excepteur sint occaecat cupidatat non proident, sunt in culpa.
|
433
|
-
------
|
434
|
-
This is never captured
|
435
|
-
STR
|
436
|
-
enum = table.transform(capture_example.split(/\n/).map)
|
437
|
-
assert_instance_of Enumerator, enum
|
438
|
-
|
439
|
-
output = []
|
440
|
-
enum.each do |klass, fields, index|
|
441
|
-
output << [klass, fields, index]
|
442
|
-
end
|
443
|
-
|
444
|
-
expected_output = [
|
445
|
-
[
|
446
|
-
'SomeTestKlass', {
|
447
|
-
'nhsnumber' => '1111111111',
|
448
|
-
'postcode' => 'CB223AD',
|
449
|
-
:rawtext => {
|
450
|
-
'nhsnumber' => '1111111111',
|
451
|
-
'address' => 'Unit C, Magog Court, Shelford Bottom, Hinton Way, Cambridge',
|
452
|
-
'postcode' => 'CB22 3AD',
|
453
|
-
'capture_inclusive' => "CAPTURE INCLUSIVE\nLorem ipsum dolor sit amet,\n" \
|
454
|
-
"consectetur adipisicing elit,\nCapture me.",
|
455
|
-
'capture_exclusive' => "CAPTURE EXCLUSIVE\n" \
|
456
|
-
'Ut enim ad minim veniam, quis nostrud exercitation.',
|
457
|
-
'capture_to_end' => "CAPTURE TO END\n" \
|
458
|
-
"Lorem ipsum dolor sit amet, consectetur adipisicing elit.\n" \
|
459
|
-
"Ut enim ad minim veniam, quis nostrud exercitation ullamco.\n" \
|
460
|
-
"Duis aute irure dolor in reprehenderit in voluptate velit.\n" \
|
461
|
-
'Excepteur sint occaecat cupidatat non proident, sunt in culpa.'
|
462
|
-
}
|
463
|
-
},
|
464
|
-
0
|
465
|
-
]
|
466
|
-
]
|
467
|
-
assert_equal expected_output.sort, output.sort
|
468
|
-
assert_equal 25, table.non_tabular_lines.last.absolute_line_number
|
469
|
-
end
|
470
|
-
|
471
|
-
def test_handles_non_utf8_characters
|
472
|
-
mixed_encoding_example = <<-STR.each_line
|
473
|
-
111
|
474
|
-
Lorem ipsum dolor sit amet.
|
475
|
-
------
|
476
|
-
111
|
477
|
-
Lorem ipsum dolor\xBE sit amet.
|
478
|
-
------
|
479
|
-
111
|
480
|
-
Lorem ipsum dolor sit amet.
|
481
|
-
------
|
482
|
-
STR
|
483
|
-
|
484
|
-
table = YAML.load <<-YML.strip_heredoc
|
485
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
486
|
-
start_line_pattern: !ruby/regexp /^111$/
|
487
|
-
end_in_a_record: true
|
488
|
-
klass: SomeTestKlass
|
489
|
-
columns:
|
490
|
-
- column: one
|
491
|
-
non_tabular_cell:
|
492
|
-
lines: !ruby/range
|
493
|
-
begin: 0
|
494
|
-
end: -1
|
495
|
-
excl: true
|
496
|
-
capture: !ruby/regexp /^(.*)$/i
|
497
|
-
YML
|
498
|
-
|
499
|
-
enum = table.transform(mixed_encoding_example)
|
500
|
-
assert_instance_of Enumerator, enum
|
501
|
-
results = enum.map { |_klass, fields, _index| fields[:rawtext]['one'] }
|
502
|
-
|
503
|
-
assert_equal 3, results.count, 'records were lost'
|
504
|
-
|
505
|
-
assert_equal [27, 28, 27], results.map { |row| row.chars.to_a.length }
|
506
|
-
assert_equal [27, 29, 27], results.map { |row| row.bytes.to_a.length }
|
507
|
-
|
508
|
-
results.each do |row|
|
509
|
-
assert row.first.valid_encoding?
|
510
|
-
assert_equal Encoding.find('UTF-8'), row.first.encoding
|
511
|
-
end
|
512
|
-
end
|
513
|
-
|
514
|
-
def test_should_not_allow_junk_bytes
|
515
|
-
junk = <<-STR.each_line
|
516
|
-
111
|
517
|
-
Lorem ipsum dolor sit amet.
|
518
|
-
------
|
519
|
-
111
|
520
|
-
Lorem ipsum dolor\x8D sit amet.
|
521
|
-
------
|
522
|
-
111
|
523
|
-
Lorem ipsum dolor sit amet.
|
524
|
-
------
|
525
|
-
STR
|
526
|
-
|
527
|
-
table = YAML.load <<-YML.strip_heredoc
|
528
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
529
|
-
start_line_pattern: !ruby/regexp /^111$/
|
530
|
-
end_in_a_record: true
|
531
|
-
klass: SomeTestKlass
|
532
|
-
columns:
|
533
|
-
- column: one
|
534
|
-
non_tabular_cell:
|
535
|
-
lines: !ruby/range
|
536
|
-
begin: 0
|
537
|
-
end: -1
|
538
|
-
excl: true
|
539
|
-
capture: !ruby/regexp /^(.*)$/i
|
540
|
-
YML
|
541
|
-
|
542
|
-
assert_raises(UTF8Encoding::UTF8CoercionError) do
|
543
|
-
table.transform(junk).to_a
|
544
|
-
end
|
545
|
-
end
|
546
|
-
|
547
|
-
def test_should_strip_captured_rawtext
|
548
|
-
unwanted_white_space = <<-STR.each_line
|
549
|
-
111
|
550
|
-
Trailing whitespace end_of_line
|
551
|
-
------
|
552
|
-
111
|
553
|
-
Leading whitespaceend_of_line
|
554
|
-
------
|
555
|
-
111
|
556
|
-
Leading and trailing whitespace end_of_line
|
557
|
-
------
|
558
|
-
111
|
559
|
-
Should not match this
|
560
|
-
------
|
561
|
-
STR
|
562
|
-
|
563
|
-
table = YAML.load <<-YML.strip_heredoc
|
564
|
-
--- !ruby/object:NdrImport::NonTabular::Table
|
565
|
-
start_line_pattern: !ruby/regexp /^111$/
|
566
|
-
end_in_a_record: true
|
567
|
-
klass: SomeTestKlass
|
568
|
-
columns:
|
569
|
-
- column: one
|
570
|
-
non_tabular_cell:
|
571
|
-
lines: 0
|
572
|
-
capture: !ruby/regexp /^(.*)end_of_line$/i
|
573
|
-
trim_rawtext: left
|
574
|
-
YML
|
575
|
-
|
576
|
-
enum = table.transform(unwanted_white_space)
|
577
|
-
assert_instance_of Enumerator, enum
|
578
|
-
|
579
|
-
output = []
|
580
|
-
enum.each do |klass, fields, index|
|
581
|
-
output << [klass, fields, index]
|
582
|
-
end
|
583
|
-
|
584
|
-
expected_rawtext_ouput = [{ 'one' => 'Trailing whitespace' },
|
585
|
-
{ 'one' => 'Leading whitespace' },
|
586
|
-
{ 'one' => 'Leading and trailing whitespace' },
|
587
|
-
{ 'one' => '' }]
|
588
|
-
assert_equal expected_rawtext_ouput, (output.map { |row| row[1][:rawtext] })
|
589
|
-
end
|
590
|
-
end
|