ndr_import 8.3.0 → 8.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 82e0bb47cb3a5c62b7cd5701c7196760cf9977357490a0e9ce708c2d814a51fc
4
- data.tar.gz: 111f781ab628de952d6b305e4fd7f0f8eb412b6c7b7b539dca2786e46904b05d
3
+ metadata.gz: 1ff3d3bfb186b79ee65c5f10a9c67508bbd91668424197b7dad3f256fbc7c78b
4
+ data.tar.gz: 73a0245645a2996d35d0bc3eefcec7c6cdfa37808b9ffb4c82dd49cb70ecc2ab
5
5
  SHA512:
6
- metadata.gz: f6d269d1d9e73ce154f7e8886a9109c16b74ba83d5b5c8f296f2c241609f1f5c9309afd08bee8538b34c36e534d48bacc5e1af6ff1fe89f89d9d30846c8841bc
7
- data.tar.gz: e332a7c3af8857095e5f1dc0de7fd7693f7df4b639c3f1a34858b4f5c7af68b73dbff751142e485a0f836e7addf656043dd944816ad03ecf9b56797d8cd5a740
6
+ metadata.gz: 04d43b45585b8577b98b2b7c77295071a73db86dc1bdd0b213e6479ff8ed5027d7c6dc3248a04dc83d6398f17683e7f0421e5205f46e57d328b5efd57c427fc5
7
+ data.tar.gz: 3ac7c8948019c4d45ab0ae2b29e21fca1f21657e845252e659f437b8a81521f052a03aa320089ab1afa33dfdeee2c27d5f97766724c0453fcb15f50f77b52476
data/CHANGELOG.md CHANGED
@@ -1,6 +1,9 @@
1
1
  ## [Unreleased]
2
2
  *no unreleased changes*
3
3
 
4
+ ## 8.4.0 / 2019-03-15
5
+ * Added ability to extract and transform PDF form data (#24)
6
+
4
7
  ## 8.3.0 / 2019-03-04
5
8
  ### Added
6
9
  * Allow `klass` in the column level mapping to be embedded array.
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  This is the Public Health England (PHE) National Disease Registers (NDR) Import ETL ruby gem, providing:
4
4
 
5
- 1. file import handlers for *extracting* data from delimited files (csv, pipe, tab, thorn), .xls(x) spreadsheets, .doc(x) word documents, PDF, XML, 7-Zip and Zip files.
5
+ 1. file import handlers for *extracting* data from delimited files (csv, pipe, tab, thorn), .xls(x) spreadsheets, .doc(x) word documents, PDF, PDF AcroForms, XML, 7-Zip and Zip files.
6
6
  2. table mappers for *transforming* tabular and non-tabular data into key value pairs grouped by a common "klass".
7
7
 
8
8
  ## Installation
data/code_safety.yml CHANGED
@@ -19,7 +19,7 @@ file safety:
19
19
  CHANGELOG.md:
20
20
  comments:
21
21
  reviewed_by: josh.pencheon
22
- safe_revision: 6271165699e2d886f94d45277baa955023e3f094
22
+ safe_revision: 9dd7962195b259862a3294aba3de061b391435eb
23
23
  CODE_OF_CONDUCT.md:
24
24
  comments:
25
25
  reviewed_by: timgentry
@@ -39,7 +39,7 @@ file safety:
39
39
  README.md:
40
40
  comments:
41
41
  reviewed_by: josh.pencheon
42
- safe_revision: f2feb0c430947839183d7223e60e7c29b2c0f846
42
+ safe_revision: 5cd2cd0b3a1e254d30d4acc28c6731825a1f84f5
43
43
  Rakefile:
44
44
  comments:
45
45
  reviewed_by: josh.pencheon
@@ -59,15 +59,23 @@ file safety:
59
59
  lib/ndr_import.rb:
60
60
  comments:
61
61
  reviewed_by: josh.pencheon
62
- safe_revision: eca44583e9989159b45e90021dd1c65228447180
62
+ safe_revision: 24d6449fd0612552f132dfbf4cada2ae28d0469e
63
+ lib/ndr_import/acroform_reader.rb:
64
+ comments:
65
+ reviewed_by: josh.pencheon
66
+ safe_revision: 24d6449fd0612552f132dfbf4cada2ae28d0469e
63
67
  lib/ndr_import/csv_library.rb:
64
68
  comments:
65
69
  reviewed_by: josh.pencheon
66
70
  safe_revision: be12e57519d3737e8d3901d7b01485c6995708dd
71
+ lib/ndr_import/file/acro_form.rb:
72
+ comments:
73
+ reviewed_by: josh.pencheon
74
+ safe_revision: 8a6ea666616c5b5d7c93cdf5aa019e8fc69d19e1
67
75
  lib/ndr_import/file/all.rb:
68
76
  comments:
69
77
  reviewed_by: josh.pencheon
70
- safe_revision: f2feb0c430947839183d7223e60e7c29b2c0f846
78
+ safe_revision: 5cd2cd0b3a1e254d30d4acc28c6731825a1f84f5
71
79
  lib/ndr_import/file/base.rb:
72
80
  comments:
73
81
  reviewed_by: timgentry
@@ -180,6 +188,10 @@ file safety:
180
188
  comments:
181
189
  reviewed_by: josh.pencheon
182
190
  safe_revision: ac30f66578ab380649be800a4426d917ddbcb329
191
+ lib/ndr_import/pdf_form/table.rb:
192
+ comments:
193
+ reviewed_by: josh.pencheon
194
+ safe_revision: 5fd247eeb13a3f1356ab2d76ac9fabf9e19d5d36
183
195
  lib/ndr_import/standard_mappings.rb:
184
196
  comments:
185
197
  reviewed_by: josh.pencheon
@@ -192,10 +204,14 @@ file safety:
192
204
  comments:
193
205
  reviewed_by: josh.pencheon
194
206
  safe_revision: 3e3a852b58e8b169535e29029e535a10f6b9cd42
207
+ lib/ndr_import/unmapped_data_error.rb:
208
+ comments:
209
+ reviewed_by: josh.pencheon
210
+ safe_revision: 5cd2cd0b3a1e254d30d4acc28c6731825a1f84f5
195
211
  lib/ndr_import/version.rb:
196
212
  comments: another check?
197
213
  reviewed_by: josh.pencheon
198
- safe_revision: 6271165699e2d886f94d45277baa955023e3f094
214
+ safe_revision: 9dd7962195b259862a3294aba3de061b391435eb
199
215
  lib/ndr_import/xml/table.rb:
200
216
  comments:
201
217
  reviewed_by: josh.pencheon
@@ -204,6 +220,10 @@ file safety:
204
220
  comments:
205
221
  reviewed_by: josh.pencheon
206
222
  safe_revision: 607c0668f1fffd70d181bc1a31c4f56eed5f6189
223
+ test/file/acro_form_test.rb:
224
+ comments:
225
+ reviewed_by: josh.pencheon
226
+ safe_revision: 5cd2cd0b3a1e254d30d4acc28c6731825a1f84f5
207
227
  test/file/base_test.rb:
208
228
  comments:
209
229
  reviewed_by: timgentry
@@ -227,7 +247,7 @@ file safety:
227
247
  test/file/registry_test.rb:
228
248
  comments:
229
249
  reviewed_by: josh.pencheon
230
- safe_revision: f2feb0c430947839183d7223e60e7c29b2c0f846
250
+ safe_revision: 5cd2cd0b3a1e254d30d4acc28c6731825a1f84f5
231
251
  test/file/seven_zip_test.rb:
232
252
  comments:
233
253
  reviewed_by: josh.pencheon
@@ -292,10 +312,18 @@ file safety:
292
312
  comments:
293
313
  reviewed_by: timgentry
294
314
  safe_revision: cf382902508a21a95b80ac4582fbbd117164e80e
315
+ test/pdf_form/table_test.rb:
316
+ comments:
317
+ reviewed_by: josh.pencheon
318
+ safe_revision: 5cd2cd0b3a1e254d30d4acc28c6731825a1f84f5
295
319
  test/readme_test.rb:
296
320
  comments:
297
321
  reviewed_by: timgentry
298
322
  safe_revision: cf382902508a21a95b80ac4582fbbd117164e80e
323
+ test/resources/acro_form.pdf:
324
+ comments:
325
+ reviewed_by: josh.pencheon
326
+ safe_revision: 5cd2cd0b3a1e254d30d4acc28c6731825a1f84f5
299
327
  test/resources/blank_tab_test.xlsx:
300
328
  comments: reviewed contents
301
329
  reviewed_by: joshpencheon
@@ -0,0 +1,33 @@
1
+ require 'pdf-reader'
2
+
3
+ module NdrImport
4
+ # PDF AcroForm reader using the pdf-reader gem
5
+ class AcroFormReader < ::PDF::Reader
6
+ def fields_hash
7
+ fields = {}
8
+ fields_from(acroform[:Fields]).each do |field|
9
+ field_name = field[:T]
10
+ unless field[:Subtype] == :Widget || field.key?(:Kids)
11
+ raise "Widgets or Radio boxes expected, found a #{field[:Subtype].inspect}"
12
+ end
13
+ raise "Non-unique column name #{field_name}" if fields.key?(field_name)
14
+ fields[field_name] = field[:V]
15
+ end
16
+ fields
17
+ end
18
+
19
+ private
20
+
21
+ def acroform
22
+ @objects.deref(root[:AcroForm])
23
+ end
24
+
25
+ def fields_from(refs)
26
+ Array(refs).flat_map do |ref|
27
+ value = @objects[ref]
28
+ # PDF has its own Hash class
29
+ value.is_a?(::Hash) ? value : fields_from(value)
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,28 @@
1
+ require 'pdf-reader'
2
+ require 'ndr_support/safe_file'
3
+ require_relative 'registry'
4
+
5
+ module NdrImport
6
+ # This is one of a collection of file handlers that deal with individual formats of data.
7
+ # They can be instantiated directly or via the factory method Registry.tables
8
+ module File
9
+ # This class is an AcroForm PDF file handler that returns a single table
10
+ class AcroForm < Base
11
+ private
12
+
13
+ def rows(&block)
14
+ return enum_for(:rows) unless block
15
+
16
+ [reader.fields_hash].each(&block)
17
+ rescue NoMethodError
18
+ raise "Failed to read #{SafeFile.basename(@filename)} as an AcroForm PDF"
19
+ end
20
+
21
+ def reader
22
+ @reader ||= AcroFormReader.new(SafeFile.safepath_to_string(@filename))
23
+ end
24
+ end
25
+
26
+ Registry.register(AcroForm, 'acroform')
27
+ end
28
+ end
@@ -1,4 +1,5 @@
1
1
  require_relative 'base'
2
+ require_relative 'acro_form'
2
3
  require_relative 'delimited'
3
4
  require_relative 'docx'
4
5
  require_relative 'excel'
@@ -0,0 +1,71 @@
1
+ require 'ndr_import/table'
2
+
3
+ module NdrImport
4
+ module PdfForm
5
+ # This class maintains the state of a PDF form table mapping and encapsulates
6
+ # the logic required to transform a table of data into "records". Particular
7
+ # attention has been made to use enumerables throughout to help with the
8
+ # transformation of large quantities of data.
9
+ class Table < ::NdrImport::Table
10
+ def self.all_valid_options
11
+ super - %w[delimiter footer_lines format header_lines]
12
+ end
13
+
14
+ def footer_lines
15
+ 0
16
+ end
17
+
18
+ def format
19
+ 'acroform'
20
+ end
21
+
22
+ def header_lines
23
+ 0
24
+ end
25
+
26
+ # This method transforms an incoming line (Hash) of data.
27
+ # Each of the klass masked mappings are applied to the hash values, which are reordered by
28
+ # the mappng definition, yielding the klass and fields for each mapped klass.
29
+ def transform_line(line, index)
30
+ return enum_for(:transform_line, line, index) unless block_given?
31
+
32
+ raise 'NdrImport::PdfForm::Table expects a Hash!' unless line.is_a? Hash
33
+
34
+ validate_column_mappings(line)
35
+
36
+ masked_mappings.each do |klass, klass_mappings|
37
+ ordered_line = order_values_by_mappings(line, klass_mappings)
38
+ fields = mapped_line(ordered_line, klass_mappings)
39
+ next if fields[:skip].to_s == 'true'.freeze
40
+ yield(klass, fields, index)
41
+ end
42
+ end
43
+
44
+ private
45
+
46
+ # Ensure every key has a column mapping
47
+ def validate_column_mappings(line)
48
+ unmapped = []
49
+ line.each_key do |key|
50
+ next if column_names.include? key
51
+ unmapped << key
52
+ end
53
+ raise NdrImport::UnmappedDataError, unmapped if unmapped.any?
54
+ end
55
+
56
+ def column_name_from(column)
57
+ column[Strings::COLUMN] || column[Strings::STANDARD_MAPPING]
58
+ end
59
+
60
+ def column_names
61
+ @column_names ||= columns.map { |column| column_name_from(column) }
62
+ end
63
+
64
+ # Return an Array of the `hash` values in the order the columns are defined in the mapping,
65
+ # allowing mapped_line to work as normal
66
+ def order_values_by_mappings(hash, column_mappings)
67
+ column_mappings.map { |column_mapping| hash[column_name_from(column_mapping)].to_s }
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,14 @@
1
+ require 'active_support/core_ext/array/conversions'
2
+
3
+ module NdrImport
4
+ # Raised if incoming data has not been mapped.
5
+ class UnmappedDataError < StandardError
6
+ attr_reader :keys
7
+
8
+ def initialize(keys)
9
+ @keys = keys
10
+ message = "Unmapped data: #{keys.to_sentence}"
11
+ super(message)
12
+ end
13
+ end
14
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  # This stores the current version of the NdrImport gem
3
3
  module NdrImport
4
- VERSION = '8.3.0'.freeze
4
+ VERSION = '8.4.0'.freeze
5
5
  end
data/lib/ndr_import.rb CHANGED
@@ -8,6 +8,9 @@ require 'ndr_import/table'
8
8
  require 'ndr_import/non_tabular/table'
9
9
  require 'ndr_import/fixed_width/table'
10
10
  require 'ndr_import/xml/table'
11
+ require 'ndr_import/pdf_form/table'
12
+ require 'ndr_import/unmapped_data_error'
13
+ require 'ndr_import/acroform_reader'
11
14
 
12
15
  module NdrImport
13
16
  def self.root
@@ -0,0 +1,39 @@
1
+ require 'test_helper'
2
+ require 'ndr_import/file/acro_form'
3
+
4
+ module NdrImport
5
+ module File
6
+ # Acro Form file handler tests
7
+ class AcroFormTest < ActiveSupport::TestCase
8
+ def setup
9
+ @permanent_test_files = SafePath.new('permanent_test_files')
10
+ end
11
+
12
+ test 'should read pdf correctly' do
13
+ file_path = @permanent_test_files.join('acro_form.pdf')
14
+ handler = NdrImport::File::AcroForm.new(file_path, nil)
15
+
16
+ expected_row = { 'Group3' => nil,
17
+ 'Textbox1_required' => nil,
18
+ 'Textbox2_required' => nil,
19
+ 'List Box_required' => '3',
20
+ 'Dropdown_required' => '3',
21
+ 'Textbox1_optional' => nil,
22
+ 'Textbox2_optional' => nil,
23
+ 'List Box_optional' => '3',
24
+ 'Dropdown_optional' => '3',
25
+ 'Date_required' => nil,
26
+ 'Date__optional' => nil,
27
+ 'Textbox3_numerical_required' => nil,
28
+ 'Textbox3_numerical_optional' => nil }
29
+
30
+ handler.tables.each do |tablename, sheet|
31
+ assert_nil tablename
32
+ assert_instance_of Enumerator, sheet
33
+ assert_equal 1, sheet.to_a.size
34
+ assert_equal expected_row, sheet.to_a.first
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -12,7 +12,8 @@ module NdrImport
12
12
 
13
13
  test 'Registry.handlers' do
14
14
  assert_instance_of Hash, NdrImport::File::Registry.handlers
15
- assert_equal %w[7z csv delimited doc docx nontabular pdf text txt xls xlsx xml_table zip],
15
+ assert_equal %w[7z acroform csv delimited doc docx nontabular
16
+ pdf text txt xls xlsx xml_table zip],
16
17
  NdrImport::File::Registry.handlers.keys.sort
17
18
  end
18
19
 
@@ -0,0 +1,119 @@
1
+ require 'test_helper'
2
+
3
+ # This tests the NdrImport::PdfForm::Table mapping class
4
+ module PdfForm
5
+ class TableTest < ActiveSupport::TestCase
6
+ def setup
7
+ @form_data = [{
8
+ 'address2' => 'Address 2',
9
+ 'should_be_blank' => '',
10
+ 'date_2' => '2018-12-01',
11
+ 'date_1' => '2018-01-01',
12
+ 'address1' => 'Address'
13
+ }]
14
+ end
15
+
16
+ def test_should_transform_pdf_form_hash
17
+ table = NdrImport::PdfForm::Table.new(klass: 'SomeTestKlass',
18
+ columns: pdf_form_column_mapping)
19
+
20
+ assert_equal 'acroform', table.format
21
+
22
+ transformed_data = table.transform(@form_data)
23
+ assert_equal 1, transformed_data.count
24
+
25
+ expected_data = ['SomeTestKlass', { rawtext: {
26
+ 'address1' => 'Address',
27
+ 'address2' => 'Address 2',
28
+ 'missing_from_data' => '',
29
+ 'date_1' => '2018-01-01',
30
+ 'date_2' => '2018-12-01',
31
+ 'should_be_blank' => ''
32
+ } }, 0]
33
+
34
+ klass, fields, index = *transformed_data.first
35
+ assert_equal expected_data[0], klass
36
+ assert_equal expected_data[1], fields
37
+ assert_equal expected_data[-1], index
38
+ end
39
+
40
+ def test_should_transform_mutli_klass_pdf_form_hash
41
+ table = NdrImport::PdfForm::Table.new(columns: multi_klass_pdf_form_column_mapping)
42
+
43
+ expected_data = [
44
+ ['TestKlassOne',
45
+ { rawtext:
46
+ { 'address1' => 'Address',
47
+ 'address2' => 'Address 2',
48
+ 'missing_from_data' => '' } },
49
+ 0],
50
+ ['TestKlassTwo',
51
+ { rawtext:
52
+ { 'address1' => 'Address',
53
+ 'date_1' => '2018-01-01',
54
+ 'date_2' => '2018-12-01',
55
+ 'should_be_blank' => '' } },
56
+ 0]
57
+ ]
58
+
59
+ transformed_data = table.transform(@form_data).to_a
60
+ assert_equal 2, transformed_data.count
61
+
62
+ expected_data.each_with_index do |expected, index|
63
+ transformed = transformed_data[index]
64
+ assert_equal expected, transformed
65
+ end
66
+ end
67
+
68
+ def test_should_fail_with_unmappped_form_data
69
+ table = NdrImport::PdfForm::Table.new(klass: 'SomeTestKlass',
70
+ columns: partial_pdf_form_column_mapping)
71
+
72
+ exception = assert_raises(NdrImport::UnmappedDataError) { table.transform(@form_data).to_a }
73
+ assert exception.message == 'Unmapped data: address2 and date_1'
74
+ end
75
+
76
+ def test_should_not_be_valid_with_bespoke_format
77
+ exception = assert_raises(ArgumentError) { NdrImport::PdfForm::Table.new(format: 'a_format') }
78
+ exception.message == 'Unrecognised options: ["format"]'
79
+ end
80
+
81
+ private
82
+
83
+ def pdf_form_column_mapping
84
+ [
85
+ { 'column' => 'address1' },
86
+ { 'column' => 'address2' },
87
+ { 'column' => 'missing_from_data' },
88
+ { 'column' => 'date_1' },
89
+ { 'column' => 'date_2' },
90
+ { 'column' => 'should_be_blank' }
91
+ ]
92
+ end
93
+
94
+ def multi_klass_pdf_form_column_mapping
95
+ [
96
+ { 'column' => 'address1',
97
+ 'klass' => %w[TestKlassOne TestKlassTwo] },
98
+ { 'column' => 'address2',
99
+ 'klass' => 'TestKlassOne' },
100
+ { 'column' => 'missing_from_data',
101
+ 'klass' => 'TestKlassOne' },
102
+ { 'column' => 'date_1',
103
+ 'klass' => 'TestKlassTwo' },
104
+ { 'column' => 'date_2',
105
+ 'klass' => 'TestKlassTwo' },
106
+ { 'column' => 'should_be_blank',
107
+ 'klass' => 'TestKlassTwo' }
108
+ ]
109
+ end
110
+
111
+ def partial_pdf_form_column_mapping
112
+ [
113
+ { 'column' => 'address1' },
114
+ { 'column' => 'date_2' },
115
+ { 'column' => 'should_be_blank' }
116
+ ]
117
+ end
118
+ end
119
+ end
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ndr_import
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.3.0
4
+ version: 8.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - NCRS Development Team
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-03-04 00:00:00.000000000 Z
11
+ date: 2019-03-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activemodel
@@ -363,7 +363,9 @@ files:
363
363
  - gemfiles/Gemfile.rails51
364
364
  - gemfiles/Gemfile.rails52
365
365
  - lib/ndr_import.rb
366
+ - lib/ndr_import/acroform_reader.rb
366
367
  - lib/ndr_import/csv_library.rb
368
+ - lib/ndr_import/file/acro_form.rb
367
369
  - lib/ndr_import/file/all.rb
368
370
  - lib/ndr_import/file/base.rb
369
371
  - lib/ndr_import/file/delimited.rb
@@ -393,12 +395,15 @@ files:
393
395
  - lib/ndr_import/non_tabular/record.rb
394
396
  - lib/ndr_import/non_tabular/table.rb
395
397
  - lib/ndr_import/non_tabular_file_helper.rb
398
+ - lib/ndr_import/pdf_form/table.rb
396
399
  - lib/ndr_import/standard_mappings.rb
397
400
  - lib/ndr_import/table.rb
398
401
  - lib/ndr_import/universal_importer_helper.rb
402
+ - lib/ndr_import/unmapped_data_error.rb
399
403
  - lib/ndr_import/version.rb
400
404
  - lib/ndr_import/xml/table.rb
401
405
  - ndr_import.gemspec
406
+ - test/file/acro_form_test.rb
402
407
  - test/file/base_test.rb
403
408
  - test/file/delimited_test.rb
404
409
  - test/file/docx_test.rb
@@ -421,7 +426,9 @@ files:
421
426
  - test/non_tabular/mapping_test.rb
422
427
  - test/non_tabular/table_test.rb
423
428
  - test/non_tabular_file_helper_test.rb
429
+ - test/pdf_form/table_test.rb
424
430
  - test/readme_test.rb
431
+ - test/resources/acro_form.pdf
425
432
  - test/resources/blank_tab_test.xlsx
426
433
  - test/resources/bomd.csv
427
434
  - test/resources/broken.csv
@@ -495,6 +502,7 @@ signing_key:
495
502
  specification_version: 4
496
503
  summary: NDR Import
497
504
  test_files:
505
+ - test/file/acro_form_test.rb
498
506
  - test/file/base_test.rb
499
507
  - test/file/delimited_test.rb
500
508
  - test/file/docx_test.rb
@@ -517,7 +525,9 @@ test_files:
517
525
  - test/non_tabular/mapping_test.rb
518
526
  - test/non_tabular/table_test.rb
519
527
  - test/non_tabular_file_helper_test.rb
528
+ - test/pdf_form/table_test.rb
520
529
  - test/readme_test.rb
530
+ - test/resources/acro_form.pdf
521
531
  - test/resources/blank_tab_test.xlsx
522
532
  - test/resources/bomd.csv
523
533
  - test/resources/broken.csv