ndr_import 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +14 -0
- data/.rubocop.yml +27 -0
- data/.ruby-version +1 -0
- data/.travis.yml +22 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/Guardfile +16 -0
- data/LICENSE.txt +21 -0
- data/README.md +69 -0
- data/Rakefile +13 -0
- data/code_safety.yml +374 -0
- data/gemfiles/Gemfile.rails32 +5 -0
- data/gemfiles/Gemfile.rails32.lock +142 -0
- data/gemfiles/Gemfile.rails41 +5 -0
- data/gemfiles/Gemfile.rails41.lock +145 -0
- data/gemfiles/Gemfile.rails42 +5 -0
- data/gemfiles/Gemfile.rails42.lock +145 -0
- data/lib/ndr_import.rb +13 -0
- data/lib/ndr_import/csv_library.rb +40 -0
- data/lib/ndr_import/file/all.rb +8 -0
- data/lib/ndr_import/file/base.rb +76 -0
- data/lib/ndr_import/file/delimited.rb +86 -0
- data/lib/ndr_import/file/excel.rb +131 -0
- data/lib/ndr_import/file/pdf.rb +38 -0
- data/lib/ndr_import/file/registry.rb +50 -0
- data/lib/ndr_import/file/text.rb +52 -0
- data/lib/ndr_import/file/word.rb +30 -0
- data/lib/ndr_import/file/zip.rb +67 -0
- data/lib/ndr_import/helpers/file/delimited.rb +105 -0
- data/lib/ndr_import/helpers/file/excel.rb +181 -0
- data/lib/ndr_import/helpers/file/pdf.rb +29 -0
- data/lib/ndr_import/helpers/file/word.rb +27 -0
- data/lib/ndr_import/helpers/file/xml.rb +45 -0
- data/lib/ndr_import/helpers/file/zip.rb +44 -0
- data/lib/ndr_import/mapper.rb +220 -0
- data/lib/ndr_import/mapping_error.rb +5 -0
- data/lib/ndr_import/non_tabular/column_mapping.rb +73 -0
- data/lib/ndr_import/non_tabular/line.rb +46 -0
- data/lib/ndr_import/non_tabular/mapping.rb +35 -0
- data/lib/ndr_import/non_tabular/record.rb +99 -0
- data/lib/ndr_import/non_tabular/table.rb +193 -0
- data/lib/ndr_import/non_tabular_file_helper.rb +160 -0
- data/lib/ndr_import/standard_mappings.rb +23 -0
- data/lib/ndr_import/table.rb +179 -0
- data/lib/ndr_import/version.rb +4 -0
- data/ndr_import.gemspec +44 -0
- data/test/file/base_test.rb +54 -0
- data/test/file/delimited_test.rb +143 -0
- data/test/file/excel_test.rb +85 -0
- data/test/file/pdf_test.rb +35 -0
- data/test/file/registry_test.rb +60 -0
- data/test/file/text_test.rb +92 -0
- data/test/file/word_test.rb +35 -0
- data/test/file/zip_test.rb +47 -0
- data/test/helpers/file/delimited_test.rb +113 -0
- data/test/helpers/file/excel_test.rb +97 -0
- data/test/helpers/file/pdf_test.rb +26 -0
- data/test/helpers/file/word_test.rb +26 -0
- data/test/helpers/file/xml_test.rb +131 -0
- data/test/helpers/file/zip_test.rb +75 -0
- data/test/mapper_test.rb +551 -0
- data/test/non_tabular/mapping_test.rb +36 -0
- data/test/non_tabular/table_test.rb +510 -0
- data/test/non_tabular_file_helper_test.rb +501 -0
- data/test/readme_test.rb +53 -0
- data/test/resources/bomd.csv +3 -0
- data/test/resources/broken.csv +3 -0
- data/test/resources/filesystem_paths.yml +26 -0
- data/test/resources/flat_file.pdf +0 -0
- data/test/resources/flat_file.txt +27 -0
- data/test/resources/flat_file.yml +20 -0
- data/test/resources/hello_utf16be.txt +0 -0
- data/test/resources/hello_utf16le.txt +0 -0
- data/test/resources/hello_utf8.txt +2 -0
- data/test/resources/hello_windows.txt +2 -0
- data/test/resources/hello_world.doc +0 -0
- data/test/resources/hello_world.pdf +0 -0
- data/test/resources/hello_world.txt +2 -0
- data/test/resources/high_ascii_delimited.txt +2 -0
- data/test/resources/malformed.xml +6 -0
- data/test/resources/normal.csv +3 -0
- data/test/resources/normal.csv.zip +0 -0
- data/test/resources/normal_pipe.csv +3 -0
- data/test/resources/normal_thorn.csv +3 -0
- data/test/resources/not_a_pdf.pdf +0 -0
- data/test/resources/not_a_word_file.doc +0 -0
- data/test/resources/sample_xls.xls +0 -0
- data/test/resources/sample_xlsx.xlsx +0 -0
- data/test/resources/standard_mappings.yml +39 -0
- data/test/resources/txt_file_xls_extension.xls +1 -0
- data/test/resources/txt_file_xlsx_extension.xlsx +1 -0
- data/test/resources/utf-16be_xml.xml +0 -0
- data/test/resources/utf-16be_xml_with_declaration.xml +0 -0
- data/test/resources/utf-16le_xml.xml +0 -0
- data/test/resources/utf-8_xml.xml +9 -0
- data/test/resources/windows-1252_xml.xml +9 -0
- data/test/resources/windows.csv +5 -0
- data/test/resources/xlsx_file_xls_extension.xls +0 -0
- data/test/standard_mappings_test.rb +22 -0
- data/test/table_test.rb +288 -0
- data/test/test_helper.rb +13 -0
- metadata +443 -0
@@ -0,0 +1,23 @@
|
|
1
|
+
module NdrImport
|
2
|
+
# NdrImport::StandardMappings stores the standard mappings hash
|
3
|
+
class StandardMappings
|
4
|
+
# mappings are stored as a class level instance variable
|
5
|
+
class << self
|
6
|
+
# Gets the standard mappings
|
7
|
+
def mappings
|
8
|
+
if defined?(@standard_mappings)
|
9
|
+
@standard_mappings
|
10
|
+
else
|
11
|
+
fail 'NdrImport::StandardMappings not configured!'
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
# Sets the standard mappings
|
16
|
+
def mappings=(hash)
|
17
|
+
fail ArgumentError unless hash.is_a?(Hash)
|
18
|
+
|
19
|
+
@standard_mappings = hash
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,179 @@
|
|
1
|
+
require 'ndr_import/mapper'
|
2
|
+
require 'active_support/core_ext/hash'
|
3
|
+
|
4
|
+
module NdrImport
|
5
|
+
# This class maintains the state of a table mapping and encapsulates the logic
|
6
|
+
# required to transform a table of data into "records". Particular attention
|
7
|
+
# has been made to use enumerables throughout to help with the transformation
|
8
|
+
# of large quantities of data.
|
9
|
+
class Table
|
10
|
+
include NdrImport::Mapper
|
11
|
+
|
12
|
+
def self.all_valid_options
|
13
|
+
%w(canonical_name filename_pattern tablename_pattern header_lines footer_lines format klass columns)
|
14
|
+
end
|
15
|
+
|
16
|
+
def all_valid_options
|
17
|
+
self.class.all_valid_options
|
18
|
+
end
|
19
|
+
|
20
|
+
attr_reader(*all_valid_options)
|
21
|
+
attr_writer :notifier
|
22
|
+
|
23
|
+
def initialize(options = {})
|
24
|
+
options.stringify_keys! if options.is_a?(Hash)
|
25
|
+
validate_options(options)
|
26
|
+
|
27
|
+
all_valid_options.each do |key|
|
28
|
+
# This pattern is used to only set attributes if option specified,
|
29
|
+
# which makes for more concise YAML serialization.
|
30
|
+
options[key] && instance_variable_set("@#{key}", options[key])
|
31
|
+
end
|
32
|
+
|
33
|
+
@row_index = 0
|
34
|
+
end
|
35
|
+
|
36
|
+
def match(filename, tablename)
|
37
|
+
::File.basename(filename) =~ (filename_pattern || /\A.*\z/) &&
|
38
|
+
(tablename.nil? || tablename =~ (tablename_pattern || /\A.*\z/))
|
39
|
+
end
|
40
|
+
|
41
|
+
# This method transforms a table of data, given a line array/enumerator and yields
|
42
|
+
# klass, fields and index (input row number) for each record that it would create
|
43
|
+
# as a result of the transformation process.
|
44
|
+
def transform(lines, &block)
|
45
|
+
return enum_for(:transform, lines) unless block
|
46
|
+
|
47
|
+
@row_index = 0
|
48
|
+
@header_valid = false
|
49
|
+
@notifier.try(:started)
|
50
|
+
|
51
|
+
skip_footer_lines(lines, footer_lines).each do |line|
|
52
|
+
process_line(line, &block)
|
53
|
+
end
|
54
|
+
|
55
|
+
@notifier.try(:finished)
|
56
|
+
end
|
57
|
+
|
58
|
+
# This method process a line of data, If it is a header line it validates it, otherwise
|
59
|
+
# transforms it. It also increments and row index and notifies the amount of lines processed.
|
60
|
+
def process_line(line, &block)
|
61
|
+
return enum_for(:process_line, line) unless block
|
62
|
+
|
63
|
+
if @row_index < header_lines
|
64
|
+
validate_header(line, @columns)
|
65
|
+
else
|
66
|
+
fail 'Header is not valid' if header_lines > 0 && !header_valid?
|
67
|
+
transform_line(line, @row_index, &block)
|
68
|
+
end
|
69
|
+
|
70
|
+
@row_index += 1
|
71
|
+
@notifier.try(:processed, @row_index)
|
72
|
+
end
|
73
|
+
|
74
|
+
# This method transforms an incoming line of data by applying each of the klass masked
|
75
|
+
# mappings to the line and yielding the klass and fields for each mapped klass.
|
76
|
+
def transform_line(line, index)
|
77
|
+
return enum_for(:transform_line, line, index) unless block_given?
|
78
|
+
|
79
|
+
masked_mappings.each do |klass, klass_mappings|
|
80
|
+
fields = mapped_line(line, klass_mappings)
|
81
|
+
next if fields[:skip].to_s == 'true'
|
82
|
+
yield(klass, fields, index)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def header_valid?
|
87
|
+
@header_valid == true
|
88
|
+
end
|
89
|
+
|
90
|
+
private
|
91
|
+
|
92
|
+
# This method uses a buffer to not yield the last <buffer_size> iterations of an enumerable.
|
93
|
+
# We use it to skip footer lines (without having to convert the enumerable to an array).
|
94
|
+
def skip_footer_lines(lines, buffer_size)
|
95
|
+
return enum_for(:skip_footer_lines, lines, buffer_size) unless block_given?
|
96
|
+
|
97
|
+
buffer = []
|
98
|
+
lines.each do |line|
|
99
|
+
buffer.unshift(line)
|
100
|
+
|
101
|
+
yield buffer.pop if buffer.length > buffer_size
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# This method memoizes the klass masked mappings. Where a table level
|
106
|
+
# klass is defined it is used with the whole mapping, otherwise the masks are generated.
|
107
|
+
def masked_mappings
|
108
|
+
@masked_mappings ||= begin
|
109
|
+
if @klass
|
110
|
+
{ @klass => @columns }
|
111
|
+
else
|
112
|
+
column_level_klass_masked_mappings
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
# This method generates a hash of klass based mappings, one for each defined klass
|
118
|
+
# where the whole line mapping is masked to just the data items of that klass.
|
119
|
+
def column_level_klass_masked_mappings
|
120
|
+
ensure_mappings_define_klass
|
121
|
+
|
122
|
+
# Loop through each klass
|
123
|
+
masked_mappings = {}
|
124
|
+
@columns.map { |mapping| mapping['klass'] }.flatten.compact.uniq.each do |klass|
|
125
|
+
# Duplicate the column mappings and do not capture fields that relate to other klasses
|
126
|
+
masked_mappings[klass] = mask_mappings_by_klass(klass)
|
127
|
+
end
|
128
|
+
masked_mappings
|
129
|
+
end
|
130
|
+
|
131
|
+
# This method ensures that every column mapping defines a klass (unless it is a column that
|
132
|
+
# we do not capture). It is only used where a table level klass is not defined.
|
133
|
+
def ensure_mappings_define_klass
|
134
|
+
klassless_mappings = @columns.
|
135
|
+
select { |mapping| mapping.nil? || mapping['klass'].nil? }.
|
136
|
+
reject { |mapping| mapping['do_not_capture'] }.
|
137
|
+
map { |mapping| mapping['column'] || mapping['standard_mapping'] }
|
138
|
+
|
139
|
+
return if klassless_mappings.empty?
|
140
|
+
|
141
|
+
# All column mappings for the single item file require a klass definition.
|
142
|
+
fail "Missing klass for column(s): #{klassless_mappings.to_sentence}"
|
143
|
+
end
|
144
|
+
|
145
|
+
# This method duplicates the mappings and applies a do_not_capture mask to those that do not
|
146
|
+
# relate to this klass, returning the masked mappings
|
147
|
+
def mask_mappings_by_klass(klass)
|
148
|
+
@columns.dup.map do |mapping|
|
149
|
+
if Array(mapping['klass']).include?(klass)
|
150
|
+
mapping
|
151
|
+
else
|
152
|
+
{ 'do_not_capture' => true }
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def validate_options(hash)
|
158
|
+
fail ArgumentError unless hash.is_a?(Hash)
|
159
|
+
|
160
|
+
unrecognised_options = hash.keys - all_valid_options
|
161
|
+
return if unrecognised_options.empty?
|
162
|
+
fail ArgumentError, "Unrecognised options: #{unrecognised_options.inspect}"
|
163
|
+
end
|
164
|
+
|
165
|
+
# if there is a header, then check the column headings are as expected in the correct order
|
166
|
+
def validate_header(line, column_mappings)
|
167
|
+
columns = column_names(column_mappings)
|
168
|
+
fail 'Number of columns does not match' if line.length != columns.length
|
169
|
+
|
170
|
+
return unless line.map(&:downcase) == columns
|
171
|
+
@header_valid = true
|
172
|
+
end
|
173
|
+
|
174
|
+
# returns the column names as we expect to receive them
|
175
|
+
def column_names(column_mappings)
|
176
|
+
column_mappings.map { |c| (c['column'] || c['standard_mapping']).downcase }
|
177
|
+
end
|
178
|
+
end # class Table
|
179
|
+
end
|
data/ndr_import.gemspec
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'ndr_import/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'ndr_import'
|
8
|
+
spec.version = NdrImport::VERSION
|
9
|
+
spec.authors = ['NCRS Development Team']
|
10
|
+
spec.email = []
|
11
|
+
spec.summary = 'NDR Import'
|
12
|
+
spec.description = 'NDR ETL Importer'
|
13
|
+
spec.homepage = 'https://github.com/PublicHealthEngland/ndr_import'
|
14
|
+
spec.license = 'MIT'
|
15
|
+
|
16
|
+
# Exclude older versions of this gem from the package.
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |s| s =~ %r{^pkg/} }
|
18
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
19
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
20
|
+
spec.require_paths = ['lib']
|
21
|
+
|
22
|
+
spec.add_dependency 'activesupport', '>= 3.2.18', '< 5.0.0'
|
23
|
+
spec.add_dependency 'ndr_support', '~> 3.0'
|
24
|
+
|
25
|
+
spec.add_dependency 'rubyzip', '~> 1.1'
|
26
|
+
spec.add_dependency 'roo', '~> 2.0'
|
27
|
+
# roo requires nokogiri >=1.5, but nokogiri (1.6.1) requires Ruby version >= 1.9.2.
|
28
|
+
spec.add_dependency 'nokogiri', '~> 1.6'
|
29
|
+
spec.add_dependency 'roo-xls'
|
30
|
+
spec.add_dependency 'spreadsheet', '1.0.3' # Aligning with encore
|
31
|
+
spec.add_dependency 'pdf-reader', '1.2.0'
|
32
|
+
spec.add_dependency 'msworddoc-extractor', '0.2.0'
|
33
|
+
|
34
|
+
spec.required_ruby_version = '>= 1.9.3'
|
35
|
+
|
36
|
+
spec.add_development_dependency 'bundler', '~> 1.7'
|
37
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
38
|
+
spec.add_development_dependency 'minitest'
|
39
|
+
spec.add_development_dependency 'guard'
|
40
|
+
spec.add_development_dependency 'guard-rubocop'
|
41
|
+
spec.add_development_dependency 'guard-test'
|
42
|
+
spec.add_development_dependency 'terminal-notifier-guard' if RUBY_PLATFORM =~ /darwin/
|
43
|
+
spec.add_development_dependency 'simplecov'
|
44
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'ndr_import/file/registry'
|
3
|
+
|
4
|
+
module NdrImport
|
5
|
+
module File
|
6
|
+
# Base file handler tests
|
7
|
+
class BaseTest < ActiveSupport::TestCase
|
8
|
+
# Handles a single table file, but for test purposes,
|
9
|
+
# I could be bothered to implement it fully
|
10
|
+
class SingleTableLazyDeveloper < ::NdrImport::File::Base
|
11
|
+
end
|
12
|
+
|
13
|
+
def setup
|
14
|
+
@permanent_test_files = SafePath.new('permanent_test_files')
|
15
|
+
end
|
16
|
+
|
17
|
+
test 'should fail on not implementing rows' do
|
18
|
+
begin
|
19
|
+
Registry.register(SingleTableLazyDeveloper, 'lazy_dev')
|
20
|
+
|
21
|
+
exception = assert_raises(RuntimeError) do
|
22
|
+
file_path = @permanent_test_files.join('normal.csv')
|
23
|
+
handler = SingleTableLazyDeveloper.new(file_path, 'lazy_dev')
|
24
|
+
|
25
|
+
handler.tables.each do |tablename, sheet|
|
26
|
+
assert_nil tablename
|
27
|
+
assert_instance_of Enumerator, sheet
|
28
|
+
sheet.to_a
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
msg = 'Implement NdrImport::File::BaseTest::SingleTableLazyDeveloper#rows'
|
33
|
+
assert_equal msg, exception.message
|
34
|
+
ensure
|
35
|
+
Registry.unregister('lazy_dev')
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
test 'should not fail when set up with an readable safepath' do
|
40
|
+
assert Base.new(SafePath.new('test_space_r'), nil)
|
41
|
+
assert Base.new(SafePath.new('test_space_rw'), nil)
|
42
|
+
end
|
43
|
+
|
44
|
+
test 'should fail when set up with an unreadable safepath' do
|
45
|
+
assert_raises(SecurityError) { Base.new(SafePath.new('test_space_w'), nil) }
|
46
|
+
end
|
47
|
+
|
48
|
+
test 'should fail when set up with a non-safepath' do
|
49
|
+
exception = assert_raises(ArgumentError) { Base.new(NdrImport.root, nil) }
|
50
|
+
assert exception.message =~ /file_name should be of type SafePath, but it is String/
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'ndr_import/file/delimited'
|
3
|
+
|
4
|
+
module NdrImport
|
5
|
+
module File
|
6
|
+
# Delimited file handler tests
|
7
|
+
class DelimitedTest < ActiveSupport::TestCase
|
8
|
+
def setup
|
9
|
+
@permanent_test_files = SafePath.new('permanent_test_files')
|
10
|
+
end
|
11
|
+
|
12
|
+
test 'should read csv correctly' do
|
13
|
+
file_path = @permanent_test_files.join('normal.csv')
|
14
|
+
handler = NdrImport::File::Delimited.new(file_path, 'csv', 'col_sep' => nil)
|
15
|
+
handler.tables.each do |tablename, sheet|
|
16
|
+
assert_nil tablename
|
17
|
+
sheet = sheet.to_a
|
18
|
+
assert_equal(('A'..'Z').to_a, sheet[0])
|
19
|
+
assert_equal ['1'] * 26, sheet[1]
|
20
|
+
assert_equal ['2'] * 26, sheet[2]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
test 'should read pipe correctly' do
|
25
|
+
file_path = @permanent_test_files.join('normal_pipe.csv')
|
26
|
+
handler = NdrImport::File::Delimited.new(file_path, 'pipe', 'col_sep' => nil)
|
27
|
+
handler.tables.each do |tablename, sheet|
|
28
|
+
assert_nil tablename
|
29
|
+
sheet = sheet.to_a
|
30
|
+
assert_equal(('A'..'Z').to_a, sheet[0])
|
31
|
+
assert_equal ['1'] * 26, sheet[1]
|
32
|
+
assert_equal ['2'] * 26, sheet[2]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
test 'should read thorn correctly' do
|
37
|
+
file_path = @permanent_test_files.join('normal_thorn.csv')
|
38
|
+
handler = NdrImport::File::Delimited.new(file_path, 'thorn', 'col_sep' => nil)
|
39
|
+
handler.tables.each do |tablename, sheet|
|
40
|
+
assert_nil tablename
|
41
|
+
sheet = sheet.to_a
|
42
|
+
assert_equal(('A'..'Z').to_a, sheet[0])
|
43
|
+
assert_equal ['1'] * 26, sheet[1]
|
44
|
+
assert_equal ['2'] * 26, sheet[2]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
test 'should read csv with a BOM' do
|
49
|
+
file_path = @permanent_test_files.join('bomd.csv')
|
50
|
+
handler = NdrImport::File::Delimited.new(file_path, 'csv', 'col_sep' => nil)
|
51
|
+
handler.tables.each do |tablename, sheet|
|
52
|
+
assert_nil tablename
|
53
|
+
assert_instance_of Enumerator, sheet
|
54
|
+
sheet = sheet.to_a
|
55
|
+
assert_equal(('A'..'Z').to_a, sheet[0])
|
56
|
+
assert_equal ['1'] * 26, sheet[1]
|
57
|
+
assert_equal ['2'] * 26, sheet[2]
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
test 'should read windows-1252 csv' do
|
62
|
+
file_path = @permanent_test_files.join('windows.csv')
|
63
|
+
handler = NdrImport::File::Delimited.new(file_path, 'csv', 'col_sep' => nil)
|
64
|
+
handler.tables.each do |tablename, sheet|
|
65
|
+
assert_nil tablename
|
66
|
+
assert_instance_of Enumerator, sheet
|
67
|
+
sheet = sheet.to_a
|
68
|
+
assert_equal 1, sheet.length
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
test 'should read acsii-delimited csv' do
|
73
|
+
file_path = @permanent_test_files.join('high_ascii_delimited.txt')
|
74
|
+
handler = NdrImport::File::Delimited.new(file_path, 'csv', 'col_sep' => "\xfe")
|
75
|
+
handler.tables.each do |tablename, sheet|
|
76
|
+
assert_nil tablename
|
77
|
+
assert_instance_of Enumerator, sheet
|
78
|
+
sheet = sheet.to_a
|
79
|
+
assert_equal 2, sheet.length
|
80
|
+
assert_equal '1234567890', sheet[0][1]
|
81
|
+
assert_equal '1234567890', sheet[1][1]
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
test 'should read line-by-line' do
|
86
|
+
rows = []
|
87
|
+
file_path = @permanent_test_files.join('normal.csv')
|
88
|
+
handler = NdrImport::File::Delimited.new(file_path, 'csv')
|
89
|
+
|
90
|
+
handler.tables.each do |tablename, sheet|
|
91
|
+
assert_nil tablename
|
92
|
+
assert_instance_of Enumerator, sheet
|
93
|
+
sheet.each do |row|
|
94
|
+
rows << row
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
assert_equal(('A'..'Z').to_a, rows[0])
|
99
|
+
assert_equal ['1'] * 26, rows[1]
|
100
|
+
assert_equal ['2'] * 26, rows[2]
|
101
|
+
end
|
102
|
+
|
103
|
+
test 'should report addition details upon failure to slurp csv' do
|
104
|
+
exception = assert_raises(CSVLibrary::MalformedCSVError) do
|
105
|
+
file_path = @permanent_test_files.join('broken.csv')
|
106
|
+
handler = NdrImport::File::Delimited.new(file_path, 'csv', 'col_sep' => nil)
|
107
|
+
|
108
|
+
handler.tables.each do |tablename, sheet|
|
109
|
+
assert_nil tablename
|
110
|
+
assert_instance_of Enumerator, sheet
|
111
|
+
sheet.to_a
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
msg = 'Invalid CSV format on row 2 of broken.csv. ' \
|
116
|
+
'Original: Missing or stray quote in line 2'
|
117
|
+
assert_equal msg, exception.message
|
118
|
+
end
|
119
|
+
|
120
|
+
test 'should report addition details upon failure to read csv line-by-line' do
|
121
|
+
rows_yielded = []
|
122
|
+
exception = assert_raises(CSVLibrary::MalformedCSVError) do
|
123
|
+
file_path = @permanent_test_files.join('broken.csv')
|
124
|
+
handler = NdrImport::File::Delimited.new(file_path, 'csv')
|
125
|
+
|
126
|
+
handler.tables.each do |tablename, sheet|
|
127
|
+
assert_nil tablename
|
128
|
+
assert_instance_of Enumerator, sheet
|
129
|
+
sheet.each do |row|
|
130
|
+
rows_yielded << row
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
assert rows_yielded.empty?, 'no rows should have been yielded'
|
136
|
+
|
137
|
+
msg = 'Invalid CSV format on row 2 of broken.csv. ' \
|
138
|
+
'Original: Missing or stray quote in line 2'
|
139
|
+
assert_equal msg, exception.message
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|