ndr_import 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +14 -0
- data/.rubocop.yml +27 -0
- data/.ruby-version +1 -0
- data/.travis.yml +22 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/Guardfile +16 -0
- data/LICENSE.txt +21 -0
- data/README.md +69 -0
- data/Rakefile +13 -0
- data/code_safety.yml +374 -0
- data/gemfiles/Gemfile.rails32 +5 -0
- data/gemfiles/Gemfile.rails32.lock +142 -0
- data/gemfiles/Gemfile.rails41 +5 -0
- data/gemfiles/Gemfile.rails41.lock +145 -0
- data/gemfiles/Gemfile.rails42 +5 -0
- data/gemfiles/Gemfile.rails42.lock +145 -0
- data/lib/ndr_import.rb +13 -0
- data/lib/ndr_import/csv_library.rb +40 -0
- data/lib/ndr_import/file/all.rb +8 -0
- data/lib/ndr_import/file/base.rb +76 -0
- data/lib/ndr_import/file/delimited.rb +86 -0
- data/lib/ndr_import/file/excel.rb +131 -0
- data/lib/ndr_import/file/pdf.rb +38 -0
- data/lib/ndr_import/file/registry.rb +50 -0
- data/lib/ndr_import/file/text.rb +52 -0
- data/lib/ndr_import/file/word.rb +30 -0
- data/lib/ndr_import/file/zip.rb +67 -0
- data/lib/ndr_import/helpers/file/delimited.rb +105 -0
- data/lib/ndr_import/helpers/file/excel.rb +181 -0
- data/lib/ndr_import/helpers/file/pdf.rb +29 -0
- data/lib/ndr_import/helpers/file/word.rb +27 -0
- data/lib/ndr_import/helpers/file/xml.rb +45 -0
- data/lib/ndr_import/helpers/file/zip.rb +44 -0
- data/lib/ndr_import/mapper.rb +220 -0
- data/lib/ndr_import/mapping_error.rb +5 -0
- data/lib/ndr_import/non_tabular/column_mapping.rb +73 -0
- data/lib/ndr_import/non_tabular/line.rb +46 -0
- data/lib/ndr_import/non_tabular/mapping.rb +35 -0
- data/lib/ndr_import/non_tabular/record.rb +99 -0
- data/lib/ndr_import/non_tabular/table.rb +193 -0
- data/lib/ndr_import/non_tabular_file_helper.rb +160 -0
- data/lib/ndr_import/standard_mappings.rb +23 -0
- data/lib/ndr_import/table.rb +179 -0
- data/lib/ndr_import/version.rb +4 -0
- data/ndr_import.gemspec +44 -0
- data/test/file/base_test.rb +54 -0
- data/test/file/delimited_test.rb +143 -0
- data/test/file/excel_test.rb +85 -0
- data/test/file/pdf_test.rb +35 -0
- data/test/file/registry_test.rb +60 -0
- data/test/file/text_test.rb +92 -0
- data/test/file/word_test.rb +35 -0
- data/test/file/zip_test.rb +47 -0
- data/test/helpers/file/delimited_test.rb +113 -0
- data/test/helpers/file/excel_test.rb +97 -0
- data/test/helpers/file/pdf_test.rb +26 -0
- data/test/helpers/file/word_test.rb +26 -0
- data/test/helpers/file/xml_test.rb +131 -0
- data/test/helpers/file/zip_test.rb +75 -0
- data/test/mapper_test.rb +551 -0
- data/test/non_tabular/mapping_test.rb +36 -0
- data/test/non_tabular/table_test.rb +510 -0
- data/test/non_tabular_file_helper_test.rb +501 -0
- data/test/readme_test.rb +53 -0
- data/test/resources/bomd.csv +3 -0
- data/test/resources/broken.csv +3 -0
- data/test/resources/filesystem_paths.yml +26 -0
- data/test/resources/flat_file.pdf +0 -0
- data/test/resources/flat_file.txt +27 -0
- data/test/resources/flat_file.yml +20 -0
- data/test/resources/hello_utf16be.txt +0 -0
- data/test/resources/hello_utf16le.txt +0 -0
- data/test/resources/hello_utf8.txt +2 -0
- data/test/resources/hello_windows.txt +2 -0
- data/test/resources/hello_world.doc +0 -0
- data/test/resources/hello_world.pdf +0 -0
- data/test/resources/hello_world.txt +2 -0
- data/test/resources/high_ascii_delimited.txt +2 -0
- data/test/resources/malformed.xml +6 -0
- data/test/resources/normal.csv +3 -0
- data/test/resources/normal.csv.zip +0 -0
- data/test/resources/normal_pipe.csv +3 -0
- data/test/resources/normal_thorn.csv +3 -0
- data/test/resources/not_a_pdf.pdf +0 -0
- data/test/resources/not_a_word_file.doc +0 -0
- data/test/resources/sample_xls.xls +0 -0
- data/test/resources/sample_xlsx.xlsx +0 -0
- data/test/resources/standard_mappings.yml +39 -0
- data/test/resources/txt_file_xls_extension.xls +1 -0
- data/test/resources/txt_file_xlsx_extension.xlsx +1 -0
- data/test/resources/utf-16be_xml.xml +0 -0
- data/test/resources/utf-16be_xml_with_declaration.xml +0 -0
- data/test/resources/utf-16le_xml.xml +0 -0
- data/test/resources/utf-8_xml.xml +9 -0
- data/test/resources/windows-1252_xml.xml +9 -0
- data/test/resources/windows.csv +5 -0
- data/test/resources/xlsx_file_xls_extension.xls +0 -0
- data/test/standard_mappings_test.rb +22 -0
- data/test/table_test.rb +288 -0
- data/test/test_helper.rb +13 -0
- metadata +443 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
module NdrImport
|
|
2
|
+
# NdrImport::StandardMappings stores the standard mappings hash
|
|
3
|
+
class StandardMappings
|
|
4
|
+
# mappings are stored as a class level instance variable
|
|
5
|
+
class << self
|
|
6
|
+
# Gets the standard mappings
|
|
7
|
+
def mappings
|
|
8
|
+
if defined?(@standard_mappings)
|
|
9
|
+
@standard_mappings
|
|
10
|
+
else
|
|
11
|
+
fail 'NdrImport::StandardMappings not configured!'
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Sets the standard mappings
|
|
16
|
+
def mappings=(hash)
|
|
17
|
+
fail ArgumentError unless hash.is_a?(Hash)
|
|
18
|
+
|
|
19
|
+
@standard_mappings = hash
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
require 'ndr_import/mapper'
|
|
2
|
+
require 'active_support/core_ext/hash'
|
|
3
|
+
|
|
4
|
+
module NdrImport
|
|
5
|
+
# This class maintains the state of a table mapping and encapsulates the logic
|
|
6
|
+
# required to transform a table of data into "records". Particular attention
|
|
7
|
+
# has been made to use enumerables throughout to help with the transformation
|
|
8
|
+
# of large quantities of data.
|
|
9
|
+
class Table
|
|
10
|
+
include NdrImport::Mapper
|
|
11
|
+
|
|
12
|
+
def self.all_valid_options
|
|
13
|
+
%w(canonical_name filename_pattern tablename_pattern header_lines footer_lines format klass columns)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def all_valid_options
|
|
17
|
+
self.class.all_valid_options
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
attr_reader(*all_valid_options)
|
|
21
|
+
attr_writer :notifier
|
|
22
|
+
|
|
23
|
+
def initialize(options = {})
|
|
24
|
+
options.stringify_keys! if options.is_a?(Hash)
|
|
25
|
+
validate_options(options)
|
|
26
|
+
|
|
27
|
+
all_valid_options.each do |key|
|
|
28
|
+
# This pattern is used to only set attributes if option specified,
|
|
29
|
+
# which makes for more concise YAML serialization.
|
|
30
|
+
options[key] && instance_variable_set("@#{key}", options[key])
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
@row_index = 0
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def match(filename, tablename)
|
|
37
|
+
::File.basename(filename) =~ (filename_pattern || /\A.*\z/) &&
|
|
38
|
+
(tablename.nil? || tablename =~ (tablename_pattern || /\A.*\z/))
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# This method transforms a table of data, given a line array/enumerator and yields
|
|
42
|
+
# klass, fields and index (input row number) for each record that it would create
|
|
43
|
+
# as a result of the transformation process.
|
|
44
|
+
def transform(lines, &block)
|
|
45
|
+
return enum_for(:transform, lines) unless block
|
|
46
|
+
|
|
47
|
+
@row_index = 0
|
|
48
|
+
@header_valid = false
|
|
49
|
+
@notifier.try(:started)
|
|
50
|
+
|
|
51
|
+
skip_footer_lines(lines, footer_lines).each do |line|
|
|
52
|
+
process_line(line, &block)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
@notifier.try(:finished)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# This method process a line of data, If it is a header line it validates it, otherwise
|
|
59
|
+
# transforms it. It also increments and row index and notifies the amount of lines processed.
|
|
60
|
+
def process_line(line, &block)
|
|
61
|
+
return enum_for(:process_line, line) unless block
|
|
62
|
+
|
|
63
|
+
if @row_index < header_lines
|
|
64
|
+
validate_header(line, @columns)
|
|
65
|
+
else
|
|
66
|
+
fail 'Header is not valid' if header_lines > 0 && !header_valid?
|
|
67
|
+
transform_line(line, @row_index, &block)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
@row_index += 1
|
|
71
|
+
@notifier.try(:processed, @row_index)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# This method transforms an incoming line of data by applying each of the klass masked
|
|
75
|
+
# mappings to the line and yielding the klass and fields for each mapped klass.
|
|
76
|
+
def transform_line(line, index)
|
|
77
|
+
return enum_for(:transform_line, line, index) unless block_given?
|
|
78
|
+
|
|
79
|
+
masked_mappings.each do |klass, klass_mappings|
|
|
80
|
+
fields = mapped_line(line, klass_mappings)
|
|
81
|
+
next if fields[:skip].to_s == 'true'
|
|
82
|
+
yield(klass, fields, index)
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def header_valid?
|
|
87
|
+
@header_valid == true
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
private
|
|
91
|
+
|
|
92
|
+
# This method uses a buffer to not yield the last <buffer_size> iterations of an enumerable.
|
|
93
|
+
# We use it to skip footer lines (without having to convert the enumerable to an array).
|
|
94
|
+
def skip_footer_lines(lines, buffer_size)
|
|
95
|
+
return enum_for(:skip_footer_lines, lines, buffer_size) unless block_given?
|
|
96
|
+
|
|
97
|
+
buffer = []
|
|
98
|
+
lines.each do |line|
|
|
99
|
+
buffer.unshift(line)
|
|
100
|
+
|
|
101
|
+
yield buffer.pop if buffer.length > buffer_size
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# This method memoizes the klass masked mappings. Where a table level
|
|
106
|
+
# klass is defined it is used with the whole mapping, otherwise the masks are generated.
|
|
107
|
+
def masked_mappings
|
|
108
|
+
@masked_mappings ||= begin
|
|
109
|
+
if @klass
|
|
110
|
+
{ @klass => @columns }
|
|
111
|
+
else
|
|
112
|
+
column_level_klass_masked_mappings
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# This method generates a hash of klass based mappings, one for each defined klass
|
|
118
|
+
# where the whole line mapping is masked to just the data items of that klass.
|
|
119
|
+
def column_level_klass_masked_mappings
|
|
120
|
+
ensure_mappings_define_klass
|
|
121
|
+
|
|
122
|
+
# Loop through each klass
|
|
123
|
+
masked_mappings = {}
|
|
124
|
+
@columns.map { |mapping| mapping['klass'] }.flatten.compact.uniq.each do |klass|
|
|
125
|
+
# Duplicate the column mappings and do not capture fields that relate to other klasses
|
|
126
|
+
masked_mappings[klass] = mask_mappings_by_klass(klass)
|
|
127
|
+
end
|
|
128
|
+
masked_mappings
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# This method ensures that every column mapping defines a klass (unless it is a column that
|
|
132
|
+
# we do not capture). It is only used where a table level klass is not defined.
|
|
133
|
+
def ensure_mappings_define_klass
|
|
134
|
+
klassless_mappings = @columns.
|
|
135
|
+
select { |mapping| mapping.nil? || mapping['klass'].nil? }.
|
|
136
|
+
reject { |mapping| mapping['do_not_capture'] }.
|
|
137
|
+
map { |mapping| mapping['column'] || mapping['standard_mapping'] }
|
|
138
|
+
|
|
139
|
+
return if klassless_mappings.empty?
|
|
140
|
+
|
|
141
|
+
# All column mappings for the single item file require a klass definition.
|
|
142
|
+
fail "Missing klass for column(s): #{klassless_mappings.to_sentence}"
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# This method duplicates the mappings and applies a do_not_capture mask to those that do not
|
|
146
|
+
# relate to this klass, returning the masked mappings
|
|
147
|
+
def mask_mappings_by_klass(klass)
|
|
148
|
+
@columns.dup.map do |mapping|
|
|
149
|
+
if Array(mapping['klass']).include?(klass)
|
|
150
|
+
mapping
|
|
151
|
+
else
|
|
152
|
+
{ 'do_not_capture' => true }
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def validate_options(hash)
|
|
158
|
+
fail ArgumentError unless hash.is_a?(Hash)
|
|
159
|
+
|
|
160
|
+
unrecognised_options = hash.keys - all_valid_options
|
|
161
|
+
return if unrecognised_options.empty?
|
|
162
|
+
fail ArgumentError, "Unrecognised options: #{unrecognised_options.inspect}"
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# if there is a header, then check the column headings are as expected in the correct order
|
|
166
|
+
def validate_header(line, column_mappings)
|
|
167
|
+
columns = column_names(column_mappings)
|
|
168
|
+
fail 'Number of columns does not match' if line.length != columns.length
|
|
169
|
+
|
|
170
|
+
return unless line.map(&:downcase) == columns
|
|
171
|
+
@header_valid = true
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# returns the column names as we expect to receive them
|
|
175
|
+
def column_names(column_mappings)
|
|
176
|
+
column_mappings.map { |c| (c['column'] || c['standard_mapping']).downcase }
|
|
177
|
+
end
|
|
178
|
+
end # class Table
|
|
179
|
+
end
|
data/ndr_import.gemspec
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
|
+
require 'ndr_import/version'
|
|
5
|
+
|
|
6
|
+
Gem::Specification.new do |spec|
|
|
7
|
+
spec.name = 'ndr_import'
|
|
8
|
+
spec.version = NdrImport::VERSION
|
|
9
|
+
spec.authors = ['NCRS Development Team']
|
|
10
|
+
spec.email = []
|
|
11
|
+
spec.summary = 'NDR Import'
|
|
12
|
+
spec.description = 'NDR ETL Importer'
|
|
13
|
+
spec.homepage = 'https://github.com/PublicHealthEngland/ndr_import'
|
|
14
|
+
spec.license = 'MIT'
|
|
15
|
+
|
|
16
|
+
# Exclude older versions of this gem from the package.
|
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |s| s =~ %r{^pkg/} }
|
|
18
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
|
19
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
|
20
|
+
spec.require_paths = ['lib']
|
|
21
|
+
|
|
22
|
+
spec.add_dependency 'activesupport', '>= 3.2.18', '< 5.0.0'
|
|
23
|
+
spec.add_dependency 'ndr_support', '~> 3.0'
|
|
24
|
+
|
|
25
|
+
spec.add_dependency 'rubyzip', '~> 1.1'
|
|
26
|
+
spec.add_dependency 'roo', '~> 2.0'
|
|
27
|
+
# roo requires nokogiri >=1.5, but nokogiri (1.6.1) requires Ruby version >= 1.9.2.
|
|
28
|
+
spec.add_dependency 'nokogiri', '~> 1.6'
|
|
29
|
+
spec.add_dependency 'roo-xls'
|
|
30
|
+
spec.add_dependency 'spreadsheet', '1.0.3' # Aligning with encore
|
|
31
|
+
spec.add_dependency 'pdf-reader', '1.2.0'
|
|
32
|
+
spec.add_dependency 'msworddoc-extractor', '0.2.0'
|
|
33
|
+
|
|
34
|
+
spec.required_ruby_version = '>= 1.9.3'
|
|
35
|
+
|
|
36
|
+
spec.add_development_dependency 'bundler', '~> 1.7'
|
|
37
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
|
38
|
+
spec.add_development_dependency 'minitest'
|
|
39
|
+
spec.add_development_dependency 'guard'
|
|
40
|
+
spec.add_development_dependency 'guard-rubocop'
|
|
41
|
+
spec.add_development_dependency 'guard-test'
|
|
42
|
+
spec.add_development_dependency 'terminal-notifier-guard' if RUBY_PLATFORM =~ /darwin/
|
|
43
|
+
spec.add_development_dependency 'simplecov'
|
|
44
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
require 'test_helper'
|
|
2
|
+
require 'ndr_import/file/registry'
|
|
3
|
+
|
|
4
|
+
module NdrImport
|
|
5
|
+
module File
|
|
6
|
+
# Base file handler tests
|
|
7
|
+
class BaseTest < ActiveSupport::TestCase
|
|
8
|
+
# Handles a single table file, but for test purposes,
|
|
9
|
+
# I could be bothered to implement it fully
|
|
10
|
+
class SingleTableLazyDeveloper < ::NdrImport::File::Base
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def setup
|
|
14
|
+
@permanent_test_files = SafePath.new('permanent_test_files')
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
test 'should fail on not implementing rows' do
|
|
18
|
+
begin
|
|
19
|
+
Registry.register(SingleTableLazyDeveloper, 'lazy_dev')
|
|
20
|
+
|
|
21
|
+
exception = assert_raises(RuntimeError) do
|
|
22
|
+
file_path = @permanent_test_files.join('normal.csv')
|
|
23
|
+
handler = SingleTableLazyDeveloper.new(file_path, 'lazy_dev')
|
|
24
|
+
|
|
25
|
+
handler.tables.each do |tablename, sheet|
|
|
26
|
+
assert_nil tablename
|
|
27
|
+
assert_instance_of Enumerator, sheet
|
|
28
|
+
sheet.to_a
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
msg = 'Implement NdrImport::File::BaseTest::SingleTableLazyDeveloper#rows'
|
|
33
|
+
assert_equal msg, exception.message
|
|
34
|
+
ensure
|
|
35
|
+
Registry.unregister('lazy_dev')
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
test 'should not fail when set up with an readable safepath' do
|
|
40
|
+
assert Base.new(SafePath.new('test_space_r'), nil)
|
|
41
|
+
assert Base.new(SafePath.new('test_space_rw'), nil)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
test 'should fail when set up with an unreadable safepath' do
|
|
45
|
+
assert_raises(SecurityError) { Base.new(SafePath.new('test_space_w'), nil) }
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
test 'should fail when set up with a non-safepath' do
|
|
49
|
+
exception = assert_raises(ArgumentError) { Base.new(NdrImport.root, nil) }
|
|
50
|
+
assert exception.message =~ /file_name should be of type SafePath, but it is String/
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
require 'test_helper'
|
|
2
|
+
require 'ndr_import/file/delimited'
|
|
3
|
+
|
|
4
|
+
module NdrImport
|
|
5
|
+
module File
|
|
6
|
+
# Delimited file handler tests
|
|
7
|
+
class DelimitedTest < ActiveSupport::TestCase
|
|
8
|
+
def setup
|
|
9
|
+
@permanent_test_files = SafePath.new('permanent_test_files')
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
test 'should read csv correctly' do
|
|
13
|
+
file_path = @permanent_test_files.join('normal.csv')
|
|
14
|
+
handler = NdrImport::File::Delimited.new(file_path, 'csv', 'col_sep' => nil)
|
|
15
|
+
handler.tables.each do |tablename, sheet|
|
|
16
|
+
assert_nil tablename
|
|
17
|
+
sheet = sheet.to_a
|
|
18
|
+
assert_equal(('A'..'Z').to_a, sheet[0])
|
|
19
|
+
assert_equal ['1'] * 26, sheet[1]
|
|
20
|
+
assert_equal ['2'] * 26, sheet[2]
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
test 'should read pipe correctly' do
|
|
25
|
+
file_path = @permanent_test_files.join('normal_pipe.csv')
|
|
26
|
+
handler = NdrImport::File::Delimited.new(file_path, 'pipe', 'col_sep' => nil)
|
|
27
|
+
handler.tables.each do |tablename, sheet|
|
|
28
|
+
assert_nil tablename
|
|
29
|
+
sheet = sheet.to_a
|
|
30
|
+
assert_equal(('A'..'Z').to_a, sheet[0])
|
|
31
|
+
assert_equal ['1'] * 26, sheet[1]
|
|
32
|
+
assert_equal ['2'] * 26, sheet[2]
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
test 'should read thorn correctly' do
|
|
37
|
+
file_path = @permanent_test_files.join('normal_thorn.csv')
|
|
38
|
+
handler = NdrImport::File::Delimited.new(file_path, 'thorn', 'col_sep' => nil)
|
|
39
|
+
handler.tables.each do |tablename, sheet|
|
|
40
|
+
assert_nil tablename
|
|
41
|
+
sheet = sheet.to_a
|
|
42
|
+
assert_equal(('A'..'Z').to_a, sheet[0])
|
|
43
|
+
assert_equal ['1'] * 26, sheet[1]
|
|
44
|
+
assert_equal ['2'] * 26, sheet[2]
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
test 'should read csv with a BOM' do
|
|
49
|
+
file_path = @permanent_test_files.join('bomd.csv')
|
|
50
|
+
handler = NdrImport::File::Delimited.new(file_path, 'csv', 'col_sep' => nil)
|
|
51
|
+
handler.tables.each do |tablename, sheet|
|
|
52
|
+
assert_nil tablename
|
|
53
|
+
assert_instance_of Enumerator, sheet
|
|
54
|
+
sheet = sheet.to_a
|
|
55
|
+
assert_equal(('A'..'Z').to_a, sheet[0])
|
|
56
|
+
assert_equal ['1'] * 26, sheet[1]
|
|
57
|
+
assert_equal ['2'] * 26, sheet[2]
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
test 'should read windows-1252 csv' do
|
|
62
|
+
file_path = @permanent_test_files.join('windows.csv')
|
|
63
|
+
handler = NdrImport::File::Delimited.new(file_path, 'csv', 'col_sep' => nil)
|
|
64
|
+
handler.tables.each do |tablename, sheet|
|
|
65
|
+
assert_nil tablename
|
|
66
|
+
assert_instance_of Enumerator, sheet
|
|
67
|
+
sheet = sheet.to_a
|
|
68
|
+
assert_equal 1, sheet.length
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
test 'should read acsii-delimited csv' do
|
|
73
|
+
file_path = @permanent_test_files.join('high_ascii_delimited.txt')
|
|
74
|
+
handler = NdrImport::File::Delimited.new(file_path, 'csv', 'col_sep' => "\xfe")
|
|
75
|
+
handler.tables.each do |tablename, sheet|
|
|
76
|
+
assert_nil tablename
|
|
77
|
+
assert_instance_of Enumerator, sheet
|
|
78
|
+
sheet = sheet.to_a
|
|
79
|
+
assert_equal 2, sheet.length
|
|
80
|
+
assert_equal '1234567890', sheet[0][1]
|
|
81
|
+
assert_equal '1234567890', sheet[1][1]
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
test 'should read line-by-line' do
|
|
86
|
+
rows = []
|
|
87
|
+
file_path = @permanent_test_files.join('normal.csv')
|
|
88
|
+
handler = NdrImport::File::Delimited.new(file_path, 'csv')
|
|
89
|
+
|
|
90
|
+
handler.tables.each do |tablename, sheet|
|
|
91
|
+
assert_nil tablename
|
|
92
|
+
assert_instance_of Enumerator, sheet
|
|
93
|
+
sheet.each do |row|
|
|
94
|
+
rows << row
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
assert_equal(('A'..'Z').to_a, rows[0])
|
|
99
|
+
assert_equal ['1'] * 26, rows[1]
|
|
100
|
+
assert_equal ['2'] * 26, rows[2]
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
test 'should report addition details upon failure to slurp csv' do
|
|
104
|
+
exception = assert_raises(CSVLibrary::MalformedCSVError) do
|
|
105
|
+
file_path = @permanent_test_files.join('broken.csv')
|
|
106
|
+
handler = NdrImport::File::Delimited.new(file_path, 'csv', 'col_sep' => nil)
|
|
107
|
+
|
|
108
|
+
handler.tables.each do |tablename, sheet|
|
|
109
|
+
assert_nil tablename
|
|
110
|
+
assert_instance_of Enumerator, sheet
|
|
111
|
+
sheet.to_a
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
msg = 'Invalid CSV format on row 2 of broken.csv. ' \
|
|
116
|
+
'Original: Missing or stray quote in line 2'
|
|
117
|
+
assert_equal msg, exception.message
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
test 'should report addition details upon failure to read csv line-by-line' do
|
|
121
|
+
rows_yielded = []
|
|
122
|
+
exception = assert_raises(CSVLibrary::MalformedCSVError) do
|
|
123
|
+
file_path = @permanent_test_files.join('broken.csv')
|
|
124
|
+
handler = NdrImport::File::Delimited.new(file_path, 'csv')
|
|
125
|
+
|
|
126
|
+
handler.tables.each do |tablename, sheet|
|
|
127
|
+
assert_nil tablename
|
|
128
|
+
assert_instance_of Enumerator, sheet
|
|
129
|
+
sheet.each do |row|
|
|
130
|
+
rows_yielded << row
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
assert rows_yielded.empty?, 'no rows should have been yielded'
|
|
136
|
+
|
|
137
|
+
msg = 'Invalid CSV format on row 2 of broken.csv. ' \
|
|
138
|
+
'Original: Missing or stray quote in line 2'
|
|
139
|
+
assert_equal msg, exception.message
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
end
|