ndr_import 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +14 -0
- data/.rubocop.yml +27 -0
- data/.ruby-version +1 -0
- data/.travis.yml +22 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/Guardfile +16 -0
- data/LICENSE.txt +21 -0
- data/README.md +69 -0
- data/Rakefile +13 -0
- data/code_safety.yml +374 -0
- data/gemfiles/Gemfile.rails32 +5 -0
- data/gemfiles/Gemfile.rails32.lock +142 -0
- data/gemfiles/Gemfile.rails41 +5 -0
- data/gemfiles/Gemfile.rails41.lock +145 -0
- data/gemfiles/Gemfile.rails42 +5 -0
- data/gemfiles/Gemfile.rails42.lock +145 -0
- data/lib/ndr_import.rb +13 -0
- data/lib/ndr_import/csv_library.rb +40 -0
- data/lib/ndr_import/file/all.rb +8 -0
- data/lib/ndr_import/file/base.rb +76 -0
- data/lib/ndr_import/file/delimited.rb +86 -0
- data/lib/ndr_import/file/excel.rb +131 -0
- data/lib/ndr_import/file/pdf.rb +38 -0
- data/lib/ndr_import/file/registry.rb +50 -0
- data/lib/ndr_import/file/text.rb +52 -0
- data/lib/ndr_import/file/word.rb +30 -0
- data/lib/ndr_import/file/zip.rb +67 -0
- data/lib/ndr_import/helpers/file/delimited.rb +105 -0
- data/lib/ndr_import/helpers/file/excel.rb +181 -0
- data/lib/ndr_import/helpers/file/pdf.rb +29 -0
- data/lib/ndr_import/helpers/file/word.rb +27 -0
- data/lib/ndr_import/helpers/file/xml.rb +45 -0
- data/lib/ndr_import/helpers/file/zip.rb +44 -0
- data/lib/ndr_import/mapper.rb +220 -0
- data/lib/ndr_import/mapping_error.rb +5 -0
- data/lib/ndr_import/non_tabular/column_mapping.rb +73 -0
- data/lib/ndr_import/non_tabular/line.rb +46 -0
- data/lib/ndr_import/non_tabular/mapping.rb +35 -0
- data/lib/ndr_import/non_tabular/record.rb +99 -0
- data/lib/ndr_import/non_tabular/table.rb +193 -0
- data/lib/ndr_import/non_tabular_file_helper.rb +160 -0
- data/lib/ndr_import/standard_mappings.rb +23 -0
- data/lib/ndr_import/table.rb +179 -0
- data/lib/ndr_import/version.rb +4 -0
- data/ndr_import.gemspec +44 -0
- data/test/file/base_test.rb +54 -0
- data/test/file/delimited_test.rb +143 -0
- data/test/file/excel_test.rb +85 -0
- data/test/file/pdf_test.rb +35 -0
- data/test/file/registry_test.rb +60 -0
- data/test/file/text_test.rb +92 -0
- data/test/file/word_test.rb +35 -0
- data/test/file/zip_test.rb +47 -0
- data/test/helpers/file/delimited_test.rb +113 -0
- data/test/helpers/file/excel_test.rb +97 -0
- data/test/helpers/file/pdf_test.rb +26 -0
- data/test/helpers/file/word_test.rb +26 -0
- data/test/helpers/file/xml_test.rb +131 -0
- data/test/helpers/file/zip_test.rb +75 -0
- data/test/mapper_test.rb +551 -0
- data/test/non_tabular/mapping_test.rb +36 -0
- data/test/non_tabular/table_test.rb +510 -0
- data/test/non_tabular_file_helper_test.rb +501 -0
- data/test/readme_test.rb +53 -0
- data/test/resources/bomd.csv +3 -0
- data/test/resources/broken.csv +3 -0
- data/test/resources/filesystem_paths.yml +26 -0
- data/test/resources/flat_file.pdf +0 -0
- data/test/resources/flat_file.txt +27 -0
- data/test/resources/flat_file.yml +20 -0
- data/test/resources/hello_utf16be.txt +0 -0
- data/test/resources/hello_utf16le.txt +0 -0
- data/test/resources/hello_utf8.txt +2 -0
- data/test/resources/hello_windows.txt +2 -0
- data/test/resources/hello_world.doc +0 -0
- data/test/resources/hello_world.pdf +0 -0
- data/test/resources/hello_world.txt +2 -0
- data/test/resources/high_ascii_delimited.txt +2 -0
- data/test/resources/malformed.xml +6 -0
- data/test/resources/normal.csv +3 -0
- data/test/resources/normal.csv.zip +0 -0
- data/test/resources/normal_pipe.csv +3 -0
- data/test/resources/normal_thorn.csv +3 -0
- data/test/resources/not_a_pdf.pdf +0 -0
- data/test/resources/not_a_word_file.doc +0 -0
- data/test/resources/sample_xls.xls +0 -0
- data/test/resources/sample_xlsx.xlsx +0 -0
- data/test/resources/standard_mappings.yml +39 -0
- data/test/resources/txt_file_xls_extension.xls +1 -0
- data/test/resources/txt_file_xlsx_extension.xlsx +1 -0
- data/test/resources/utf-16be_xml.xml +0 -0
- data/test/resources/utf-16be_xml_with_declaration.xml +0 -0
- data/test/resources/utf-16le_xml.xml +0 -0
- data/test/resources/utf-8_xml.xml +9 -0
- data/test/resources/windows-1252_xml.xml +9 -0
- data/test/resources/windows.csv +5 -0
- data/test/resources/xlsx_file_xls_extension.xls +0 -0
- data/test/standard_mappings_test.rb +22 -0
- data/test/table_test.rb +288 -0
- data/test/test_helper.rb +13 -0
- metadata +443 -0
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'ndr_support/safe_file'
|
2
|
+
|
3
|
+
module NdrImport
|
4
|
+
module Helpers
|
5
|
+
module File
|
6
|
+
# This mixin adds Word document functionality to unified importers.
|
7
|
+
# It provides a file reader method.
|
8
|
+
# currently only works on .doc (97-2003), not.docx
|
9
|
+
module Word
|
10
|
+
private
|
11
|
+
|
12
|
+
def read_word_file(path)
|
13
|
+
require 'msworddoc-extractor'
|
14
|
+
lines = []
|
15
|
+
begin
|
16
|
+
doc = MSWordDoc::Extractor.load(SafeFile.safepath_to_string(path))
|
17
|
+
|
18
|
+
lines.concat doc.whole_contents.split("\n")
|
19
|
+
rescue => e
|
20
|
+
raise("#{SafeFile.basename(path)} [#{e.class}: #{e.message}]")
|
21
|
+
end
|
22
|
+
lines
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'ndr_support/safe_file'
|
2
|
+
require 'ndr_support/utf8_encoding'
|
3
|
+
|
4
|
+
module NdrImport
|
5
|
+
module Helpers
|
6
|
+
module File
|
7
|
+
# This mixin adds XML functionality to unified importers.
|
8
|
+
module Xml
|
9
|
+
include UTF8Encoding
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def read_xml_file(path)
|
14
|
+
file_data = SafeFile.new(path).read
|
15
|
+
|
16
|
+
require 'nokogiri'
|
17
|
+
|
18
|
+
Nokogiri::XML(ensure_utf8! file_data).tap do |doc|
|
19
|
+
doc.encoding = 'UTF-8'
|
20
|
+
emulate_strict_mode_fatal_check!(doc)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Nokogiri can use give a `STRICT` parse option to libxml, but our friendly
|
25
|
+
# handling of muddled encodings causes XML explicitly declared as something
|
26
|
+
# other than UTF-8 to fail (because it has been recoded to UTF-8 by the
|
27
|
+
# time it is given to Nokogiri / libxml).
|
28
|
+
# This raises a SyntaxError if strict mode would have found any other
|
29
|
+
# (fatal) issues with the document.
|
30
|
+
def emulate_strict_mode_fatal_check!(document)
|
31
|
+
# We let slide any warnings about xml declared as one of our
|
32
|
+
# auto encodings, but parsed as UTF-8:
|
33
|
+
encoding_pattern = AUTO_ENCODINGS.map { |name| Regexp.escape(name) }.join('|')
|
34
|
+
encoding_warning = /\ADocument labelled (#{encoding_pattern}) but has UTF-8 content\z/
|
35
|
+
fatal_errors = document.errors.select do |error|
|
36
|
+
error.fatal? && (encoding_warning !~ error.message)
|
37
|
+
end
|
38
|
+
|
39
|
+
return unless fatal_errors.any?
|
40
|
+
fail Nokogiri::XML::SyntaxError, "The file had #{fatal_errors.length} fatal error(s)!"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'ndr_support/safe_file'
|
2
|
+
|
3
|
+
module NdrImport
|
4
|
+
module Helpers
|
5
|
+
module File
|
6
|
+
# This mixin adds Zip functionality to unified importers.
|
7
|
+
module Zip
|
8
|
+
private
|
9
|
+
|
10
|
+
# Unzip the file, creating the destination directory if necessary.
|
11
|
+
# A pattern can be provided to only extract required files.
|
12
|
+
def unzip_file(source, destination, pattern = //)
|
13
|
+
# SECURE TVB Mon Aug 13 14:41:05 BST 2012 : SafePath will raise exception if insecure
|
14
|
+
# path is constructed
|
15
|
+
# SafeFile.safepath_to_string will make sure that the arguments are from type SafePath
|
16
|
+
|
17
|
+
# SECURE: BNS 2010-09-21 (for external access)
|
18
|
+
fail 'Not allowed in external environment' if defined?(::Rails) && ::Rails.env.external?
|
19
|
+
|
20
|
+
require 'zip'
|
21
|
+
# TODO: Abort if destination directory already exists...
|
22
|
+
FileUtils.mkdir_p(SafeFile.safepath_to_string(destination))
|
23
|
+
|
24
|
+
::Zip::File.open(SafeFile.safepath_to_string(source)) do |zipfile|
|
25
|
+
zipfile.entries.each do |entry|
|
26
|
+
# SECURE: TPG 2010-11-1: The path is stripped from the zipfile entry when extracted
|
27
|
+
basename = ::File.basename(entry.name)
|
28
|
+
zipfile.extract(entry, destination.join(basename)) if entry.file? && basename.match(pattern)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
rescue ::Zip::ZipDestinationFileExistsError
|
33
|
+
# I'm going to ignore this and just overwrite the files.
|
34
|
+
rescue SecurityError => ex
|
35
|
+
raise ex
|
36
|
+
rescue ArgumentError => ex
|
37
|
+
raise ex
|
38
|
+
rescue => ex
|
39
|
+
puts ex
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,220 @@
|
|
1
|
+
require 'ndr_support/string/cleaning'
|
2
|
+
require 'ndr_support/string/conversions'
|
3
|
+
require 'ndr_import/standard_mappings'
|
4
|
+
require 'base64'
|
5
|
+
require 'msworddoc-extractor'
|
6
|
+
|
7
|
+
# This module provides helper logic for mapping unified sources for import into the system
|
8
|
+
module NdrImport::Mapper
|
9
|
+
private
|
10
|
+
|
11
|
+
# uses the mappings for this line to unpack the fixed width string
|
12
|
+
# returning an array of the resulting columns
|
13
|
+
def fixed_width_columns(line, line_mappings)
|
14
|
+
unpack_patterns = line_mappings.map { |c| c['unpack_pattern'] }.join
|
15
|
+
line.unpack(unpack_patterns)
|
16
|
+
end
|
17
|
+
|
18
|
+
# the replace option can be used before any other mapping option
|
19
|
+
def replace_before_mapping(original_value, field_mapping)
|
20
|
+
return unless field_mapping.include?('replace') && original_value
|
21
|
+
|
22
|
+
[field_mapping['replace']].flatten.each do |field_replacement|
|
23
|
+
field_replacement.each do |pattern, replacement|
|
24
|
+
original_value.gsub!(pattern, replacement)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# Returns the standard_mapping hash specified
|
30
|
+
# Assumes mappping exists
|
31
|
+
def standard_mapping(mapping_name, column_mapping)
|
32
|
+
mapping = NdrImport::StandardMappings.mappings[mapping_name]
|
33
|
+
return nil if mapping.nil?
|
34
|
+
if column_mapping['mappings']
|
35
|
+
mapping['mappings'] = mapping['mappings'] + column_mapping.delete('mappings')
|
36
|
+
end
|
37
|
+
mapping.merge(column_mapping)
|
38
|
+
end
|
39
|
+
|
40
|
+
# This takes an array of raw values and their associated mappings and returns an attribute hash
|
41
|
+
# It accepts a block to alter the raw value that is stored in the raw text (if necessary),
|
42
|
+
# enabling it to work for different sources
|
43
|
+
def mapped_line(line, line_mappings)
|
44
|
+
attributes = {}
|
45
|
+
rawtext = {}
|
46
|
+
validate_line_mappings(line_mappings)
|
47
|
+
|
48
|
+
line.each_with_index do |raw_value, col|
|
49
|
+
column_mapping = line_mappings[col]
|
50
|
+
if column_mapping.nil?
|
51
|
+
fail ArgumentError,
|
52
|
+
"Line has too many columns (expected #{line_mappings.size} but got #{line.size})"
|
53
|
+
end
|
54
|
+
|
55
|
+
next if column_mapping['do_not_capture']
|
56
|
+
|
57
|
+
if column_mapping['standard_mapping']
|
58
|
+
column_mapping = standard_mapping(column_mapping['standard_mapping'], column_mapping)
|
59
|
+
end
|
60
|
+
field_mappings = column_mapping['mappings'] || []
|
61
|
+
|
62
|
+
# Establish the rawtext column name we are to use for this column
|
63
|
+
rawtext_column_name = (column_mapping['rawtext_name'] || column_mapping['column']).downcase
|
64
|
+
|
65
|
+
# Replace raw_value with decoded raw_value
|
66
|
+
Array(column_mapping['decode']).each do |encoding|
|
67
|
+
raw_value = decode_raw_value(raw_value, encoding)
|
68
|
+
end
|
69
|
+
|
70
|
+
# raw value casting can vary between sources, so we allow the caller to apply it here
|
71
|
+
if respond_to?(:cast_raw_value)
|
72
|
+
raw_value = cast_raw_value(rawtext_column_name, raw_value, column_mapping)
|
73
|
+
end
|
74
|
+
|
75
|
+
# Store the raw column value
|
76
|
+
rawtext[rawtext_column_name] = raw_value
|
77
|
+
|
78
|
+
field_mappings.each do |field_mapping|
|
79
|
+
# create a duplicate of the raw value we can manipulate
|
80
|
+
original_value = raw_value ? raw_value.dup : nil
|
81
|
+
|
82
|
+
replace_before_mapping(original_value, field_mapping)
|
83
|
+
value = mapped_value(original_value, field_mapping)
|
84
|
+
|
85
|
+
field = field_mapping['field']
|
86
|
+
|
87
|
+
# Assumes join is specified in first joined field
|
88
|
+
joined = field_mapping['join'] ? true : false
|
89
|
+
|
90
|
+
# Currently assuming already validated YAML, s.t. no fields have the
|
91
|
+
# same priorities
|
92
|
+
#
|
93
|
+
# This has become really messy...
|
94
|
+
unless value.blank? && !joined
|
95
|
+
attributes[field] = {} unless attributes[field]
|
96
|
+
attributes[field][:priority] = {} unless attributes[field][:priority]
|
97
|
+
if field_mapping['order']
|
98
|
+
attributes[field][field_mapping['order']] = value
|
99
|
+
attributes[field][:join] = field_mapping['join'] if field_mapping['join']
|
100
|
+
attributes[field][:compact] = field_mapping['compact'] if field_mapping.include?('compact')
|
101
|
+
elsif field_mapping['priority']
|
102
|
+
attributes[field][:priority][field_mapping['priority']] = value
|
103
|
+
else
|
104
|
+
# Check if already a mapped-to field, and assign default low
|
105
|
+
# priority
|
106
|
+
attributes[field][:priority][1] = value
|
107
|
+
attributes[field][:value] = value
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# tidy up many to one field mappings
|
114
|
+
# and one to many, for cross-populating
|
115
|
+
attributes.each do |field, value|
|
116
|
+
if value.include?(:join)
|
117
|
+
join_string = value.delete(:join) || ','
|
118
|
+
value.delete(:value)
|
119
|
+
value.delete(:priority)
|
120
|
+
if value.include?(:compact)
|
121
|
+
compact = value.delete(:compact)
|
122
|
+
else
|
123
|
+
compact = true
|
124
|
+
end
|
125
|
+
t = value.sort.map do |_part_order, part_value|
|
126
|
+
part_value.blank? ? nil : part_value
|
127
|
+
end
|
128
|
+
if compact
|
129
|
+
attributes[field] = t.compact.join(join_string)
|
130
|
+
else
|
131
|
+
attributes[field] = t.join(join_string)
|
132
|
+
end
|
133
|
+
else
|
134
|
+
attributes[field][:priority].reject! { |_k, v| v.blank? }
|
135
|
+
attributes[field] = attributes[field][:priority].sort.first[1]
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
attributes[:rawtext] = rawtext
|
140
|
+
attributes
|
141
|
+
end
|
142
|
+
|
143
|
+
def mapped_value(original_value, field_mapping)
|
144
|
+
if field_mapping.include?('format')
|
145
|
+
begin
|
146
|
+
return original_value.blank? ? nil : original_value.to_date(field_mapping['format'])
|
147
|
+
rescue ArgumentError => e
|
148
|
+
e2 = ArgumentError.new("#{e} value #{original_value.inspect}")
|
149
|
+
e2.set_backtrace(e.backtrace)
|
150
|
+
raise e2
|
151
|
+
end
|
152
|
+
elsif field_mapping.include?('clean')
|
153
|
+
return original_value.blank? ? nil : original_value.clean(field_mapping['clean'])
|
154
|
+
elsif field_mapping.include?('map')
|
155
|
+
return field_mapping['map'] ? field_mapping['map'][original_value] : nil
|
156
|
+
elsif field_mapping.include?('match')
|
157
|
+
# WARNING:TVB Thu Aug 9 17:09:25 BST 2012 field_mapping['match'] regexp
|
158
|
+
# may need to be escaped
|
159
|
+
matches = Regexp.new(field_mapping['match']).match(original_value)
|
160
|
+
return matches[1].strip if matches && matches.size > 0
|
161
|
+
elsif field_mapping.include?('daysafter')
|
162
|
+
return original_value unless original_value.to_i.to_s == original_value.to_s
|
163
|
+
return original_value.to_i.days.since(field_mapping['daysafter'].to_time).to_date
|
164
|
+
else
|
165
|
+
return nil if original_value.blank?
|
166
|
+
return original_value.is_a?(String) ? original_value.strip : original_value
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
# Check for duplicate priorities, check for nonexistent standard_mappings
|
171
|
+
def validate_line_mappings(line_mappings)
|
172
|
+
priority = {}
|
173
|
+
line_mappings.each do |column_mapping|
|
174
|
+
if column_mapping['standard_mapping']
|
175
|
+
if standard_mapping(column_mapping['standard_mapping'], column_mapping).nil?
|
176
|
+
fail "Standard mapping \"#{column_mapping['standard_mapping']}\" does not exist"
|
177
|
+
end
|
178
|
+
end
|
179
|
+
field_mappings = column_mapping['mappings'] || []
|
180
|
+
field_mappings.each do |field_mapping|
|
181
|
+
field = field_mapping['field']
|
182
|
+
if field_mapping['priority']
|
183
|
+
fail 'Cannot have duplicate priorities' if priority[field] == field_mapping['priority']
|
184
|
+
priority[field] = field_mapping['priority']
|
185
|
+
else
|
186
|
+
priority[field] = 1
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
true
|
191
|
+
end
|
192
|
+
|
193
|
+
# Decode raw_value using specified encoding
|
194
|
+
# E.g. adding decode to a column:
|
195
|
+
#
|
196
|
+
# - column: base64
|
197
|
+
# decode:
|
198
|
+
# - :base64
|
199
|
+
# - :word_doc
|
200
|
+
#
|
201
|
+
# would base64 decode a word document and then 'decode' the word document into plain text
|
202
|
+
def decode_raw_value(raw_value, encoding)
|
203
|
+
case encoding
|
204
|
+
when :base64
|
205
|
+
Base64.decode64(raw_value)
|
206
|
+
when :word_doc
|
207
|
+
read_word_stream(StringIO.new(raw_value, 'r'))
|
208
|
+
else
|
209
|
+
fail "Cannot decode: #{encoding}"
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
# Given an IO stream representing a .doc word document,
|
214
|
+
# this method will extract the text for the document in the same way
|
215
|
+
# as NdrImport::Helpers::File::Word#read_word_file
|
216
|
+
def read_word_stream(stream)
|
217
|
+
# whole_contents adds "\n" to end of stream, we remove it
|
218
|
+
MSWordDoc::Extractor.load(stream).whole_contents.sub(/\n\z/, '')
|
219
|
+
end
|
220
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module NdrImport
|
4
|
+
module NonTabular
|
5
|
+
# This class stores the mapping for an individual non-tabular column, encapsulating
|
6
|
+
# the logic associated with finding matching lines of source data and subsequently
|
7
|
+
# capturing arrays of values within them.
|
8
|
+
class ColumnMapping
|
9
|
+
attr_accessor :name, :cell_mapping, :lines, :capture, :join
|
10
|
+
|
11
|
+
def initialize(column_mapping)
|
12
|
+
@name = column_mapping['rawtext_name'] ||
|
13
|
+
column_mapping['column'] ||
|
14
|
+
column_mapping['standard_mapping']
|
15
|
+
@cell_mapping = column_mapping['non_tabular_cell']
|
16
|
+
|
17
|
+
validate_cell_mapping
|
18
|
+
|
19
|
+
@lines = @cell_mapping['lines']
|
20
|
+
@join = @cell_mapping['join']
|
21
|
+
end
|
22
|
+
|
23
|
+
# This method returns the range of matching source data lines. If the range is a
|
24
|
+
# RegexpRange then it will calculate it for the text provided.
|
25
|
+
def matching_lines(text)
|
26
|
+
if @lines.is_a?(RegexpRange)
|
27
|
+
@lines.to_range(text)
|
28
|
+
else
|
29
|
+
@lines
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# capture the required part of the line by replacing (recusively) the line,
|
34
|
+
# with the first captured regular expression group. This is hardcoded in an attempt
|
35
|
+
# to preserve the rawtext as much as possible
|
36
|
+
def capture_value(line)
|
37
|
+
value = line.dup
|
38
|
+
[@cell_mapping['capture']].flatten.each do |pattern|
|
39
|
+
if matchdata = value.to_s.match(pattern)
|
40
|
+
value = matchdata[1]
|
41
|
+
else
|
42
|
+
value = nil
|
43
|
+
end
|
44
|
+
end
|
45
|
+
value
|
46
|
+
end
|
47
|
+
|
48
|
+
def validate_cell_mapping
|
49
|
+
validate_presence_of_non_tabular_cell
|
50
|
+
validate_presence_of_non_tabular_cell_lines
|
51
|
+
validate_presence_of_non_tabular_cell_capture
|
52
|
+
end
|
53
|
+
|
54
|
+
def validate_presence_of_non_tabular_cell
|
55
|
+
return if @cell_mapping
|
56
|
+
fail NdrImport::MappingError,
|
57
|
+
I18n.t('mapping.errors.missing_non_tabular_cell', :name => @name)
|
58
|
+
end
|
59
|
+
|
60
|
+
def validate_presence_of_non_tabular_cell_lines
|
61
|
+
return if @cell_mapping['lines']
|
62
|
+
fail NdrImport::MappingError,
|
63
|
+
I18n.t('mapping.errors.missing_non_tabular_cell_lines', :name => @name)
|
64
|
+
end
|
65
|
+
|
66
|
+
def validate_presence_of_non_tabular_cell_capture
|
67
|
+
return if @cell_mapping['capture']
|
68
|
+
fail NdrImport::MappingError,
|
69
|
+
I18n.t('mapping.errors.missing_non_tabular_cell_capture', :name => @name)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module NdrImport
|
4
|
+
module NonTabular
|
5
|
+
# This class behaves like a string and is used instead of the each source line of text.
|
6
|
+
# It allows us to contain additional information relating to the use of the line e.g. is
|
7
|
+
# the line within a record or for which fields the line has been used to capture a value.
|
8
|
+
class Line
|
9
|
+
attr_accessor :absolute_line_number,
|
10
|
+
:captured_fields,
|
11
|
+
:captures_values,
|
12
|
+
:in_a_record,
|
13
|
+
:record_line_number,
|
14
|
+
:removed
|
15
|
+
|
16
|
+
def initialize(line, absolute_line_number)
|
17
|
+
@line = line.rstrip
|
18
|
+
@absolute_line_number = absolute_line_number
|
19
|
+
@in_a_record = false
|
20
|
+
@removed = false
|
21
|
+
@captured_fields = []
|
22
|
+
@captures_values = []
|
23
|
+
end
|
24
|
+
|
25
|
+
def =~(other)
|
26
|
+
@line =~ other
|
27
|
+
end
|
28
|
+
|
29
|
+
def match(*args)
|
30
|
+
@line.match(*args)
|
31
|
+
end
|
32
|
+
|
33
|
+
def to_s
|
34
|
+
@line
|
35
|
+
end
|
36
|
+
|
37
|
+
def captured_for(field)
|
38
|
+
@captured_fields << field if field && !@captured_fields.include?(field)
|
39
|
+
end
|
40
|
+
|
41
|
+
def matches_for(field, value)
|
42
|
+
@captures_values << [field, value]
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|