ndr_import 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +14 -0
- data/.rubocop.yml +27 -0
- data/.ruby-version +1 -0
- data/.travis.yml +22 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/Guardfile +16 -0
- data/LICENSE.txt +21 -0
- data/README.md +69 -0
- data/Rakefile +13 -0
- data/code_safety.yml +374 -0
- data/gemfiles/Gemfile.rails32 +5 -0
- data/gemfiles/Gemfile.rails32.lock +142 -0
- data/gemfiles/Gemfile.rails41 +5 -0
- data/gemfiles/Gemfile.rails41.lock +145 -0
- data/gemfiles/Gemfile.rails42 +5 -0
- data/gemfiles/Gemfile.rails42.lock +145 -0
- data/lib/ndr_import.rb +13 -0
- data/lib/ndr_import/csv_library.rb +40 -0
- data/lib/ndr_import/file/all.rb +8 -0
- data/lib/ndr_import/file/base.rb +76 -0
- data/lib/ndr_import/file/delimited.rb +86 -0
- data/lib/ndr_import/file/excel.rb +131 -0
- data/lib/ndr_import/file/pdf.rb +38 -0
- data/lib/ndr_import/file/registry.rb +50 -0
- data/lib/ndr_import/file/text.rb +52 -0
- data/lib/ndr_import/file/word.rb +30 -0
- data/lib/ndr_import/file/zip.rb +67 -0
- data/lib/ndr_import/helpers/file/delimited.rb +105 -0
- data/lib/ndr_import/helpers/file/excel.rb +181 -0
- data/lib/ndr_import/helpers/file/pdf.rb +29 -0
- data/lib/ndr_import/helpers/file/word.rb +27 -0
- data/lib/ndr_import/helpers/file/xml.rb +45 -0
- data/lib/ndr_import/helpers/file/zip.rb +44 -0
- data/lib/ndr_import/mapper.rb +220 -0
- data/lib/ndr_import/mapping_error.rb +5 -0
- data/lib/ndr_import/non_tabular/column_mapping.rb +73 -0
- data/lib/ndr_import/non_tabular/line.rb +46 -0
- data/lib/ndr_import/non_tabular/mapping.rb +35 -0
- data/lib/ndr_import/non_tabular/record.rb +99 -0
- data/lib/ndr_import/non_tabular/table.rb +193 -0
- data/lib/ndr_import/non_tabular_file_helper.rb +160 -0
- data/lib/ndr_import/standard_mappings.rb +23 -0
- data/lib/ndr_import/table.rb +179 -0
- data/lib/ndr_import/version.rb +4 -0
- data/ndr_import.gemspec +44 -0
- data/test/file/base_test.rb +54 -0
- data/test/file/delimited_test.rb +143 -0
- data/test/file/excel_test.rb +85 -0
- data/test/file/pdf_test.rb +35 -0
- data/test/file/registry_test.rb +60 -0
- data/test/file/text_test.rb +92 -0
- data/test/file/word_test.rb +35 -0
- data/test/file/zip_test.rb +47 -0
- data/test/helpers/file/delimited_test.rb +113 -0
- data/test/helpers/file/excel_test.rb +97 -0
- data/test/helpers/file/pdf_test.rb +26 -0
- data/test/helpers/file/word_test.rb +26 -0
- data/test/helpers/file/xml_test.rb +131 -0
- data/test/helpers/file/zip_test.rb +75 -0
- data/test/mapper_test.rb +551 -0
- data/test/non_tabular/mapping_test.rb +36 -0
- data/test/non_tabular/table_test.rb +510 -0
- data/test/non_tabular_file_helper_test.rb +501 -0
- data/test/readme_test.rb +53 -0
- data/test/resources/bomd.csv +3 -0
- data/test/resources/broken.csv +3 -0
- data/test/resources/filesystem_paths.yml +26 -0
- data/test/resources/flat_file.pdf +0 -0
- data/test/resources/flat_file.txt +27 -0
- data/test/resources/flat_file.yml +20 -0
- data/test/resources/hello_utf16be.txt +0 -0
- data/test/resources/hello_utf16le.txt +0 -0
- data/test/resources/hello_utf8.txt +2 -0
- data/test/resources/hello_windows.txt +2 -0
- data/test/resources/hello_world.doc +0 -0
- data/test/resources/hello_world.pdf +0 -0
- data/test/resources/hello_world.txt +2 -0
- data/test/resources/high_ascii_delimited.txt +2 -0
- data/test/resources/malformed.xml +6 -0
- data/test/resources/normal.csv +3 -0
- data/test/resources/normal.csv.zip +0 -0
- data/test/resources/normal_pipe.csv +3 -0
- data/test/resources/normal_thorn.csv +3 -0
- data/test/resources/not_a_pdf.pdf +0 -0
- data/test/resources/not_a_word_file.doc +0 -0
- data/test/resources/sample_xls.xls +0 -0
- data/test/resources/sample_xlsx.xlsx +0 -0
- data/test/resources/standard_mappings.yml +39 -0
- data/test/resources/txt_file_xls_extension.xls +1 -0
- data/test/resources/txt_file_xlsx_extension.xlsx +1 -0
- data/test/resources/utf-16be_xml.xml +0 -0
- data/test/resources/utf-16be_xml_with_declaration.xml +0 -0
- data/test/resources/utf-16le_xml.xml +0 -0
- data/test/resources/utf-8_xml.xml +9 -0
- data/test/resources/windows-1252_xml.xml +9 -0
- data/test/resources/windows.csv +5 -0
- data/test/resources/xlsx_file_xls_extension.xls +0 -0
- data/test/standard_mappings_test.rb +22 -0
- data/test/table_test.rb +288 -0
- data/test/test_helper.rb +13 -0
- metadata +443 -0
@@ -0,0 +1,35 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'ndr_import/non_tabular/table'
|
3
|
+
|
4
|
+
module NdrImport
|
5
|
+
module NonTabular
|
6
|
+
# This class stores the mapping used to break an incoming file into multiple rows/records
|
7
|
+
class Mapping < Table
|
8
|
+
def self.all_valid_options
|
9
|
+
super + %w(non_tabular_row)
|
10
|
+
end
|
11
|
+
|
12
|
+
def initialize(options)
|
13
|
+
non_tabular_mappings = options['non_tabular_row']
|
14
|
+
if non_tabular_mappings
|
15
|
+
initialize_non_tabular_mappings(non_tabular_mappings)
|
16
|
+
else
|
17
|
+
# validate presence of non_tabular_row
|
18
|
+
fail NdrImport::MappingError,
|
19
|
+
I18n.t('mapping.errors.missing_non_tabular_row')
|
20
|
+
end
|
21
|
+
|
22
|
+
super(options)
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def initialize_non_tabular_mappings(non_tabular_mappings)
|
28
|
+
NON_TABULAR_OPTIONS.each do |key|
|
29
|
+
next unless non_tabular_mappings[key]
|
30
|
+
instance_variable_set("@#{key}", non_tabular_mappings[key])
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module NdrImport
|
4
|
+
module NonTabular
|
5
|
+
# This class behaves like an array of NdrImport::NonTabular::Line elements
|
6
|
+
# that contains all the source lines of text that relate to a single record of data.
|
7
|
+
# It also encapsulates the logic that tabulates the data.
|
8
|
+
class Record
|
9
|
+
attr_reader :lines
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@lines = []
|
13
|
+
end
|
14
|
+
|
15
|
+
def <<(line)
|
16
|
+
return if line.removed
|
17
|
+
line.in_a_record = true
|
18
|
+
line.record_line_number = @lines.length
|
19
|
+
@lines << line
|
20
|
+
end
|
21
|
+
|
22
|
+
def empty?
|
23
|
+
@lines.empty?
|
24
|
+
end
|
25
|
+
|
26
|
+
# Call this if it turns out that this is not a record.
|
27
|
+
# All lines will be flagged accordingly.
|
28
|
+
def not_a_record!
|
29
|
+
@lines.each { |line| line.in_a_record = false }
|
30
|
+
end
|
31
|
+
|
32
|
+
# Returns an array of "cells" for a given array of lines of a file that represent
|
33
|
+
# a single "row" of data. Allowing the output to be mapped by the standard mapper.
|
34
|
+
#
|
35
|
+
# ==== Signature
|
36
|
+
#
|
37
|
+
# tabulate(mappings)
|
38
|
+
#
|
39
|
+
# ==== Examples
|
40
|
+
#
|
41
|
+
# If the YAML mapping is
|
42
|
+
# ---
|
43
|
+
# - standard_mapping: nhsnumber
|
44
|
+
# non_tabular_cell:
|
45
|
+
# lines: 0
|
46
|
+
# capture:
|
47
|
+
# - !ruby/regexp /^D\|([^|]*).*/
|
48
|
+
# - column: fulltextreport
|
49
|
+
# non_tabular_cell:
|
50
|
+
# lines: !ruby/range
|
51
|
+
# begin: 1
|
52
|
+
# end: -1
|
53
|
+
# excl: false
|
54
|
+
# capture: !ruby/regexp /^(?:R|\d+)\|(.*)$/i
|
55
|
+
# join: \n
|
56
|
+
#
|
57
|
+
# lines = [
|
58
|
+
# "D|1111111111|...",
|
59
|
+
# "R|This is a",
|
60
|
+
# "1|multiline report"
|
61
|
+
# ]
|
62
|
+
#
|
63
|
+
# tabulate(mappings)
|
64
|
+
#
|
65
|
+
# # =>
|
66
|
+
# [
|
67
|
+
# "1111111111",
|
68
|
+
# "This is a\nmultiline report"
|
69
|
+
# ]
|
70
|
+
#
|
71
|
+
def tabulate(mappings)
|
72
|
+
cells = []
|
73
|
+
mappings.each do |column_mapping|
|
74
|
+
begin
|
75
|
+
matches = get_matches(column_mapping)
|
76
|
+
# Join the non-blank lines together and add to the array of cells
|
77
|
+
cells << matches.select { |value| !value.blank? }.join(column_mapping.join || '')
|
78
|
+
rescue RegexpRange::PatternMatchError
|
79
|
+
cells << nil
|
80
|
+
end
|
81
|
+
end
|
82
|
+
cells
|
83
|
+
end
|
84
|
+
|
85
|
+
# returns an array of matches from within the captured lines
|
86
|
+
def get_matches(column_mapping)
|
87
|
+
matching_lines = column_mapping.matching_lines(@lines)
|
88
|
+
# loop through the specified line (or lines)
|
89
|
+
matches = Array(@lines[matching_lines]).map do |line|
|
90
|
+
line.captured_for(column_mapping.name)
|
91
|
+
value = column_mapping.capture_value(line)
|
92
|
+
line.matches_for(column_mapping.name, value)
|
93
|
+
value
|
94
|
+
end
|
95
|
+
matches
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,193 @@
|
|
1
|
+
require 'ndr_import/table'
|
2
|
+
|
3
|
+
module NdrImport
|
4
|
+
module NonTabular
|
5
|
+
# This class maintains the state of a non tabular table mapping and encapsulates
|
6
|
+
# the logic required to transform a table of data into "records". Particular
|
7
|
+
# attention has been made to use enumerables throughout to help with the
|
8
|
+
# transformation of large quantities of data.
|
9
|
+
class Table < ::NdrImport::Table
|
10
|
+
require 'i18n'
|
11
|
+
require 'ndr_support/regexp_range' # TODO: unneeded?
|
12
|
+
require 'ndr_support/utf8_encoding'
|
13
|
+
require 'ndr_import/non_tabular/column_mapping'
|
14
|
+
require 'ndr_import/non_tabular/record'
|
15
|
+
require 'ndr_import/non_tabular/line'
|
16
|
+
|
17
|
+
include UTF8Encoding
|
18
|
+
|
19
|
+
NON_TABULAR_OPTIONS = %w(capture_start_line start_line_pattern end_line_pattern remove_lines
|
20
|
+
start_in_a_record end_in_a_record)
|
21
|
+
|
22
|
+
def self.all_valid_options
|
23
|
+
super - %w(tablename_pattern header_lines footer_lines) + NON_TABULAR_OPTIONS
|
24
|
+
end
|
25
|
+
|
26
|
+
attr_reader(*NON_TABULAR_OPTIONS)
|
27
|
+
attr_reader :non_tabular_lines
|
28
|
+
|
29
|
+
def header_lines
|
30
|
+
0
|
31
|
+
end
|
32
|
+
|
33
|
+
def footer_lines
|
34
|
+
0
|
35
|
+
end
|
36
|
+
|
37
|
+
def initialize(options = {})
|
38
|
+
super(options)
|
39
|
+
|
40
|
+
validate_presence_of_start_line_pattern
|
41
|
+
end
|
42
|
+
|
43
|
+
def tablename_pattern=(_value)
|
44
|
+
fail NdrImport::MappingError, 'Should not define tablename_pattern'
|
45
|
+
end
|
46
|
+
|
47
|
+
def validate_presence_of_start_line_pattern
|
48
|
+
return if @start_line_pattern
|
49
|
+
fail NdrImport::MappingError,
|
50
|
+
I18n.t('mapping.errors.missing_start_line_pattern')
|
51
|
+
end
|
52
|
+
|
53
|
+
# This method transforms a table of data, given a line array/enumerator and yields
|
54
|
+
# klass, fields and index (input row number) for each record that it would create
|
55
|
+
# as a result of the transformation process.
|
56
|
+
def transform(lines, &block)
|
57
|
+
return enum_for(:transform, lines) unless block
|
58
|
+
|
59
|
+
self.non_tabular_lines = ensure_utf8_enum!(lines)
|
60
|
+
remove_unwanted_lines
|
61
|
+
|
62
|
+
super(read_non_tabular_array, &block)
|
63
|
+
end
|
64
|
+
|
65
|
+
def validate_header(_line, _column_mappings)
|
66
|
+
@header_valid = true
|
67
|
+
end
|
68
|
+
|
69
|
+
protected
|
70
|
+
|
71
|
+
def ensure_utf8_enum!(lines)
|
72
|
+
return enum_for(:ensure_utf8_enum!, lines) unless block_given?
|
73
|
+
|
74
|
+
lines.each do |line|
|
75
|
+
# puts 'ensure_utf8_object!'
|
76
|
+
yield ensure_utf8_object!(line)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# This method flages unwanted lines, typically page headers and footers as removed
|
81
|
+
# preventing them from being captured in the non tabular record. Especially useful
|
82
|
+
# when there page headers and footers that are out of step with the start and end
|
83
|
+
# of each record and could therefore appear anywhere in an individual record if kept.
|
84
|
+
def remove_unwanted_lines
|
85
|
+
return unless @remove_lines.is_a?(Hash)
|
86
|
+
@non_tabular_lines.each_with_index do |_line, i|
|
87
|
+
@remove_lines.each do |_key, lines_to_remove|
|
88
|
+
comparable_lines = @non_tabular_lines[i, lines_to_remove.length]
|
89
|
+
next unless lines_equal(comparable_lines, lines_to_remove)
|
90
|
+
# All lines are equal, so flag them as removed
|
91
|
+
comparable_lines.each { |line| line.removed = true }
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def read_non_tabular_array
|
97
|
+
@tabular_array = []
|
98
|
+
@in_a_record = @start_in_a_record
|
99
|
+
@non_tabular_record = NdrImport::NonTabular::Record.new
|
100
|
+
|
101
|
+
partition_and_process_non_tabular_lines
|
102
|
+
process_end_of_record
|
103
|
+
|
104
|
+
@tabular_array
|
105
|
+
end
|
106
|
+
|
107
|
+
# Reads the array of lines, looking to see if a line matches the start_line_pattern,
|
108
|
+
# identifying the start of a record. It then collects all the lines until a line
|
109
|
+
# matches the end_line_pattern (if defined, otherwise when it matches the next
|
110
|
+
# start_line_pattern) and sends these line to NdrImport::NonTabular::Record#tabulate.
|
111
|
+
#
|
112
|
+
# NOTE: Currently the end line is consumed and does not form part of the
|
113
|
+
# collected array.
|
114
|
+
def partition_and_process_non_tabular_lines
|
115
|
+
non_tabular_lines.each do |line|
|
116
|
+
if line =~ @start_line_pattern
|
117
|
+
# This is a start line
|
118
|
+
start_record(line)
|
119
|
+
elsif line =~ @end_line_pattern
|
120
|
+
# This is an end line
|
121
|
+
end_record
|
122
|
+
else
|
123
|
+
@non_tabular_record << line if @in_a_record
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# Checks to see if we get the start of a new record before getting the end of the previous
|
129
|
+
# one and fails if so. Otherwise it tabulates the previous record
|
130
|
+
def start_record(line)
|
131
|
+
if @end_line_pattern
|
132
|
+
fail NdrImport::MappingError,
|
133
|
+
I18n.t('mapping.errors.start_pattern_before_end') if @in_a_record
|
134
|
+
else
|
135
|
+
# No endline mapping
|
136
|
+
@tabular_array << @non_tabular_record.tabulate(column_mappings) if @in_a_record
|
137
|
+
end
|
138
|
+
@non_tabular_record = NdrImport::NonTabular::Record.new
|
139
|
+
@non_tabular_record << line if @capture_start_line
|
140
|
+
@in_a_record = true
|
141
|
+
end
|
142
|
+
|
143
|
+
# Tabulate the record (if in one), flagged it as no longer being in a record
|
144
|
+
# and set the record to be a new one.
|
145
|
+
def end_record
|
146
|
+
@tabular_array << @non_tabular_record.tabulate(column_mappings) if @in_a_record
|
147
|
+
@in_a_record = false
|
148
|
+
@non_tabular_record = NdrImport::NonTabular::Record.new
|
149
|
+
end
|
150
|
+
|
151
|
+
# If the non-tabular data ends in a record (i.e. the last record is terminated by the EOF)
|
152
|
+
# then we need to process the last record manually or flag those lines as not being part
|
153
|
+
# of a record
|
154
|
+
def process_end_of_record
|
155
|
+
return if @non_tabular_record.empty?
|
156
|
+
if @end_in_a_record
|
157
|
+
@tabular_array << @non_tabular_record.tabulate(column_mappings) if @in_a_record
|
158
|
+
else
|
159
|
+
@non_tabular_record.not_a_record!
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
# Store the source lines as instances of NdrImport::NonTabular::Line
|
164
|
+
def non_tabular_lines=(lines)
|
165
|
+
@non_tabular_lines = lines.map.with_index do |line, i|
|
166
|
+
NdrImport::NonTabular::Line.new(line, i)
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
# Create and memoize the column mappings
|
171
|
+
def column_mappings
|
172
|
+
@column_mappings ||= raw_column_mappings.map do |column_mapping|
|
173
|
+
NdrImport::NonTabular::ColumnMapping.new(column_mapping)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def raw_column_mappings
|
178
|
+
@columns || []
|
179
|
+
end
|
180
|
+
|
181
|
+
# This method compares two arrays, where the first must be an array of
|
182
|
+
# NdrImport::NonTabular::Line or string elements
|
183
|
+
# and the second can be a mix of strings and/or regular expressions
|
184
|
+
def lines_equal(lines, other_lines)
|
185
|
+
return false unless lines.length == other_lines.length
|
186
|
+
lines.each_with_index.map do |line, i|
|
187
|
+
other_line = other_lines[i]
|
188
|
+
other_line.is_a?(Regexp) ? line.to_s =~ other_line : line.to_s == other_line
|
189
|
+
end.all?
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
@@ -0,0 +1,160 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module NdrImport
|
4
|
+
# This mixin adds (multiline) non-tabular file functionality to unified importers.
|
5
|
+
# It provides a file reader method and method to capture the rawtext value
|
6
|
+
# appropriately. These methods can be overridden or aliased as required.
|
7
|
+
#
|
8
|
+
# The YAML mapping must define the start_line_pattern which identifies the start
|
9
|
+
# of a multiline record (or "row") and can optionally define an end_line_pattern.
|
10
|
+
module NonTabularFileHelper
|
11
|
+
require 'i18n'
|
12
|
+
require 'ndr_support/regexp_range' # TODO: unneeded?
|
13
|
+
require 'ndr_support/utf8_encoding'
|
14
|
+
require 'ndr_import/non_tabular/column_mapping'
|
15
|
+
require 'ndr_import/non_tabular/record'
|
16
|
+
require 'ndr_import/non_tabular/line'
|
17
|
+
require 'ndr_import/non_tabular/mapping'
|
18
|
+
|
19
|
+
include UTF8Encoding
|
20
|
+
|
21
|
+
attr_reader :non_tabular_lines
|
22
|
+
|
23
|
+
protected
|
24
|
+
|
25
|
+
# Reads a non-tabular text file and returns an array of tabulated rows of data,
|
26
|
+
# where each row is an array of cells.
|
27
|
+
def read_non_tabular_file
|
28
|
+
self.non_tabular_lines = ensure_utf8_object! SafeFile.readlines(filename)
|
29
|
+
remove_unwanted_lines
|
30
|
+
read_non_tabular_array
|
31
|
+
end
|
32
|
+
|
33
|
+
# Reads a string and returns an array of tabulated data. Use only for prototyping.
|
34
|
+
def read_non_tabular_string(text)
|
35
|
+
self.non_tabular_lines = ensure_utf8_object!(text).split("\n")
|
36
|
+
remove_unwanted_lines
|
37
|
+
read_non_tabular_array
|
38
|
+
end
|
39
|
+
|
40
|
+
# This method flages unwanted lines, typically page headers and footers as removed
|
41
|
+
# preventing them from being captured in the non tabular record. Especially useful
|
42
|
+
# when there page headers and footers that are out of step with the start and end
|
43
|
+
# of each record and could therefore appear anywhere in an individual record if kept.
|
44
|
+
def remove_unwanted_lines
|
45
|
+
return unless row_mapping.remove_lines.is_a?(Hash)
|
46
|
+
@non_tabular_lines.each_with_index do |_line, i|
|
47
|
+
row_mapping.remove_lines.each do |_key, lines_to_remove|
|
48
|
+
comparable_lines = @non_tabular_lines[i, lines_to_remove.length]
|
49
|
+
next unless lines_equal(comparable_lines, lines_to_remove)
|
50
|
+
# All lines are equal, so flag them as removed
|
51
|
+
comparable_lines.each { |line| line.removed = true }
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def read_non_tabular_array
|
57
|
+
@tabular_array = []
|
58
|
+
@in_a_record = row_mapping.start_in_a_record
|
59
|
+
@non_tabular_record = NdrImport::NonTabular::Record.new
|
60
|
+
|
61
|
+
partition_and_process_non_tabular_lines
|
62
|
+
process_end_of_record
|
63
|
+
|
64
|
+
# We change the mapping instance variable to only contain the column mappings.
|
65
|
+
# This enables the standard mapper to work unaltered.
|
66
|
+
@mappings = raw_column_mappings
|
67
|
+
@tabular_array
|
68
|
+
end
|
69
|
+
|
70
|
+
# Reads the array of lines, looking to see if a line matches the start_line_pattern,
|
71
|
+
# identifying the start of a record. It then collects all the lines until a line
|
72
|
+
# matches the end_line_pattern (if defined, otherwise when it matches the next
|
73
|
+
# start_line_pattern) and sends these line to NdrImport::NonTabular::Record#tabulate.
|
74
|
+
#
|
75
|
+
# NOTE: Currently the end line is consumed and does not form part of the
|
76
|
+
# collected array.
|
77
|
+
def partition_and_process_non_tabular_lines
|
78
|
+
non_tabular_lines.each do |line|
|
79
|
+
if line =~ row_mapping.start_line_pattern
|
80
|
+
# This is a start line
|
81
|
+
start_record(line)
|
82
|
+
elsif line =~ row_mapping.end_line_pattern
|
83
|
+
# This is an end line
|
84
|
+
end_record
|
85
|
+
else
|
86
|
+
@non_tabular_record << line if @in_a_record
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
# Checks to see if we get the start of a new record before getting the end of the previous
|
92
|
+
# one and fails if so. Otherwise it tabulates the previous record
|
93
|
+
def start_record(line)
|
94
|
+
if row_mapping.end_line_pattern
|
95
|
+
fail NdrImport::MappingError,
|
96
|
+
I18n.t('mapping.errors.start_pattern_before_end') if @in_a_record
|
97
|
+
else
|
98
|
+
# No endline mapping
|
99
|
+
@tabular_array << @non_tabular_record.tabulate(column_mappings) if @in_a_record
|
100
|
+
end
|
101
|
+
@non_tabular_record = NdrImport::NonTabular::Record.new
|
102
|
+
@non_tabular_record << line if row_mapping.capture_start_line
|
103
|
+
@in_a_record = true
|
104
|
+
end
|
105
|
+
|
106
|
+
# Tabulate the record (if in one), flagged it as no longer being in a record
|
107
|
+
# and set the record to be a new one.
|
108
|
+
def end_record
|
109
|
+
@tabular_array << @non_tabular_record.tabulate(column_mappings) if @in_a_record
|
110
|
+
@in_a_record = false
|
111
|
+
@non_tabular_record = NdrImport::NonTabular::Record.new
|
112
|
+
end
|
113
|
+
|
114
|
+
# If the non-tabular data ends in a record (i.e. the last record is terminated by the EOF)
|
115
|
+
# then we need to process the last record manually or flag those lines as not being part
|
116
|
+
# of a record
|
117
|
+
def process_end_of_record
|
118
|
+
return if @non_tabular_record.empty?
|
119
|
+
if row_mapping.end_in_a_record
|
120
|
+
@tabular_array << @non_tabular_record.tabulate(column_mappings) if @in_a_record
|
121
|
+
else
|
122
|
+
@non_tabular_record.not_a_record!
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# Store the source lines as instances of NdrImport::NonTabular::Line
|
127
|
+
def non_tabular_lines=(lines)
|
128
|
+
@non_tabular_lines = lines.map.with_index do |line, i|
|
129
|
+
NdrImport::NonTabular::Line.new(line, i)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# Create and memoize the row mappings
|
134
|
+
def row_mapping
|
135
|
+
@row_mapping ||= NdrImport::NonTabular::Mapping.new(@mappings)
|
136
|
+
end
|
137
|
+
|
138
|
+
# Create and memoize the column mappings
|
139
|
+
def column_mappings
|
140
|
+
@column_mappings ||= raw_column_mappings.map do |column_mapping|
|
141
|
+
NdrImport::NonTabular::ColumnMapping.new(column_mapping)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def raw_column_mappings
|
146
|
+
@mappings['columns'] || []
|
147
|
+
end
|
148
|
+
|
149
|
+
# This method compares two arrays, where the first must be an array of
|
150
|
+
# NdrImport::NonTabular::Line or string elements
|
151
|
+
# and the second can be a mix of strings and/or regular expressions
|
152
|
+
def lines_equal(lines, other_lines)
|
153
|
+
return false unless lines.length == other_lines.length
|
154
|
+
lines.each_with_index.map do |line, i|
|
155
|
+
other_line = other_lines[i]
|
156
|
+
other_line.is_a?(Regexp) ? line.to_s =~ other_line : line.to_s == other_line
|
157
|
+
end.all?
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|