datashift 0.15.0 → 0.16.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.markdown +91 -55
- data/VERSION +1 -1
- data/datashift.gemspec +8 -23
- data/lib/applications/jexcel_file.rb +1 -2
- data/lib/datashift.rb +34 -15
- data/lib/datashift/column_packer.rb +98 -34
- data/lib/datashift/data_transforms.rb +83 -0
- data/lib/datashift/delimiters.rb +58 -10
- data/lib/datashift/excel_base.rb +123 -0
- data/lib/datashift/exceptions.rb +45 -7
- data/lib/datashift/load_object.rb +25 -0
- data/lib/datashift/mapping_service.rb +91 -0
- data/lib/datashift/method_detail.rb +40 -62
- data/lib/datashift/method_details_manager.rb +18 -2
- data/lib/datashift/method_dictionary.rb +27 -10
- data/lib/datashift/method_mapper.rb +49 -41
- data/lib/datashift/model_mapper.rb +42 -22
- data/lib/datashift/populator.rb +258 -143
- data/lib/datashift/thor_base.rb +38 -0
- data/lib/exporters/csv_exporter.rb +57 -145
- data/lib/exporters/excel_exporter.rb +73 -60
- data/lib/generators/csv_generator.rb +65 -5
- data/lib/generators/generator_base.rb +69 -3
- data/lib/generators/mapping_generator.rb +112 -0
- data/lib/helpers/core_ext/csv_file.rb +33 -0
- data/lib/loaders/csv_loader.rb +41 -39
- data/lib/loaders/excel_loader.rb +130 -116
- data/lib/loaders/loader_base.rb +190 -146
- data/lib/loaders/paperclip/attachment_loader.rb +4 -4
- data/lib/loaders/paperclip/datashift_paperclip.rb +5 -3
- data/lib/loaders/paperclip/image_loading.rb +9 -7
- data/lib/loaders/reporter.rb +17 -8
- data/lib/thor/export.thor +12 -13
- data/lib/thor/generate.thor +1 -9
- data/lib/thor/import.thor +13 -24
- data/lib/thor/mapping.thor +65 -0
- data/spec/Gemfile +13 -11
- data/spec/Gemfile.lock +98 -93
- data/spec/csv_exporter_spec.rb +104 -99
- data/spec/csv_generator_spec.rb +159 -0
- data/spec/csv_loader_spec.rb +197 -16
- data/spec/datashift_spec.rb +9 -0
- data/spec/excel_exporter_spec.rb +149 -58
- data/spec/excel_generator_spec.rb +35 -44
- data/spec/excel_loader_spec.rb +196 -178
- data/spec/excel_spec.rb +8 -5
- data/spec/loader_base_spec.rb +47 -7
- data/spec/mapping_spec.rb +117 -0
- data/spec/method_dictionary_spec.rb +24 -11
- data/spec/method_mapper_spec.rb +5 -7
- data/spec/model_mapper_spec.rb +41 -0
- data/spec/paperclip_loader_spec.rb +3 -6
- data/spec/populator_spec.rb +48 -14
- data/spec/spec_helper.rb +85 -73
- data/spec/thor_spec.rb +40 -5
- metadata +93 -86
- data/lib/applications/excel_base.rb +0 -63
data/lib/loaders/excel_loader.rb
CHANGED
@@ -11,7 +11,7 @@
|
|
11
11
|
# i.e pulls data from each column and sends to object.
|
12
12
|
#
|
13
13
|
require 'datashift/exceptions'
|
14
|
-
|
14
|
+
require 'datashift/exceptions'
|
15
15
|
|
16
16
|
module DataShift
|
17
17
|
|
@@ -21,6 +21,38 @@ module DataShift
|
|
21
21
|
|
22
22
|
module ExcelLoading
|
23
23
|
|
24
|
+
include ExcelBase
|
25
|
+
|
26
|
+
attr_accessor :excel
|
27
|
+
|
28
|
+
# Currently struggling to determine the 'end' of data in a spreadsheet
|
29
|
+
# this reflects if current row had any data at all
|
30
|
+
attr_reader :contains_data
|
31
|
+
|
32
|
+
def start_excel( file_name, options = {} )
|
33
|
+
|
34
|
+
@excel = Excel.new
|
35
|
+
|
36
|
+
excel.open(file_name)
|
37
|
+
|
38
|
+
puts "\n\n\nLoading from Excel file: #{file_name}"
|
39
|
+
logger.info("\nStarting Load from Excel file: #{file_name}")
|
40
|
+
|
41
|
+
sheet_number = options[:sheet_number] || 0
|
42
|
+
|
43
|
+
@sheet = excel.worksheet( sheet_number )
|
44
|
+
|
45
|
+
parse_headers(@sheet, options[:header_row])
|
46
|
+
|
47
|
+
raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" if(excel_headers.empty?)
|
48
|
+
|
49
|
+
# Create a method_mapper which maps list of headers into suitable calls on the Active Record class
|
50
|
+
# For example if model has an attribute 'price' will map columns called Price or price or PRICE etc to this attribute
|
51
|
+
populate_method_mapper_from_headers(excel_headers, options )
|
52
|
+
|
53
|
+
reporter.reset
|
54
|
+
end
|
55
|
+
|
24
56
|
# Options:
|
25
57
|
# [:dummy] : Perform a dummy run - attempt to load everything but then roll back
|
26
58
|
#
|
@@ -37,173 +69,155 @@ module DataShift
|
|
37
69
|
def perform_excel_load( file_name, options = {} )
|
38
70
|
|
39
71
|
raise MissingHeadersError, "Minimum row for Headers is 0 - passed #{options[:header_row]}" if(options[:header_row] && options[:header_row].to_i < 0)
|
40
|
-
|
41
|
-
@excel = Excel.new
|
42
72
|
|
43
|
-
|
44
|
-
|
45
|
-
puts "\n\n\nLoading from Excel file: #{file_name}"
|
73
|
+
start_excel(file_name, options)
|
46
74
|
|
47
|
-
|
48
|
-
|
49
|
-
@sheet = @excel.worksheet( sheet_number )
|
75
|
+
begin
|
76
|
+
puts "Dummy Run - Changes will be rolled back" if options[:dummy]
|
50
77
|
|
51
|
-
|
52
|
-
@header_row = @sheet.row(header_row_index)
|
78
|
+
load_object_class.transaction do
|
53
79
|
|
54
|
-
|
80
|
+
@sheet.each_with_index do |row, i|
|
55
81
|
|
56
|
-
|
82
|
+
current_row_idx = i
|
83
|
+
@current_row = row
|
57
84
|
|
58
|
-
|
59
|
-
# There is no actual max columns in Excel .. you will run out of memory though at some point
|
60
|
-
(0..1024).each do |column|
|
61
|
-
cell = @header_row[column]
|
62
|
-
break unless cell
|
63
|
-
header = "#{cell.to_s}".strip
|
64
|
-
break if header.empty?
|
65
|
-
@headers << header
|
66
|
-
end
|
85
|
+
next if(current_row_idx == header_row_index)
|
67
86
|
|
68
|
-
raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" if(@headers.empty?)
|
69
|
-
|
70
|
-
# Create a method_mapper which maps list of headers into suitable calls on the Active Record class
|
71
|
-
# For example if model has an attribute 'price' will map columns called Price, price, PRICE etc to this attribute
|
72
|
-
populate_method_mapper_from_headers( @headers, options )
|
73
|
-
|
74
|
-
# currently pointless num_rows rubbish i.e inaccurate!
|
75
|
-
#logger.info "Excel Loader processing #{@sheet.num_rows} rows"
|
76
|
-
|
77
|
-
@reporter.reset
|
78
|
-
|
79
|
-
begin
|
80
|
-
puts "Dummy Run - Changes will be rolled back" if options[:dummy]
|
81
|
-
|
82
|
-
load_object_class.transaction do
|
83
|
-
|
84
|
-
@sheet.each_with_index do |row, i|
|
85
|
-
|
86
|
-
@current_row = row
|
87
|
-
|
88
|
-
next if(i == header_row_index)
|
89
|
-
|
90
87
|
# Excel num_rows seems to return all 'visible' rows, which appears to be greater than the actual data rows
|
91
88
|
# (TODO - write spec to process .xls with a huge number of rows)
|
92
89
|
#
|
93
90
|
# This is rubbish but currently manually detect when actual data ends, this isn't very smart but
|
94
91
|
# got no better idea than ending once we hit the first completely empty row
|
95
|
-
break if
|
96
|
-
|
97
|
-
logger.info "Processing Row #{
|
98
|
-
|
99
|
-
contains_data = false
|
100
|
-
|
92
|
+
break if(@current_row.nil? || @current_row.compact.empty?)
|
93
|
+
|
94
|
+
logger.info "Processing Row #{current_row_idx} : #{@current_row}"
|
95
|
+
|
96
|
+
@contains_data = false
|
97
|
+
|
101
98
|
begin
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
#
|
106
|
-
#
|
107
|
-
|
108
|
-
|
109
|
-
# as part of this we also attempt to save early, for example before assigning to
|
110
|
-
# has_and_belongs_to associations which require the load_object has an id for the join table
|
111
|
-
|
112
|
-
# Iterate over method_details, working on data out of associated Excel column
|
113
|
-
@method_mapper.method_details.each do |method_detail|
|
114
|
-
|
115
|
-
next unless method_detail # TODO populate unmapped with a real MethodDetail that is 'null' and create is_nil
|
116
|
-
|
117
|
-
logger.info "Processing Column #{method_detail.column_index}"
|
118
|
-
|
119
|
-
value = @current_row[method_detail.column_index]
|
120
|
-
|
121
|
-
contains_data = true unless(value.nil? || value.to_s.empty?)
|
122
|
-
|
123
|
-
prepare_data(method_detail, value)
|
124
|
-
|
125
|
-
process()
|
126
|
-
end
|
127
|
-
|
99
|
+
|
100
|
+
process_excel_row(row)
|
101
|
+
|
102
|
+
# This is rubbish but currently have to manually detect when actual data ends,
|
103
|
+
# no other way to detect when we hit the first completely empty row
|
104
|
+
break unless(contains_data == true)
|
105
|
+
|
128
106
|
rescue => e
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
if(verbose)
|
134
|
-
puts "Failed to process row [#{i}] (#{@current_row})"
|
135
|
-
puts e.inspect, e.backtrace
|
136
|
-
end
|
137
|
-
|
138
|
-
logger.error "Failed to process row [#{i}] (#{@current_row})"
|
139
|
-
logger.error e.backtrace
|
140
|
-
|
141
|
-
# don't forget to reset the load object
|
107
|
+
process_excel_failure(e, true)
|
108
|
+
|
109
|
+
# don't forget to reset the load object
|
142
110
|
new_load_object
|
143
111
|
next
|
144
112
|
end
|
145
|
-
|
113
|
+
|
146
114
|
break unless(contains_data == true)
|
147
115
|
|
148
116
|
# currently here as we can only identify the end of a speadsheet by first empty row
|
149
117
|
@reporter.processed_object_count += 1
|
150
|
-
|
118
|
+
|
151
119
|
# TODO - make optional - all or nothing or carry on and dump out the exception list at end
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
logger.error "Failed to save row [#{@current_row}]"
|
156
|
-
logger.error load_object.errors.inspect if(load_object)
|
157
|
-
else
|
158
|
-
logger.info "Row #{@current_row} succesfully SAVED : ID #{load_object.id}"
|
159
|
-
@reporter.add_loaded_object(@load_object)
|
160
|
-
end
|
161
|
-
|
120
|
+
|
121
|
+
save_and_report
|
122
|
+
|
162
123
|
# don't forget to reset the object or we'll update rather than create
|
163
124
|
new_load_object
|
164
125
|
|
165
|
-
end
|
166
|
-
|
126
|
+
end # all rows processed
|
127
|
+
|
167
128
|
if(options[:dummy])
|
168
129
|
puts "Excel loading stage complete - Dummy run so Rolling Back."
|
169
130
|
raise ActiveRecord::Rollback # Don't actually create/upload to DB if we are doing dummy run
|
170
131
|
end
|
171
|
-
|
132
|
+
|
172
133
|
end # TRANSACTION N.B ActiveRecord::Rollback does not propagate outside of the containing transaction block
|
173
|
-
|
174
|
-
rescue => e
|
134
|
+
|
135
|
+
rescue => e
|
175
136
|
puts "ERROR: Excel loading failed : #{e.inspect}"
|
176
137
|
raise e
|
177
|
-
ensure
|
138
|
+
ensure
|
178
139
|
report
|
179
140
|
end
|
180
|
-
|
141
|
+
|
181
142
|
end
|
182
|
-
|
143
|
+
|
144
|
+
def process_excel_failure( e, delete_object = true)
|
145
|
+
failure(@current_row, delete_object)
|
146
|
+
|
147
|
+
if(verbose)
|
148
|
+
puts "perform_excel_load failed in row [#{current_row_idx}] #{@current_row} - #{e.message} :"
|
149
|
+
puts e.backtrace
|
150
|
+
end
|
151
|
+
|
152
|
+
logger.error "perform_excel_load failed in row [#{current_row_idx}] #{@current_row} - #{e.message} :"
|
153
|
+
logger.error e.backtrace.join("\n")
|
154
|
+
end
|
155
|
+
|
156
|
+
|
183
157
|
def value_at(row, column)
|
184
158
|
@excel[row, column]
|
185
159
|
end
|
186
|
-
|
160
|
+
|
161
|
+
def process_excel_row(row)
|
162
|
+
|
163
|
+
# First assign any default values for columns
|
164
|
+
process_defaults
|
165
|
+
|
166
|
+
# TODO - Smart sorting of column processing order ....
|
167
|
+
# Does not currently ensure mandatory columns (for valid?) processed first but model needs saving
|
168
|
+
# before associations can be processed so user should ensure mandatory columns are prior to associations
|
169
|
+
|
170
|
+
# as part of this we also attempt to save early, for example before assigning to
|
171
|
+
# has_and_belongs_to associations which require the load_object has an id for the join table
|
172
|
+
|
173
|
+
# Iterate over method_details, working on data out of associated Excel column
|
174
|
+
@method_mapper.method_details.each_with_index do |method_detail, i|
|
175
|
+
|
176
|
+
unless method_detail
|
177
|
+
logger.warn("No method_detail found for column (#{i})")
|
178
|
+
next # TODO populate unmapped with a real MethodDetail that is 'null' and create is_nil
|
179
|
+
end
|
180
|
+
|
181
|
+
logger.info "Processing Column #{method_detail.column_index} (#{method_detail.operator})"
|
182
|
+
|
183
|
+
value = row[method_detail.column_index]
|
184
|
+
|
185
|
+
@contains_data = true unless(value.nil? || value.to_s.empty?)
|
186
|
+
|
187
|
+
process(method_detail, value)
|
188
|
+
end
|
189
|
+
|
190
|
+
end
|
191
|
+
|
187
192
|
end
|
188
193
|
|
189
194
|
|
190
195
|
class ExcelLoader < LoaderBase
|
191
196
|
|
192
197
|
include ExcelLoading
|
193
|
-
|
194
|
-
|
195
|
-
|
198
|
+
|
199
|
+
# Setup loading
|
200
|
+
#
|
201
|
+
# Options to drive building the method dictionary for a class, enabling headers to be mapped to operators on that class.
|
202
|
+
#
|
203
|
+
# Options
|
204
|
+
# :reload : Force load of the method dictionary for object_class even if already loaded
|
205
|
+
# :instance_methods : Include setter/delegate style instance methods for assignment, as well as AR columns
|
206
|
+
# :verbose : Verbose logging and to STDOUT
|
207
|
+
#
|
208
|
+
def initialize(klass, object = nil, options = {})
|
209
|
+
super( klass, object, options )
|
196
210
|
raise "Cannot load - failed to create a #{klass}" unless @load_object
|
197
211
|
end
|
198
212
|
|
199
213
|
|
200
214
|
def perform_load( file_name, options = {} )
|
201
|
-
|
215
|
+
|
202
216
|
logger.info "Starting bulk load from Excel : #{file_name}"
|
203
|
-
|
217
|
+
|
204
218
|
perform_excel_load( file_name, options )
|
205
219
|
|
206
|
-
puts "Excel loading stage complete - #{loaded_count} rows added."
|
220
|
+
puts "Excel loading stage complete - #{loaded_count} rows added."
|
207
221
|
end
|
208
222
|
|
209
223
|
end
|
data/lib/loaders/loader_base.rb
CHANGED
@@ -19,70 +19,74 @@ module DataShift
|
|
19
19
|
|
20
20
|
include DataShift::Logging
|
21
21
|
include DataShift::Querying
|
22
|
-
|
22
|
+
|
23
23
|
attr_reader :headers
|
24
24
|
|
25
25
|
attr_accessor :method_mapper
|
26
26
|
|
27
|
+
# The inbound row/line number
|
28
|
+
attr_accessor :current_row_idx
|
29
|
+
|
27
30
|
attr_accessor :load_object_class, :load_object
|
28
31
|
|
29
32
|
attr_accessor :reporter
|
30
33
|
attr_accessor :populator
|
31
|
-
|
34
|
+
|
32
35
|
attr_accessor :config, :verbose
|
33
36
|
|
37
|
+
|
34
38
|
def options() return @config; end
|
35
|
-
|
39
|
+
|
36
40
|
|
37
41
|
# Setup loading
|
38
42
|
#
|
39
43
|
# Options to drive building the method dictionary for a class, enabling headers to be mapped to operators on that class.
|
40
|
-
#
|
41
|
-
# find_operators [default = true] : Populate method dictionary with operators and method details
|
42
44
|
#
|
43
45
|
# Options
|
44
|
-
#
|
45
46
|
# :reload : Force load of the method dictionary for object_class even if already loaded
|
46
47
|
# :instance_methods : Include setter/delegate style instance methods for assignment, as well as AR columns
|
47
|
-
# :verbose :
|
48
|
+
# :verbose : Verbose logging and to STDOUT
|
48
49
|
#
|
49
|
-
def initialize(object_class,
|
50
|
+
def initialize(object_class, object = nil, options = {})
|
50
51
|
@load_object_class = object_class
|
51
|
-
|
52
|
+
|
53
|
+
logger.info("Loading objects of type #{@load_object_class} (#{object}")
|
54
|
+
|
52
55
|
@populator = if(options[:populator].is_a?(String))
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
56
|
+
::Object.const_get(options[:populator]).new
|
57
|
+
elsif(options[:populator].is_a?(Class))
|
58
|
+
options[:populator].new
|
59
|
+
else
|
60
|
+
DataShift::Populator.new
|
61
|
+
end
|
62
|
+
|
60
63
|
# Gather names of all possible 'setter' methods on AR class (instance variables and associations)
|
61
|
-
if(
|
64
|
+
if( !MethodDictionary::for?(object_class) || options[:reload] )
|
62
65
|
#puts "DEBUG Building Method Dictionary for class #{object_class}"
|
63
|
-
|
66
|
+
|
64
67
|
meth_dict_opts = options.extract!(:reload, :instance_methods)
|
65
68
|
DataShift::MethodDictionary.find_operators( @load_object_class, meth_dict_opts)
|
66
|
-
|
69
|
+
|
67
70
|
# Create dictionary of data on all possible 'setter' methods which can be used to
|
68
71
|
# populate or integrate an object of type @load_object_class
|
69
72
|
DataShift::MethodDictionary.build_method_details(@load_object_class)
|
70
73
|
end
|
71
|
-
|
74
|
+
|
72
75
|
@method_mapper = DataShift::MethodMapper.new
|
73
76
|
@config = options.dup # clone can cause issues like 'can't modify frozen hash'
|
74
77
|
|
75
78
|
@verbose = @config[:verbose]
|
76
|
-
|
77
|
-
|
79
|
+
|
80
|
+
@current_row_idx = 0
|
81
|
+
|
78
82
|
@headers = []
|
79
|
-
|
83
|
+
|
80
84
|
@reporter = DataShift::Reporter.new
|
81
|
-
|
85
|
+
|
82
86
|
reset(object)
|
83
87
|
end
|
84
88
|
|
85
|
-
|
89
|
+
|
86
90
|
# Based on filename call appropriate loading function
|
87
91
|
# Currently supports :
|
88
92
|
# Excel/Open Office files saved as .xls
|
@@ -103,11 +107,11 @@ module DataShift
|
|
103
107
|
def perform_load( file_name, options = {} )
|
104
108
|
|
105
109
|
raise DataShift::BadFile, "Cannot load #{file_name} file not found." unless(File.exists?(file_name))
|
106
|
-
|
110
|
+
|
107
111
|
logger.info("Perform Load Options:\n#{options.inspect}")
|
108
|
-
|
112
|
+
|
109
113
|
ext = File.extname(file_name)
|
110
|
-
|
114
|
+
|
111
115
|
# TODO - make more modular - these methods doing too much, for example move the object creation/reset
|
112
116
|
# out of these perform... methods to make it easier to over ride that behaviour
|
113
117
|
if(ext.casecmp('.xls') == 0)
|
@@ -120,9 +124,9 @@ module DataShift
|
|
120
124
|
end
|
121
125
|
|
122
126
|
def report
|
123
|
-
@reporter.report
|
127
|
+
@reporter.report
|
124
128
|
end
|
125
|
-
|
129
|
+
|
126
130
|
# Core API
|
127
131
|
#
|
128
132
|
# Given a list of free text column names from a file,
|
@@ -148,21 +152,21 @@ module DataShift
|
|
148
152
|
#
|
149
153
|
def populate_method_mapper_from_headers( headers, options = {} )
|
150
154
|
@headers = headers
|
151
|
-
|
155
|
+
|
152
156
|
mandatory = options[:mandatory] || []
|
153
|
-
|
157
|
+
|
154
158
|
strict = (options[:strict] == true)
|
155
|
-
|
156
|
-
begin
|
159
|
+
|
160
|
+
begin
|
157
161
|
@method_mapper.map_inbound_headers_to_methods( load_object_class, @headers, options )
|
158
162
|
rescue => e
|
159
163
|
puts e.inspect, e.backtrace
|
160
164
|
logger.error("Failed to map header row to set of database operators : #{e.inspect}")
|
161
165
|
raise MappingDefinitionError, "Failed to map header row to set of database operators"
|
162
166
|
end
|
163
|
-
|
167
|
+
|
164
168
|
unless(@method_mapper.missing_methods.empty?)
|
165
|
-
|
169
|
+
logger.warn("Following headings couldn't be mapped to #{load_object_class} \n#{@method_mapper.missing_methods.inspect}")
|
166
170
|
raise MappingDefinitionError, "Missing mappings for columns : #{@method_mapper.missing_methods.join(",")}" if(strict)
|
167
171
|
end
|
168
172
|
|
@@ -170,44 +174,55 @@ module DataShift
|
|
170
174
|
@method_mapper.missing_mandatory(mandatory).each { |er| puts "ERROR: Mandatory column missing - expected column '#{er}'" }
|
171
175
|
raise MissingMandatoryError, "Mandatory columns missing - please fix and retry."
|
172
176
|
end
|
173
|
-
|
177
|
+
|
174
178
|
@method_mapper
|
175
179
|
end
|
176
180
|
|
177
181
|
|
178
|
-
#
|
179
|
-
#
|
180
|
-
def
|
181
|
-
|
182
|
-
@populator.default_values.each do |
|
183
|
-
|
184
|
-
|
182
|
+
#TODO - Move code into Populator
|
183
|
+
# Process columns with a default value specified
|
184
|
+
def process_defaults()
|
185
|
+
|
186
|
+
@populator.default_values.each do |dname, dv|
|
187
|
+
|
188
|
+
method_detail = MethodDictionary.find_method_detail( load_object_class, dname )
|
189
|
+
|
190
|
+
if(method_detail)
|
191
|
+
logger.debug "Applying default value [#{dname}] on (#{method_detail.operator})"
|
192
|
+
@populator.prepare_and_assign(method_detail, load_object, dv)
|
193
|
+
else
|
194
|
+
logger.warn "No operator found for default [#{dname}] trying basic assignment"
|
195
|
+
begin
|
196
|
+
@populator.insistent_assignment(load_object, dv, dname)
|
197
|
+
rescue
|
198
|
+
logger.error "Badly specified default - could not set #{dname}(#{dv})"
|
199
|
+
end
|
200
|
+
end
|
185
201
|
end
|
186
202
|
end
|
187
|
-
|
203
|
+
|
188
204
|
# Core API - Given a single free text column name from a file, search method mapper for
|
189
205
|
# associated operator on base object class.
|
190
206
|
#
|
191
207
|
# If suitable association found, process row data and then assign to current load_object
|
192
208
|
def find_and_process(column_name, data)
|
193
|
-
|
209
|
+
|
194
210
|
puts "WARNING: MethodDictionary empty for class #{load_object_class}" unless(MethodDictionary.for?(load_object_class))
|
195
|
-
|
211
|
+
|
196
212
|
method_detail = MethodDictionary.find_method_detail( load_object_class, column_name )
|
197
213
|
|
198
214
|
if(method_detail)
|
199
|
-
|
200
|
-
process()
|
215
|
+
process(method_detail, data)
|
201
216
|
else
|
202
217
|
puts "No matching method found for column #{column_name}"
|
203
218
|
@load_object.errors.add(:base, "No matching method found for column #{column_name}")
|
204
219
|
end
|
205
220
|
end
|
206
|
-
|
207
|
-
|
221
|
+
|
222
|
+
|
208
223
|
# Any Config under key 'LoaderBase' is merged over existing options - taking precedence.
|
209
224
|
#
|
210
|
-
# Any Config under a key equal to the full name of the Loader class (e.g DataShift::
|
225
|
+
# Any Config under a key equal to the full name of the Loader class (e.g DataShift::SpreeEcom::ImageLoader)
|
211
226
|
# is merged over existing options - taking precedence.
|
212
227
|
#
|
213
228
|
# Format :
|
@@ -217,161 +232,190 @@ module DataShift
|
|
217
232
|
#
|
218
233
|
def configure_from(yaml_file)
|
219
234
|
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
235
|
+
logger.info("Reading Datashift loader config from: #{yaml_file.inspect}")
|
236
|
+
|
237
|
+
data = YAML::load( ERB.new( IO.read(yaml_file) ).result )
|
238
|
+
|
239
|
+
logger.info("Read Datashift config: #{data.inspect}")
|
240
|
+
|
224
241
|
if(data['LoaderBase'])
|
225
242
|
@config.merge!(data['LoaderBase'])
|
226
243
|
end
|
227
|
-
|
228
|
-
if(data[self.class.name])
|
244
|
+
|
245
|
+
if(data[self.class.name])
|
229
246
|
@config.merge!(data[self.class.name])
|
230
247
|
end
|
231
|
-
|
248
|
+
|
232
249
|
@populator.configure_from(load_object_class, yaml_file)
|
233
250
|
logger.info("Loader Options : #{@config.inspect}")
|
234
251
|
end
|
235
|
-
|
236
|
-
|
237
|
-
#
|
238
|
-
#
|
239
|
-
#
|
240
|
-
#
|
241
|
-
# add any provided postfixes
|
242
|
-
def prepare_data(method_detail, value)
|
243
|
-
return @populator.prepare_data(method_detail, value)
|
244
|
-
end
|
245
|
-
|
246
|
-
# Return the find_by operator and the rest of the (row,columns) data
|
252
|
+
|
253
|
+
|
254
|
+
# Return the find_by (where) operator, if specified, otherwise use the heading operator.
|
255
|
+
# i.e where operator embedded in row ,takes precedence over operator in column heading
|
256
|
+
#
|
257
|
+
# Treat rest of the node as the value to use in the where clause e.g
|
247
258
|
# price:0.99
|
248
|
-
#
|
249
|
-
# Column headings
|
259
|
+
#
|
260
|
+
# Column headings will be used, if the row only contains data e.g
|
250
261
|
# 0.99
|
262
|
+
#
|
251
263
|
# We leave it to caller to manage any other aspects or problems in 'rest'
|
252
264
|
#
|
253
|
-
def
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
265
|
+
def get_operator_and_data(inbound_data)
|
266
|
+
|
267
|
+
where_operator, data = inbound_data.split(Delimiters::name_value_delim)
|
268
|
+
|
269
|
+
md = @populator.current_method_detail
|
270
|
+
|
259
271
|
# Find by operator embedded in row takes precedence over operator in column heading
|
260
|
-
if(
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
272
|
+
if((data.nil? || data.empty?) && md.find_by_operator)
|
273
|
+
if((where_operator.nil? || where_operator.empty?)) #colum completely empty - check for defaults
|
274
|
+
if(md.find_by_value)
|
275
|
+
data = md.find_by_value
|
276
|
+
else
|
277
|
+
data = Populator::header_default_data(md.operator)
|
278
|
+
end
|
279
|
+
else
|
280
|
+
data = where_operator
|
265
281
|
end
|
282
|
+
|
283
|
+
# row contains single entry only so take operator from header via method details
|
284
|
+
where_operator = md.find_by_operator
|
266
285
|
end
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
return
|
286
|
+
|
287
|
+
logger.debug("LoaderBase - get_operator_and_data - [#{where_operator}] - [#{data}]")
|
288
|
+
|
289
|
+
return where_operator, data
|
271
290
|
end
|
272
|
-
|
291
|
+
|
273
292
|
# Process a value string from a column.
|
274
293
|
# Assigning value(s) to correct association on @load_object.
|
275
294
|
# Method detail represents a column from a file and it's correlated AR associations.
|
276
295
|
# Value string which may contain multiple values for a collection association.
|
277
296
|
#
|
278
|
-
def process()
|
279
|
-
|
280
|
-
current_method_detail =
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
297
|
+
def process(method_detail, value)
|
298
|
+
|
299
|
+
current_method_detail = method_detail
|
300
|
+
|
301
|
+
current_value, current_attribute_hash = @populator.prepare_data(method_detail, value)
|
302
|
+
|
303
|
+
# TODO - Move ALL of this into Populator properly
|
285
304
|
if(current_method_detail.operator_for(:has_many))
|
286
305
|
|
287
306
|
if(current_method_detail.operator_class && current_value)
|
288
307
|
|
289
308
|
# there are times when we need to save early, for example before assigning to
|
290
309
|
# has_and_belongs_to associations which require the load_object has an id for the join table
|
291
|
-
|
310
|
+
|
292
311
|
save_if_new
|
293
312
|
|
294
313
|
# A single column can contain multiple associations delimited by special char
|
295
314
|
# Size:large|Colour:red,green,blue => ['Size:large', 'Colour:red,green,blue']
|
296
315
|
columns = current_value.to_s.split( Delimiters::multi_assoc_delim )
|
297
316
|
|
298
|
-
# Size:large|Colour:red,green,blue
|
317
|
+
# Size:large|Colour:red,green,blue =>
|
318
|
+
# find_by_size( 'large' )
|
319
|
+
# find_all_by_colour( ['red','green','blue'] )
|
299
320
|
|
300
321
|
columns.each do |col_str|
|
301
|
-
|
302
|
-
find_operator, col_values =
|
303
|
-
|
322
|
+
|
323
|
+
find_operator, col_values = get_operator_and_data( col_str )
|
324
|
+
|
304
325
|
raise "Cannot perform DB find by #{find_operator}. Expected format key:value" unless(find_operator && col_values)
|
305
|
-
|
326
|
+
|
306
327
|
find_by_values = col_values.split(Delimiters::multi_value_delim)
|
307
|
-
|
328
|
+
|
308
329
|
find_by_values << current_method_detail.find_by_value if(current_method_detail.find_by_value)
|
309
|
-
|
310
|
-
if(find_by_values.size > 1)
|
311
330
|
|
312
|
-
|
313
|
-
current_value = current_method_detail.operator_class.where(find_operator => find_by_values)
|
331
|
+
found_values = []
|
314
332
|
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
333
|
+
#if(find_by_values.size() == 1)
|
334
|
+
# logger.info("Find or create #{current_method_detail.operator_class} with #{find_operator} = #{find_by_values.inspect}")
|
335
|
+
# item = current_method_detail.operator_class.where(find_operator => find_by_values.first).first_or_create
|
336
|
+
#else
|
337
|
+
# logger.info("Find #{current_method_detail.operator_class} with #{find_operator} = values #{find_by_values.inspect}")
|
338
|
+
# current_method_detail.operator_class.where(find_operator => find_by_values).all
|
339
|
+
#end
|
321
340
|
|
322
|
-
|
341
|
+
operator_class = current_method_detail.operator_class
|
323
342
|
|
324
|
-
|
343
|
+
logger.info("Find #{current_method_detail.operator_class} with #{find_operator} = #{find_by_values.inspect}")
|
325
344
|
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
345
|
+
find_by_values.each do |v|
|
346
|
+
begin
|
347
|
+
found_values << operator_class.where(find_operator => v).first_or_create
|
348
|
+
rescue => e
|
349
|
+
logger.error(e.inspect)
|
350
|
+
# TODO some way to define if this is a fatal error or not ?
|
330
351
|
end
|
352
|
+
end
|
353
|
+
|
354
|
+
logger.info("Scan result #{found_values.inspect}")
|
331
355
|
|
356
|
+
unless(find_by_values.size == found_values.size)
|
357
|
+
found = found_values.collect {|f| f.send(find_operator) }
|
358
|
+
@load_object.errors.add( current_method_detail.operator, "Association with key(s) #{(find_by_values - found).inspect} NOT found")
|
359
|
+
logger.error "Association [#{current_method_detail.operator}] with key(s) #{(find_by_values - found).inspect} NOT found - Not added."
|
360
|
+
next if(found_values.empty?)
|
332
361
|
end
|
333
362
|
|
363
|
+
logger.info("Assigning #{found_values.inspect} (#{found_values.class})")
|
364
|
+
|
334
365
|
# Lookup Assoc's Model done, now add the found value(s) to load model's collection
|
335
|
-
@populator.
|
336
|
-
end
|
366
|
+
@populator.prepare_and_assign(current_method_detail, @load_object, found_values)
|
367
|
+
end # END HAS_MANY
|
337
368
|
end
|
338
|
-
# END HAS_MANY
|
339
369
|
else
|
340
370
|
# Nice n simple straight assignment to a column variable
|
341
371
|
#puts "INFO: LOADER BASE processing #{method_detail.name}"
|
342
|
-
@populator.assign(
|
372
|
+
@populator.assign(load_object)
|
343
373
|
end
|
344
374
|
end
|
345
|
-
|
346
|
-
|
375
|
+
|
376
|
+
|
347
377
|
# Loading failed. Store a failed object and if requested roll back (destroy) the current load object
|
348
378
|
# For use case where object saved early but subsequent required columns fail to process
|
349
379
|
# so the load object is invalid
|
350
|
-
|
380
|
+
|
351
381
|
def failure( object = @load_object, rollback = false)
|
352
382
|
if(object)
|
353
383
|
@reporter.add_failed_object(object)
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
384
|
+
|
385
|
+
if(rollback && object.respond_to?('destroy') && !object.new_record?)
|
386
|
+
klass = object.class
|
387
|
+
object.destroy
|
388
|
+
object = klass.new
|
389
|
+
end
|
390
|
+
end
|
391
|
+
end
|
392
|
+
|
393
|
+
def save_and_report
|
394
|
+
unless(save)
|
395
|
+
failure
|
396
|
+
logger.error "Failed to save row (#{current_row_idx}) - [#{@current_row}]"
|
397
|
+
logger.error load_object.errors.inspect if(load_object)
|
398
|
+
else
|
399
|
+
logger.info("Successfully SAVED Object with ID #{load_object.id} for Row #{@current_row}")
|
400
|
+
@reporter.add_loaded_object(@load_object)
|
401
|
+
@reporter.success_inbound_count += 1
|
358
402
|
end
|
359
403
|
end
|
360
404
|
|
361
405
|
def save
|
362
406
|
return unless( @load_object )
|
363
|
-
|
407
|
+
|
364
408
|
puts "DEBUG: SAVING #{@load_object.class} : #{@load_object.inspect}" if(verbose)
|
365
409
|
begin
|
366
410
|
return @load_object.save
|
367
411
|
rescue => e
|
368
|
-
|
369
|
-
|
370
|
-
logger.error e.backtrace
|
371
|
-
raise "Error in save whilst processing column #{@current_method_detail.name}" if(@config[:strict])
|
412
|
+
logger.error( "Save Error : #{e.inspect} on #{@load_object.class}")
|
413
|
+
logger.error(e.backtrace)
|
372
414
|
end
|
373
|
-
|
374
|
-
|
415
|
+
|
416
|
+
false
|
417
|
+
end
|
418
|
+
|
375
419
|
# Reset the loader, including database object to be populated, and load counts
|
376
420
|
#
|
377
421
|
def reset(object = nil)
|
@@ -379,7 +423,7 @@ module DataShift
|
|
379
423
|
@reporter.reset
|
380
424
|
end
|
381
425
|
|
382
|
-
|
426
|
+
|
383
427
|
def new_load_object
|
384
428
|
@load_object = @load_object_class.new
|
385
429
|
@load_object
|
@@ -408,7 +452,7 @@ module DataShift
|
|
408
452
|
def missing_mandatory_headers( mandatory_list )
|
409
453
|
[ [*mandatory_list] - @headers].flatten
|
410
454
|
end
|
411
|
-
|
455
|
+
|
412
456
|
def find_or_new( klass, condition_hash = {} )
|
413
457
|
@records[klass] = klass.find(:all, :conditions => condition_hash)
|
414
458
|
if @records[klass].any?
|
@@ -419,14 +463,14 @@ module DataShift
|
|
419
463
|
end
|
420
464
|
|
421
465
|
protected
|
422
|
-
|
466
|
+
|
423
467
|
# Take current column data and split into each association
|
424
468
|
# Supported Syntax :
|
425
469
|
# assoc_find_name:value | assoc2_find_name:value | etc
|
426
470
|
def get_each_assoc
|
427
|
-
@populator.current_value.to_s.split( Delimiters::multi_assoc_delim )
|
471
|
+
current_value = @populator.current_value.to_s.split( Delimiters::multi_assoc_delim )
|
428
472
|
end
|
429
|
-
|
473
|
+
|
430
474
|
private
|
431
475
|
|
432
476
|
# This method usually called during processing to avoid errors with associations like
|
@@ -436,14 +480,14 @@ module DataShift
|
|
436
480
|
# TODO smart ordering of columns dynamically ourselves rather than relying on incoming data order
|
437
481
|
def save_if_new
|
438
482
|
return unless(load_object.new_record?)
|
439
|
-
|
440
|
-
if(load_object.valid?)
|
483
|
+
|
484
|
+
if(load_object.valid?)
|
441
485
|
save
|
442
486
|
else
|
443
|
-
|
487
|
+
raise DataShift::SaveError.new("Cannot Save - Invalid #{load_object.class} Record - #{load_object.errors.full_messages}")
|
444
488
|
end
|
445
489
|
end
|
446
|
-
|
490
|
+
|
447
491
|
end
|
448
492
|
|
449
493
|
end
|