datashift 0.15.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.markdown +91 -55
- data/VERSION +1 -1
- data/datashift.gemspec +8 -23
- data/lib/applications/jexcel_file.rb +1 -2
- data/lib/datashift.rb +34 -15
- data/lib/datashift/column_packer.rb +98 -34
- data/lib/datashift/data_transforms.rb +83 -0
- data/lib/datashift/delimiters.rb +58 -10
- data/lib/datashift/excel_base.rb +123 -0
- data/lib/datashift/exceptions.rb +45 -7
- data/lib/datashift/load_object.rb +25 -0
- data/lib/datashift/mapping_service.rb +91 -0
- data/lib/datashift/method_detail.rb +40 -62
- data/lib/datashift/method_details_manager.rb +18 -2
- data/lib/datashift/method_dictionary.rb +27 -10
- data/lib/datashift/method_mapper.rb +49 -41
- data/lib/datashift/model_mapper.rb +42 -22
- data/lib/datashift/populator.rb +258 -143
- data/lib/datashift/thor_base.rb +38 -0
- data/lib/exporters/csv_exporter.rb +57 -145
- data/lib/exporters/excel_exporter.rb +73 -60
- data/lib/generators/csv_generator.rb +65 -5
- data/lib/generators/generator_base.rb +69 -3
- data/lib/generators/mapping_generator.rb +112 -0
- data/lib/helpers/core_ext/csv_file.rb +33 -0
- data/lib/loaders/csv_loader.rb +41 -39
- data/lib/loaders/excel_loader.rb +130 -116
- data/lib/loaders/loader_base.rb +190 -146
- data/lib/loaders/paperclip/attachment_loader.rb +4 -4
- data/lib/loaders/paperclip/datashift_paperclip.rb +5 -3
- data/lib/loaders/paperclip/image_loading.rb +9 -7
- data/lib/loaders/reporter.rb +17 -8
- data/lib/thor/export.thor +12 -13
- data/lib/thor/generate.thor +1 -9
- data/lib/thor/import.thor +13 -24
- data/lib/thor/mapping.thor +65 -0
- data/spec/Gemfile +13 -11
- data/spec/Gemfile.lock +98 -93
- data/spec/csv_exporter_spec.rb +104 -99
- data/spec/csv_generator_spec.rb +159 -0
- data/spec/csv_loader_spec.rb +197 -16
- data/spec/datashift_spec.rb +9 -0
- data/spec/excel_exporter_spec.rb +149 -58
- data/spec/excel_generator_spec.rb +35 -44
- data/spec/excel_loader_spec.rb +196 -178
- data/spec/excel_spec.rb +8 -5
- data/spec/loader_base_spec.rb +47 -7
- data/spec/mapping_spec.rb +117 -0
- data/spec/method_dictionary_spec.rb +24 -11
- data/spec/method_mapper_spec.rb +5 -7
- data/spec/model_mapper_spec.rb +41 -0
- data/spec/paperclip_loader_spec.rb +3 -6
- data/spec/populator_spec.rb +48 -14
- data/spec/spec_helper.rb +85 -73
- data/spec/thor_spec.rb +40 -5
- metadata +93 -86
- data/lib/applications/excel_base.rb +0 -63
data/lib/loaders/excel_loader.rb
CHANGED
@@ -11,7 +11,7 @@
|
|
11
11
|
# i.e pulls data from each column and sends to object.
|
12
12
|
#
|
13
13
|
require 'datashift/exceptions'
|
14
|
-
|
14
|
+
require 'datashift/exceptions'
|
15
15
|
|
16
16
|
module DataShift
|
17
17
|
|
@@ -21,6 +21,38 @@ module DataShift
|
|
21
21
|
|
22
22
|
module ExcelLoading
|
23
23
|
|
24
|
+
include ExcelBase
|
25
|
+
|
26
|
+
attr_accessor :excel
|
27
|
+
|
28
|
+
# Currently struggling to determine the 'end' of data in a spreadsheet
|
29
|
+
# this reflects if current row had any data at all
|
30
|
+
attr_reader :contains_data
|
31
|
+
|
32
|
+
def start_excel( file_name, options = {} )
|
33
|
+
|
34
|
+
@excel = Excel.new
|
35
|
+
|
36
|
+
excel.open(file_name)
|
37
|
+
|
38
|
+
puts "\n\n\nLoading from Excel file: #{file_name}"
|
39
|
+
logger.info("\nStarting Load from Excel file: #{file_name}")
|
40
|
+
|
41
|
+
sheet_number = options[:sheet_number] || 0
|
42
|
+
|
43
|
+
@sheet = excel.worksheet( sheet_number )
|
44
|
+
|
45
|
+
parse_headers(@sheet, options[:header_row])
|
46
|
+
|
47
|
+
raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" if(excel_headers.empty?)
|
48
|
+
|
49
|
+
# Create a method_mapper which maps list of headers into suitable calls on the Active Record class
|
50
|
+
# For example if model has an attribute 'price' will map columns called Price or price or PRICE etc to this attribute
|
51
|
+
populate_method_mapper_from_headers(excel_headers, options )
|
52
|
+
|
53
|
+
reporter.reset
|
54
|
+
end
|
55
|
+
|
24
56
|
# Options:
|
25
57
|
# [:dummy] : Perform a dummy run - attempt to load everything but then roll back
|
26
58
|
#
|
@@ -37,173 +69,155 @@ module DataShift
|
|
37
69
|
def perform_excel_load( file_name, options = {} )
|
38
70
|
|
39
71
|
raise MissingHeadersError, "Minimum row for Headers is 0 - passed #{options[:header_row]}" if(options[:header_row] && options[:header_row].to_i < 0)
|
40
|
-
|
41
|
-
@excel = Excel.new
|
42
72
|
|
43
|
-
|
44
|
-
|
45
|
-
puts "\n\n\nLoading from Excel file: #{file_name}"
|
73
|
+
start_excel(file_name, options)
|
46
74
|
|
47
|
-
|
48
|
-
|
49
|
-
@sheet = @excel.worksheet( sheet_number )
|
75
|
+
begin
|
76
|
+
puts "Dummy Run - Changes will be rolled back" if options[:dummy]
|
50
77
|
|
51
|
-
|
52
|
-
@header_row = @sheet.row(header_row_index)
|
78
|
+
load_object_class.transaction do
|
53
79
|
|
54
|
-
|
80
|
+
@sheet.each_with_index do |row, i|
|
55
81
|
|
56
|
-
|
82
|
+
current_row_idx = i
|
83
|
+
@current_row = row
|
57
84
|
|
58
|
-
|
59
|
-
# There is no actual max columns in Excel .. you will run out of memory though at some point
|
60
|
-
(0..1024).each do |column|
|
61
|
-
cell = @header_row[column]
|
62
|
-
break unless cell
|
63
|
-
header = "#{cell.to_s}".strip
|
64
|
-
break if header.empty?
|
65
|
-
@headers << header
|
66
|
-
end
|
85
|
+
next if(current_row_idx == header_row_index)
|
67
86
|
|
68
|
-
raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" if(@headers.empty?)
|
69
|
-
|
70
|
-
# Create a method_mapper which maps list of headers into suitable calls on the Active Record class
|
71
|
-
# For example if model has an attribute 'price' will map columns called Price, price, PRICE etc to this attribute
|
72
|
-
populate_method_mapper_from_headers( @headers, options )
|
73
|
-
|
74
|
-
# currently pointless num_rows rubbish i.e inaccurate!
|
75
|
-
#logger.info "Excel Loader processing #{@sheet.num_rows} rows"
|
76
|
-
|
77
|
-
@reporter.reset
|
78
|
-
|
79
|
-
begin
|
80
|
-
puts "Dummy Run - Changes will be rolled back" if options[:dummy]
|
81
|
-
|
82
|
-
load_object_class.transaction do
|
83
|
-
|
84
|
-
@sheet.each_with_index do |row, i|
|
85
|
-
|
86
|
-
@current_row = row
|
87
|
-
|
88
|
-
next if(i == header_row_index)
|
89
|
-
|
90
87
|
# Excel num_rows seems to return all 'visible' rows, which appears to be greater than the actual data rows
|
91
88
|
# (TODO - write spec to process .xls with a huge number of rows)
|
92
89
|
#
|
93
90
|
# This is rubbish but currently manually detect when actual data ends, this isn't very smart but
|
94
91
|
# got no better idea than ending once we hit the first completely empty row
|
95
|
-
break if
|
96
|
-
|
97
|
-
logger.info "Processing Row #{
|
98
|
-
|
99
|
-
contains_data = false
|
100
|
-
|
92
|
+
break if(@current_row.nil? || @current_row.compact.empty?)
|
93
|
+
|
94
|
+
logger.info "Processing Row #{current_row_idx} : #{@current_row}"
|
95
|
+
|
96
|
+
@contains_data = false
|
97
|
+
|
101
98
|
begin
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
#
|
106
|
-
#
|
107
|
-
|
108
|
-
|
109
|
-
# as part of this we also attempt to save early, for example before assigning to
|
110
|
-
# has_and_belongs_to associations which require the load_object has an id for the join table
|
111
|
-
|
112
|
-
# Iterate over method_details, working on data out of associated Excel column
|
113
|
-
@method_mapper.method_details.each do |method_detail|
|
114
|
-
|
115
|
-
next unless method_detail # TODO populate unmapped with a real MethodDetail that is 'null' and create is_nil
|
116
|
-
|
117
|
-
logger.info "Processing Column #{method_detail.column_index}"
|
118
|
-
|
119
|
-
value = @current_row[method_detail.column_index]
|
120
|
-
|
121
|
-
contains_data = true unless(value.nil? || value.to_s.empty?)
|
122
|
-
|
123
|
-
prepare_data(method_detail, value)
|
124
|
-
|
125
|
-
process()
|
126
|
-
end
|
127
|
-
|
99
|
+
|
100
|
+
process_excel_row(row)
|
101
|
+
|
102
|
+
# This is rubbish but currently have to manually detect when actual data ends,
|
103
|
+
# no other way to detect when we hit the first completely empty row
|
104
|
+
break unless(contains_data == true)
|
105
|
+
|
128
106
|
rescue => e
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
if(verbose)
|
134
|
-
puts "Failed to process row [#{i}] (#{@current_row})"
|
135
|
-
puts e.inspect, e.backtrace
|
136
|
-
end
|
137
|
-
|
138
|
-
logger.error "Failed to process row [#{i}] (#{@current_row})"
|
139
|
-
logger.error e.backtrace
|
140
|
-
|
141
|
-
# don't forget to reset the load object
|
107
|
+
process_excel_failure(e, true)
|
108
|
+
|
109
|
+
# don't forget to reset the load object
|
142
110
|
new_load_object
|
143
111
|
next
|
144
112
|
end
|
145
|
-
|
113
|
+
|
146
114
|
break unless(contains_data == true)
|
147
115
|
|
148
116
|
# currently here as we can only identify the end of a speadsheet by first empty row
|
149
117
|
@reporter.processed_object_count += 1
|
150
|
-
|
118
|
+
|
151
119
|
# TODO - make optional - all or nothing or carry on and dump out the exception list at end
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
logger.error "Failed to save row [#{@current_row}]"
|
156
|
-
logger.error load_object.errors.inspect if(load_object)
|
157
|
-
else
|
158
|
-
logger.info "Row #{@current_row} succesfully SAVED : ID #{load_object.id}"
|
159
|
-
@reporter.add_loaded_object(@load_object)
|
160
|
-
end
|
161
|
-
|
120
|
+
|
121
|
+
save_and_report
|
122
|
+
|
162
123
|
# don't forget to reset the object or we'll update rather than create
|
163
124
|
new_load_object
|
164
125
|
|
165
|
-
end
|
166
|
-
|
126
|
+
end # all rows processed
|
127
|
+
|
167
128
|
if(options[:dummy])
|
168
129
|
puts "Excel loading stage complete - Dummy run so Rolling Back."
|
169
130
|
raise ActiveRecord::Rollback # Don't actually create/upload to DB if we are doing dummy run
|
170
131
|
end
|
171
|
-
|
132
|
+
|
172
133
|
end # TRANSACTION N.B ActiveRecord::Rollback does not propagate outside of the containing transaction block
|
173
|
-
|
174
|
-
rescue => e
|
134
|
+
|
135
|
+
rescue => e
|
175
136
|
puts "ERROR: Excel loading failed : #{e.inspect}"
|
176
137
|
raise e
|
177
|
-
ensure
|
138
|
+
ensure
|
178
139
|
report
|
179
140
|
end
|
180
|
-
|
141
|
+
|
181
142
|
end
|
182
|
-
|
143
|
+
|
144
|
+
def process_excel_failure( e, delete_object = true)
|
145
|
+
failure(@current_row, delete_object)
|
146
|
+
|
147
|
+
if(verbose)
|
148
|
+
puts "perform_excel_load failed in row [#{current_row_idx}] #{@current_row} - #{e.message} :"
|
149
|
+
puts e.backtrace
|
150
|
+
end
|
151
|
+
|
152
|
+
logger.error "perform_excel_load failed in row [#{current_row_idx}] #{@current_row} - #{e.message} :"
|
153
|
+
logger.error e.backtrace.join("\n")
|
154
|
+
end
|
155
|
+
|
156
|
+
|
183
157
|
def value_at(row, column)
|
184
158
|
@excel[row, column]
|
185
159
|
end
|
186
|
-
|
160
|
+
|
161
|
+
def process_excel_row(row)
|
162
|
+
|
163
|
+
# First assign any default values for columns
|
164
|
+
process_defaults
|
165
|
+
|
166
|
+
# TODO - Smart sorting of column processing order ....
|
167
|
+
# Does not currently ensure mandatory columns (for valid?) processed first but model needs saving
|
168
|
+
# before associations can be processed so user should ensure mandatory columns are prior to associations
|
169
|
+
|
170
|
+
# as part of this we also attempt to save early, for example before assigning to
|
171
|
+
# has_and_belongs_to associations which require the load_object has an id for the join table
|
172
|
+
|
173
|
+
# Iterate over method_details, working on data out of associated Excel column
|
174
|
+
@method_mapper.method_details.each_with_index do |method_detail, i|
|
175
|
+
|
176
|
+
unless method_detail
|
177
|
+
logger.warn("No method_detail found for column (#{i})")
|
178
|
+
next # TODO populate unmapped with a real MethodDetail that is 'null' and create is_nil
|
179
|
+
end
|
180
|
+
|
181
|
+
logger.info "Processing Column #{method_detail.column_index} (#{method_detail.operator})"
|
182
|
+
|
183
|
+
value = row[method_detail.column_index]
|
184
|
+
|
185
|
+
@contains_data = true unless(value.nil? || value.to_s.empty?)
|
186
|
+
|
187
|
+
process(method_detail, value)
|
188
|
+
end
|
189
|
+
|
190
|
+
end
|
191
|
+
|
187
192
|
end
|
188
193
|
|
189
194
|
|
190
195
|
class ExcelLoader < LoaderBase
|
191
196
|
|
192
197
|
include ExcelLoading
|
193
|
-
|
194
|
-
|
195
|
-
|
198
|
+
|
199
|
+
# Setup loading
|
200
|
+
#
|
201
|
+
# Options to drive building the method dictionary for a class, enabling headers to be mapped to operators on that class.
|
202
|
+
#
|
203
|
+
# Options
|
204
|
+
# :reload : Force load of the method dictionary for object_class even if already loaded
|
205
|
+
# :instance_methods : Include setter/delegate style instance methods for assignment, as well as AR columns
|
206
|
+
# :verbose : Verbose logging and to STDOUT
|
207
|
+
#
|
208
|
+
def initialize(klass, object = nil, options = {})
|
209
|
+
super( klass, object, options )
|
196
210
|
raise "Cannot load - failed to create a #{klass}" unless @load_object
|
197
211
|
end
|
198
212
|
|
199
213
|
|
200
214
|
def perform_load( file_name, options = {} )
|
201
|
-
|
215
|
+
|
202
216
|
logger.info "Starting bulk load from Excel : #{file_name}"
|
203
|
-
|
217
|
+
|
204
218
|
perform_excel_load( file_name, options )
|
205
219
|
|
206
|
-
puts "Excel loading stage complete - #{loaded_count} rows added."
|
220
|
+
puts "Excel loading stage complete - #{loaded_count} rows added."
|
207
221
|
end
|
208
222
|
|
209
223
|
end
|
data/lib/loaders/loader_base.rb
CHANGED
@@ -19,70 +19,74 @@ module DataShift
|
|
19
19
|
|
20
20
|
include DataShift::Logging
|
21
21
|
include DataShift::Querying
|
22
|
-
|
22
|
+
|
23
23
|
attr_reader :headers
|
24
24
|
|
25
25
|
attr_accessor :method_mapper
|
26
26
|
|
27
|
+
# The inbound row/line number
|
28
|
+
attr_accessor :current_row_idx
|
29
|
+
|
27
30
|
attr_accessor :load_object_class, :load_object
|
28
31
|
|
29
32
|
attr_accessor :reporter
|
30
33
|
attr_accessor :populator
|
31
|
-
|
34
|
+
|
32
35
|
attr_accessor :config, :verbose
|
33
36
|
|
37
|
+
|
34
38
|
def options() return @config; end
|
35
|
-
|
39
|
+
|
36
40
|
|
37
41
|
# Setup loading
|
38
42
|
#
|
39
43
|
# Options to drive building the method dictionary for a class, enabling headers to be mapped to operators on that class.
|
40
|
-
#
|
41
|
-
# find_operators [default = true] : Populate method dictionary with operators and method details
|
42
44
|
#
|
43
45
|
# Options
|
44
|
-
#
|
45
46
|
# :reload : Force load of the method dictionary for object_class even if already loaded
|
46
47
|
# :instance_methods : Include setter/delegate style instance methods for assignment, as well as AR columns
|
47
|
-
# :verbose :
|
48
|
+
# :verbose : Verbose logging and to STDOUT
|
48
49
|
#
|
49
|
-
def initialize(object_class,
|
50
|
+
def initialize(object_class, object = nil, options = {})
|
50
51
|
@load_object_class = object_class
|
51
|
-
|
52
|
+
|
53
|
+
logger.info("Loading objects of type #{@load_object_class} (#{object}")
|
54
|
+
|
52
55
|
@populator = if(options[:populator].is_a?(String))
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
56
|
+
::Object.const_get(options[:populator]).new
|
57
|
+
elsif(options[:populator].is_a?(Class))
|
58
|
+
options[:populator].new
|
59
|
+
else
|
60
|
+
DataShift::Populator.new
|
61
|
+
end
|
62
|
+
|
60
63
|
# Gather names of all possible 'setter' methods on AR class (instance variables and associations)
|
61
|
-
if(
|
64
|
+
if( !MethodDictionary::for?(object_class) || options[:reload] )
|
62
65
|
#puts "DEBUG Building Method Dictionary for class #{object_class}"
|
63
|
-
|
66
|
+
|
64
67
|
meth_dict_opts = options.extract!(:reload, :instance_methods)
|
65
68
|
DataShift::MethodDictionary.find_operators( @load_object_class, meth_dict_opts)
|
66
|
-
|
69
|
+
|
67
70
|
# Create dictionary of data on all possible 'setter' methods which can be used to
|
68
71
|
# populate or integrate an object of type @load_object_class
|
69
72
|
DataShift::MethodDictionary.build_method_details(@load_object_class)
|
70
73
|
end
|
71
|
-
|
74
|
+
|
72
75
|
@method_mapper = DataShift::MethodMapper.new
|
73
76
|
@config = options.dup # clone can cause issues like 'can't modify frozen hash'
|
74
77
|
|
75
78
|
@verbose = @config[:verbose]
|
76
|
-
|
77
|
-
|
79
|
+
|
80
|
+
@current_row_idx = 0
|
81
|
+
|
78
82
|
@headers = []
|
79
|
-
|
83
|
+
|
80
84
|
@reporter = DataShift::Reporter.new
|
81
|
-
|
85
|
+
|
82
86
|
reset(object)
|
83
87
|
end
|
84
88
|
|
85
|
-
|
89
|
+
|
86
90
|
# Based on filename call appropriate loading function
|
87
91
|
# Currently supports :
|
88
92
|
# Excel/Open Office files saved as .xls
|
@@ -103,11 +107,11 @@ module DataShift
|
|
103
107
|
def perform_load( file_name, options = {} )
|
104
108
|
|
105
109
|
raise DataShift::BadFile, "Cannot load #{file_name} file not found." unless(File.exists?(file_name))
|
106
|
-
|
110
|
+
|
107
111
|
logger.info("Perform Load Options:\n#{options.inspect}")
|
108
|
-
|
112
|
+
|
109
113
|
ext = File.extname(file_name)
|
110
|
-
|
114
|
+
|
111
115
|
# TODO - make more modular - these methods doing too much, for example move the object creation/reset
|
112
116
|
# out of these perform... methods to make it easier to over ride that behaviour
|
113
117
|
if(ext.casecmp('.xls') == 0)
|
@@ -120,9 +124,9 @@ module DataShift
|
|
120
124
|
end
|
121
125
|
|
122
126
|
def report
|
123
|
-
@reporter.report
|
127
|
+
@reporter.report
|
124
128
|
end
|
125
|
-
|
129
|
+
|
126
130
|
# Core API
|
127
131
|
#
|
128
132
|
# Given a list of free text column names from a file,
|
@@ -148,21 +152,21 @@ module DataShift
|
|
148
152
|
#
|
149
153
|
def populate_method_mapper_from_headers( headers, options = {} )
|
150
154
|
@headers = headers
|
151
|
-
|
155
|
+
|
152
156
|
mandatory = options[:mandatory] || []
|
153
|
-
|
157
|
+
|
154
158
|
strict = (options[:strict] == true)
|
155
|
-
|
156
|
-
begin
|
159
|
+
|
160
|
+
begin
|
157
161
|
@method_mapper.map_inbound_headers_to_methods( load_object_class, @headers, options )
|
158
162
|
rescue => e
|
159
163
|
puts e.inspect, e.backtrace
|
160
164
|
logger.error("Failed to map header row to set of database operators : #{e.inspect}")
|
161
165
|
raise MappingDefinitionError, "Failed to map header row to set of database operators"
|
162
166
|
end
|
163
|
-
|
167
|
+
|
164
168
|
unless(@method_mapper.missing_methods.empty?)
|
165
|
-
|
169
|
+
logger.warn("Following headings couldn't be mapped to #{load_object_class} \n#{@method_mapper.missing_methods.inspect}")
|
166
170
|
raise MappingDefinitionError, "Missing mappings for columns : #{@method_mapper.missing_methods.join(",")}" if(strict)
|
167
171
|
end
|
168
172
|
|
@@ -170,44 +174,55 @@ module DataShift
|
|
170
174
|
@method_mapper.missing_mandatory(mandatory).each { |er| puts "ERROR: Mandatory column missing - expected column '#{er}'" }
|
171
175
|
raise MissingMandatoryError, "Mandatory columns missing - please fix and retry."
|
172
176
|
end
|
173
|
-
|
177
|
+
|
174
178
|
@method_mapper
|
175
179
|
end
|
176
180
|
|
177
181
|
|
178
|
-
#
|
179
|
-
#
|
180
|
-
def
|
181
|
-
|
182
|
-
@populator.default_values.each do |
|
183
|
-
|
184
|
-
|
182
|
+
#TODO - Move code into Populator
|
183
|
+
# Process columns with a default value specified
|
184
|
+
def process_defaults()
|
185
|
+
|
186
|
+
@populator.default_values.each do |dname, dv|
|
187
|
+
|
188
|
+
method_detail = MethodDictionary.find_method_detail( load_object_class, dname )
|
189
|
+
|
190
|
+
if(method_detail)
|
191
|
+
logger.debug "Applying default value [#{dname}] on (#{method_detail.operator})"
|
192
|
+
@populator.prepare_and_assign(method_detail, load_object, dv)
|
193
|
+
else
|
194
|
+
logger.warn "No operator found for default [#{dname}] trying basic assignment"
|
195
|
+
begin
|
196
|
+
@populator.insistent_assignment(load_object, dv, dname)
|
197
|
+
rescue
|
198
|
+
logger.error "Badly specified default - could not set #{dname}(#{dv})"
|
199
|
+
end
|
200
|
+
end
|
185
201
|
end
|
186
202
|
end
|
187
|
-
|
203
|
+
|
188
204
|
# Core API - Given a single free text column name from a file, search method mapper for
|
189
205
|
# associated operator on base object class.
|
190
206
|
#
|
191
207
|
# If suitable association found, process row data and then assign to current load_object
|
192
208
|
def find_and_process(column_name, data)
|
193
|
-
|
209
|
+
|
194
210
|
puts "WARNING: MethodDictionary empty for class #{load_object_class}" unless(MethodDictionary.for?(load_object_class))
|
195
|
-
|
211
|
+
|
196
212
|
method_detail = MethodDictionary.find_method_detail( load_object_class, column_name )
|
197
213
|
|
198
214
|
if(method_detail)
|
199
|
-
|
200
|
-
process()
|
215
|
+
process(method_detail, data)
|
201
216
|
else
|
202
217
|
puts "No matching method found for column #{column_name}"
|
203
218
|
@load_object.errors.add(:base, "No matching method found for column #{column_name}")
|
204
219
|
end
|
205
220
|
end
|
206
|
-
|
207
|
-
|
221
|
+
|
222
|
+
|
208
223
|
# Any Config under key 'LoaderBase' is merged over existing options - taking precedence.
|
209
224
|
#
|
210
|
-
# Any Config under a key equal to the full name of the Loader class (e.g DataShift::
|
225
|
+
# Any Config under a key equal to the full name of the Loader class (e.g DataShift::SpreeEcom::ImageLoader)
|
211
226
|
# is merged over existing options - taking precedence.
|
212
227
|
#
|
213
228
|
# Format :
|
@@ -217,161 +232,190 @@ module DataShift
|
|
217
232
|
#
|
218
233
|
def configure_from(yaml_file)
|
219
234
|
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
235
|
+
logger.info("Reading Datashift loader config from: #{yaml_file.inspect}")
|
236
|
+
|
237
|
+
data = YAML::load( ERB.new( IO.read(yaml_file) ).result )
|
238
|
+
|
239
|
+
logger.info("Read Datashift config: #{data.inspect}")
|
240
|
+
|
224
241
|
if(data['LoaderBase'])
|
225
242
|
@config.merge!(data['LoaderBase'])
|
226
243
|
end
|
227
|
-
|
228
|
-
if(data[self.class.name])
|
244
|
+
|
245
|
+
if(data[self.class.name])
|
229
246
|
@config.merge!(data[self.class.name])
|
230
247
|
end
|
231
|
-
|
248
|
+
|
232
249
|
@populator.configure_from(load_object_class, yaml_file)
|
233
250
|
logger.info("Loader Options : #{@config.inspect}")
|
234
251
|
end
|
235
|
-
|
236
|
-
|
237
|
-
#
|
238
|
-
#
|
239
|
-
#
|
240
|
-
#
|
241
|
-
# add any provided postfixes
|
242
|
-
def prepare_data(method_detail, value)
|
243
|
-
return @populator.prepare_data(method_detail, value)
|
244
|
-
end
|
245
|
-
|
246
|
-
# Return the find_by operator and the rest of the (row,columns) data
|
252
|
+
|
253
|
+
|
254
|
+
# Return the find_by (where) operator, if specified, otherwise use the heading operator.
|
255
|
+
# i.e where operator embedded in row ,takes precedence over operator in column heading
|
256
|
+
#
|
257
|
+
# Treat rest of the node as the value to use in the where clause e.g
|
247
258
|
# price:0.99
|
248
|
-
#
|
249
|
-
# Column headings
|
259
|
+
#
|
260
|
+
# Column headings will be used, if the row only contains data e.g
|
250
261
|
# 0.99
|
262
|
+
#
|
251
263
|
# We leave it to caller to manage any other aspects or problems in 'rest'
|
252
264
|
#
|
253
|
-
def
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
265
|
+
def get_operator_and_data(inbound_data)
|
266
|
+
|
267
|
+
where_operator, data = inbound_data.split(Delimiters::name_value_delim)
|
268
|
+
|
269
|
+
md = @populator.current_method_detail
|
270
|
+
|
259
271
|
# Find by operator embedded in row takes precedence over operator in column heading
|
260
|
-
if(
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
272
|
+
if((data.nil? || data.empty?) && md.find_by_operator)
|
273
|
+
if((where_operator.nil? || where_operator.empty?)) #colum completely empty - check for defaults
|
274
|
+
if(md.find_by_value)
|
275
|
+
data = md.find_by_value
|
276
|
+
else
|
277
|
+
data = Populator::header_default_data(md.operator)
|
278
|
+
end
|
279
|
+
else
|
280
|
+
data = where_operator
|
265
281
|
end
|
282
|
+
|
283
|
+
# row contains single entry only so take operator from header via method details
|
284
|
+
where_operator = md.find_by_operator
|
266
285
|
end
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
return
|
286
|
+
|
287
|
+
logger.debug("LoaderBase - get_operator_and_data - [#{where_operator}] - [#{data}]")
|
288
|
+
|
289
|
+
return where_operator, data
|
271
290
|
end
|
272
|
-
|
291
|
+
|
273
292
|
# Process a value string from a column.
|
274
293
|
# Assigning value(s) to correct association on @load_object.
|
275
294
|
# Method detail represents a column from a file and it's correlated AR associations.
|
276
295
|
# Value string which may contain multiple values for a collection association.
|
277
296
|
#
|
278
|
-
def process()
|
279
|
-
|
280
|
-
current_method_detail =
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
297
|
+
def process(method_detail, value)
|
298
|
+
|
299
|
+
current_method_detail = method_detail
|
300
|
+
|
301
|
+
current_value, current_attribute_hash = @populator.prepare_data(method_detail, value)
|
302
|
+
|
303
|
+
# TODO - Move ALL of this into Populator properly
|
285
304
|
if(current_method_detail.operator_for(:has_many))
|
286
305
|
|
287
306
|
if(current_method_detail.operator_class && current_value)
|
288
307
|
|
289
308
|
# there are times when we need to save early, for example before assigning to
|
290
309
|
# has_and_belongs_to associations which require the load_object has an id for the join table
|
291
|
-
|
310
|
+
|
292
311
|
save_if_new
|
293
312
|
|
294
313
|
# A single column can contain multiple associations delimited by special char
|
295
314
|
# Size:large|Colour:red,green,blue => ['Size:large', 'Colour:red,green,blue']
|
296
315
|
columns = current_value.to_s.split( Delimiters::multi_assoc_delim )
|
297
316
|
|
298
|
-
# Size:large|Colour:red,green,blue
|
317
|
+
# Size:large|Colour:red,green,blue =>
|
318
|
+
# find_by_size( 'large' )
|
319
|
+
# find_all_by_colour( ['red','green','blue'] )
|
299
320
|
|
300
321
|
columns.each do |col_str|
|
301
|
-
|
302
|
-
find_operator, col_values =
|
303
|
-
|
322
|
+
|
323
|
+
find_operator, col_values = get_operator_and_data( col_str )
|
324
|
+
|
304
325
|
raise "Cannot perform DB find by #{find_operator}. Expected format key:value" unless(find_operator && col_values)
|
305
|
-
|
326
|
+
|
306
327
|
find_by_values = col_values.split(Delimiters::multi_value_delim)
|
307
|
-
|
328
|
+
|
308
329
|
find_by_values << current_method_detail.find_by_value if(current_method_detail.find_by_value)
|
309
|
-
|
310
|
-
if(find_by_values.size > 1)
|
311
330
|
|
312
|
-
|
313
|
-
current_value = current_method_detail.operator_class.where(find_operator => find_by_values)
|
331
|
+
found_values = []
|
314
332
|
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
333
|
+
#if(find_by_values.size() == 1)
|
334
|
+
# logger.info("Find or create #{current_method_detail.operator_class} with #{find_operator} = #{find_by_values.inspect}")
|
335
|
+
# item = current_method_detail.operator_class.where(find_operator => find_by_values.first).first_or_create
|
336
|
+
#else
|
337
|
+
# logger.info("Find #{current_method_detail.operator_class} with #{find_operator} = values #{find_by_values.inspect}")
|
338
|
+
# current_method_detail.operator_class.where(find_operator => find_by_values).all
|
339
|
+
#end
|
321
340
|
|
322
|
-
|
341
|
+
operator_class = current_method_detail.operator_class
|
323
342
|
|
324
|
-
|
343
|
+
logger.info("Find #{current_method_detail.operator_class} with #{find_operator} = #{find_by_values.inspect}")
|
325
344
|
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
345
|
+
find_by_values.each do |v|
|
346
|
+
begin
|
347
|
+
found_values << operator_class.where(find_operator => v).first_or_create
|
348
|
+
rescue => e
|
349
|
+
logger.error(e.inspect)
|
350
|
+
# TODO some way to define if this is a fatal error or not ?
|
330
351
|
end
|
352
|
+
end
|
353
|
+
|
354
|
+
logger.info("Scan result #{found_values.inspect}")
|
331
355
|
|
356
|
+
unless(find_by_values.size == found_values.size)
|
357
|
+
found = found_values.collect {|f| f.send(find_operator) }
|
358
|
+
@load_object.errors.add( current_method_detail.operator, "Association with key(s) #{(find_by_values - found).inspect} NOT found")
|
359
|
+
logger.error "Association [#{current_method_detail.operator}] with key(s) #{(find_by_values - found).inspect} NOT found - Not added."
|
360
|
+
next if(found_values.empty?)
|
332
361
|
end
|
333
362
|
|
363
|
+
logger.info("Assigning #{found_values.inspect} (#{found_values.class})")
|
364
|
+
|
334
365
|
# Lookup Assoc's Model done, now add the found value(s) to load model's collection
|
335
|
-
@populator.
|
336
|
-
end
|
366
|
+
@populator.prepare_and_assign(current_method_detail, @load_object, found_values)
|
367
|
+
end # END HAS_MANY
|
337
368
|
end
|
338
|
-
# END HAS_MANY
|
339
369
|
else
|
340
370
|
# Nice n simple straight assignment to a column variable
|
341
371
|
#puts "INFO: LOADER BASE processing #{method_detail.name}"
|
342
|
-
@populator.assign(
|
372
|
+
@populator.assign(load_object)
|
343
373
|
end
|
344
374
|
end
|
345
|
-
|
346
|
-
|
375
|
+
|
376
|
+
|
347
377
|
# Loading failed. Store a failed object and if requested roll back (destroy) the current load object
|
348
378
|
# For use case where object saved early but subsequent required columns fail to process
|
349
379
|
# so the load object is invalid
|
350
|
-
|
380
|
+
|
351
381
|
def failure( object = @load_object, rollback = false)
|
352
382
|
if(object)
|
353
383
|
@reporter.add_failed_object(object)
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
384
|
+
|
385
|
+
if(rollback && object.respond_to?('destroy') && !object.new_record?)
|
386
|
+
klass = object.class
|
387
|
+
object.destroy
|
388
|
+
object = klass.new
|
389
|
+
end
|
390
|
+
end
|
391
|
+
end
|
392
|
+
|
393
|
+
def save_and_report
|
394
|
+
unless(save)
|
395
|
+
failure
|
396
|
+
logger.error "Failed to save row (#{current_row_idx}) - [#{@current_row}]"
|
397
|
+
logger.error load_object.errors.inspect if(load_object)
|
398
|
+
else
|
399
|
+
logger.info("Successfully SAVED Object with ID #{load_object.id} for Row #{@current_row}")
|
400
|
+
@reporter.add_loaded_object(@load_object)
|
401
|
+
@reporter.success_inbound_count += 1
|
358
402
|
end
|
359
403
|
end
|
360
404
|
|
361
405
|
def save
|
362
406
|
return unless( @load_object )
|
363
|
-
|
407
|
+
|
364
408
|
puts "DEBUG: SAVING #{@load_object.class} : #{@load_object.inspect}" if(verbose)
|
365
409
|
begin
|
366
410
|
return @load_object.save
|
367
411
|
rescue => e
|
368
|
-
|
369
|
-
|
370
|
-
logger.error e.backtrace
|
371
|
-
raise "Error in save whilst processing column #{@current_method_detail.name}" if(@config[:strict])
|
412
|
+
logger.error( "Save Error : #{e.inspect} on #{@load_object.class}")
|
413
|
+
logger.error(e.backtrace)
|
372
414
|
end
|
373
|
-
|
374
|
-
|
415
|
+
|
416
|
+
false
|
417
|
+
end
|
418
|
+
|
375
419
|
# Reset the loader, including database object to be populated, and load counts
|
376
420
|
#
|
377
421
|
def reset(object = nil)
|
@@ -379,7 +423,7 @@ module DataShift
|
|
379
423
|
@reporter.reset
|
380
424
|
end
|
381
425
|
|
382
|
-
|
426
|
+
|
383
427
|
def new_load_object
|
384
428
|
@load_object = @load_object_class.new
|
385
429
|
@load_object
|
@@ -408,7 +452,7 @@ module DataShift
|
|
408
452
|
def missing_mandatory_headers( mandatory_list )
|
409
453
|
[ [*mandatory_list] - @headers].flatten
|
410
454
|
end
|
411
|
-
|
455
|
+
|
412
456
|
def find_or_new( klass, condition_hash = {} )
|
413
457
|
@records[klass] = klass.find(:all, :conditions => condition_hash)
|
414
458
|
if @records[klass].any?
|
@@ -419,14 +463,14 @@ module DataShift
|
|
419
463
|
end
|
420
464
|
|
421
465
|
protected
|
422
|
-
|
466
|
+
|
423
467
|
# Take current column data and split into each association
|
424
468
|
# Supported Syntax :
|
425
469
|
# assoc_find_name:value | assoc2_find_name:value | etc
|
426
470
|
def get_each_assoc
|
427
|
-
@populator.current_value.to_s.split( Delimiters::multi_assoc_delim )
|
471
|
+
current_value = @populator.current_value.to_s.split( Delimiters::multi_assoc_delim )
|
428
472
|
end
|
429
|
-
|
473
|
+
|
430
474
|
private
|
431
475
|
|
432
476
|
# This method usually called during processing to avoid errors with associations like
|
@@ -436,14 +480,14 @@ module DataShift
|
|
436
480
|
# TODO smart ordering of columns dynamically ourselves rather than relying on incoming data order
|
437
481
|
def save_if_new
|
438
482
|
return unless(load_object.new_record?)
|
439
|
-
|
440
|
-
if(load_object.valid?)
|
483
|
+
|
484
|
+
if(load_object.valid?)
|
441
485
|
save
|
442
486
|
else
|
443
|
-
|
487
|
+
raise DataShift::SaveError.new("Cannot Save - Invalid #{load_object.class} Record - #{load_object.errors.full_messages}")
|
444
488
|
end
|
445
489
|
end
|
446
|
-
|
490
|
+
|
447
491
|
end
|
448
492
|
|
449
493
|
end
|