datashift 0.15.0 → 0.16.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/README.markdown +91 -55
  3. data/VERSION +1 -1
  4. data/datashift.gemspec +8 -23
  5. data/lib/applications/jexcel_file.rb +1 -2
  6. data/lib/datashift.rb +34 -15
  7. data/lib/datashift/column_packer.rb +98 -34
  8. data/lib/datashift/data_transforms.rb +83 -0
  9. data/lib/datashift/delimiters.rb +58 -10
  10. data/lib/datashift/excel_base.rb +123 -0
  11. data/lib/datashift/exceptions.rb +45 -7
  12. data/lib/datashift/load_object.rb +25 -0
  13. data/lib/datashift/mapping_service.rb +91 -0
  14. data/lib/datashift/method_detail.rb +40 -62
  15. data/lib/datashift/method_details_manager.rb +18 -2
  16. data/lib/datashift/method_dictionary.rb +27 -10
  17. data/lib/datashift/method_mapper.rb +49 -41
  18. data/lib/datashift/model_mapper.rb +42 -22
  19. data/lib/datashift/populator.rb +258 -143
  20. data/lib/datashift/thor_base.rb +38 -0
  21. data/lib/exporters/csv_exporter.rb +57 -145
  22. data/lib/exporters/excel_exporter.rb +73 -60
  23. data/lib/generators/csv_generator.rb +65 -5
  24. data/lib/generators/generator_base.rb +69 -3
  25. data/lib/generators/mapping_generator.rb +112 -0
  26. data/lib/helpers/core_ext/csv_file.rb +33 -0
  27. data/lib/loaders/csv_loader.rb +41 -39
  28. data/lib/loaders/excel_loader.rb +130 -116
  29. data/lib/loaders/loader_base.rb +190 -146
  30. data/lib/loaders/paperclip/attachment_loader.rb +4 -4
  31. data/lib/loaders/paperclip/datashift_paperclip.rb +5 -3
  32. data/lib/loaders/paperclip/image_loading.rb +9 -7
  33. data/lib/loaders/reporter.rb +17 -8
  34. data/lib/thor/export.thor +12 -13
  35. data/lib/thor/generate.thor +1 -9
  36. data/lib/thor/import.thor +13 -24
  37. data/lib/thor/mapping.thor +65 -0
  38. data/spec/Gemfile +13 -11
  39. data/spec/Gemfile.lock +98 -93
  40. data/spec/csv_exporter_spec.rb +104 -99
  41. data/spec/csv_generator_spec.rb +159 -0
  42. data/spec/csv_loader_spec.rb +197 -16
  43. data/spec/datashift_spec.rb +9 -0
  44. data/spec/excel_exporter_spec.rb +149 -58
  45. data/spec/excel_generator_spec.rb +35 -44
  46. data/spec/excel_loader_spec.rb +196 -178
  47. data/spec/excel_spec.rb +8 -5
  48. data/spec/loader_base_spec.rb +47 -7
  49. data/spec/mapping_spec.rb +117 -0
  50. data/spec/method_dictionary_spec.rb +24 -11
  51. data/spec/method_mapper_spec.rb +5 -7
  52. data/spec/model_mapper_spec.rb +41 -0
  53. data/spec/paperclip_loader_spec.rb +3 -6
  54. data/spec/populator_spec.rb +48 -14
  55. data/spec/spec_helper.rb +85 -73
  56. data/spec/thor_spec.rb +40 -5
  57. metadata +93 -86
  58. data/lib/applications/excel_base.rb +0 -63
@@ -11,7 +11,7 @@
11
11
  # i.e pulls data from each column and sends to object.
12
12
  #
13
13
  require 'datashift/exceptions'
14
-
14
+ require 'datashift/exceptions'
15
15
 
16
16
  module DataShift
17
17
 
@@ -21,6 +21,38 @@ module DataShift
21
21
 
22
22
  module ExcelLoading
23
23
 
24
+ include ExcelBase
25
+
26
+ attr_accessor :excel
27
+
28
+ # Currently struggling to determine the 'end' of data in a spreadsheet
29
+ # this reflects if current row had any data at all
30
+ attr_reader :contains_data
31
+
32
+ def start_excel( file_name, options = {} )
33
+
34
+ @excel = Excel.new
35
+
36
+ excel.open(file_name)
37
+
38
+ puts "\n\n\nLoading from Excel file: #{file_name}"
39
+ logger.info("\nStarting Load from Excel file: #{file_name}")
40
+
41
+ sheet_number = options[:sheet_number] || 0
42
+
43
+ @sheet = excel.worksheet( sheet_number )
44
+
45
+ parse_headers(@sheet, options[:header_row])
46
+
47
+ raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" if(excel_headers.empty?)
48
+
49
+ # Create a method_mapper which maps list of headers into suitable calls on the Active Record class
50
+ # For example if model has an attribute 'price' will map columns called Price or price or PRICE etc to this attribute
51
+ populate_method_mapper_from_headers(excel_headers, options )
52
+
53
+ reporter.reset
54
+ end
55
+
24
56
  # Options:
25
57
  # [:dummy] : Perform a dummy run - attempt to load everything but then roll back
26
58
  #
@@ -37,173 +69,155 @@ module DataShift
37
69
  def perform_excel_load( file_name, options = {} )
38
70
 
39
71
  raise MissingHeadersError, "Minimum row for Headers is 0 - passed #{options[:header_row]}" if(options[:header_row] && options[:header_row].to_i < 0)
40
-
41
- @excel = Excel.new
42
72
 
43
- @excel.open(file_name)
44
-
45
- puts "\n\n\nLoading from Excel file: #{file_name}"
73
+ start_excel(file_name, options)
46
74
 
47
- sheet_number = options[:sheet_number] || 0
48
-
49
- @sheet = @excel.worksheet( sheet_number )
75
+ begin
76
+ puts "Dummy Run - Changes will be rolled back" if options[:dummy]
50
77
 
51
- header_row_index = options[:header_row] || 0
52
- @header_row = @sheet.row(header_row_index)
78
+ load_object_class.transaction do
53
79
 
54
- raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" unless(@header_row)
80
+ @sheet.each_with_index do |row, i|
55
81
 
56
- @headers = []
82
+ current_row_idx = i
83
+ @current_row = row
57
84
 
58
- # TODO - make more robust - currently end on first empty column
59
- # There is no actual max columns in Excel .. you will run out of memory though at some point
60
- (0..1024).each do |column|
61
- cell = @header_row[column]
62
- break unless cell
63
- header = "#{cell.to_s}".strip
64
- break if header.empty?
65
- @headers << header
66
- end
85
+ next if(current_row_idx == header_row_index)
67
86
 
68
- raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" if(@headers.empty?)
69
-
70
- # Create a method_mapper which maps list of headers into suitable calls on the Active Record class
71
- # For example if model has an attribute 'price' will map columns called Price, price, PRICE etc to this attribute
72
- populate_method_mapper_from_headers( @headers, options )
73
-
74
- # currently pointless num_rows rubbish i.e inaccurate!
75
- #logger.info "Excel Loader processing #{@sheet.num_rows} rows"
76
-
77
- @reporter.reset
78
-
79
- begin
80
- puts "Dummy Run - Changes will be rolled back" if options[:dummy]
81
-
82
- load_object_class.transaction do
83
-
84
- @sheet.each_with_index do |row, i|
85
-
86
- @current_row = row
87
-
88
- next if(i == header_row_index)
89
-
90
87
  # Excel num_rows seems to return all 'visible' rows, which appears to be greater than the actual data rows
91
88
  # (TODO - write spec to process .xls with a huge number of rows)
92
89
  #
93
90
  # This is rubbish but currently manually detect when actual data ends, this isn't very smart but
94
91
  # got no better idea than ending once we hit the first completely empty row
95
- break if @current_row.nil?
96
-
97
- logger.info "Processing Row #{i} : #{@current_row}"
98
-
99
- contains_data = false
100
-
92
+ break if(@current_row.nil? || @current_row.compact.empty?)
93
+
94
+ logger.info "Processing Row #{current_row_idx} : #{@current_row}"
95
+
96
+ @contains_data = false
97
+
101
98
  begin
102
- # First assign any default values for columns not included in parsed_file
103
- process_missing_columns_with_defaults
104
-
105
- # TODO - Smart sorting of column processing order ....
106
- # Does not currently ensure mandatory columns (for valid?) processed first but model needs saving
107
- # before associations can be processed so user should ensure mandatory columns are prior to associations
108
-
109
- # as part of this we also attempt to save early, for example before assigning to
110
- # has_and_belongs_to associations which require the load_object has an id for the join table
111
-
112
- # Iterate over method_details, working on data out of associated Excel column
113
- @method_mapper.method_details.each do |method_detail|
114
-
115
- next unless method_detail # TODO populate unmapped with a real MethodDetail that is 'null' and create is_nil
116
-
117
- logger.info "Processing Column #{method_detail.column_index}"
118
-
119
- value = @current_row[method_detail.column_index]
120
-
121
- contains_data = true unless(value.nil? || value.to_s.empty?)
122
-
123
- prepare_data(method_detail, value)
124
-
125
- process()
126
- end
127
-
99
+
100
+ process_excel_row(row)
101
+
102
+ # This is rubbish but currently have to manually detect when actual data ends,
103
+ # no other way to detect when we hit the first completely empty row
104
+ break unless(contains_data == true)
105
+
128
106
  rescue => e
129
- @reporter.processed_object_count += 1
130
-
131
- failure(@current_row, true)
132
-
133
- if(verbose)
134
- puts "Failed to process row [#{i}] (#{@current_row})"
135
- puts e.inspect, e.backtrace
136
- end
137
-
138
- logger.error "Failed to process row [#{i}] (#{@current_row})"
139
- logger.error e.backtrace
140
-
141
- # don't forget to reset the load object
107
+ process_excel_failure(e, true)
108
+
109
+ # don't forget to reset the load object
142
110
  new_load_object
143
111
  next
144
112
  end
145
-
113
+
146
114
  break unless(contains_data == true)
147
115
 
148
116
  # currently here as we can only identify the end of a speadsheet by first empty row
149
117
  @reporter.processed_object_count += 1
150
-
118
+
151
119
  # TODO - make optional - all or nothing or carry on and dump out the exception list at end
152
-
153
- unless(save)
154
- failure
155
- logger.error "Failed to save row [#{@current_row}]"
156
- logger.error load_object.errors.inspect if(load_object)
157
- else
158
- logger.info "Row #{@current_row} succesfully SAVED : ID #{load_object.id}"
159
- @reporter.add_loaded_object(@load_object)
160
- end
161
-
120
+
121
+ save_and_report
122
+
162
123
  # don't forget to reset the object or we'll update rather than create
163
124
  new_load_object
164
125
 
165
- end
166
-
126
+ end # all rows processed
127
+
167
128
  if(options[:dummy])
168
129
  puts "Excel loading stage complete - Dummy run so Rolling Back."
169
130
  raise ActiveRecord::Rollback # Don't actually create/upload to DB if we are doing dummy run
170
131
  end
171
-
132
+
172
133
  end # TRANSACTION N.B ActiveRecord::Rollback does not propagate outside of the containing transaction block
173
-
174
- rescue => e
134
+
135
+ rescue => e
175
136
  puts "ERROR: Excel loading failed : #{e.inspect}"
176
137
  raise e
177
- ensure
138
+ ensure
178
139
  report
179
140
  end
180
-
141
+
181
142
  end
182
-
143
+
144
+ def process_excel_failure( e, delete_object = true)
145
+ failure(@current_row, delete_object)
146
+
147
+ if(verbose)
148
+ puts "perform_excel_load failed in row [#{current_row_idx}] #{@current_row} - #{e.message} :"
149
+ puts e.backtrace
150
+ end
151
+
152
+ logger.error "perform_excel_load failed in row [#{current_row_idx}] #{@current_row} - #{e.message} :"
153
+ logger.error e.backtrace.join("\n")
154
+ end
155
+
156
+
183
157
  def value_at(row, column)
184
158
  @excel[row, column]
185
159
  end
186
-
160
+
161
+ def process_excel_row(row)
162
+
163
+ # First assign any default values for columns
164
+ process_defaults
165
+
166
+ # TODO - Smart sorting of column processing order ....
167
+ # Does not currently ensure mandatory columns (for valid?) processed first but model needs saving
168
+ # before associations can be processed so user should ensure mandatory columns are prior to associations
169
+
170
+ # as part of this we also attempt to save early, for example before assigning to
171
+ # has_and_belongs_to associations which require the load_object has an id for the join table
172
+
173
+ # Iterate over method_details, working on data out of associated Excel column
174
+ @method_mapper.method_details.each_with_index do |method_detail, i|
175
+
176
+ unless method_detail
177
+ logger.warn("No method_detail found for column (#{i})")
178
+ next # TODO populate unmapped with a real MethodDetail that is 'null' and create is_nil
179
+ end
180
+
181
+ logger.info "Processing Column #{method_detail.column_index} (#{method_detail.operator})"
182
+
183
+ value = row[method_detail.column_index]
184
+
185
+ @contains_data = true unless(value.nil? || value.to_s.empty?)
186
+
187
+ process(method_detail, value)
188
+ end
189
+
190
+ end
191
+
187
192
  end
188
193
 
189
194
 
190
195
  class ExcelLoader < LoaderBase
191
196
 
192
197
  include ExcelLoading
193
-
194
- def initialize(klass, find_operators = true, object = nil, options = {})
195
- super( klass, find_operators, object, options )
198
+
199
+ # Setup loading
200
+ #
201
+ # Options to drive building the method dictionary for a class, enabling headers to be mapped to operators on that class.
202
+ #
203
+ # Options
204
+ # :reload : Force load of the method dictionary for object_class even if already loaded
205
+ # :instance_methods : Include setter/delegate style instance methods for assignment, as well as AR columns
206
+ # :verbose : Verbose logging and to STDOUT
207
+ #
208
+ def initialize(klass, object = nil, options = {})
209
+ super( klass, object, options )
196
210
  raise "Cannot load - failed to create a #{klass}" unless @load_object
197
211
  end
198
212
 
199
213
 
200
214
  def perform_load( file_name, options = {} )
201
-
215
+
202
216
  logger.info "Starting bulk load from Excel : #{file_name}"
203
-
217
+
204
218
  perform_excel_load( file_name, options )
205
219
 
206
- puts "Excel loading stage complete - #{loaded_count} rows added."
220
+ puts "Excel loading stage complete - #{loaded_count} rows added."
207
221
  end
208
222
 
209
223
  end
@@ -19,70 +19,74 @@ module DataShift
19
19
 
20
20
  include DataShift::Logging
21
21
  include DataShift::Querying
22
-
22
+
23
23
  attr_reader :headers
24
24
 
25
25
  attr_accessor :method_mapper
26
26
 
27
+ # The inbound row/line number
28
+ attr_accessor :current_row_idx
29
+
27
30
  attr_accessor :load_object_class, :load_object
28
31
 
29
32
  attr_accessor :reporter
30
33
  attr_accessor :populator
31
-
34
+
32
35
  attr_accessor :config, :verbose
33
36
 
37
+
34
38
  def options() return @config; end
35
-
39
+
36
40
 
37
41
  # Setup loading
38
42
  #
39
43
  # Options to drive building the method dictionary for a class, enabling headers to be mapped to operators on that class.
40
- #
41
- # find_operators [default = true] : Populate method dictionary with operators and method details
42
44
  #
43
45
  # Options
44
- #
45
46
  # :reload : Force load of the method dictionary for object_class even if already loaded
46
47
  # :instance_methods : Include setter/delegate style instance methods for assignment, as well as AR columns
47
- # :verbose : Verboise logging and to STDOUT
48
+ # :verbose : Verbose logging and to STDOUT
48
49
  #
49
- def initialize(object_class, find_operators = true, object = nil, options = {})
50
+ def initialize(object_class, object = nil, options = {})
50
51
  @load_object_class = object_class
51
-
52
+
53
+ logger.info("Loading objects of type #{@load_object_class} (#{object}")
54
+
52
55
  @populator = if(options[:populator].is_a?(String))
53
- ::Object.const_get(options[:populator]).new
54
- elsif(options[:populator].is_a?(Class))
55
- options[:populator].new
56
- else
57
- DataShift::Populator.new
58
- end
59
-
56
+ ::Object.const_get(options[:populator]).new
57
+ elsif(options[:populator].is_a?(Class))
58
+ options[:populator].new
59
+ else
60
+ DataShift::Populator.new
61
+ end
62
+
60
63
  # Gather names of all possible 'setter' methods on AR class (instance variables and associations)
61
- if((find_operators && !MethodDictionary::for?(object_class)) || options[:reload])
64
+ if( !MethodDictionary::for?(object_class) || options[:reload] )
62
65
  #puts "DEBUG Building Method Dictionary for class #{object_class}"
63
-
66
+
64
67
  meth_dict_opts = options.extract!(:reload, :instance_methods)
65
68
  DataShift::MethodDictionary.find_operators( @load_object_class, meth_dict_opts)
66
-
69
+
67
70
  # Create dictionary of data on all possible 'setter' methods which can be used to
68
71
  # populate or integrate an object of type @load_object_class
69
72
  DataShift::MethodDictionary.build_method_details(@load_object_class)
70
73
  end
71
-
74
+
72
75
  @method_mapper = DataShift::MethodMapper.new
73
76
  @config = options.dup # clone can cause issues like 'can't modify frozen hash'
74
77
 
75
78
  @verbose = @config[:verbose]
76
-
77
- puts "Verbose Mode" if(verbose)
79
+
80
+ @current_row_idx = 0
81
+
78
82
  @headers = []
79
-
83
+
80
84
  @reporter = DataShift::Reporter.new
81
-
85
+
82
86
  reset(object)
83
87
  end
84
88
 
85
-
89
+
86
90
  # Based on filename call appropriate loading function
87
91
  # Currently supports :
88
92
  # Excel/Open Office files saved as .xls
@@ -103,11 +107,11 @@ module DataShift
103
107
  def perform_load( file_name, options = {} )
104
108
 
105
109
  raise DataShift::BadFile, "Cannot load #{file_name} file not found." unless(File.exists?(file_name))
106
-
110
+
107
111
  logger.info("Perform Load Options:\n#{options.inspect}")
108
-
112
+
109
113
  ext = File.extname(file_name)
110
-
114
+
111
115
  # TODO - make more modular - these methods doing too much, for example move the object creation/reset
112
116
  # out of these perform... methods to make it easier to over ride that behaviour
113
117
  if(ext.casecmp('.xls') == 0)
@@ -120,9 +124,9 @@ module DataShift
120
124
  end
121
125
 
122
126
  def report
123
- @reporter.report
127
+ @reporter.report
124
128
  end
125
-
129
+
126
130
  # Core API
127
131
  #
128
132
  # Given a list of free text column names from a file,
@@ -148,21 +152,21 @@ module DataShift
148
152
  #
149
153
  def populate_method_mapper_from_headers( headers, options = {} )
150
154
  @headers = headers
151
-
155
+
152
156
  mandatory = options[:mandatory] || []
153
-
157
+
154
158
  strict = (options[:strict] == true)
155
-
156
- begin
159
+
160
+ begin
157
161
  @method_mapper.map_inbound_headers_to_methods( load_object_class, @headers, options )
158
162
  rescue => e
159
163
  puts e.inspect, e.backtrace
160
164
  logger.error("Failed to map header row to set of database operators : #{e.inspect}")
161
165
  raise MappingDefinitionError, "Failed to map header row to set of database operators"
162
166
  end
163
-
167
+
164
168
  unless(@method_mapper.missing_methods.empty?)
165
- puts "WARNING: These headings couldn't be mapped to class #{load_object_class} :\n#{@method_mapper.missing_methods.inspect}"
169
+ logger.warn("Following headings couldn't be mapped to #{load_object_class} \n#{@method_mapper.missing_methods.inspect}")
166
170
  raise MappingDefinitionError, "Missing mappings for columns : #{@method_mapper.missing_methods.join(",")}" if(strict)
167
171
  end
168
172
 
@@ -170,44 +174,55 @@ module DataShift
170
174
  @method_mapper.missing_mandatory(mandatory).each { |er| puts "ERROR: Mandatory column missing - expected column '#{er}'" }
171
175
  raise MissingMandatoryError, "Mandatory columns missing - please fix and retry."
172
176
  end
173
-
177
+
174
178
  @method_mapper
175
179
  end
176
180
 
177
181
 
178
- # Process any defaults user has specified, for those columns that are not included in
179
- # the incoming import format
180
- def process_missing_columns_with_defaults()
181
- inbound_ops = @method_mapper.operator_names
182
- @populator.default_values.each do |dn, dv|
183
- logger.debug "Processing default value #{dn} : #{dv}"
184
- @populator.assignment(dn, @load_object, dv) unless(inbound_ops.include?(dn))
182
+ #TODO - Move code into Populator
183
+ # Process columns with a default value specified
184
+ def process_defaults()
185
+
186
+ @populator.default_values.each do |dname, dv|
187
+
188
+ method_detail = MethodDictionary.find_method_detail( load_object_class, dname )
189
+
190
+ if(method_detail)
191
+ logger.debug "Applying default value [#{dname}] on (#{method_detail.operator})"
192
+ @populator.prepare_and_assign(method_detail, load_object, dv)
193
+ else
194
+ logger.warn "No operator found for default [#{dname}] trying basic assignment"
195
+ begin
196
+ @populator.insistent_assignment(load_object, dv, dname)
197
+ rescue
198
+ logger.error "Badly specified default - could not set #{dname}(#{dv})"
199
+ end
200
+ end
185
201
  end
186
202
  end
187
-
203
+
188
204
  # Core API - Given a single free text column name from a file, search method mapper for
189
205
  # associated operator on base object class.
190
206
  #
191
207
  # If suitable association found, process row data and then assign to current load_object
192
208
  def find_and_process(column_name, data)
193
-
209
+
194
210
  puts "WARNING: MethodDictionary empty for class #{load_object_class}" unless(MethodDictionary.for?(load_object_class))
195
-
211
+
196
212
  method_detail = MethodDictionary.find_method_detail( load_object_class, column_name )
197
213
 
198
214
  if(method_detail)
199
- prepare_data(method_detail, data)
200
- process()
215
+ process(method_detail, data)
201
216
  else
202
217
  puts "No matching method found for column #{column_name}"
203
218
  @load_object.errors.add(:base, "No matching method found for column #{column_name}")
204
219
  end
205
220
  end
206
-
207
-
221
+
222
+
208
223
  # Any Config under key 'LoaderBase' is merged over existing options - taking precedence.
209
224
  #
210
- # Any Config under a key equal to the full name of the Loader class (e.g DataShift::SpreeHelper::ImageLoader)
225
+ # Any Config under a key equal to the full name of the Loader class (e.g DataShift::SpreeEcom::ImageLoader)
211
226
  # is merged over existing options - taking precedence.
212
227
  #
213
228
  # Format :
@@ -217,161 +232,190 @@ module DataShift
217
232
  #
218
233
  def configure_from(yaml_file)
219
234
 
220
- data = YAML::load( File.open(yaml_file) )
221
-
222
- logger.info("Read Datashift loading config: #{data.inspect}")
223
-
235
+ logger.info("Reading Datashift loader config from: #{yaml_file.inspect}")
236
+
237
+ data = YAML::load( ERB.new( IO.read(yaml_file) ).result )
238
+
239
+ logger.info("Read Datashift config: #{data.inspect}")
240
+
224
241
  if(data['LoaderBase'])
225
242
  @config.merge!(data['LoaderBase'])
226
243
  end
227
-
228
- if(data[self.class.name])
244
+
245
+ if(data[self.class.name])
229
246
  @config.merge!(data[self.class.name])
230
247
  end
231
-
248
+
232
249
  @populator.configure_from(load_object_class, yaml_file)
233
250
  logger.info("Loader Options : #{@config.inspect}")
234
251
  end
235
-
236
- # Set member variables to hold details and value.
237
- #
238
- # Check supplied value, validate it, and if required :
239
- # set to provided default value
240
- # prepend any provided prefixes
241
- # add any provided postfixes
242
- def prepare_data(method_detail, value)
243
- return @populator.prepare_data(method_detail, value)
244
- end
245
-
246
- # Return the find_by operator and the rest of the (row,columns) data
252
+
253
+
254
+ # Return the find_by (where) operator, if specified, otherwise use the heading operator.
255
+ # i.e where operator embedded in row ,takes precedence over operator in column heading
256
+ #
257
+ # Treat rest of the node as the value to use in the where clause e.g
247
258
  # price:0.99
248
- #
249
- # Column headings can already contain the operator so possible that row only contains
259
+ #
260
+ # Column headings will be used, if the row only contains data e.g
250
261
  # 0.99
262
+ #
251
263
  # We leave it to caller to manage any other aspects or problems in 'rest'
252
264
  #
253
- def get_find_operator_and_rest(inbound_data)
254
-
255
- operator, rest = inbound_data.split(Delimiters::name_value_delim)
256
-
257
- #puts "DEBUG inbound_data: #{inbound_data} => #{operator} , #{rest}"
258
-
265
+ def get_operator_and_data(inbound_data)
266
+
267
+ where_operator, data = inbound_data.split(Delimiters::name_value_delim)
268
+
269
+ md = @populator.current_method_detail
270
+
259
271
  # Find by operator embedded in row takes precedence over operator in column heading
260
- if(@populator.current_method_detail.find_by_operator)
261
- # row contains 0.99 so rest is effectively operator, and operator is in method details
262
- if(rest.nil?)
263
- rest = operator
264
- operator = @populator.current_method_detail.find_by_operator
272
+ if((data.nil? || data.empty?) && md.find_by_operator)
273
+ if((where_operator.nil? || where_operator.empty?)) #colum completely empty - check for defaults
274
+ if(md.find_by_value)
275
+ data = md.find_by_value
276
+ else
277
+ data = Populator::header_default_data(md.operator)
278
+ end
279
+ else
280
+ data = where_operator
265
281
  end
282
+
283
+ # row contains single entry only so take operator from header via method details
284
+ where_operator = md.find_by_operator
266
285
  end
267
-
268
- #puts "DEBUG: get_find_operator_and_rest: #{operator} => #{rest}"
269
-
270
- return operator, rest
286
+
287
+ logger.debug("LoaderBase - get_operator_and_data - [#{where_operator}] - [#{data}]")
288
+
289
+ return where_operator, data
271
290
  end
272
-
291
+
273
292
  # Process a value string from a column.
274
293
  # Assigning value(s) to correct association on @load_object.
275
294
  # Method detail represents a column from a file and it's correlated AR associations.
276
295
  # Value string which may contain multiple values for a collection association.
277
296
  #
278
- def process()
279
-
280
- current_method_detail = @populator.current_method_detail
281
- current_value = @populator.current_value
282
-
283
- logger.info("Current value to assign : #{current_value}")
284
-
297
+ def process(method_detail, value)
298
+
299
+ current_method_detail = method_detail
300
+
301
+ current_value, current_attribute_hash = @populator.prepare_data(method_detail, value)
302
+
303
+ # TODO - Move ALL of this into Populator properly
285
304
  if(current_method_detail.operator_for(:has_many))
286
305
 
287
306
  if(current_method_detail.operator_class && current_value)
288
307
 
289
308
  # there are times when we need to save early, for example before assigning to
290
309
  # has_and_belongs_to associations which require the load_object has an id for the join table
291
-
310
+
292
311
  save_if_new
293
312
 
294
313
  # A single column can contain multiple associations delimited by special char
295
314
  # Size:large|Colour:red,green,blue => ['Size:large', 'Colour:red,green,blue']
296
315
  columns = current_value.to_s.split( Delimiters::multi_assoc_delim )
297
316
 
298
- # Size:large|Colour:red,green,blue => generates find_by_size( 'large' ) and find_all_by_colour( ['red','green','blue'] )
317
+ # Size:large|Colour:red,green,blue =>
318
+ # find_by_size( 'large' )
319
+ # find_all_by_colour( ['red','green','blue'] )
299
320
 
300
321
  columns.each do |col_str|
301
-
302
- find_operator, col_values = get_find_operator_and_rest( col_str )
303
-
322
+
323
+ find_operator, col_values = get_operator_and_data( col_str )
324
+
304
325
  raise "Cannot perform DB find by #{find_operator}. Expected format key:value" unless(find_operator && col_values)
305
-
326
+
306
327
  find_by_values = col_values.split(Delimiters::multi_value_delim)
307
-
328
+
308
329
  find_by_values << current_method_detail.find_by_value if(current_method_detail.find_by_value)
309
-
310
- if(find_by_values.size > 1)
311
330
 
312
- #RAILS 4 current_value = current_method_detail.operator_class.send("find_all_by_#{find_operator}", find_by_values )
313
- current_value = current_method_detail.operator_class.where(find_operator => find_by_values)
331
+ found_values = []
314
332
 
315
- unless(find_by_values.size == current_value.size)
316
- found = current_value.collect {|f| f.send(find_operator) }
317
- @load_object.errors.add( current_method_detail.operator, "Association with key(s) #{(find_by_values - found).inspect} NOT found")
318
- puts "WARNING: Association #{current_method_detail.operator} with key(s) #{(find_by_values - found).inspect} NOT found - Not added."
319
- next if(@current_value.empty?)
320
- end
333
+ #if(find_by_values.size() == 1)
334
+ # logger.info("Find or create #{current_method_detail.operator_class} with #{find_operator} = #{find_by_values.inspect}")
335
+ # item = current_method_detail.operator_class.where(find_operator => find_by_values.first).first_or_create
336
+ #else
337
+ # logger.info("Find #{current_method_detail.operator_class} with #{find_operator} = values #{find_by_values.inspect}")
338
+ # current_method_detail.operator_class.where(find_operator => find_by_values).all
339
+ #end
321
340
 
322
- else
341
+ operator_class = current_method_detail.operator_class
323
342
 
324
- current_value = current_method_detail.operator_class.send("find_by_#{find_operator}", find_by_values )
343
+ logger.info("Find #{current_method_detail.operator_class} with #{find_operator} = #{find_by_values.inspect}")
325
344
 
326
- unless(current_value)
327
- @load_object.errors.add( current_method_detail.operator, "Association with key #{find_by_values} NOT found")
328
- puts "WARNING: Association with key #{find_by_values} NOT found - Not added."
329
- next
345
+ find_by_values.each do |v|
346
+ begin
347
+ found_values << operator_class.where(find_operator => v).first_or_create
348
+ rescue => e
349
+ logger.error(e.inspect)
350
+ # TODO some way to define if this is a fatal error or not ?
330
351
  end
352
+ end
353
+
354
+ logger.info("Scan result #{found_values.inspect}")
331
355
 
356
+ unless(find_by_values.size == found_values.size)
357
+ found = found_values.collect {|f| f.send(find_operator) }
358
+ @load_object.errors.add( current_method_detail.operator, "Association with key(s) #{(find_by_values - found).inspect} NOT found")
359
+ logger.error "Association [#{current_method_detail.operator}] with key(s) #{(find_by_values - found).inspect} NOT found - Not added."
360
+ next if(found_values.empty?)
332
361
  end
333
362
 
363
+ logger.info("Assigning #{found_values.inspect} (#{found_values.class})")
364
+
334
365
  # Lookup Assoc's Model done, now add the found value(s) to load model's collection
335
- @populator.assign(current_method_detail, @load_object, current_value)
336
- end
366
+ @populator.prepare_and_assign(current_method_detail, @load_object, found_values)
367
+ end # END HAS_MANY
337
368
  end
338
- # END HAS_MANY
339
369
  else
340
370
  # Nice n simple straight assignment to a column variable
341
371
  #puts "INFO: LOADER BASE processing #{method_detail.name}"
342
- @populator.assign(current_method_detail, @load_object, current_value)
372
+ @populator.assign(load_object)
343
373
  end
344
374
  end
345
-
346
-
375
+
376
+
347
377
  # Loading failed. Store a failed object and if requested roll back (destroy) the current load object
348
378
  # For use case where object saved early but subsequent required columns fail to process
349
379
  # so the load object is invalid
350
-
380
+
351
381
  def failure( object = @load_object, rollback = false)
352
382
  if(object)
353
383
  @reporter.add_failed_object(object)
354
-
355
- object.destroy if(rollback && object.respond_to?('destroy') && !object.new_record?)
356
-
357
- new_load_object # don't forget to reset the load object
384
+
385
+ if(rollback && object.respond_to?('destroy') && !object.new_record?)
386
+ klass = object.class
387
+ object.destroy
388
+ object = klass.new
389
+ end
390
+ end
391
+ end
392
+
393
+ def save_and_report
394
+ unless(save)
395
+ failure
396
+ logger.error "Failed to save row (#{current_row_idx}) - [#{@current_row}]"
397
+ logger.error load_object.errors.inspect if(load_object)
398
+ else
399
+ logger.info("Successfully SAVED Object with ID #{load_object.id} for Row #{@current_row}")
400
+ @reporter.add_loaded_object(@load_object)
401
+ @reporter.success_inbound_count += 1
358
402
  end
359
403
  end
360
404
 
361
405
  def save
362
406
  return unless( @load_object )
363
-
407
+
364
408
  puts "DEBUG: SAVING #{@load_object.class} : #{@load_object.inspect}" if(verbose)
365
409
  begin
366
410
  return @load_object.save
367
411
  rescue => e
368
- failure
369
- puts "Error saving #{@load_object.class} : #{e.inspect}"
370
- logger.error e.backtrace
371
- raise "Error in save whilst processing column #{@current_method_detail.name}" if(@config[:strict])
412
+ logger.error( "Save Error : #{e.inspect} on #{@load_object.class}")
413
+ logger.error(e.backtrace)
372
414
  end
373
- end
374
-
415
+
416
+ false
417
+ end
418
+
375
419
  # Reset the loader, including database object to be populated, and load counts
376
420
  #
377
421
  def reset(object = nil)
@@ -379,7 +423,7 @@ module DataShift
379
423
  @reporter.reset
380
424
  end
381
425
 
382
-
426
+
383
427
  def new_load_object
384
428
  @load_object = @load_object_class.new
385
429
  @load_object
@@ -408,7 +452,7 @@ module DataShift
408
452
  def missing_mandatory_headers( mandatory_list )
409
453
  [ [*mandatory_list] - @headers].flatten
410
454
  end
411
-
455
+
412
456
  def find_or_new( klass, condition_hash = {} )
413
457
  @records[klass] = klass.find(:all, :conditions => condition_hash)
414
458
  if @records[klass].any?
@@ -419,14 +463,14 @@ module DataShift
419
463
  end
420
464
 
421
465
  protected
422
-
466
+
423
467
  # Take current column data and split into each association
424
468
  # Supported Syntax :
425
469
  # assoc_find_name:value | assoc2_find_name:value | etc
426
470
  def get_each_assoc
427
- @populator.current_value.to_s.split( Delimiters::multi_assoc_delim )
471
+ current_value = @populator.current_value.to_s.split( Delimiters::multi_assoc_delim )
428
472
  end
429
-
473
+
430
474
  private
431
475
 
432
476
  # This method usually called during processing to avoid errors with associations like
@@ -436,14 +480,14 @@ module DataShift
436
480
  # TODO smart ordering of columns dynamically ourselves rather than relying on incoming data order
437
481
  def save_if_new
438
482
  return unless(load_object.new_record?)
439
-
440
- if(load_object.valid?)
483
+
484
+ if(load_object.valid?)
441
485
  save
442
486
  else
443
- puts "Cannot Save - Invalid #{load_object.class} - #{load_object.errors.full_messages}" if(verbose)
487
+ raise DataShift::SaveError.new("Cannot Save - Invalid #{load_object.class} Record - #{load_object.errors.full_messages}")
444
488
  end
445
489
  end
446
-
490
+
447
491
  end
448
492
 
449
493
  end