datashift 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/README.markdown +91 -55
  3. data/VERSION +1 -1
  4. data/datashift.gemspec +8 -23
  5. data/lib/applications/jexcel_file.rb +1 -2
  6. data/lib/datashift.rb +34 -15
  7. data/lib/datashift/column_packer.rb +98 -34
  8. data/lib/datashift/data_transforms.rb +83 -0
  9. data/lib/datashift/delimiters.rb +58 -10
  10. data/lib/datashift/excel_base.rb +123 -0
  11. data/lib/datashift/exceptions.rb +45 -7
  12. data/lib/datashift/load_object.rb +25 -0
  13. data/lib/datashift/mapping_service.rb +91 -0
  14. data/lib/datashift/method_detail.rb +40 -62
  15. data/lib/datashift/method_details_manager.rb +18 -2
  16. data/lib/datashift/method_dictionary.rb +27 -10
  17. data/lib/datashift/method_mapper.rb +49 -41
  18. data/lib/datashift/model_mapper.rb +42 -22
  19. data/lib/datashift/populator.rb +258 -143
  20. data/lib/datashift/thor_base.rb +38 -0
  21. data/lib/exporters/csv_exporter.rb +57 -145
  22. data/lib/exporters/excel_exporter.rb +73 -60
  23. data/lib/generators/csv_generator.rb +65 -5
  24. data/lib/generators/generator_base.rb +69 -3
  25. data/lib/generators/mapping_generator.rb +112 -0
  26. data/lib/helpers/core_ext/csv_file.rb +33 -0
  27. data/lib/loaders/csv_loader.rb +41 -39
  28. data/lib/loaders/excel_loader.rb +130 -116
  29. data/lib/loaders/loader_base.rb +190 -146
  30. data/lib/loaders/paperclip/attachment_loader.rb +4 -4
  31. data/lib/loaders/paperclip/datashift_paperclip.rb +5 -3
  32. data/lib/loaders/paperclip/image_loading.rb +9 -7
  33. data/lib/loaders/reporter.rb +17 -8
  34. data/lib/thor/export.thor +12 -13
  35. data/lib/thor/generate.thor +1 -9
  36. data/lib/thor/import.thor +13 -24
  37. data/lib/thor/mapping.thor +65 -0
  38. data/spec/Gemfile +13 -11
  39. data/spec/Gemfile.lock +98 -93
  40. data/spec/csv_exporter_spec.rb +104 -99
  41. data/spec/csv_generator_spec.rb +159 -0
  42. data/spec/csv_loader_spec.rb +197 -16
  43. data/spec/datashift_spec.rb +9 -0
  44. data/spec/excel_exporter_spec.rb +149 -58
  45. data/spec/excel_generator_spec.rb +35 -44
  46. data/spec/excel_loader_spec.rb +196 -178
  47. data/spec/excel_spec.rb +8 -5
  48. data/spec/loader_base_spec.rb +47 -7
  49. data/spec/mapping_spec.rb +117 -0
  50. data/spec/method_dictionary_spec.rb +24 -11
  51. data/spec/method_mapper_spec.rb +5 -7
  52. data/spec/model_mapper_spec.rb +41 -0
  53. data/spec/paperclip_loader_spec.rb +3 -6
  54. data/spec/populator_spec.rb +48 -14
  55. data/spec/spec_helper.rb +85 -73
  56. data/spec/thor_spec.rb +40 -5
  57. metadata +93 -86
  58. data/lib/applications/excel_base.rb +0 -63
@@ -11,7 +11,7 @@
11
11
  # i.e pulls data from each column and sends to object.
12
12
  #
13
13
  require 'datashift/exceptions'
14
-
14
+ require 'datashift/exceptions'
15
15
 
16
16
  module DataShift
17
17
 
@@ -21,6 +21,38 @@ module DataShift
21
21
 
22
22
  module ExcelLoading
23
23
 
24
+ include ExcelBase
25
+
26
+ attr_accessor :excel
27
+
28
+ # Currently struggling to determine the 'end' of data in a spreadsheet
29
+ # this reflects if current row had any data at all
30
+ attr_reader :contains_data
31
+
32
+ def start_excel( file_name, options = {} )
33
+
34
+ @excel = Excel.new
35
+
36
+ excel.open(file_name)
37
+
38
+ puts "\n\n\nLoading from Excel file: #{file_name}"
39
+ logger.info("\nStarting Load from Excel file: #{file_name}")
40
+
41
+ sheet_number = options[:sheet_number] || 0
42
+
43
+ @sheet = excel.worksheet( sheet_number )
44
+
45
+ parse_headers(@sheet, options[:header_row])
46
+
47
+ raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" if(excel_headers.empty?)
48
+
49
+ # Create a method_mapper which maps list of headers into suitable calls on the Active Record class
50
+ # For example if model has an attribute 'price' will map columns called Price or price or PRICE etc to this attribute
51
+ populate_method_mapper_from_headers(excel_headers, options )
52
+
53
+ reporter.reset
54
+ end
55
+
24
56
  # Options:
25
57
  # [:dummy] : Perform a dummy run - attempt to load everything but then roll back
26
58
  #
@@ -37,173 +69,155 @@ module DataShift
37
69
  def perform_excel_load( file_name, options = {} )
38
70
 
39
71
  raise MissingHeadersError, "Minimum row for Headers is 0 - passed #{options[:header_row]}" if(options[:header_row] && options[:header_row].to_i < 0)
40
-
41
- @excel = Excel.new
42
72
 
43
- @excel.open(file_name)
44
-
45
- puts "\n\n\nLoading from Excel file: #{file_name}"
73
+ start_excel(file_name, options)
46
74
 
47
- sheet_number = options[:sheet_number] || 0
48
-
49
- @sheet = @excel.worksheet( sheet_number )
75
+ begin
76
+ puts "Dummy Run - Changes will be rolled back" if options[:dummy]
50
77
 
51
- header_row_index = options[:header_row] || 0
52
- @header_row = @sheet.row(header_row_index)
78
+ load_object_class.transaction do
53
79
 
54
- raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" unless(@header_row)
80
+ @sheet.each_with_index do |row, i|
55
81
 
56
- @headers = []
82
+ current_row_idx = i
83
+ @current_row = row
57
84
 
58
- # TODO - make more robust - currently end on first empty column
59
- # There is no actual max columns in Excel .. you will run out of memory though at some point
60
- (0..1024).each do |column|
61
- cell = @header_row[column]
62
- break unless cell
63
- header = "#{cell.to_s}".strip
64
- break if header.empty?
65
- @headers << header
66
- end
85
+ next if(current_row_idx == header_row_index)
67
86
 
68
- raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" if(@headers.empty?)
69
-
70
- # Create a method_mapper which maps list of headers into suitable calls on the Active Record class
71
- # For example if model has an attribute 'price' will map columns called Price, price, PRICE etc to this attribute
72
- populate_method_mapper_from_headers( @headers, options )
73
-
74
- # currently pointless num_rows rubbish i.e inaccurate!
75
- #logger.info "Excel Loader processing #{@sheet.num_rows} rows"
76
-
77
- @reporter.reset
78
-
79
- begin
80
- puts "Dummy Run - Changes will be rolled back" if options[:dummy]
81
-
82
- load_object_class.transaction do
83
-
84
- @sheet.each_with_index do |row, i|
85
-
86
- @current_row = row
87
-
88
- next if(i == header_row_index)
89
-
90
87
  # Excel num_rows seems to return all 'visible' rows, which appears to be greater than the actual data rows
91
88
  # (TODO - write spec to process .xls with a huge number of rows)
92
89
  #
93
90
  # This is rubbish but currently manually detect when actual data ends, this isn't very smart but
94
91
  # got no better idea than ending once we hit the first completely empty row
95
- break if @current_row.nil?
96
-
97
- logger.info "Processing Row #{i} : #{@current_row}"
98
-
99
- contains_data = false
100
-
92
+ break if(@current_row.nil? || @current_row.compact.empty?)
93
+
94
+ logger.info "Processing Row #{current_row_idx} : #{@current_row}"
95
+
96
+ @contains_data = false
97
+
101
98
  begin
102
- # First assign any default values for columns not included in parsed_file
103
- process_missing_columns_with_defaults
104
-
105
- # TODO - Smart sorting of column processing order ....
106
- # Does not currently ensure mandatory columns (for valid?) processed first but model needs saving
107
- # before associations can be processed so user should ensure mandatory columns are prior to associations
108
-
109
- # as part of this we also attempt to save early, for example before assigning to
110
- # has_and_belongs_to associations which require the load_object has an id for the join table
111
-
112
- # Iterate over method_details, working on data out of associated Excel column
113
- @method_mapper.method_details.each do |method_detail|
114
-
115
- next unless method_detail # TODO populate unmapped with a real MethodDetail that is 'null' and create is_nil
116
-
117
- logger.info "Processing Column #{method_detail.column_index}"
118
-
119
- value = @current_row[method_detail.column_index]
120
-
121
- contains_data = true unless(value.nil? || value.to_s.empty?)
122
-
123
- prepare_data(method_detail, value)
124
-
125
- process()
126
- end
127
-
99
+
100
+ process_excel_row(row)
101
+
102
+ # This is rubbish but currently have to manually detect when actual data ends,
103
+ # no other way to detect when we hit the first completely empty row
104
+ break unless(contains_data == true)
105
+
128
106
  rescue => e
129
- @reporter.processed_object_count += 1
130
-
131
- failure(@current_row, true)
132
-
133
- if(verbose)
134
- puts "Failed to process row [#{i}] (#{@current_row})"
135
- puts e.inspect, e.backtrace
136
- end
137
-
138
- logger.error "Failed to process row [#{i}] (#{@current_row})"
139
- logger.error e.backtrace
140
-
141
- # don't forget to reset the load object
107
+ process_excel_failure(e, true)
108
+
109
+ # don't forget to reset the load object
142
110
  new_load_object
143
111
  next
144
112
  end
145
-
113
+
146
114
  break unless(contains_data == true)
147
115
 
148
116
  # currently here as we can only identify the end of a speadsheet by first empty row
149
117
  @reporter.processed_object_count += 1
150
-
118
+
151
119
  # TODO - make optional - all or nothing or carry on and dump out the exception list at end
152
-
153
- unless(save)
154
- failure
155
- logger.error "Failed to save row [#{@current_row}]"
156
- logger.error load_object.errors.inspect if(load_object)
157
- else
158
- logger.info "Row #{@current_row} succesfully SAVED : ID #{load_object.id}"
159
- @reporter.add_loaded_object(@load_object)
160
- end
161
-
120
+
121
+ save_and_report
122
+
162
123
  # don't forget to reset the object or we'll update rather than create
163
124
  new_load_object
164
125
 
165
- end
166
-
126
+ end # all rows processed
127
+
167
128
  if(options[:dummy])
168
129
  puts "Excel loading stage complete - Dummy run so Rolling Back."
169
130
  raise ActiveRecord::Rollback # Don't actually create/upload to DB if we are doing dummy run
170
131
  end
171
-
132
+
172
133
  end # TRANSACTION N.B ActiveRecord::Rollback does not propagate outside of the containing transaction block
173
-
174
- rescue => e
134
+
135
+ rescue => e
175
136
  puts "ERROR: Excel loading failed : #{e.inspect}"
176
137
  raise e
177
- ensure
138
+ ensure
178
139
  report
179
140
  end
180
-
141
+
181
142
  end
182
-
143
+
144
+ def process_excel_failure( e, delete_object = true)
145
+ failure(@current_row, delete_object)
146
+
147
+ if(verbose)
148
+ puts "perform_excel_load failed in row [#{current_row_idx}] #{@current_row} - #{e.message} :"
149
+ puts e.backtrace
150
+ end
151
+
152
+ logger.error "perform_excel_load failed in row [#{current_row_idx}] #{@current_row} - #{e.message} :"
153
+ logger.error e.backtrace.join("\n")
154
+ end
155
+
156
+
183
157
  def value_at(row, column)
184
158
  @excel[row, column]
185
159
  end
186
-
160
+
161
+ def process_excel_row(row)
162
+
163
+ # First assign any default values for columns
164
+ process_defaults
165
+
166
+ # TODO - Smart sorting of column processing order ....
167
+ # Does not currently ensure mandatory columns (for valid?) processed first but model needs saving
168
+ # before associations can be processed so user should ensure mandatory columns are prior to associations
169
+
170
+ # as part of this we also attempt to save early, for example before assigning to
171
+ # has_and_belongs_to associations which require the load_object has an id for the join table
172
+
173
+ # Iterate over method_details, working on data out of associated Excel column
174
+ @method_mapper.method_details.each_with_index do |method_detail, i|
175
+
176
+ unless method_detail
177
+ logger.warn("No method_detail found for column (#{i})")
178
+ next # TODO populate unmapped with a real MethodDetail that is 'null' and create is_nil
179
+ end
180
+
181
+ logger.info "Processing Column #{method_detail.column_index} (#{method_detail.operator})"
182
+
183
+ value = row[method_detail.column_index]
184
+
185
+ @contains_data = true unless(value.nil? || value.to_s.empty?)
186
+
187
+ process(method_detail, value)
188
+ end
189
+
190
+ end
191
+
187
192
  end
188
193
 
189
194
 
190
195
  class ExcelLoader < LoaderBase
191
196
 
192
197
  include ExcelLoading
193
-
194
- def initialize(klass, find_operators = true, object = nil, options = {})
195
- super( klass, find_operators, object, options )
198
+
199
+ # Setup loading
200
+ #
201
+ # Options to drive building the method dictionary for a class, enabling headers to be mapped to operators on that class.
202
+ #
203
+ # Options
204
+ # :reload : Force load of the method dictionary for object_class even if already loaded
205
+ # :instance_methods : Include setter/delegate style instance methods for assignment, as well as AR columns
206
+ # :verbose : Verbose logging and to STDOUT
207
+ #
208
+ def initialize(klass, object = nil, options = {})
209
+ super( klass, object, options )
196
210
  raise "Cannot load - failed to create a #{klass}" unless @load_object
197
211
  end
198
212
 
199
213
 
200
214
  def perform_load( file_name, options = {} )
201
-
215
+
202
216
  logger.info "Starting bulk load from Excel : #{file_name}"
203
-
217
+
204
218
  perform_excel_load( file_name, options )
205
219
 
206
- puts "Excel loading stage complete - #{loaded_count} rows added."
220
+ puts "Excel loading stage complete - #{loaded_count} rows added."
207
221
  end
208
222
 
209
223
  end
@@ -19,70 +19,74 @@ module DataShift
19
19
 
20
20
  include DataShift::Logging
21
21
  include DataShift::Querying
22
-
22
+
23
23
  attr_reader :headers
24
24
 
25
25
  attr_accessor :method_mapper
26
26
 
27
+ # The inbound row/line number
28
+ attr_accessor :current_row_idx
29
+
27
30
  attr_accessor :load_object_class, :load_object
28
31
 
29
32
  attr_accessor :reporter
30
33
  attr_accessor :populator
31
-
34
+
32
35
  attr_accessor :config, :verbose
33
36
 
37
+
34
38
  def options() return @config; end
35
-
39
+
36
40
 
37
41
  # Setup loading
38
42
  #
39
43
  # Options to drive building the method dictionary for a class, enabling headers to be mapped to operators on that class.
40
- #
41
- # find_operators [default = true] : Populate method dictionary with operators and method details
42
44
  #
43
45
  # Options
44
- #
45
46
  # :reload : Force load of the method dictionary for object_class even if already loaded
46
47
  # :instance_methods : Include setter/delegate style instance methods for assignment, as well as AR columns
47
- # :verbose : Verboise logging and to STDOUT
48
+ # :verbose : Verbose logging and to STDOUT
48
49
  #
49
- def initialize(object_class, find_operators = true, object = nil, options = {})
50
+ def initialize(object_class, object = nil, options = {})
50
51
  @load_object_class = object_class
51
-
52
+
53
+ logger.info("Loading objects of type #{@load_object_class} (#{object}")
54
+
52
55
  @populator = if(options[:populator].is_a?(String))
53
- ::Object.const_get(options[:populator]).new
54
- elsif(options[:populator].is_a?(Class))
55
- options[:populator].new
56
- else
57
- DataShift::Populator.new
58
- end
59
-
56
+ ::Object.const_get(options[:populator]).new
57
+ elsif(options[:populator].is_a?(Class))
58
+ options[:populator].new
59
+ else
60
+ DataShift::Populator.new
61
+ end
62
+
60
63
  # Gather names of all possible 'setter' methods on AR class (instance variables and associations)
61
- if((find_operators && !MethodDictionary::for?(object_class)) || options[:reload])
64
+ if( !MethodDictionary::for?(object_class) || options[:reload] )
62
65
  #puts "DEBUG Building Method Dictionary for class #{object_class}"
63
-
66
+
64
67
  meth_dict_opts = options.extract!(:reload, :instance_methods)
65
68
  DataShift::MethodDictionary.find_operators( @load_object_class, meth_dict_opts)
66
-
69
+
67
70
  # Create dictionary of data on all possible 'setter' methods which can be used to
68
71
  # populate or integrate an object of type @load_object_class
69
72
  DataShift::MethodDictionary.build_method_details(@load_object_class)
70
73
  end
71
-
74
+
72
75
  @method_mapper = DataShift::MethodMapper.new
73
76
  @config = options.dup # clone can cause issues like 'can't modify frozen hash'
74
77
 
75
78
  @verbose = @config[:verbose]
76
-
77
- puts "Verbose Mode" if(verbose)
79
+
80
+ @current_row_idx = 0
81
+
78
82
  @headers = []
79
-
83
+
80
84
  @reporter = DataShift::Reporter.new
81
-
85
+
82
86
  reset(object)
83
87
  end
84
88
 
85
-
89
+
86
90
  # Based on filename call appropriate loading function
87
91
  # Currently supports :
88
92
  # Excel/Open Office files saved as .xls
@@ -103,11 +107,11 @@ module DataShift
103
107
  def perform_load( file_name, options = {} )
104
108
 
105
109
  raise DataShift::BadFile, "Cannot load #{file_name} file not found." unless(File.exists?(file_name))
106
-
110
+
107
111
  logger.info("Perform Load Options:\n#{options.inspect}")
108
-
112
+
109
113
  ext = File.extname(file_name)
110
-
114
+
111
115
  # TODO - make more modular - these methods doing too much, for example move the object creation/reset
112
116
  # out of these perform... methods to make it easier to over ride that behaviour
113
117
  if(ext.casecmp('.xls') == 0)
@@ -120,9 +124,9 @@ module DataShift
120
124
  end
121
125
 
122
126
  def report
123
- @reporter.report
127
+ @reporter.report
124
128
  end
125
-
129
+
126
130
  # Core API
127
131
  #
128
132
  # Given a list of free text column names from a file,
@@ -148,21 +152,21 @@ module DataShift
148
152
  #
149
153
  def populate_method_mapper_from_headers( headers, options = {} )
150
154
  @headers = headers
151
-
155
+
152
156
  mandatory = options[:mandatory] || []
153
-
157
+
154
158
  strict = (options[:strict] == true)
155
-
156
- begin
159
+
160
+ begin
157
161
  @method_mapper.map_inbound_headers_to_methods( load_object_class, @headers, options )
158
162
  rescue => e
159
163
  puts e.inspect, e.backtrace
160
164
  logger.error("Failed to map header row to set of database operators : #{e.inspect}")
161
165
  raise MappingDefinitionError, "Failed to map header row to set of database operators"
162
166
  end
163
-
167
+
164
168
  unless(@method_mapper.missing_methods.empty?)
165
- puts "WARNING: These headings couldn't be mapped to class #{load_object_class} :\n#{@method_mapper.missing_methods.inspect}"
169
+ logger.warn("Following headings couldn't be mapped to #{load_object_class} \n#{@method_mapper.missing_methods.inspect}")
166
170
  raise MappingDefinitionError, "Missing mappings for columns : #{@method_mapper.missing_methods.join(",")}" if(strict)
167
171
  end
168
172
 
@@ -170,44 +174,55 @@ module DataShift
170
174
  @method_mapper.missing_mandatory(mandatory).each { |er| puts "ERROR: Mandatory column missing - expected column '#{er}'" }
171
175
  raise MissingMandatoryError, "Mandatory columns missing - please fix and retry."
172
176
  end
173
-
177
+
174
178
  @method_mapper
175
179
  end
176
180
 
177
181
 
178
- # Process any defaults user has specified, for those columns that are not included in
179
- # the incoming import format
180
- def process_missing_columns_with_defaults()
181
- inbound_ops = @method_mapper.operator_names
182
- @populator.default_values.each do |dn, dv|
183
- logger.debug "Processing default value #{dn} : #{dv}"
184
- @populator.assignment(dn, @load_object, dv) unless(inbound_ops.include?(dn))
182
+ #TODO - Move code into Populator
183
+ # Process columns with a default value specified
184
+ def process_defaults()
185
+
186
+ @populator.default_values.each do |dname, dv|
187
+
188
+ method_detail = MethodDictionary.find_method_detail( load_object_class, dname )
189
+
190
+ if(method_detail)
191
+ logger.debug "Applying default value [#{dname}] on (#{method_detail.operator})"
192
+ @populator.prepare_and_assign(method_detail, load_object, dv)
193
+ else
194
+ logger.warn "No operator found for default [#{dname}] trying basic assignment"
195
+ begin
196
+ @populator.insistent_assignment(load_object, dv, dname)
197
+ rescue
198
+ logger.error "Badly specified default - could not set #{dname}(#{dv})"
199
+ end
200
+ end
185
201
  end
186
202
  end
187
-
203
+
188
204
  # Core API - Given a single free text column name from a file, search method mapper for
189
205
  # associated operator on base object class.
190
206
  #
191
207
  # If suitable association found, process row data and then assign to current load_object
192
208
  def find_and_process(column_name, data)
193
-
209
+
194
210
  puts "WARNING: MethodDictionary empty for class #{load_object_class}" unless(MethodDictionary.for?(load_object_class))
195
-
211
+
196
212
  method_detail = MethodDictionary.find_method_detail( load_object_class, column_name )
197
213
 
198
214
  if(method_detail)
199
- prepare_data(method_detail, data)
200
- process()
215
+ process(method_detail, data)
201
216
  else
202
217
  puts "No matching method found for column #{column_name}"
203
218
  @load_object.errors.add(:base, "No matching method found for column #{column_name}")
204
219
  end
205
220
  end
206
-
207
-
221
+
222
+
208
223
  # Any Config under key 'LoaderBase' is merged over existing options - taking precedence.
209
224
  #
210
- # Any Config under a key equal to the full name of the Loader class (e.g DataShift::SpreeHelper::ImageLoader)
225
+ # Any Config under a key equal to the full name of the Loader class (e.g DataShift::SpreeEcom::ImageLoader)
211
226
  # is merged over existing options - taking precedence.
212
227
  #
213
228
  # Format :
@@ -217,161 +232,190 @@ module DataShift
217
232
  #
218
233
  def configure_from(yaml_file)
219
234
 
220
- data = YAML::load( File.open(yaml_file) )
221
-
222
- logger.info("Read Datashift loading config: #{data.inspect}")
223
-
235
+ logger.info("Reading Datashift loader config from: #{yaml_file.inspect}")
236
+
237
+ data = YAML::load( ERB.new( IO.read(yaml_file) ).result )
238
+
239
+ logger.info("Read Datashift config: #{data.inspect}")
240
+
224
241
  if(data['LoaderBase'])
225
242
  @config.merge!(data['LoaderBase'])
226
243
  end
227
-
228
- if(data[self.class.name])
244
+
245
+ if(data[self.class.name])
229
246
  @config.merge!(data[self.class.name])
230
247
  end
231
-
248
+
232
249
  @populator.configure_from(load_object_class, yaml_file)
233
250
  logger.info("Loader Options : #{@config.inspect}")
234
251
  end
235
-
236
- # Set member variables to hold details and value.
237
- #
238
- # Check supplied value, validate it, and if required :
239
- # set to provided default value
240
- # prepend any provided prefixes
241
- # add any provided postfixes
242
- def prepare_data(method_detail, value)
243
- return @populator.prepare_data(method_detail, value)
244
- end
245
-
246
- # Return the find_by operator and the rest of the (row,columns) data
252
+
253
+
254
+ # Return the find_by (where) operator, if specified, otherwise use the heading operator.
255
+ # i.e where operator embedded in row ,takes precedence over operator in column heading
256
+ #
257
+ # Treat rest of the node as the value to use in the where clause e.g
247
258
  # price:0.99
248
- #
249
- # Column headings can already contain the operator so possible that row only contains
259
+ #
260
+ # Column headings will be used, if the row only contains data e.g
250
261
  # 0.99
262
+ #
251
263
  # We leave it to caller to manage any other aspects or problems in 'rest'
252
264
  #
253
- def get_find_operator_and_rest(inbound_data)
254
-
255
- operator, rest = inbound_data.split(Delimiters::name_value_delim)
256
-
257
- #puts "DEBUG inbound_data: #{inbound_data} => #{operator} , #{rest}"
258
-
265
+ def get_operator_and_data(inbound_data)
266
+
267
+ where_operator, data = inbound_data.split(Delimiters::name_value_delim)
268
+
269
+ md = @populator.current_method_detail
270
+
259
271
  # Find by operator embedded in row takes precedence over operator in column heading
260
- if(@populator.current_method_detail.find_by_operator)
261
- # row contains 0.99 so rest is effectively operator, and operator is in method details
262
- if(rest.nil?)
263
- rest = operator
264
- operator = @populator.current_method_detail.find_by_operator
272
+ if((data.nil? || data.empty?) && md.find_by_operator)
273
+ if((where_operator.nil? || where_operator.empty?)) #colum completely empty - check for defaults
274
+ if(md.find_by_value)
275
+ data = md.find_by_value
276
+ else
277
+ data = Populator::header_default_data(md.operator)
278
+ end
279
+ else
280
+ data = where_operator
265
281
  end
282
+
283
+ # row contains single entry only so take operator from header via method details
284
+ where_operator = md.find_by_operator
266
285
  end
267
-
268
- #puts "DEBUG: get_find_operator_and_rest: #{operator} => #{rest}"
269
-
270
- return operator, rest
286
+
287
+ logger.debug("LoaderBase - get_operator_and_data - [#{where_operator}] - [#{data}]")
288
+
289
+ return where_operator, data
271
290
  end
272
-
291
+
273
292
  # Process a value string from a column.
274
293
  # Assigning value(s) to correct association on @load_object.
275
294
  # Method detail represents a column from a file and it's correlated AR associations.
276
295
  # Value string which may contain multiple values for a collection association.
277
296
  #
278
- def process()
279
-
280
- current_method_detail = @populator.current_method_detail
281
- current_value = @populator.current_value
282
-
283
- logger.info("Current value to assign : #{current_value}")
284
-
297
+ def process(method_detail, value)
298
+
299
+ current_method_detail = method_detail
300
+
301
+ current_value, current_attribute_hash = @populator.prepare_data(method_detail, value)
302
+
303
+ # TODO - Move ALL of this into Populator properly
285
304
  if(current_method_detail.operator_for(:has_many))
286
305
 
287
306
  if(current_method_detail.operator_class && current_value)
288
307
 
289
308
  # there are times when we need to save early, for example before assigning to
290
309
  # has_and_belongs_to associations which require the load_object has an id for the join table
291
-
310
+
292
311
  save_if_new
293
312
 
294
313
  # A single column can contain multiple associations delimited by special char
295
314
  # Size:large|Colour:red,green,blue => ['Size:large', 'Colour:red,green,blue']
296
315
  columns = current_value.to_s.split( Delimiters::multi_assoc_delim )
297
316
 
298
- # Size:large|Colour:red,green,blue => generates find_by_size( 'large' ) and find_all_by_colour( ['red','green','blue'] )
317
+ # Size:large|Colour:red,green,blue =>
318
+ # find_by_size( 'large' )
319
+ # find_all_by_colour( ['red','green','blue'] )
299
320
 
300
321
  columns.each do |col_str|
301
-
302
- find_operator, col_values = get_find_operator_and_rest( col_str )
303
-
322
+
323
+ find_operator, col_values = get_operator_and_data( col_str )
324
+
304
325
  raise "Cannot perform DB find by #{find_operator}. Expected format key:value" unless(find_operator && col_values)
305
-
326
+
306
327
  find_by_values = col_values.split(Delimiters::multi_value_delim)
307
-
328
+
308
329
  find_by_values << current_method_detail.find_by_value if(current_method_detail.find_by_value)
309
-
310
- if(find_by_values.size > 1)
311
330
 
312
- #RAILS 4 current_value = current_method_detail.operator_class.send("find_all_by_#{find_operator}", find_by_values )
313
- current_value = current_method_detail.operator_class.where(find_operator => find_by_values)
331
+ found_values = []
314
332
 
315
- unless(find_by_values.size == current_value.size)
316
- found = current_value.collect {|f| f.send(find_operator) }
317
- @load_object.errors.add( current_method_detail.operator, "Association with key(s) #{(find_by_values - found).inspect} NOT found")
318
- puts "WARNING: Association #{current_method_detail.operator} with key(s) #{(find_by_values - found).inspect} NOT found - Not added."
319
- next if(@current_value.empty?)
320
- end
333
+ #if(find_by_values.size() == 1)
334
+ # logger.info("Find or create #{current_method_detail.operator_class} with #{find_operator} = #{find_by_values.inspect}")
335
+ # item = current_method_detail.operator_class.where(find_operator => find_by_values.first).first_or_create
336
+ #else
337
+ # logger.info("Find #{current_method_detail.operator_class} with #{find_operator} = values #{find_by_values.inspect}")
338
+ # current_method_detail.operator_class.where(find_operator => find_by_values).all
339
+ #end
321
340
 
322
- else
341
+ operator_class = current_method_detail.operator_class
323
342
 
324
- current_value = current_method_detail.operator_class.send("find_by_#{find_operator}", find_by_values )
343
+ logger.info("Find #{current_method_detail.operator_class} with #{find_operator} = #{find_by_values.inspect}")
325
344
 
326
- unless(current_value)
327
- @load_object.errors.add( current_method_detail.operator, "Association with key #{find_by_values} NOT found")
328
- puts "WARNING: Association with key #{find_by_values} NOT found - Not added."
329
- next
345
+ find_by_values.each do |v|
346
+ begin
347
+ found_values << operator_class.where(find_operator => v).first_or_create
348
+ rescue => e
349
+ logger.error(e.inspect)
350
+ # TODO some way to define if this is a fatal error or not ?
330
351
  end
352
+ end
353
+
354
+ logger.info("Scan result #{found_values.inspect}")
331
355
 
356
+ unless(find_by_values.size == found_values.size)
357
+ found = found_values.collect {|f| f.send(find_operator) }
358
+ @load_object.errors.add( current_method_detail.operator, "Association with key(s) #{(find_by_values - found).inspect} NOT found")
359
+ logger.error "Association [#{current_method_detail.operator}] with key(s) #{(find_by_values - found).inspect} NOT found - Not added."
360
+ next if(found_values.empty?)
332
361
  end
333
362
 
363
+ logger.info("Assigning #{found_values.inspect} (#{found_values.class})")
364
+
334
365
  # Lookup Assoc's Model done, now add the found value(s) to load model's collection
335
- @populator.assign(current_method_detail, @load_object, current_value)
336
- end
366
+ @populator.prepare_and_assign(current_method_detail, @load_object, found_values)
367
+ end # END HAS_MANY
337
368
  end
338
- # END HAS_MANY
339
369
  else
340
370
  # Nice n simple straight assignment to a column variable
341
371
  #puts "INFO: LOADER BASE processing #{method_detail.name}"
342
- @populator.assign(current_method_detail, @load_object, current_value)
372
+ @populator.assign(load_object)
343
373
  end
344
374
  end
345
-
346
-
375
+
376
+
347
377
  # Loading failed. Store a failed object and if requested roll back (destroy) the current load object
348
378
  # For use case where object saved early but subsequent required columns fail to process
349
379
  # so the load object is invalid
350
-
380
+
351
381
  def failure( object = @load_object, rollback = false)
352
382
  if(object)
353
383
  @reporter.add_failed_object(object)
354
-
355
- object.destroy if(rollback && object.respond_to?('destroy') && !object.new_record?)
356
-
357
- new_load_object # don't forget to reset the load object
384
+
385
+ if(rollback && object.respond_to?('destroy') && !object.new_record?)
386
+ klass = object.class
387
+ object.destroy
388
+ object = klass.new
389
+ end
390
+ end
391
+ end
392
+
393
+ def save_and_report
394
+ unless(save)
395
+ failure
396
+ logger.error "Failed to save row (#{current_row_idx}) - [#{@current_row}]"
397
+ logger.error load_object.errors.inspect if(load_object)
398
+ else
399
+ logger.info("Successfully SAVED Object with ID #{load_object.id} for Row #{@current_row}")
400
+ @reporter.add_loaded_object(@load_object)
401
+ @reporter.success_inbound_count += 1
358
402
  end
359
403
  end
360
404
 
361
405
  def save
362
406
  return unless( @load_object )
363
-
407
+
364
408
  puts "DEBUG: SAVING #{@load_object.class} : #{@load_object.inspect}" if(verbose)
365
409
  begin
366
410
  return @load_object.save
367
411
  rescue => e
368
- failure
369
- puts "Error saving #{@load_object.class} : #{e.inspect}"
370
- logger.error e.backtrace
371
- raise "Error in save whilst processing column #{@current_method_detail.name}" if(@config[:strict])
412
+ logger.error( "Save Error : #{e.inspect} on #{@load_object.class}")
413
+ logger.error(e.backtrace)
372
414
  end
373
- end
374
-
415
+
416
+ false
417
+ end
418
+
375
419
  # Reset the loader, including database object to be populated, and load counts
376
420
  #
377
421
  def reset(object = nil)
@@ -379,7 +423,7 @@ module DataShift
379
423
  @reporter.reset
380
424
  end
381
425
 
382
-
426
+
383
427
  def new_load_object
384
428
  @load_object = @load_object_class.new
385
429
  @load_object
@@ -408,7 +452,7 @@ module DataShift
408
452
  def missing_mandatory_headers( mandatory_list )
409
453
  [ [*mandatory_list] - @headers].flatten
410
454
  end
411
-
455
+
412
456
  def find_or_new( klass, condition_hash = {} )
413
457
  @records[klass] = klass.find(:all, :conditions => condition_hash)
414
458
  if @records[klass].any?
@@ -419,14 +463,14 @@ module DataShift
419
463
  end
420
464
 
421
465
  protected
422
-
466
+
423
467
  # Take current column data and split into each association
424
468
  # Supported Syntax :
425
469
  # assoc_find_name:value | assoc2_find_name:value | etc
426
470
  def get_each_assoc
427
- @populator.current_value.to_s.split( Delimiters::multi_assoc_delim )
471
+ current_value = @populator.current_value.to_s.split( Delimiters::multi_assoc_delim )
428
472
  end
429
-
473
+
430
474
  private
431
475
 
432
476
  # This method usually called during processing to avoid errors with associations like
@@ -436,14 +480,14 @@ module DataShift
436
480
  # TODO smart ordering of columns dynamically ourselves rather than relying on incoming data order
437
481
  def save_if_new
438
482
  return unless(load_object.new_record?)
439
-
440
- if(load_object.valid?)
483
+
484
+ if(load_object.valid?)
441
485
  save
442
486
  else
443
- puts "Cannot Save - Invalid #{load_object.class} - #{load_object.errors.full_messages}" if(verbose)
487
+ raise DataShift::SaveError.new("Cannot Save - Invalid #{load_object.class} Record - #{load_object.errors.full_messages}")
444
488
  end
445
489
  end
446
-
490
+
447
491
  end
448
492
 
449
493
  end