datashift 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (84) hide show
  1. data/.document +5 -5
  2. data/LICENSE.txt +26 -26
  3. data/README.markdown +326 -305
  4. data/README.rdoc +19 -19
  5. data/Rakefile +86 -93
  6. data/VERSION +1 -1
  7. data/datashift.gemspec +163 -152
  8. data/lib/applications/jruby/jexcel_file.rb +410 -408
  9. data/lib/applications/jruby/word.rb +79 -79
  10. data/lib/datashift.rb +183 -152
  11. data/lib/datashift/exceptions.rb +11 -11
  12. data/lib/datashift/file_definitions.rb +353 -353
  13. data/lib/datashift/mapping_file_definitions.rb +87 -87
  14. data/lib/datashift/method_detail.rb +293 -275
  15. data/lib/datashift/method_dictionary.rb +208 -209
  16. data/lib/datashift/method_mapper.rb +90 -90
  17. data/lib/datashift/model_mapper.rb +27 -0
  18. data/lib/exporters/csv_exporter.rb +36 -0
  19. data/lib/exporters/excel_exporter.rb +116 -0
  20. data/lib/exporters/exporter_base.rb +15 -0
  21. data/lib/generators/csv_generator.rb +36 -36
  22. data/lib/generators/excel_generator.rb +106 -122
  23. data/lib/generators/generator_base.rb +13 -13
  24. data/lib/helpers/core_ext/to_b.rb +24 -24
  25. data/lib/helpers/rake_utils.rb +42 -0
  26. data/lib/helpers/spree_helper.rb +194 -153
  27. data/lib/java/poi-3.7/LICENSE +507 -507
  28. data/lib/java/poi-3.7/NOTICE +21 -21
  29. data/lib/java/poi-3.7/RELEASE_NOTES.txt +115 -115
  30. data/lib/loaders/csv_loader.rb +98 -98
  31. data/lib/loaders/excel_loader.rb +155 -155
  32. data/lib/loaders/loader_base.rb +420 -420
  33. data/lib/loaders/spreadsheet_loader.rb +136 -136
  34. data/lib/loaders/spree/image_loader.rb +67 -63
  35. data/lib/loaders/spree/product_loader.rb +289 -248
  36. data/lib/thor/generate_excel.thor +54 -0
  37. data/sandbox/app/controllers/application_controller.rb +3 -0
  38. data/sandbox/config/application.rb +43 -0
  39. data/sandbox/config/database.yml +34 -0
  40. data/sandbox/config/environment.rb +7 -0
  41. data/sandbox/config/environments/development.rb +30 -0
  42. data/spec/csv_loader_spec.rb +30 -30
  43. data/spec/datashift_spec.rb +26 -26
  44. data/spec/db/migrate/20110803201325_create_test_bed.rb +85 -85
  45. data/spec/excel_exporter_spec.rb +78 -78
  46. data/spec/excel_generator_spec.rb +78 -78
  47. data/spec/excel_loader_spec.rb +223 -223
  48. data/spec/file_definitions.rb +141 -141
  49. data/spec/fixtures/ProjectsDefaults.yml +29 -29
  50. data/spec/fixtures/config/database.yml +27 -27
  51. data/spec/fixtures/datashift_Spree_db.sqlite +0 -0
  52. data/spec/fixtures/datashift_test_models_db.sqlite +0 -0
  53. data/spec/fixtures/negative/SpreeProdMiss1Mandatory.csv +4 -4
  54. data/spec/fixtures/negative/SpreeProdMissManyMandatory.csv +4 -4
  55. data/spec/fixtures/spree/SpreeProducts.csv +4 -4
  56. data/spec/fixtures/spree/SpreeProducts.xls +0 -0
  57. data/spec/fixtures/spree/SpreeProductsMultiColumn.csv +4 -4
  58. data/spec/fixtures/spree/SpreeProductsMultiColumn.xls +0 -0
  59. data/spec/fixtures/spree/SpreeProductsSimple.csv +4 -4
  60. data/spec/fixtures/spree/SpreeProductsWithImages.csv +4 -4
  61. data/spec/fixtures/spree/SpreeZoneExample.csv +5 -5
  62. data/spec/fixtures/test_model_defs.rb +57 -57
  63. data/spec/loader_spec.rb +120 -120
  64. data/spec/method_dictionary_spec.rb +242 -242
  65. data/spec/method_mapper_spec.rb +41 -41
  66. data/spec/spec_helper.rb +154 -116
  67. data/spec/spree_exporter_spec.rb +67 -0
  68. data/spec/spree_generator_spec.rb +77 -64
  69. data/spec/spree_loader_spec.rb +363 -324
  70. data/spec/spree_method_mapping_spec.rb +218 -214
  71. data/tasks/config/seed_fu_product_template.erb +15 -15
  72. data/tasks/config/tidy_config.txt +12 -12
  73. data/tasks/{excel_generator.rake → export/excel_generator.rake} +101 -78
  74. data/tasks/file_tasks.rake +36 -36
  75. data/tasks/import/csv.rake +50 -49
  76. data/tasks/import/excel.rake +74 -71
  77. data/tasks/spree/image_load.rake +108 -108
  78. data/tasks/spree/product_loader.rake +43 -43
  79. data/tasks/word_to_seedfu.rake +166 -166
  80. data/test/helper.rb +18 -18
  81. data/test/test_interact.rb +7 -7
  82. metadata +16 -8
  83. data/datashift-0.1.0.gem +0 -0
  84. data/tasks/db_tasks.rake +0 -66
@@ -1,156 +1,156 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Aug 2011
4
- # License:: MIT
5
- #
6
- # Details:: Specific loader to support Excel files.
7
- # Note this only requires JRuby, Excel not required, nor Win OLE.
8
- #
9
- # Maps column headings to operations on the model.
10
- # Iterates over all the rows using mapped operations to assign row data to a database object,
11
- # i.e pulls data from each column and sends to object.
12
- #
13
- require 'datashift/exceptions'
14
-
15
-
16
- module DataShift
17
-
18
- if(Guards::jruby?)
19
-
20
- require 'loaders/loader_base'
21
-
22
- require 'java'
23
- require 'jexcel_file'
24
-
25
- module ExcelLoading
26
-
27
- # Options:
28
- # [:header_row] : Default is 0. Use alternative row as header definition.
29
- # [:mandatory] : Array of mandatory column names
30
- # [:strict] : Raise exception when no mapping found for a column heading (non mandatory)
31
- # [:sheet_number]
32
-
33
- def perform_excel_load( file_name, options = {} )
34
-
35
- @mandatory = options[:mandatory] || []
36
-
37
- @excel = JExcelFile.new
38
-
39
- @excel.open(file_name)
40
-
41
- #if(options[:verbose])
42
- puts "\n\n\nLoading from Excel file: #{file_name}"
43
-
44
- sheet_number = options[:sheet_number] || 0
45
-
46
- @sheet = @excel.sheet( sheet_number )
47
-
48
- header_row_index = options[:header_row] || 0
49
- @header_row = @sheet.getRow(header_row_index)
50
-
51
- raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" unless(@header_row)
52
-
53
- @headers = []
54
-
55
- (0..JExcelFile::MAX_COLUMNS).each do |i|
56
- cell = @header_row.getCell(i)
57
- break unless cell
58
- header = "#{@excel.cell_value(cell).to_s}".strip
59
- break if header.empty?
60
- @headers << header
61
- end
62
-
63
- raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" if(@headers.empty?)
64
-
65
- # Create a method_mapper which maps list of headers into suitable calls on the Active Record class
66
- # For example if model has an attribute 'price' will map columns called Price, price, PRICE etc to this attribute
67
- map_headers_to_operators( @headers, options[:strict] , @mandatory )
68
-
69
- logger.info "Excel Loader prcoessing #{@excel.num_rows} rows"
70
- load_object_class.transaction do
71
- @loaded_objects = []
72
-
73
- (1..@excel.num_rows).collect do |row|
74
-
75
- # Excel num_rows seems to return all 'visible' rows, which appears to be greater than the actual data rows
76
- # (TODO - write spec to process .xls with a huge number of rows)
77
- #
78
- # This is rubbish but currently manually detect when actual data ends, this isn't very smart but
79
- # got no better idea than ending once we hit the first completely empty row
80
- break if @excel.sheet.getRow(row).nil?
81
-
82
- contains_data = false
83
-
84
- # TODO - Smart sorting of column processing order ....
85
- # Does not currently ensure mandatory columns (for valid?) processed first but model needs saving
86
- # before associations can be processed so user should ensure mandatory columns are prior to associations
87
-
88
- # as part of this we also attempt to save early, for example before assigning to
89
- # has_and_belongs_to associations which require the load_object has an id for the join table
90
-
91
- # Iterate over the columns method_mapper found in Excel,
92
- # pulling data out of associated column
93
- @method_mapper.method_details.each_with_index do |method_detail, col|
94
-
95
- value = value_at(row, col)
96
-
97
- contains_data = true unless(value.nil? || value.to_s.empty?)
98
-
99
- prepare_data(method_detail, value)
100
-
101
- process()
102
- end
103
-
104
- break unless(contains_data == true)
105
-
106
- # TODO - requirements to handle not valid ?
107
- # all or nothing or carry on and dump out the exception list at end
108
- #puts "DEBUG: FINAL SAVE #{load_object.inspect}"
109
- unless(save)
110
- failure
111
- logger.error "Failed to save row [#{row}]"
112
- logger.error load_object.errors.inspect
113
- else
114
- logger.info "Row #{row} succesfully SAVED : ID #{load_object.id}"
115
- end
116
-
117
- # don't forget to reset the object or we'll update rather than create
118
- new_load_object
119
-
120
- end
121
- end
122
- puts "Excel loading stage complete - #{loaded_objects.size} rows added."
123
- end
124
-
125
- def value_at(row, column)
126
- @excel.get_cell_value( @excel.sheet.getRow(row), column)
127
- end
128
- end
129
-
130
-
131
- class ExcelLoader < LoaderBase
132
-
133
- include ExcelLoading
134
-
135
- def initialize(klass, object = nil, options = {})
136
- super( klass, object, options )
137
- raise "Cannot load - failed to create a #{klass}" unless @load_object
138
- end
139
-
140
-
141
- def perform_load( file_name, options = {} )
142
- perform_excel_load( file_name, options )
143
-
144
- puts "Excel loading stage complete - #{loaded_objects.size} rows added."
145
- end
146
-
147
- end
148
-
149
- else
150
-
151
- module ExcelLoading
152
- end
153
-
154
- end
155
-
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Aug 2011
4
+ # License:: MIT
5
+ #
6
+ # Details:: Specific loader to support Excel files.
7
+ # Note this only requires JRuby, Excel not required, nor Win OLE.
8
+ #
9
+ # Maps column headings to operations on the model.
10
+ # Iterates over all the rows using mapped operations to assign row data to a database object,
11
+ # i.e pulls data from each column and sends to object.
12
+ #
13
+ require 'datashift/exceptions'
14
+
15
+
16
+ module DataShift
17
+
18
+ if(Guards::jruby?)
19
+
20
+ require 'loaders/loader_base'
21
+
22
+ require 'java'
23
+ require 'jexcel_file'
24
+
25
+ module ExcelLoading
26
+
27
+ # Options:
28
+ # [:header_row] : Default is 0. Use alternative row as header definition.
29
+ # [:mandatory] : Array of mandatory column names
30
+ # [:strict] : Raise exception when no mapping found for a column heading (non mandatory)
31
+ # [:sheet_number]
32
+
33
+ def perform_excel_load( file_name, options = {} )
34
+
35
+ @mandatory = options[:mandatory] || []
36
+
37
+ @excel = JExcelFile.new
38
+
39
+ @excel.open(file_name)
40
+
41
+ #if(options[:verbose])
42
+ puts "\n\n\nLoading from Excel file: #{file_name}"
43
+
44
+ sheet_number = options[:sheet_number] || 0
45
+
46
+ @sheet = @excel.sheet( sheet_number )
47
+
48
+ header_row_index = options[:header_row] || 0
49
+ @header_row = @sheet.getRow(header_row_index)
50
+
51
+ raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" unless(@header_row)
52
+
53
+ @headers = []
54
+
55
+ (0..JExcelFile::MAX_COLUMNS).each do |i|
56
+ cell = @header_row.getCell(i)
57
+ break unless cell
58
+ header = "#{@excel.cell_value(cell).to_s}".strip
59
+ break if header.empty?
60
+ @headers << header
61
+ end
62
+
63
+ raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" if(@headers.empty?)
64
+
65
+ # Create a method_mapper which maps list of headers into suitable calls on the Active Record class
66
+ # For example if model has an attribute 'price' will map columns called Price, price, PRICE etc to this attribute
67
+ map_headers_to_operators( @headers, options[:strict] , @mandatory )
68
+
69
+ logger.info "Excel Loader prcoessing #{@excel.num_rows} rows"
70
+ load_object_class.transaction do
71
+ @loaded_objects = []
72
+
73
+ (1..@excel.num_rows).collect do |row|
74
+
75
+ # Excel num_rows seems to return all 'visible' rows, which appears to be greater than the actual data rows
76
+ # (TODO - write spec to process .xls with a huge number of rows)
77
+ #
78
+ # This is rubbish but currently manually detect when actual data ends, this isn't very smart but
79
+ # got no better idea than ending once we hit the first completely empty row
80
+ break if @excel.sheet.getRow(row).nil?
81
+
82
+ contains_data = false
83
+
84
+ # TODO - Smart sorting of column processing order ....
85
+ # Does not currently ensure mandatory columns (for valid?) processed first but model needs saving
86
+ # before associations can be processed so user should ensure mandatory columns are prior to associations
87
+
88
+ # as part of this we also attempt to save early, for example before assigning to
89
+ # has_and_belongs_to associations which require the load_object has an id for the join table
90
+
91
+ # Iterate over the columns method_mapper found in Excel,
92
+ # pulling data out of associated column
93
+ @method_mapper.method_details.each_with_index do |method_detail, col|
94
+
95
+ value = value_at(row, col)
96
+
97
+ contains_data = true unless(value.nil? || value.to_s.empty?)
98
+
99
+ prepare_data(method_detail, value)
100
+
101
+ process()
102
+ end
103
+
104
+ break unless(contains_data == true)
105
+
106
+ # TODO - requirements to handle not valid ?
107
+ # all or nothing or carry on and dump out the exception list at end
108
+ #puts "DEBUG: FINAL SAVE #{load_object.inspect}"
109
+ unless(save)
110
+ failure
111
+ logger.error "Failed to save row [#{row}]"
112
+ logger.error load_object.errors.inspect
113
+ else
114
+ logger.info "Row #{row} succesfully SAVED : ID #{load_object.id}"
115
+ end
116
+
117
+ # don't forget to reset the object or we'll update rather than create
118
+ new_load_object
119
+
120
+ end
121
+ end
122
+ puts "Excel loading stage complete - #{loaded_objects.size} rows added."
123
+ end
124
+
125
+ def value_at(row, column)
126
+ @excel.get_cell_value( @excel.sheet.getRow(row), column)
127
+ end
128
+ end
129
+
130
+
131
+ class ExcelLoader < LoaderBase
132
+
133
+ include ExcelLoading
134
+
135
+ def initialize(klass, object = nil, options = {})
136
+ super( klass, object, options )
137
+ raise "Cannot load - failed to create a #{klass}" unless @load_object
138
+ end
139
+
140
+
141
+ def perform_load( file_name, options = {} )
142
+ perform_excel_load( file_name, options )
143
+
144
+ puts "Excel loading stage complete - #{loaded_objects.size} rows added."
145
+ end
146
+
147
+ end
148
+
149
+ else
150
+
151
+ module ExcelLoading
152
+ end
153
+
154
+ end
155
+
156
156
  end
@@ -1,421 +1,421 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Aug 2010
4
- # License:: MIT
5
- #
6
- # Details:: Base class for loaders, providing a process hook which populates a model,
7
- # based on a method map and supplied value from a file - i.e a single column/row's string value.
8
- # Note that although a single column, the string can be formatted to contain multiple values.
9
- #
10
- # Tightly coupled with MethodMapper classes (in lib/engine) which contains full details of
11
- # a file's column and it's correlated AR associations.
12
- #
13
- module DataShift
14
-
15
- require 'datashift/method_mapper'
16
-
17
- class LoaderBase
18
-
19
-
20
- include DataShift::Logging
21
-
22
- attr_reader :headers
23
-
24
- attr_accessor :method_mapper
25
-
26
- attr_accessor :load_object_class, :load_object
27
- attr_accessor :current_value, :current_method_detail
28
-
29
- attr_accessor :loaded_objects, :failed_objects
30
-
31
- attr_accessor :options
32
-
33
- # Support multiple associations being added to a base object to be specified in a single column.
34
- #
35
- # Entry represents the association to find via supplied name, value to use in the lookup.
36
- # Can contain multiple lookup name/value pairs, separated by multi_assoc_delim ( | )
37
- #
38
- # Default syntax :
39
- #
40
- # Name1:value1, value2|Name2:value1, value2, value3|Name3:value1, value2
41
- #
42
- # E.G.
43
- # Association Properties, has a column named Size, and another called Colour,
44
- # and this combination could be used to lookup multiple associations to add to the main model Jumper
45
- #
46
- # Size:small # => generates find_by_size( 'small' )
47
- # Size:large # => generates find_by_size( 'large' )
48
- # Colour:red,green,blue # => generates find_all_by_colour( ['red','green','blue'] )
49
- #
50
- # Size:large|Size:medium|Size:large
51
- # => Find 3 different associations, perform lookup via column called Size
52
- # => Jumper.properties << [ small, medium, large ]
53
- #
54
- def self.name_value_delim
55
- @name_value_delim ||= ':'
56
- @name_value_delim
57
- end
58
-
59
- def self.set_name_value_delim(x) @name_value_delim = x; end
60
- # TODO - support embedded object creation/update via hash (which hopefully we should be able to just forward to AR)
61
- #
62
- # |Category|
63
- # name:new{ :date => '20110102', :owner = > 'blah'}
64
- #
65
-
66
-
67
- def self.multi_value_delim
68
- @multi_value_delim ||= ','
69
- @multi_value_delim
70
- end
71
-
72
- def self.set_multi_value_delim(x) @multi_value_delim = x; end
73
-
74
- # TODO - support multi embedded object creation/update via hash (which hopefully we should be able to just forward to AR)
75
- #
76
- # |Category|
77
- # name:new{ :a => 1, :b => 2}|name:medium{ :a => 6, :b => 34}|name:old{ :a => 12, :b => 67}
78
- #
79
- def self.multi_assoc_delim
80
- @multi_assoc_delim ||= '|'
81
- @multi_assoc_delim
82
- end
83
-
84
-
85
- def self.set_multi_assoc_delim(x) @multi_assoc_delim = x; end
86
-
87
-
88
- # Options
89
- # :instance_methods => true
90
-
91
- def initialize(object_class, object = nil, options = {})
92
- @load_object_class = object_class
93
-
94
- # Gather names of all possible 'setter' methods on AR class (instance variables and associations)
95
- DataShift::MethodDictionary.find_operators( @load_object_class, :reload => true, :instance_methods => options[:instance_methods] )
96
-
97
- # Create dictionary of data on all possible 'setter' methods which can be used to
98
- # populate or integrate an object of type @load_object_class
99
- DataShift::MethodDictionary.build_method_details(@load_object_class)
100
-
101
- @method_mapper = DataShift::MethodMapper.new
102
- @options = options.clone
103
- @headers = []
104
-
105
- @default_data_objects ||= {}
106
-
107
- @default_values = {}
108
- @override_values = {}
109
-
110
- @prefixes = {}
111
- @postfixes = {}
112
-
113
- reset(object)
114
- end
115
-
116
-
117
- # kinda the derived classes interface - best way in Ruby ?
118
- def perform_load( input, options = {} )
119
- raise "WARNING- ABSTRACT METHOD CALLED - Please implement perform_load()"
120
- end
121
-
122
-
123
- # Core API - Given a list of free text column names from a file,
124
- # map all headers to a method detail containing operator details.
125
- #
126
- # This is then available through @method_mapper.method_details.each
127
- #
128
- # Options:
129
- # strict : report any header values that can't be mapped as an error
130
- #
131
- def map_headers_to_operators( headers, strict, mandatory = [])
132
- @headers = headers
133
-
134
- method_details = @method_mapper.map_inbound_to_methods( load_object_class, @headers )
135
-
136
- unless(@method_mapper.missing_methods.empty?)
137
- puts "WARNING: Following column headings could not be mapped : #{@method_mapper.missing_methods.inspect}"
138
- raise MappingDefinitionError, "Missing mappings for columns : #{@method_mapper.missing_methods.join(",")}" if(strict)
139
- end
140
-
141
- unless(@method_mapper.contains_mandatory?(mandatory) )
142
- @method_mapper.missing_mandatory(mandatory).each { |e| puts "ERROR: Mandatory column missing - expected column '#{e}'" }
143
- raise MissingMandatoryError, "Mandatory columns missing - please fix and retry."
144
- end unless(mandatory.empty?)
145
- end
146
-
147
-
148
- # Core API - Given a single free text column name from a file, search method mapper for
149
- # associated operator on base object class.
150
- #
151
- # If suitable association found, process row data and then assign to current load_object
152
- def find_and_process(column_name, data)
153
- method_detail = MethodDictionary.find_method_detail( load_object_class, column_name )
154
-
155
- if(method_detail)
156
- prepare_data(method_detail, data)
157
- process()
158
- else
159
- @load_object.errors.add_base( "No matching method found for column #{column_name}")
160
- end
161
- end
162
-
163
-
164
- # Default values can be provided in YAML config file
165
- # Format :
166
- # Load Class
167
- # atttribute: value
168
-
169
- def configure_from( yaml_file )
170
-
171
- data = YAML::load( File.open(yaml_file) )
172
-
173
-
174
- # TODO - MOVE DEFAULTS TO OWN MODULE
175
- # decorate the loading class with the defaults/ove rides to manage itself
176
- # IDEAS .....
177
- #
178
- #unless(@default_data_objects[load_object_class])
179
- #
180
- # @default_data_objects[load_object_class] = load_object_class.new
181
-
182
- # default_data_object = @default_data_objects[load_object_class]
183
-
184
-
185
- # default_data_object.instance_eval do
186
- # def datashift_defaults=(hash)
187
- # @datashift_defaults = hash
188
- # end
189
- # def datashift_defaults
190
- # @datashift_defaults
191
- # end
192
- #end unless load_object_class.respond_to?(:datashift_defaults)
193
- #end
194
-
195
- #puts load_object_class.new.to_yaml
196
-
197
- puts data.inspect
198
-
199
- if(data[load_object_class.name])
200
-
201
- deflts = data[load_object_class.name]['datashift_defaults']
202
- @default_values.merge!(deflts) if deflts
203
-
204
- ovrides = data[load_object_class.name]['datashift_overrides']
205
- @override_values.merge!(ovrides) if ovrides
206
- end
207
-
208
- end
209
-
210
- # Set member variables to hold details and value.
211
- #
212
- # Check supplied value, validate it, and if required :
213
- # set to any provided default value
214
- # prepend or append with any provided extensions
215
- def prepare_data(method_detail, value)
216
-
217
- @current_value = value
218
-
219
- @current_method_detail = method_detail
220
-
221
- operator = method_detail.operator
222
-
223
- override_value(operator)
224
-
225
- if((value.nil? || value.to_s.empty?) && default_value(operator))
226
- @current_value = default_value(operator)
227
- end
228
-
229
- @current_value = "#{prefixes(operator)}#{@current_value}" if(prefixes(operator))
230
- @current_value = "#{@current_value}#{postfixes(operator)}" if(postfixes(operator))
231
-
232
- @current_value
233
- end
234
-
235
-
236
- # Process a value string from a column.
237
- # Assigning value(s) to correct association on @load_object.
238
- # Method detail represents a column from a file and it's correlated AR associations.
239
- # Value string which may contain multiple values for a collection association.
240
- #
241
- def process()
242
-
243
- if(@current_method_detail.operator_for(:has_many))
244
-
245
- if(@current_method_detail.operator_class && @current_value)
246
-
247
- # there are times when we need to save early, for example before assigning to
248
- # has_and_belongs_to associations which require the load_object has an id for the join table
249
-
250
- save_if_new
251
-
252
- # A single column can contain multiple associations delimited by special char
253
- # Size:large|Colour:red,green,blue => ['Size:large', 'Colour:red,green,blue']
254
- columns = @current_value.to_s.split( LoaderBase::multi_assoc_delim)
255
-
256
- # Size:large|Colour:red,green,blue => generates find_by_size( 'large' ) and find_all_by_colour( ['red','green','blue'] )
257
-
258
- columns.each do |col_str|
259
-
260
- find_operator, col_values = "",""
261
-
262
- if(@current_method_detail.find_by_operator)
263
- find_operator, col_values = @current_method_detail.find_by_operator, col_str
264
- else
265
- find_operator, col_values = col_str.split(LoaderBase::name_value_delim)
266
- raise "No key to find #{@current_method_detail.operator} in DB. Expected format key:value" unless(col_values)
267
- end
268
-
269
- find_by_values = col_values.split(LoaderBase::multi_value_delim)
270
-
271
- if(find_by_values.size > 1)
272
-
273
- @current_value = @current_method_detail.operator_class.send("find_all_by_#{find_operator}", find_by_values )
274
-
275
- unless(find_by_values.size == @current_value.size)
276
- found = @current_value.collect {|f| f.send(find_operator) }
277
- @load_object.errors.add( @current_method_detail.operator, "Association with key(s) #{(find_by_values - found).inspect} NOT found")
278
- puts "WARNING: Association with key(s) #{(lookups - found).inspect} NOT found - Not added."
279
- next if(@current_value.empty?)
280
- end
281
-
282
- else
283
-
284
- @current_value = @current_method_detail.operator_class.send("find_by_#{find_operator}", find_by_values )
285
-
286
- unless(@current_value)
287
- @load_object.errors.add( @current_method_detail.operator, "Association with key #{find_by_values} NOT found")
288
- puts "WARNING: Association with key #{find_by_values} NOT found - Not added."
289
- next
290
- end
291
-
292
- end
293
-
294
- # Lookup Assoc's Model done, now add the found value(s) to load model's collection
295
- @current_method_detail.assign(@load_object, @current_value)
296
- end
297
- end
298
- # END HAS_MANY
299
- else
300
- # Nice n simple straight assignment to a column variable
301
- #puts "INFO: LOADER BASE processing #{method_detail.name}"
302
- @current_method_detail.assign(@load_object, @current_value)
303
- end
304
- end
305
-
306
- def failure
307
- @failed_objects << @load_object unless( !load_object.new_record? || @failed_objects.include?(@load_object))
308
- end
309
-
310
- def save
311
- #puts "DEBUG: SAVING #{load_object.class} : #{load_object.inspect}" #if(options[:verbose])
312
- begin
313
- result = @load_object.save
314
-
315
- @loaded_objects << @load_object unless(@loaded_objects.include?(@load_object))
316
-
317
- return result
318
- rescue => e
319
- failure
320
- puts "Error saving #{@load_object.class} : #{e.inspect}"
321
- logger.error e.backtrace
322
- raise "Error in save whilst processing column #{@current_method_detail.name}" if(@options[:strict])
323
- end
324
- end
325
-
326
- def self.default_object_for( klass )
327
- @default_data_objects ||= {}
328
- @default_data_objects[klass]
329
- end
330
-
331
- def set_default_value( name, value )
332
- @default_values[name] = value
333
- end
334
-
335
- def set_override_value( operator, value )
336
- @override_values[operator] = value
337
- end
338
-
339
- def default_value(name)
340
- @default_values[name]
341
- end
342
-
343
- def override_value( operator )
344
- @current_value = @override_values[operator] if(@override_values[operator])
345
- end
346
-
347
-
348
- def set_prefix( name, value )
349
- @prefixes[name] = value
350
- end
351
-
352
- def prefixes(name)
353
- @prefixes[name]
354
- end
355
-
356
- def set_postfix( name, value )
357
- @postfixes[name] = value
358
- end
359
-
360
- def postfixes(name)
361
- @postfixes[name]
362
- end
363
-
364
-
365
- # Reset the loader, including database object to be populated, and load counts
366
- #
367
- def reset(object = nil)
368
- @load_object = object || new_load_object
369
- @loaded_objects, @failed_objects = [],[]
370
- @current_value = nil
371
- end
372
-
373
-
374
- def new_load_object
375
- @load_object = @load_object_class.new
376
- @load_object
377
- end
378
-
379
- def abort_on_failure?
380
- @options[:abort_on_failure] == 'true'
381
- end
382
-
383
- def loaded_count
384
- @loaded_objects.size
385
- end
386
-
387
- def failed_count
388
- @failed_objects.size
389
- end
390
-
391
-
392
- # Check whether headers contains supplied list
393
- def headers_contain_mandatory?( mandatory_list )
394
- [ [*mandatory_list] - @headers].flatten.empty?
395
- end
396
-
397
-
398
- # Check whether headers contains supplied list
399
- def missing_mandatory_headers( mandatory_list )
400
- [ [*mandatory_list] - @headers].flatten
401
- end
402
-
403
- def find_or_new( klass, condition_hash = {} )
404
- @records[klass] = klass.find(:all, :conditions => condition_hash)
405
- if @records[klass].any?
406
- return @records[klass].first
407
- else
408
- return klass.new
409
- end
410
- end
411
-
412
- private
413
-
414
- def save_if_new
415
- #puts "SAVE", load_object.inspect
416
- save if(load_object.valid? && load_object.new_record?)
417
- end
418
-
419
- end
420
-
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Aug 2010
4
+ # License:: MIT
5
+ #
6
+ # Details:: Base class for loaders, providing a process hook which populates a model,
7
+ # based on a method map and supplied value from a file - i.e a single column/row's string value.
8
+ # Note that although a single column, the string can be formatted to contain multiple values.
9
+ #
10
+ # Tightly coupled with MethodMapper classes (in lib/engine) which contains full details of
11
+ # a file's column and it's correlated AR associations.
12
+ #
13
+ module DataShift
14
+
15
+ require 'datashift/method_mapper'
16
+
17
+ class LoaderBase
18
+
19
+
20
+ include DataShift::Logging
21
+
22
+ attr_reader :headers
23
+
24
+ attr_accessor :method_mapper
25
+
26
+ attr_accessor :load_object_class, :load_object
27
+ attr_accessor :current_value, :current_method_detail
28
+
29
+ attr_accessor :loaded_objects, :failed_objects
30
+
31
+ attr_accessor :options
32
+
33
+ # Support multiple associations being added to a base object to be specified in a single column.
34
+ #
35
+ # Entry represents the association to find via supplied name, value to use in the lookup.
36
+ # Can contain multiple lookup name/value pairs, separated by multi_assoc_delim ( | )
37
+ #
38
+ # Default syntax :
39
+ #
40
+ # Name1:value1, value2|Name2:value1, value2, value3|Name3:value1, value2
41
+ #
42
+ # E.G.
43
+ # Association Properties, has a column named Size, and another called Colour,
44
+ # and this combination could be used to lookup multiple associations to add to the main model Jumper
45
+ #
46
+ # Size:small # => generates find_by_size( 'small' )
47
+ # Size:large # => generates find_by_size( 'large' )
48
+ # Colour:red,green,blue # => generates find_all_by_colour( ['red','green','blue'] )
49
+ #
50
+ # Size:large|Size:medium|Size:large
51
+ # => Find 3 different associations, perform lookup via column called Size
52
+ # => Jumper.properties << [ small, medium, large ]
53
+ #
54
+ def self.name_value_delim
55
+ @name_value_delim ||= ':'
56
+ @name_value_delim
57
+ end
58
+
59
+ def self.set_name_value_delim(x) @name_value_delim = x; end
60
+ # TODO - support embedded object creation/update via hash (which hopefully we should be able to just forward to AR)
61
+ #
62
+ # |Category|
63
+ # name:new{ :date => '20110102', :owner = > 'blah'}
64
+ #
65
+
66
+
67
+ def self.multi_value_delim
68
+ @multi_value_delim ||= ','
69
+ @multi_value_delim
70
+ end
71
+
72
+ def self.set_multi_value_delim(x) @multi_value_delim = x; end
73
+
74
+ # TODO - support multi embedded object creation/update via hash (which hopefully we should be able to just forward to AR)
75
+ #
76
+ # |Category|
77
+ # name:new{ :a => 1, :b => 2}|name:medium{ :a => 6, :b => 34}|name:old{ :a => 12, :b => 67}
78
+ #
79
+ def self.multi_assoc_delim
80
+ @multi_assoc_delim ||= '|'
81
+ @multi_assoc_delim
82
+ end
83
+
84
+
85
+ def self.set_multi_assoc_delim(x) @multi_assoc_delim = x; end
86
+
87
+
88
+ # Options
89
+ # :instance_methods => true
90
+
91
+ def initialize(object_class, object = nil, options = {})
92
+ @load_object_class = object_class
93
+
94
+ # Gather names of all possible 'setter' methods on AR class (instance variables and associations)
95
+ DataShift::MethodDictionary.find_operators( @load_object_class, :reload => true, :instance_methods => options[:instance_methods] )
96
+
97
+ # Create dictionary of data on all possible 'setter' methods which can be used to
98
+ # populate or integrate an object of type @load_object_class
99
+ DataShift::MethodDictionary.build_method_details(@load_object_class)
100
+
101
+ @method_mapper = DataShift::MethodMapper.new
102
+ @options = options.clone
103
+ @headers = []
104
+
105
+ @default_data_objects ||= {}
106
+
107
+ @default_values = {}
108
+ @override_values = {}
109
+
110
+ @prefixes = {}
111
+ @postfixes = {}
112
+
113
+ reset(object)
114
+ end
115
+
116
+
117
+ # kinda the derived classes interface - best way in Ruby ?
118
+ def perform_load( input, options = {} )
119
+ raise "WARNING- ABSTRACT METHOD CALLED - Please implement perform_load()"
120
+ end
121
+
122
+
123
+ # Core API - Given a list of free text column names from a file,
124
+ # map all headers to a method detail containing operator details.
125
+ #
126
+ # This is then available through @method_mapper.method_details.each
127
+ #
128
+ # Options:
129
+ # strict : report any header values that can't be mapped as an error
130
+ #
131
+ def map_headers_to_operators( headers, strict, mandatory = [])
132
+ @headers = headers
133
+
134
+ method_details = @method_mapper.map_inbound_to_methods( load_object_class, @headers )
135
+
136
+ unless(@method_mapper.missing_methods.empty?)
137
+ puts "WARNING: Following column headings could not be mapped : #{@method_mapper.missing_methods.inspect}"
138
+ raise MappingDefinitionError, "Missing mappings for columns : #{@method_mapper.missing_methods.join(",")}" if(strict)
139
+ end
140
+
141
+ unless(@method_mapper.contains_mandatory?(mandatory) )
142
+ @method_mapper.missing_mandatory(mandatory).each { |e| puts "ERROR: Mandatory column missing - expected column '#{e}'" }
143
+ raise MissingMandatoryError, "Mandatory columns missing - please fix and retry."
144
+ end unless(mandatory.empty?)
145
+ end
146
+
147
+
148
+ # Core API - Given a single free text column name from a file, search method mapper for
149
+ # associated operator on base object class.
150
+ #
151
+ # If suitable association found, process row data and then assign to current load_object
152
+ def find_and_process(column_name, data)
153
+ method_detail = MethodDictionary.find_method_detail( load_object_class, column_name )
154
+
155
+ if(method_detail)
156
+ prepare_data(method_detail, data)
157
+ process()
158
+ else
159
+ @load_object.errors.add_base( "No matching method found for column #{column_name}")
160
+ end
161
+ end
162
+
163
+
164
+ # Default values can be provided in YAML config file
165
+ # Format :
166
+ # Load Class
167
+ # atttribute: value
168
+
169
+ def configure_from( yaml_file )
170
+
171
+ data = YAML::load( File.open(yaml_file) )
172
+
173
+
174
+ # TODO - MOVE DEFAULTS TO OWN MODULE
175
+ # decorate the loading class with the defaults/ove rides to manage itself
176
+ # IDEAS .....
177
+ #
178
+ #unless(@default_data_objects[load_object_class])
179
+ #
180
+ # @default_data_objects[load_object_class] = load_object_class.new
181
+
182
+ # default_data_object = @default_data_objects[load_object_class]
183
+
184
+
185
+ # default_data_object.instance_eval do
186
+ # def datashift_defaults=(hash)
187
+ # @datashift_defaults = hash
188
+ # end
189
+ # def datashift_defaults
190
+ # @datashift_defaults
191
+ # end
192
+ #end unless load_object_class.respond_to?(:datashift_defaults)
193
+ #end
194
+
195
+ #puts load_object_class.new.to_yaml
196
+
197
+ puts data.inspect
198
+
199
+ if(data[load_object_class.name])
200
+
201
+ deflts = data[load_object_class.name]['datashift_defaults']
202
+ @default_values.merge!(deflts) if deflts
203
+
204
+ ovrides = data[load_object_class.name]['datashift_overrides']
205
+ @override_values.merge!(ovrides) if ovrides
206
+ end
207
+
208
+ end
209
+
210
+ # Set member variables to hold details and value.
211
+ #
212
+ # Check supplied value, validate it, and if required :
213
+ # set to any provided default value
214
+ # prepend or append with any provided extensions
215
+ def prepare_data(method_detail, value)
216
+
217
+ @current_value = value
218
+
219
+ @current_method_detail = method_detail
220
+
221
+ operator = method_detail.operator
222
+
223
+ override_value(operator)
224
+
225
+ if((value.nil? || value.to_s.empty?) && default_value(operator))
226
+ @current_value = default_value(operator)
227
+ end
228
+
229
+ @current_value = "#{prefixes(operator)}#{@current_value}" if(prefixes(operator))
230
+ @current_value = "#{@current_value}#{postfixes(operator)}" if(postfixes(operator))
231
+
232
+ @current_value
233
+ end
234
+
235
+
236
+ # Process a value string from a column.
237
+ # Assigning value(s) to correct association on @load_object.
238
+ # Method detail represents a column from a file and it's correlated AR associations.
239
+ # Value string which may contain multiple values for a collection association.
240
+ #
241
+ def process()
242
+
243
+ if(@current_method_detail.operator_for(:has_many))
244
+
245
+ if(@current_method_detail.operator_class && @current_value)
246
+
247
+ # there are times when we need to save early, for example before assigning to
248
+ # has_and_belongs_to associations which require the load_object has an id for the join table
249
+
250
+ save_if_new
251
+
252
+ # A single column can contain multiple associations delimited by special char
253
+ # Size:large|Colour:red,green,blue => ['Size:large', 'Colour:red,green,blue']
254
+ columns = @current_value.to_s.split( LoaderBase::multi_assoc_delim)
255
+
256
+ # Size:large|Colour:red,green,blue => generates find_by_size( 'large' ) and find_all_by_colour( ['red','green','blue'] )
257
+
258
+ columns.each do |col_str|
259
+
260
+ find_operator, col_values = "",""
261
+
262
+ if(@current_method_detail.find_by_operator)
263
+ find_operator, col_values = @current_method_detail.find_by_operator, col_str
264
+ else
265
+ find_operator, col_values = col_str.split(LoaderBase::name_value_delim)
266
+ raise "No key to find #{@current_method_detail.operator} in DB. Expected format key:value" unless(col_values)
267
+ end
268
+
269
+ find_by_values = col_values.split(LoaderBase::multi_value_delim)
270
+
271
+ if(find_by_values.size > 1)
272
+
273
+ @current_value = @current_method_detail.operator_class.send("find_all_by_#{find_operator}", find_by_values )
274
+
275
+ unless(find_by_values.size == @current_value.size)
276
+ found = @current_value.collect {|f| f.send(find_operator) }
277
+ @load_object.errors.add( @current_method_detail.operator, "Association with key(s) #{(find_by_values - found).inspect} NOT found")
278
+ puts "WARNING: Association with key(s) #{(lookups - found).inspect} NOT found - Not added."
279
+ next if(@current_value.empty?)
280
+ end
281
+
282
+ else
283
+
284
+ @current_value = @current_method_detail.operator_class.send("find_by_#{find_operator}", find_by_values )
285
+
286
+ unless(@current_value)
287
+ @load_object.errors.add( @current_method_detail.operator, "Association with key #{find_by_values} NOT found")
288
+ puts "WARNING: Association with key #{find_by_values} NOT found - Not added."
289
+ next
290
+ end
291
+
292
+ end
293
+
294
+ # Lookup Assoc's Model done, now add the found value(s) to load model's collection
295
+ @current_method_detail.assign(@load_object, @current_value)
296
+ end
297
+ end
298
+ # END HAS_MANY
299
+ else
300
+ # Nice n simple straight assignment to a column variable
301
+ #puts "INFO: LOADER BASE processing #{method_detail.name}"
302
+ @current_method_detail.assign(@load_object, @current_value)
303
+ end
304
+ end
305
+
306
+ def failure
307
+ @failed_objects << @load_object unless( !load_object.new_record? || @failed_objects.include?(@load_object))
308
+ end
309
+
310
+ def save
311
+ #puts "DEBUG: SAVING #{load_object.class} : #{load_object.inspect}" #if(options[:verbose])
312
+ begin
313
+ result = @load_object.save
314
+
315
+ @loaded_objects << @load_object unless(@loaded_objects.include?(@load_object))
316
+
317
+ return result
318
+ rescue => e
319
+ failure
320
+ puts "Error saving #{@load_object.class} : #{e.inspect}"
321
+ logger.error e.backtrace
322
+ raise "Error in save whilst processing column #{@current_method_detail.name}" if(@options[:strict])
323
+ end
324
+ end
325
+
326
+ def self.default_object_for( klass )
327
+ @default_data_objects ||= {}
328
+ @default_data_objects[klass]
329
+ end
330
+
331
+ def set_default_value( name, value )
332
+ @default_values[name] = value
333
+ end
334
+
335
+ def set_override_value( operator, value )
336
+ @override_values[operator] = value
337
+ end
338
+
339
+ def default_value(name)
340
+ @default_values[name]
341
+ end
342
+
343
+ def override_value( operator )
344
+ @current_value = @override_values[operator] if(@override_values[operator])
345
+ end
346
+
347
+
348
+ def set_prefix( name, value )
349
+ @prefixes[name] = value
350
+ end
351
+
352
+ def prefixes(name)
353
+ @prefixes[name]
354
+ end
355
+
356
+ def set_postfix( name, value )
357
+ @postfixes[name] = value
358
+ end
359
+
360
+ def postfixes(name)
361
+ @postfixes[name]
362
+ end
363
+
364
+
365
+ # Reset the loader, including database object to be populated, and load counts
366
+ #
367
+ def reset(object = nil)
368
+ @load_object = object || new_load_object
369
+ @loaded_objects, @failed_objects = [],[]
370
+ @current_value = nil
371
+ end
372
+
373
+
374
+ def new_load_object
375
+ @load_object = @load_object_class.new
376
+ @load_object
377
+ end
378
+
379
+ def abort_on_failure?
380
+ @options[:abort_on_failure] == 'true'
381
+ end
382
+
383
+ def loaded_count
384
+ @loaded_objects.size
385
+ end
386
+
387
+ def failed_count
388
+ @failed_objects.size
389
+ end
390
+
391
+
392
+ # Check whether headers contains supplied list
393
+ def headers_contain_mandatory?( mandatory_list )
394
+ [ [*mandatory_list] - @headers].flatten.empty?
395
+ end
396
+
397
+
398
+ # Check whether headers contains supplied list
399
+ def missing_mandatory_headers( mandatory_list )
400
+ [ [*mandatory_list] - @headers].flatten
401
+ end
402
+
403
+ def find_or_new( klass, condition_hash = {} )
404
+ @records[klass] = klass.find(:all, :conditions => condition_hash)
405
+ if @records[klass].any?
406
+ return @records[klass].first
407
+ else
408
+ return klass.new
409
+ end
410
+ end
411
+
412
+ private
413
+
414
+ def save_if_new
415
+ #puts "SAVE", load_object.inspect
416
+ save if(load_object.valid? && load_object.new_record?)
417
+ end
418
+
419
+ end
420
+
421
421
  end