datashift 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. data/.document +5 -5
  2. data/LICENSE.txt +26 -26
  3. data/README.markdown +305 -303
  4. data/README.rdoc +19 -19
  5. data/Rakefile +93 -93
  6. data/VERSION +1 -1
  7. data/datashift-0.1.0.gem +0 -0
  8. data/datashift.gemspec +152 -136
  9. data/lib/applications/jruby/jexcel_file.rb +408 -408
  10. data/lib/applications/jruby/word.rb +79 -79
  11. data/lib/datashift.rb +152 -152
  12. data/lib/datashift/exceptions.rb +11 -11
  13. data/lib/datashift/file_definitions.rb +353 -353
  14. data/lib/datashift/mapping_file_definitions.rb +87 -87
  15. data/lib/datashift/method_detail.rb +275 -275
  16. data/lib/datashift/method_dictionary.rb +209 -209
  17. data/lib/datashift/method_mapper.rb +90 -90
  18. data/lib/generators/csv_generator.rb +36 -36
  19. data/lib/generators/excel_generator.rb +122 -122
  20. data/lib/generators/generator_base.rb +13 -13
  21. data/lib/helpers/core_ext/to_b.rb +24 -24
  22. data/lib/helpers/spree_helper.rb +153 -155
  23. data/lib/java/poi-3.7/LICENSE +507 -507
  24. data/lib/java/poi-3.7/NOTICE +21 -21
  25. data/lib/java/poi-3.7/RELEASE_NOTES.txt +115 -115
  26. data/lib/loaders/csv_loader.rb +98 -98
  27. data/lib/loaders/excel_loader.rb +155 -155
  28. data/lib/loaders/loader_base.rb +420 -420
  29. data/lib/loaders/spreadsheet_loader.rb +136 -136
  30. data/lib/loaders/spree/image_loader.rb +63 -64
  31. data/lib/loaders/spree/product_loader.rb +248 -250
  32. data/public/spree/products/large/DEMO_001_ror_bag.jpeg +0 -0
  33. data/public/spree/products/large/DEMO_002_Powerstation.jpg +0 -0
  34. data/public/spree/products/large/DEMO_003_ror_mug.jpeg +0 -0
  35. data/public/spree/products/mini/DEMO_001_ror_bag.jpeg +0 -0
  36. data/public/spree/products/mini/DEMO_002_Powerstation.jpg +0 -0
  37. data/public/spree/products/mini/DEMO_003_ror_mug.jpeg +0 -0
  38. data/public/spree/products/original/DEMO_001_ror_bag.jpeg +0 -0
  39. data/public/spree/products/original/DEMO_002_Powerstation.jpg +0 -0
  40. data/public/spree/products/original/DEMO_003_ror_mug.jpeg +0 -0
  41. data/public/spree/products/product/DEMO_001_ror_bag.jpeg +0 -0
  42. data/public/spree/products/product/DEMO_002_Powerstation.jpg +0 -0
  43. data/public/spree/products/product/DEMO_003_ror_mug.jpeg +0 -0
  44. data/public/spree/products/small/DEMO_001_ror_bag.jpeg +0 -0
  45. data/public/spree/products/small/DEMO_002_Powerstation.jpg +0 -0
  46. data/public/spree/products/small/DEMO_003_ror_mug.jpeg +0 -0
  47. data/spec/csv_loader_spec.rb +30 -30
  48. data/spec/datashift_spec.rb +26 -26
  49. data/spec/db/migrate/20110803201325_create_test_bed.rb +85 -85
  50. data/spec/excel_exporter_spec.rb +78 -78
  51. data/spec/excel_generator_spec.rb +78 -78
  52. data/spec/excel_loader_spec.rb +223 -223
  53. data/spec/file_definitions.rb +141 -141
  54. data/spec/fixtures/ProjectsDefaults.yml +29 -29
  55. data/spec/fixtures/config/database.yml +27 -24
  56. data/spec/fixtures/datashift_Spree_db.sqlite +0 -0
  57. data/spec/fixtures/interact_models_db.sqlite +0 -0
  58. data/spec/fixtures/negative/SpreeProdMiss1Mandatory.csv +4 -4
  59. data/spec/fixtures/negative/SpreeProdMissManyMandatory.csv +4 -4
  60. data/spec/fixtures/spree/SpreeProducts.csv +4 -4
  61. data/spec/fixtures/spree/SpreeProductsMultiColumn.csv +4 -4
  62. data/spec/fixtures/spree/SpreeProductsSimple.csv +4 -4
  63. data/spec/fixtures/spree/SpreeProductsWithImages.csv +4 -0
  64. data/spec/fixtures/spree/SpreeZoneExample.csv +5 -5
  65. data/spec/fixtures/test_model_defs.rb +57 -57
  66. data/spec/loader_spec.rb +120 -120
  67. data/spec/method_dictionary_spec.rb +242 -242
  68. data/spec/method_mapper_spec.rb +41 -41
  69. data/spec/spec_helper.rb +116 -116
  70. data/spec/spree_generator_spec.rb +64 -64
  71. data/spec/spree_loader_spec.rb +324 -327
  72. data/spec/spree_method_mapping_spec.rb +214 -214
  73. data/tasks/config/seed_fu_product_template.erb +15 -15
  74. data/tasks/config/tidy_config.txt +12 -12
  75. data/tasks/db_tasks.rake +65 -65
  76. data/tasks/excel_generator.rake +78 -78
  77. data/tasks/file_tasks.rake +36 -36
  78. data/tasks/import/csv.rake +49 -49
  79. data/tasks/import/excel.rake +71 -71
  80. data/tasks/spree/image_load.rake +108 -108
  81. data/tasks/spree/product_loader.rake +43 -43
  82. data/tasks/word_to_seedfu.rake +166 -166
  83. data/test/helper.rb +18 -18
  84. data/test/test_interact.rb +7 -7
  85. metadata +22 -3
  86. data/spec/fixtures/interact_spree_db.sqlite +0 -0
@@ -1,156 +1,156 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Aug 2011
4
- # License:: MIT
5
- #
6
- # Details:: Specific loader to support Excel files.
7
- # Note this only requires JRuby, Excel not required, nor Win OLE.
8
- #
9
- # Maps column headings to operations on the model.
10
- # Iterates over all the rows using mapped operations to assign row data to a database object,
11
- # i.e pulls data from each column and sends to object.
12
- #
13
- require 'datashift/exceptions'
14
-
15
-
16
- module DataShift
17
-
18
- if(Guards::jruby?)
19
-
20
- require 'loaders/loader_base'
21
-
22
- require 'java'
23
- require 'jexcel_file'
24
-
25
- module ExcelLoading
26
-
27
- # Options:
28
- # [:header_row] : Default is 0. Use alternative row as header definition.
29
- # [:mandatory] : Array of mandatory column names
30
- # [:strict] : Raise exception when no mapping found for a column heading (non mandatory)
31
- # [:sheet_number]
32
-
33
- def perform_excel_load( file_name, options = {} )
34
-
35
- @mandatory = options[:mandatory] || []
36
-
37
- @excel = JExcelFile.new
38
-
39
- @excel.open(file_name)
40
-
41
- #if(options[:verbose])
42
- puts "\n\n\nLoading from Excel file: #{file_name}"
43
-
44
- sheet_number = options[:sheet_number] || 0
45
-
46
- @sheet = @excel.sheet( sheet_number )
47
-
48
- header_row_index = options[:header_row] || 0
49
- @header_row = @sheet.getRow(header_row_index)
50
-
51
- raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" unless(@header_row)
52
-
53
- @headers = []
54
- category_003
55
- (0..JExcelFile::MAX_COLUMNS).each do |i|
56
- cell = @header_row.getCell(i)
57
- break unless cell
58
- header = "#{@excel.cell_value(cell).to_s}".strip
59
- break if header.empty?
60
- @headers << header
61
- end
62
-
63
- raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" if(@headers.empty?)
64
-
65
- # Create a method_mapper which maps list of headers into suitable calls on the Active Record class
66
- # For example if model has an attribute 'price' will map columns called Price, price, PRICE etc to this attribute
67
- map_headers_to_operators( @headers, options[:strict] , @mandatory )
68
-
69
- logger.info "Excel Loader prcoessing #{@excel.num_rows} rows"
70
- load_object_class.transaction do
71
- @loaded_objects = []
72
-
73
- (1..@excel.num_rows).collect do |row|
74
-
75
- # Excel num_rows seems to return all 'visible' rows, which appears to be greater than the actual data rows
76
- # (TODO - write spec to process .xls with a huge number of rows)
77
- #
78
- # This is rubbish but currently manually detect when actual data ends, this isn't very smart but
79
- # got no better idea than ending once we hit the first completely empty row
80
- break if @excel.sheet.getRow(row).nil?
81
-
82
- contains_data = false
83
-
84
- # TODO - Smart sorting of column processing order ....
85
- # Does not currently ensure mandatory columns (for valid?) processed first but model needs saving
86
- # before associations can be processed so user should ensure mandatory columns are prior to associations
87
-
88
- # as part of this we also attempt to save early, for example before assigning to
89
- # has_and_belongs_to associations which require the load_object has an id for the join table
90
-
91
- # Iterate over the columns method_mapper found in Excel,
92
- # pulling data out of associated column
93
- @method_mapper.method_details.each_with_index do |method_detail, col|
94
-
95
- value = value_at(row, col)
96
-
97
- contains_data = true unless(value.nil? || value.to_s.empty?)
98
-
99
- prepare_data(method_detail, value)
100
-
101
- process()
102
- end
103
-
104
- break unless(contains_data == true)
105
-
106
- # TODO - requirements to handle not valid ?
107
- # all or nothing or carry on and dump out the exception list at end
108
- #puts "DEBUG: FINAL SAVE #{load_object.inspect}"
109
- unless(save)
110
- failure
111
- logger.error "Failed to save row [#{row}]"
112
- logger.error load_object.errors.inspect
113
- else
114
- logger.info "Row #{row} succesfully SAVED : ID #{load_object.id}"
115
- end
116
-
117
- # don't forget to reset the object or we'll update rather than create
118
- new_load_object
119
-
120
- end
121
- end
122
- puts "Excel loading stage complete - #{loaded_objects.size} rows added."
123
- end
124
-
125
- def value_at(row, column)
126
- @excel.get_cell_value( @excel.sheet.getRow(row), column)
127
- end
128
- end
129
-
130
-
131
- class ExcelLoader < LoaderBase
132
-
133
- include ExcelLoading
134
-
135
- def initialize(klass, object = nil, options = {})
136
- super( klass, object, options )
137
- raise "Cannot load - failed to create a #{klass}" unless @load_object
138
- end
139
-
140
-
141
- def perform_load( file_name, options = {} )
142
- perform_excel_load( file_name, options )
143
-
144
- puts "Excel loading stage complete - #{loaded_objects.size} rows added."
145
- end
146
-
147
- end
148
-
149
- else
150
-
151
- module ExcelLoading
152
- end
153
-
154
- end
155
-
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Aug 2011
4
+ # License:: MIT
5
+ #
6
+ # Details:: Specific loader to support Excel files.
7
+ # Note this only requires JRuby, Excel not required, nor Win OLE.
8
+ #
9
+ # Maps column headings to operations on the model.
10
+ # Iterates over all the rows using mapped operations to assign row data to a database object,
11
+ # i.e pulls data from each column and sends to object.
12
+ #
13
+ require 'datashift/exceptions'
14
+
15
+
16
+ module DataShift
17
+
18
+ if(Guards::jruby?)
19
+
20
+ require 'loaders/loader_base'
21
+
22
+ require 'java'
23
+ require 'jexcel_file'
24
+
25
+ module ExcelLoading
26
+
27
+ # Options:
28
+ # [:header_row] : Default is 0. Use alternative row as header definition.
29
+ # [:mandatory] : Array of mandatory column names
30
+ # [:strict] : Raise exception when no mapping found for a column heading (non mandatory)
31
+ # [:sheet_number]
32
+
33
+ def perform_excel_load( file_name, options = {} )
34
+
35
+ @mandatory = options[:mandatory] || []
36
+
37
+ @excel = JExcelFile.new
38
+
39
+ @excel.open(file_name)
40
+
41
+ #if(options[:verbose])
42
+ puts "\n\n\nLoading from Excel file: #{file_name}"
43
+
44
+ sheet_number = options[:sheet_number] || 0
45
+
46
+ @sheet = @excel.sheet( sheet_number )
47
+
48
+ header_row_index = options[:header_row] || 0
49
+ @header_row = @sheet.getRow(header_row_index)
50
+
51
+ raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" unless(@header_row)
52
+
53
+ @headers = []
54
+
55
+ (0..JExcelFile::MAX_COLUMNS).each do |i|
56
+ cell = @header_row.getCell(i)
57
+ break unless cell
58
+ header = "#{@excel.cell_value(cell).to_s}".strip
59
+ break if header.empty?
60
+ @headers << header
61
+ end
62
+
63
+ raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" if(@headers.empty?)
64
+
65
+ # Create a method_mapper which maps list of headers into suitable calls on the Active Record class
66
+ # For example if model has an attribute 'price' will map columns called Price, price, PRICE etc to this attribute
67
+ map_headers_to_operators( @headers, options[:strict] , @mandatory )
68
+
69
+ logger.info "Excel Loader prcoessing #{@excel.num_rows} rows"
70
+ load_object_class.transaction do
71
+ @loaded_objects = []
72
+
73
+ (1..@excel.num_rows).collect do |row|
74
+
75
+ # Excel num_rows seems to return all 'visible' rows, which appears to be greater than the actual data rows
76
+ # (TODO - write spec to process .xls with a huge number of rows)
77
+ #
78
+ # This is rubbish but currently manually detect when actual data ends, this isn't very smart but
79
+ # got no better idea than ending once we hit the first completely empty row
80
+ break if @excel.sheet.getRow(row).nil?
81
+
82
+ contains_data = false
83
+
84
+ # TODO - Smart sorting of column processing order ....
85
+ # Does not currently ensure mandatory columns (for valid?) processed first but model needs saving
86
+ # before associations can be processed so user should ensure mandatory columns are prior to associations
87
+
88
+ # as part of this we also attempt to save early, for example before assigning to
89
+ # has_and_belongs_to associations which require the load_object has an id for the join table
90
+
91
+ # Iterate over the columns method_mapper found in Excel,
92
+ # pulling data out of associated column
93
+ @method_mapper.method_details.each_with_index do |method_detail, col|
94
+
95
+ value = value_at(row, col)
96
+
97
+ contains_data = true unless(value.nil? || value.to_s.empty?)
98
+
99
+ prepare_data(method_detail, value)
100
+
101
+ process()
102
+ end
103
+
104
+ break unless(contains_data == true)
105
+
106
+ # TODO - requirements to handle not valid ?
107
+ # all or nothing or carry on and dump out the exception list at end
108
+ #puts "DEBUG: FINAL SAVE #{load_object.inspect}"
109
+ unless(save)
110
+ failure
111
+ logger.error "Failed to save row [#{row}]"
112
+ logger.error load_object.errors.inspect
113
+ else
114
+ logger.info "Row #{row} succesfully SAVED : ID #{load_object.id}"
115
+ end
116
+
117
+ # don't forget to reset the object or we'll update rather than create
118
+ new_load_object
119
+
120
+ end
121
+ end
122
+ puts "Excel loading stage complete - #{loaded_objects.size} rows added."
123
+ end
124
+
125
+ def value_at(row, column)
126
+ @excel.get_cell_value( @excel.sheet.getRow(row), column)
127
+ end
128
+ end
129
+
130
+
131
+ class ExcelLoader < LoaderBase
132
+
133
+ include ExcelLoading
134
+
135
+ def initialize(klass, object = nil, options = {})
136
+ super( klass, object, options )
137
+ raise "Cannot load - failed to create a #{klass}" unless @load_object
138
+ end
139
+
140
+
141
+ def perform_load( file_name, options = {} )
142
+ perform_excel_load( file_name, options )
143
+
144
+ puts "Excel loading stage complete - #{loaded_objects.size} rows added."
145
+ end
146
+
147
+ end
148
+
149
+ else
150
+
151
+ module ExcelLoading
152
+ end
153
+
154
+ end
155
+
156
156
  end
@@ -1,421 +1,421 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Aug 2010
4
- # License:: MIT
5
- #
6
- # Details:: Base class for loaders, providing a process hook which populates a model,
7
- # based on a method map and supplied value from a file - i.e a single column/row's string value.
8
- # Note that although a single column, the string can be formatted to contain multiple values.
9
- #
10
- # Tightly coupled with MethodMapper classes (in lib/engine) which contains full details of
11
- # a file's column and it's correlated AR associations.
12
- #
13
- module DataShift
14
-
15
- require 'datashift/method_mapper'
16
-
17
- class LoaderBase
18
-
19
-
20
- include DataShift::Logging
21
-
22
- attr_reader :headers
23
-
24
- attr_accessor :method_mapper
25
-
26
- attr_accessor :load_object_class, :load_object
27
- attr_accessor :current_value, :current_method_detail
28
-
29
- attr_accessor :loaded_objects, :failed_objects
30
-
31
- attr_accessor :options
32
-
33
- # Support multiple associations being added to a base object to be specified in a single column.
34
- #
35
- # Entry represents the association to find via supplied name, value to use in the lookup.
36
- # Can contain multiple lookup name/value pairs, separated by multi_assoc_delim ( | )
37
- #
38
- # Default syntax :
39
- #
40
- # Name1:value1, value2|Name2:value1, value2, value3|Name3:value1, value2
41
- #
42
- # E.G.
43
- # Association Properties, has a column named Size, and another called Colour,
44
- # and this combination could be used to lookup multiple associations to add to the main model Jumper
45
- #
46
- # Size:small # => generates find_by_size( 'small' )
47
- # Size:large # => generates find_by_size( 'large' )
48
- # Colour:red,green,blue # => generates find_all_by_colour( ['red','green','blue'] )
49
- #
50
- # Size:large|Size:medium|Size:large
51
- # => Find 3 different associations, perform lookup via column called Size
52
- # => Jumper.properties << [ small, medium, large ]
53
- #
54
- def self.name_value_delim
55
- @name_value_delim ||= ':'
56
- @name_value_delim
57
- end
58
-
59
- def self.set_name_value_delim(x) @name_value_delim = x; end
60
- # TODO - support embedded object creation/update via hash (which hopefully we should be able to just forward to AR)
61
- #
62
- # |Category|
63
- # name:new{ :date => '20110102', :owner = > 'blah'}
64
- #
65
-
66
-
67
- def self.multi_value_delim
68
- @multi_value_delim ||= ','
69
- @multi_value_delim
70
- end
71
-
72
- def self.set_multi_value_delim(x) @multi_value_delim = x; end
73
-
74
- # TODO - support multi embedded object creation/update via hash (which hopefully we should be able to just forward to AR)
75
- #
76
- # |Category|
77
- # name:new{ :a => 1, :b => 2}|name:medium{ :a => 6, :b => 34}|name:old{ :a => 12, :b => 67}
78
- #
79
- def self.multi_assoc_delim
80
- @multi_assoc_delim ||= '|'
81
- @multi_assoc_delim
82
- end
83
-
84
-
85
- def self.set_multi_assoc_delim(x) @multi_assoc_delim = x; end
86
-
87
-
88
- # Options
89
- # :instance_methods => true
90
-
91
- def initialize(object_class, object = nil, options = {})
92
- @load_object_class = object_class
93
-
94
- # Gather names of all possible 'setter' methods on AR class (instance variables and associations)
95
- DataShift::MethodDictionary.find_operators( @load_object_class, :reload => true, :instance_methods => options[:instance_methods] )
96
-
97
- # Create dictionary of data on all possible 'setter' methods which can be used to
98
- # populate or integrate an object of type @load_object_class
99
- DataShift::MethodDictionary.build_method_details(@load_object_class)
100
-
101
- @method_mapper = DataShift::MethodMapper.new
102
- @options = options.clone
103
- @headers = []
104
-
105
- @default_data_objects ||= {}
106
-
107
- @default_values = {}
108
- @override_values = {}
109
-
110
- @prefixes = {}
111
- @postfixes = {}
112
-
113
- reset(object)
114
- end
115
-
116
-
117
- # kinda the derived classes interface - best way in Ruby ?
118
- def perform_load( input, options = {} )
119
- raise "WARNING- ABSTRACT METHOD CALLED - Please implement perform_load()"
120
- end
121
-
122
-
123
- # Core API - Given a list of free text column names from a file,
124
- # map all headers to a method detail containing operator details.
125
- #
126
- # This is then available through @method_mapper.method_details.each
127
- #
128
- # Options:
129
- # strict : report any header values that can't be mapped as an error
130
- #
131
- def map_headers_to_operators( headers, strict, mandatory = [])
132
- @headers = headers
133
-
134
- method_details = @method_mapper.map_inbound_to_methods( load_object_class, @headers )
135
-
136
- unless(@method_mapper.missing_methods.empty?)
137
- puts "WARNING: Following column headings could not be mapped : #{@method_mapper.missing_methods.inspect}"
138
- raise MappingDefinitionError, "Missing mappings for columns : #{@method_mapper.missing_methods.join(",")}" if(strict)
139
- end
140
-
141
- unless(@method_mapper.contains_mandatory?(mandatory) )
142
- @method_mapper.missing_mandatory(mandatory).each { |e| puts "ERROR: Mandatory column missing - expected column '#{e}'" }
143
- raise MissingMandatoryError, "Mandatory columns missing - please fix and retry."
144
- end unless(mandatory.empty?)
145
- end
146
-
147
-
148
- # Core API - Given a single free text column name from a file, search method mapper for
149
- # associated operator on base object class.
150
- #
151
- # If suitable association found, process row data and then assign to current load_object
152
- def find_and_process(column_name, data)
153
- method_detail = MethodDictionary.find_method_detail( load_object_class, column_name )
154
-
155
- if(method_detail)
156
- prepare_data(method_detail, data)
157
- process()
158
- else
159
- @load_object.errors.add_base( "No matching method found for column #{column_name}")
160
- end
161
- end
162
-
163
-
164
- # Default values can be provided in YAML config file
165
- # Format :
166
- # Load Class
167
- # atttribute: value
168
-
169
- def configure_from( yaml_file )
170
-
171
- data = YAML::load( File.open(yaml_file) )
172
-
173
-
174
- # TODO - MOVE DEFAULTS TO OWN MODULE
175
- # decorate the loading class with the defaults/ove rides to manage itself
176
- # IDEAS .....
177
- #
178
- #unless(@default_data_objects[load_object_class])
179
- #
180
- # @default_data_objects[load_object_class] = load_object_class.new
181
-
182
- # default_data_object = @default_data_objects[load_object_class]
183
-
184
-
185
- # default_data_object.instance_eval do
186
- # def datashift_defaults=(hash)
187
- # @datashift_defaults = hash
188
- # end
189
- # def datashift_defaults
190
- # @datashift_defaults
191
- # end
192
- #end unless load_object_class.respond_to?(:datashift_defaults)
193
- #end
194
-
195
- #puts load_object_class.new.to_yaml
196
-
197
- puts data.inspect
198
-
199
- if(data[load_object_class.name])
200
-
201
- deflts = data[load_object_class.name]['datashift_defaults']
202
- @default_values.merge!(deflts) if deflts
203
-
204
- ovrides = data[load_object_class.name]['datashift_overrides']
205
- @override_values.merge!(ovrides) if ovrides
206
- end
207
-
208
- end
209
-
210
- # Set member variables to hold details and value.
211
- #
212
- # Check supplied value, validate it, and if required :
213
- # set to any provided default value
214
- # prepend or append with any provided extensions
215
- def prepare_data(method_detail, value)
216
-
217
- @current_value = value
218
-
219
- @current_method_detail = method_detail
220
-
221
- operator = method_detail.operator
222
-
223
- override_value(operator)
224
-
225
- if((value.nil? || value.to_s.empty?) && default_value(operator))
226
- @current_value = default_value(operator)
227
- end
228
-
229
- @current_value = "#{prefixes(operator)}#{@current_value}" if(prefixes(operator))
230
- @current_value = "#{@current_value}#{postfixes(operator)}" if(postfixes(operator))
231
-
232
- @current_value
233
- end
234
-
235
-
236
- # Process a value string from a column.
237
- # Assigning value(s) to correct association on @load_object.
238
- # Method detail represents a column from a file and it's correlated AR associations.
239
- # Value string which may contain multiple values for a collection association.
240
- #
241
- def process()
242
-
243
- if(@current_method_detail.operator_for(:has_many))
244
-
245
- if(@current_method_detail.operator_class && @current_value)
246
-
247
- # there are times when we need to save early, for example before assigning to
248
- # has_and_belongs_to associations which require the load_object has an id for the join table
249
-
250
- save_if_new
251
-
252
- # A single column can contain multiple associations delimited by special char
253
- # Size:large|Colour:red,green,blue => ['Size:large', 'Colour:red,green,blue']
254
- columns = @current_value.to_s.split( LoaderBase::multi_assoc_delim)
255
-
256
- # Size:large|Colour:red,green,blue => generates find_by_size( 'large' ) and find_all_by_colour( ['red','green','blue'] )
257
-
258
- columns.each do |col_str|
259
-
260
- find_operator, col_values = "",""
261
-
262
- if(@current_method_detail.find_by_operator)
263
- find_operator, col_values = @current_method_detail.find_by_operator, col_str
264
- else
265
- find_operator, col_values = col_str.split(LoaderBase::name_value_delim)
266
- raise "No key to find #{@current_method_detail.operator} in DB. Expected format key:value" unless(col_values)
267
- end
268
-
269
- find_by_values = col_values.split(LoaderBase::multi_value_delim)
270
-
271
- if(find_by_values.size > 1)
272
-
273
- @current_value = @current_method_detail.operator_class.send("find_all_by_#{find_operator}", find_by_values )
274
-
275
- unless(find_by_values.size == @current_value.size)
276
- found = @current_value.collect {|f| f.send(find_operator) }
277
- @load_object.errors.add( @current_method_detail.operator, "Association with key(s) #{(find_by_values - found).inspect} NOT found")
278
- puts "WARNING: Association with key(s) #{(lookups - found).inspect} NOT found - Not added."
279
- next if(@current_value.empty?)
280
- end
281
-
282
- else
283
-
284
- @current_value = @current_method_detail.operator_class.send("find_by_#{find_operator}", find_by_values )
285
-
286
- unless(@current_value)
287
- @load_object.errors.add( @current_method_detail.operator, "Association with key #{find_by_values} NOT found")
288
- puts "WARNING: Association with key #{find_by_values} NOT found - Not added."
289
- next
290
- end
291
-
292
- end
293
-
294
- # Lookup Assoc's Model done, now add the found value(s) to load model's collection
295
- @current_method_detail.assign(@load_object, @current_value)
296
- end
297
- end
298
- # END HAS_MANY
299
- else
300
- # Nice n simple straight assignment to a column variable
301
- #puts "INFO: LOADER BASE processing #{method_detail.name}"
302
- @current_method_detail.assign(@load_object, @current_value)
303
- end
304
- end
305
-
306
- def failure
307
- @failed_objects << @load_object unless( !load_object.new_record? || @failed_objects.include?(@load_object))
308
- end
309
-
310
- def save
311
- #puts "DEBUG: SAVING #{load_object.class} : #{load_object.inspect}" #if(options[:verbose])
312
- begin
313
- result = @load_object.save
314
-
315
- @loaded_objects << @load_object unless(@loaded_objects.include?(@load_object))
316
-
317
- return result
318
- rescue => e
319
- failure
320
- puts "Error saving #{@load_object.class} : #{e.inspect}"
321
- logger.error e.backtrace
322
- raise "Error in save whilst processing column #{@current_method_detail.name}" if(@options[:strict])
323
- end
324
- end
325
-
326
- def self.default_object_for( klass )
327
- @default_data_objects ||= {}
328
- @default_data_objects[klass]
329
- end
330
-
331
- def set_default_value( name, value )
332
- @default_values[name] = value
333
- end
334
-
335
- def set_override_value( operator, value )
336
- @override_values[operator] = value
337
- end
338
-
339
- def default_value(name)
340
- @default_values[name]
341
- end
342
-
343
- def override_value( operator )
344
- @current_value = @override_values[operator] if(@override_values[operator])
345
- end
346
-
347
-
348
- def set_prefix( name, value )
349
- @prefixes[name] = value
350
- end
351
-
352
- def prefixes(name)
353
- @prefixes[name]
354
- end
355
-
356
- def set_postfix( name, value )
357
- @postfixes[name] = value
358
- end
359
-
360
- def postfixes(name)
361
- @postfixes[name]
362
- end
363
-
364
-
365
- # Reset the loader, including database object to be populated, and load counts
366
- #
367
- def reset(object = nil)
368
- @load_object = object || new_load_object
369
- @loaded_objects, @failed_objects = [],[]
370
- @current_value = nil
371
- end
372
-
373
-
374
- def new_load_object
375
- @load_object = @load_object_class.new
376
- @load_object
377
- end
378
-
379
- def abort_on_failure?
380
- @options[:abort_on_failure] == 'true'
381
- end
382
-
383
- def loaded_count
384
- @loaded_objects.size
385
- end
386
-
387
- def failed_count
388
- @failed_objects.size
389
- end
390
-
391
-
392
- # Check whether headers contains supplied list
393
- def headers_contain_mandatory?( mandatory_list )
394
- [ [*mandatory_list] - @headers].flatten.empty?
395
- end
396
-
397
-
398
- # Check whether headers contains supplied list
399
- def missing_mandatory_headers( mandatory_list )
400
- [ [*mandatory_list] - @headers].flatten
401
- end
402
-
403
- def find_or_new( klass, condition_hash = {} )
404
- @records[klass] = klass.find(:all, :conditions => condition_hash)
405
- if @records[klass].any?
406
- return @records[klass].first
407
- else
408
- return klass.new
409
- end
410
- end
411
-
412
- private
413
-
414
- def save_if_new
415
- #puts "SAVE", load_object.inspect
416
- save if(load_object.valid? && load_object.new_record?)
417
- end
418
-
419
- end
420
-
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Aug 2010
4
+ # License:: MIT
5
+ #
6
+ # Details:: Base class for loaders, providing a process hook which populates a model,
7
+ # based on a method map and supplied value from a file - i.e a single column/row's string value.
8
+ # Note that although a single column, the string can be formatted to contain multiple values.
9
+ #
10
+ # Tightly coupled with MethodMapper classes (in lib/engine) which contains full details of
11
+ # a file's column and it's correlated AR associations.
12
+ #
13
+ module DataShift
14
+
15
+ require 'datashift/method_mapper'
16
+
17
+ class LoaderBase
18
+
19
+
20
+ include DataShift::Logging
21
+
22
+ attr_reader :headers
23
+
24
+ attr_accessor :method_mapper
25
+
26
+ attr_accessor :load_object_class, :load_object
27
+ attr_accessor :current_value, :current_method_detail
28
+
29
+ attr_accessor :loaded_objects, :failed_objects
30
+
31
+ attr_accessor :options
32
+
33
+ # Support multiple associations being added to a base object to be specified in a single column.
34
+ #
35
+ # Entry represents the association to find via supplied name, value to use in the lookup.
36
+ # Can contain multiple lookup name/value pairs, separated by multi_assoc_delim ( | )
37
+ #
38
+ # Default syntax :
39
+ #
40
+ # Name1:value1, value2|Name2:value1, value2, value3|Name3:value1, value2
41
+ #
42
+ # E.G.
43
+ # Association Properties, has a column named Size, and another called Colour,
44
+ # and this combination could be used to lookup multiple associations to add to the main model Jumper
45
+ #
46
+ # Size:small # => generates find_by_size( 'small' )
47
+ # Size:large # => generates find_by_size( 'large' )
48
+ # Colour:red,green,blue # => generates find_all_by_colour( ['red','green','blue'] )
49
+ #
50
+ # Size:large|Size:medium|Size:large
51
+ # => Find 3 different associations, perform lookup via column called Size
52
+ # => Jumper.properties << [ small, medium, large ]
53
+ #
54
+ def self.name_value_delim
55
+ @name_value_delim ||= ':'
56
+ @name_value_delim
57
+ end
58
+
59
+ def self.set_name_value_delim(x) @name_value_delim = x; end
60
+ # TODO - support embedded object creation/update via hash (which hopefully we should be able to just forward to AR)
61
+ #
62
+ # |Category|
63
+ # name:new{ :date => '20110102', :owner = > 'blah'}
64
+ #
65
+
66
+
67
+ def self.multi_value_delim
68
+ @multi_value_delim ||= ','
69
+ @multi_value_delim
70
+ end
71
+
72
+ def self.set_multi_value_delim(x) @multi_value_delim = x; end
73
+
74
+ # TODO - support multi embedded object creation/update via hash (which hopefully we should be able to just forward to AR)
75
+ #
76
+ # |Category|
77
+ # name:new{ :a => 1, :b => 2}|name:medium{ :a => 6, :b => 34}|name:old{ :a => 12, :b => 67}
78
+ #
79
+ def self.multi_assoc_delim
80
+ @multi_assoc_delim ||= '|'
81
+ @multi_assoc_delim
82
+ end
83
+
84
+
85
+ def self.set_multi_assoc_delim(x) @multi_assoc_delim = x; end
86
+
87
+
88
+ # Options
89
+ # :instance_methods => true
90
+
91
+ def initialize(object_class, object = nil, options = {})
92
+ @load_object_class = object_class
93
+
94
+ # Gather names of all possible 'setter' methods on AR class (instance variables and associations)
95
+ DataShift::MethodDictionary.find_operators( @load_object_class, :reload => true, :instance_methods => options[:instance_methods] )
96
+
97
+ # Create dictionary of data on all possible 'setter' methods which can be used to
98
+ # populate or integrate an object of type @load_object_class
99
+ DataShift::MethodDictionary.build_method_details(@load_object_class)
100
+
101
+ @method_mapper = DataShift::MethodMapper.new
102
+ @options = options.clone
103
+ @headers = []
104
+
105
+ @default_data_objects ||= {}
106
+
107
+ @default_values = {}
108
+ @override_values = {}
109
+
110
+ @prefixes = {}
111
+ @postfixes = {}
112
+
113
+ reset(object)
114
+ end
115
+
116
+
117
+ # kinda the derived classes interface - best way in Ruby ?
118
+ def perform_load( input, options = {} )
119
+ raise "WARNING- ABSTRACT METHOD CALLED - Please implement perform_load()"
120
+ end
121
+
122
+
123
+ # Core API - Given a list of free text column names from a file,
124
+ # map all headers to a method detail containing operator details.
125
+ #
126
+ # This is then available through @method_mapper.method_details.each
127
+ #
128
+ # Options:
129
+ # strict : report any header values that can't be mapped as an error
130
+ #
131
+ def map_headers_to_operators( headers, strict, mandatory = [])
132
+ @headers = headers
133
+
134
+ method_details = @method_mapper.map_inbound_to_methods( load_object_class, @headers )
135
+
136
+ unless(@method_mapper.missing_methods.empty?)
137
+ puts "WARNING: Following column headings could not be mapped : #{@method_mapper.missing_methods.inspect}"
138
+ raise MappingDefinitionError, "Missing mappings for columns : #{@method_mapper.missing_methods.join(",")}" if(strict)
139
+ end
140
+
141
+ unless(@method_mapper.contains_mandatory?(mandatory) )
142
+ @method_mapper.missing_mandatory(mandatory).each { |e| puts "ERROR: Mandatory column missing - expected column '#{e}'" }
143
+ raise MissingMandatoryError, "Mandatory columns missing - please fix and retry."
144
+ end unless(mandatory.empty?)
145
+ end
146
+
147
+
148
+ # Core API - Given a single free text column name from a file, search method mapper for
149
+ # associated operator on base object class.
150
+ #
151
+ # If suitable association found, process row data and then assign to current load_object
152
+ def find_and_process(column_name, data)
153
+ method_detail = MethodDictionary.find_method_detail( load_object_class, column_name )
154
+
155
+ if(method_detail)
156
+ prepare_data(method_detail, data)
157
+ process()
158
+ else
159
+ @load_object.errors.add_base( "No matching method found for column #{column_name}")
160
+ end
161
+ end
162
+
163
+
164
+ # Default values can be provided in YAML config file
165
+ # Format :
166
+ # Load Class
167
+ # atttribute: value
168
+
169
+ def configure_from( yaml_file )
170
+
171
+ data = YAML::load( File.open(yaml_file) )
172
+
173
+
174
+ # TODO - MOVE DEFAULTS TO OWN MODULE
175
+ # decorate the loading class with the defaults/ove rides to manage itself
176
+ # IDEAS .....
177
+ #
178
+ #unless(@default_data_objects[load_object_class])
179
+ #
180
+ # @default_data_objects[load_object_class] = load_object_class.new
181
+
182
+ # default_data_object = @default_data_objects[load_object_class]
183
+
184
+
185
+ # default_data_object.instance_eval do
186
+ # def datashift_defaults=(hash)
187
+ # @datashift_defaults = hash
188
+ # end
189
+ # def datashift_defaults
190
+ # @datashift_defaults
191
+ # end
192
+ #end unless load_object_class.respond_to?(:datashift_defaults)
193
+ #end
194
+
195
+ #puts load_object_class.new.to_yaml
196
+
197
+ puts data.inspect
198
+
199
+ if(data[load_object_class.name])
200
+
201
+ deflts = data[load_object_class.name]['datashift_defaults']
202
+ @default_values.merge!(deflts) if deflts
203
+
204
+ ovrides = data[load_object_class.name]['datashift_overrides']
205
+ @override_values.merge!(ovrides) if ovrides
206
+ end
207
+
208
+ end
209
+
210
+ # Set member variables to hold details and value.
211
+ #
212
+ # Check supplied value, validate it, and if required :
213
+ # set to any provided default value
214
+ # prepend or append with any provided extensions
215
+ def prepare_data(method_detail, value)
216
+
217
+ @current_value = value
218
+
219
+ @current_method_detail = method_detail
220
+
221
+ operator = method_detail.operator
222
+
223
+ override_value(operator)
224
+
225
+ if((value.nil? || value.to_s.empty?) && default_value(operator))
226
+ @current_value = default_value(operator)
227
+ end
228
+
229
+ @current_value = "#{prefixes(operator)}#{@current_value}" if(prefixes(operator))
230
+ @current_value = "#{@current_value}#{postfixes(operator)}" if(postfixes(operator))
231
+
232
+ @current_value
233
+ end
234
+
235
+
236
+ # Process a value string from a column.
237
+ # Assigning value(s) to correct association on @load_object.
238
+ # Method detail represents a column from a file and it's correlated AR associations.
239
+ # Value string which may contain multiple values for a collection association.
240
+ #
241
+ def process()
242
+
243
+ if(@current_method_detail.operator_for(:has_many))
244
+
245
+ if(@current_method_detail.operator_class && @current_value)
246
+
247
+ # there are times when we need to save early, for example before assigning to
248
+ # has_and_belongs_to associations which require the load_object has an id for the join table
249
+
250
+ save_if_new
251
+
252
+ # A single column can contain multiple associations delimited by special char
253
+ # Size:large|Colour:red,green,blue => ['Size:large', 'Colour:red,green,blue']
254
+ columns = @current_value.to_s.split( LoaderBase::multi_assoc_delim)
255
+
256
+ # Size:large|Colour:red,green,blue => generates find_by_size( 'large' ) and find_all_by_colour( ['red','green','blue'] )
257
+
258
+ columns.each do |col_str|
259
+
260
+ find_operator, col_values = "",""
261
+
262
+ if(@current_method_detail.find_by_operator)
263
+ find_operator, col_values = @current_method_detail.find_by_operator, col_str
264
+ else
265
+ find_operator, col_values = col_str.split(LoaderBase::name_value_delim)
266
+ raise "No key to find #{@current_method_detail.operator} in DB. Expected format key:value" unless(col_values)
267
+ end
268
+
269
+ find_by_values = col_values.split(LoaderBase::multi_value_delim)
270
+
271
+ if(find_by_values.size > 1)
272
+
273
+ @current_value = @current_method_detail.operator_class.send("find_all_by_#{find_operator}", find_by_values )
274
+
275
+ unless(find_by_values.size == @current_value.size)
276
+ found = @current_value.collect {|f| f.send(find_operator) }
277
+ @load_object.errors.add( @current_method_detail.operator, "Association with key(s) #{(find_by_values - found).inspect} NOT found")
278
+ puts "WARNING: Association with key(s) #{(lookups - found).inspect} NOT found - Not added."
279
+ next if(@current_value.empty?)
280
+ end
281
+
282
+ else
283
+
284
+ @current_value = @current_method_detail.operator_class.send("find_by_#{find_operator}", find_by_values )
285
+
286
+ unless(@current_value)
287
+ @load_object.errors.add( @current_method_detail.operator, "Association with key #{find_by_values} NOT found")
288
+ puts "WARNING: Association with key #{find_by_values} NOT found - Not added."
289
+ next
290
+ end
291
+
292
+ end
293
+
294
+ # Lookup Assoc's Model done, now add the found value(s) to load model's collection
295
+ @current_method_detail.assign(@load_object, @current_value)
296
+ end
297
+ end
298
+ # END HAS_MANY
299
+ else
300
+ # Nice n simple straight assignment to a column variable
301
+ #puts "INFO: LOADER BASE processing #{method_detail.name}"
302
+ @current_method_detail.assign(@load_object, @current_value)
303
+ end
304
+ end
305
+
306
+ def failure
307
+ @failed_objects << @load_object unless( !load_object.new_record? || @failed_objects.include?(@load_object))
308
+ end
309
+
310
+ def save
311
+ #puts "DEBUG: SAVING #{load_object.class} : #{load_object.inspect}" #if(options[:verbose])
312
+ begin
313
+ result = @load_object.save
314
+
315
+ @loaded_objects << @load_object unless(@loaded_objects.include?(@load_object))
316
+
317
+ return result
318
+ rescue => e
319
+ failure
320
+ puts "Error saving #{@load_object.class} : #{e.inspect}"
321
+ logger.error e.backtrace
322
+ raise "Error in save whilst processing column #{@current_method_detail.name}" if(@options[:strict])
323
+ end
324
+ end
325
+
326
+ def self.default_object_for( klass )
327
+ @default_data_objects ||= {}
328
+ @default_data_objects[klass]
329
+ end
330
+
331
+ def set_default_value( name, value )
332
+ @default_values[name] = value
333
+ end
334
+
335
+ def set_override_value( operator, value )
336
+ @override_values[operator] = value
337
+ end
338
+
339
+ def default_value(name)
340
+ @default_values[name]
341
+ end
342
+
343
+ def override_value( operator )
344
+ @current_value = @override_values[operator] if(@override_values[operator])
345
+ end
346
+
347
+
348
+ def set_prefix( name, value )
349
+ @prefixes[name] = value
350
+ end
351
+
352
+ def prefixes(name)
353
+ @prefixes[name]
354
+ end
355
+
356
+ def set_postfix( name, value )
357
+ @postfixes[name] = value
358
+ end
359
+
360
+ def postfixes(name)
361
+ @postfixes[name]
362
+ end
363
+
364
+
365
+ # Reset the loader, including database object to be populated, and load counts
366
+ #
367
+ def reset(object = nil)
368
+ @load_object = object || new_load_object
369
+ @loaded_objects, @failed_objects = [],[]
370
+ @current_value = nil
371
+ end
372
+
373
+
374
+ def new_load_object
375
+ @load_object = @load_object_class.new
376
+ @load_object
377
+ end
378
+
379
+ def abort_on_failure?
380
+ @options[:abort_on_failure] == 'true'
381
+ end
382
+
383
+ def loaded_count
384
+ @loaded_objects.size
385
+ end
386
+
387
+ def failed_count
388
+ @failed_objects.size
389
+ end
390
+
391
+
392
+ # Check whether headers contains supplied list
393
+ def headers_contain_mandatory?( mandatory_list )
394
+ [ [*mandatory_list] - @headers].flatten.empty?
395
+ end
396
+
397
+
398
+ # Check whether headers contains supplied list
399
+ def missing_mandatory_headers( mandatory_list )
400
+ [ [*mandatory_list] - @headers].flatten
401
+ end
402
+
403
+ def find_or_new( klass, condition_hash = {} )
404
+ @records[klass] = klass.find(:all, :conditions => condition_hash)
405
+ if @records[klass].any?
406
+ return @records[klass].first
407
+ else
408
+ return klass.new
409
+ end
410
+ end
411
+
412
+ private
413
+
414
+ def save_if_new
415
+ #puts "SAVE", load_object.inspect
416
+ save if(load_object.valid? && load_object.new_record?)
417
+ end
418
+
419
+ end
420
+
421
421
  end