datashift 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (85) hide show
  1. data/.document +5 -5
  2. data/Gemfile +28 -25
  3. data/LICENSE.txt +26 -26
  4. data/README.markdown +302 -285
  5. data/README.rdoc +19 -19
  6. data/Rakefile +93 -95
  7. data/VERSION +5 -5
  8. data/datashift.gemspec +162 -178
  9. data/lib/applications/jruby/jexcel_file.rb +396 -396
  10. data/lib/applications/jruby/word.rb +79 -79
  11. data/lib/datashift.rb +152 -113
  12. data/lib/datashift/exceptions.rb +11 -11
  13. data/lib/datashift/file_definitions.rb +353 -353
  14. data/lib/datashift/mapping_file_definitions.rb +87 -87
  15. data/lib/datashift/method_detail.rb +236 -236
  16. data/lib/datashift/method_mapper.rb +256 -256
  17. data/lib/generators/csv_generator.rb +36 -36
  18. data/lib/generators/excel_generator.rb +121 -121
  19. data/lib/generators/generator_base.rb +13 -13
  20. data/lib/helpers/core_ext/to_b.rb +24 -24
  21. data/lib/helpers/spree_helper.rb +131 -131
  22. data/lib/java/poi-3.7/LICENSE +507 -507
  23. data/lib/java/poi-3.7/NOTICE +21 -21
  24. data/lib/java/poi-3.7/RELEASE_NOTES.txt +115 -115
  25. data/lib/loaders/csv_loader.rb +98 -98
  26. data/lib/loaders/excel_loader.rb +154 -149
  27. data/lib/loaders/loader_base.rb +403 -331
  28. data/lib/loaders/spreadsheet_loader.rb +136 -136
  29. data/lib/loaders/spree/image_loader.rb +45 -45
  30. data/lib/loaders/spree/product_loader.rb +224 -224
  31. data/spec/csv_loader_spec.rb +30 -30
  32. data/spec/datashift_spec.rb +26 -26
  33. data/spec/db/migrate/20110803201325_create_test_bed.rb +85 -85
  34. data/spec/excel_generator_spec.rb +78 -78
  35. data/spec/excel_loader_spec.rb +204 -176
  36. data/spec/file_definitions.rb +141 -141
  37. data/spec/fixtures/.~lock.ProjectsSingleCategories.xls# +1 -0
  38. data/spec/fixtures/ProjectsDefaults.yml +29 -0
  39. data/spec/fixtures/config/database.yml +24 -24
  40. data/spec/fixtures/interact_models_db.sqlite +0 -0
  41. data/spec/fixtures/interact_spree_db.sqlite +0 -0
  42. data/spec/fixtures/negative/SpreeProdMiss1Mandatory.csv +4 -4
  43. data/spec/fixtures/negative/SpreeProdMissManyMandatory.csv +4 -4
  44. data/spec/fixtures/spree/SpreeProducts.csv +4 -4
  45. data/spec/fixtures/spree/SpreeProductsMultiColumn.csv +4 -4
  46. data/spec/fixtures/spree/SpreeProductsSimple.csv +4 -4
  47. data/spec/fixtures/spree/SpreeZoneExample.csv +5 -5
  48. data/spec/fixtures/test_model_defs.rb +57 -57
  49. data/spec/loader_spec.rb +120 -120
  50. data/spec/method_mapper_spec.rb +237 -237
  51. data/spec/spec_helper.rb +115 -115
  52. data/spec/spree_generator_spec.rb +64 -64
  53. data/spec/spree_loader_spec.rb +310 -310
  54. data/spec/spree_method_mapping_spec.rb +214 -214
  55. data/tasks/config/seed_fu_product_template.erb +15 -15
  56. data/tasks/config/tidy_config.txt +12 -12
  57. data/tasks/db_tasks.rake +65 -64
  58. data/tasks/excel_generator.rake +78 -78
  59. data/tasks/file_tasks.rake +36 -36
  60. data/tasks/import/csv.rake +49 -49
  61. data/tasks/import/excel.rake +71 -66
  62. data/tasks/spree/image_load.rake +108 -108
  63. data/tasks/spree/product_loader.rake +43 -43
  64. data/tasks/word_to_seedfu.rake +166 -166
  65. data/test/helper.rb +18 -18
  66. data/test/test_interact.rb +7 -7
  67. metadata +7 -38
  68. data/Gemfile.lock +0 -211
  69. data/bin/autospec +0 -16
  70. data/bin/convert_to_should_syntax +0 -16
  71. data/bin/erubis +0 -16
  72. data/bin/htmldiff +0 -16
  73. data/bin/jeweler +0 -16
  74. data/bin/ldiff +0 -16
  75. data/bin/nokogiri +0 -16
  76. data/bin/rackup +0 -16
  77. data/bin/rails +0 -16
  78. data/bin/rake +0 -16
  79. data/bin/rake2thor +0 -16
  80. data/bin/ri +0 -16
  81. data/bin/rspec +0 -16
  82. data/bin/spree +0 -16
  83. data/bin/thor +0 -16
  84. data/bin/tilt +0 -16
  85. data/bin/tt +0 -16
@@ -1,150 +1,155 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Aug 2011
4
- # License:: MIT
5
- #
6
- # Details:: Specific loader to support Excel files.
7
- # Note this only requires JRuby, Excel not required, nor Win OLE.
8
- #
9
- # Maps column headings to operations on the model.
10
- # Iterates over all the rows using mapped operations to assign row data to a database object,
11
- # i.e pulls data from each column and sends to object.
12
- #
13
- require 'datashift/exceptions'
14
-
15
-
16
- module DataShift
17
-
18
- if(Guards::jruby?)
19
-
20
- require 'loaders/loader_base'
21
-
22
- require 'java'
23
- require 'jexcel_file'
24
-
25
- module ExcelLoading
26
-
27
- # Options:
28
- # [:header_row] : Default is 0. Use alternative row as header definition.
29
- # [:mandatory] : Array of mandatory column names
30
- # [:strict] : Raise exception when no mapping found for a column heading (non mandatory)
31
- # [:sheet_number]
32
-
33
- def perform_excel_load( file_name, options = {} )
34
-
35
- @mandatory = options[:mandatory] || []
36
-
37
- @excel = JExcelFile.new
38
-
39
- @excel.open(file_name)
40
-
41
- #if(options[:verbose])
42
- puts "\n\n\nLoading from Excel file: #{file_name}"
43
-
44
- sheet_number = options[:sheet_number] || 0
45
-
46
- @sheet = @excel.sheet( sheet_number )
47
-
48
- header_row_index = options[:header_row] || 0
49
- @header_row = @sheet.getRow(header_row_index)
50
-
51
- raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" unless(@header_row)
52
-
53
- @headers = []
54
-
55
- (0..JExcelFile::MAX_COLUMNS).each do |i|
56
- cell = @header_row.getCell(i)
57
- break unless cell
58
- header = "#{@excel.cell_value(cell).to_s}".strip
59
- break if header.empty?
60
- @headers << header
61
- end
62
-
63
- raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" if(@headers.empty?)
64
-
65
- # Create a method_mapper which maps list of headers into suitable calls on the Active Record class
66
- map_headers_to_operators( @headers, options[:strict] , @mandatory )
67
-
68
- load_object_class.transaction do
69
- @loaded_objects = []
70
-
71
- (1..@excel.num_rows).collect do |row|
72
-
73
- # Excel num_rows seems to return all 'visible' rows, which appears to be greater than the actual data rows
74
- # (TODO - write spec to process .xls with a huge number of rows)
75
- #
76
- # This is rubbish but currently manually detect when actual data ends, this isn't very smart but
77
- # got no better idea than ending once we hit the first completely empty row
78
- break if @excel.sheet.getRow(row).nil?
79
-
80
- contains_data = false
81
-
82
- # TODO - Smart sorting of column processing order ....
83
- # Does not currently ensure mandatory columns (for valid?) processed first but model needs saving
84
- # before associations can be processed so user should ensure mandatory columns are prior to associations
85
-
86
- # as part of this we also attempt to save early, for example before assigning to
87
- # has_and_belongs_to associations which require the load_object has an id for the join table
88
-
89
- # Iterate over the columns method_mapper found in Excel,
90
- # pulling data out of associated column
91
- @method_mapper.method_details.each_with_index do |method_detail, col|
92
-
93
- value = value_at(row, col)
94
-
95
- contains_data = true unless(value.nil? || value.to_s.empty?)
96
-
97
- #puts "DEBUG: Excel process METHOD :#{method_detail.inspect}", value.inspect
98
- prepare_data(method_detail, value)
99
-
100
- process()
101
- end
102
-
103
- break unless(contains_data == true)
104
-
105
- # TODO - requirements to handle not valid ?
106
- # all or nothing or carry on and dump out the exception list at end
107
- #puts "DEBUG: FINAL SAVE #{load_object.inspect}"
108
- save
109
- #puts "DEBUG: SAVED #{load_object.inspect}"
110
-
111
- # don't forget to reset the object or we'll update rather than create
112
- new_load_object
113
-
114
- end
115
- end
116
- puts "Excel loading stage complete - #{loaded_objects.size} rows added."
117
- end
118
-
119
- def value_at(row, column)
120
- @excel.get_cell_value( @excel.sheet.getRow(row), column)
121
- end
122
- end
123
-
124
-
125
- class ExcelLoader < LoaderBase
126
-
127
- include ExcelLoading
128
-
129
- def initialize(klass, object = nil, options = {})
130
- super( klass, object, options )
131
- raise "Cannot load - failed to create a #{klass}" unless @load_object
132
- end
133
-
134
-
135
- def perform_load( file_name, options = {} )
136
- perform_excel_load( file_name, options )
137
-
138
- puts "Excel loading stage complete - #{loaded_objects.size} rows added."
139
- end
140
-
141
- end
142
-
143
- else
144
-
145
- module ExcelLoading
146
- end
147
-
148
- end
149
-
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Aug 2011
4
+ # License:: MIT
5
+ #
6
+ # Details:: Specific loader to support Excel files.
7
+ # Note this only requires JRuby, Excel not required, nor Win OLE.
8
+ #
9
+ # Maps column headings to operations on the model.
10
+ # Iterates over all the rows using mapped operations to assign row data to a database object,
11
+ # i.e pulls data from each column and sends to object.
12
+ #
13
+ require 'datashift/exceptions'
14
+
15
+
16
+ module DataShift
17
+
18
+ if(Guards::jruby?)
19
+
20
+ require 'loaders/loader_base'
21
+
22
+ require 'java'
23
+ require 'jexcel_file'
24
+
25
+ module ExcelLoading
26
+
27
+ # Options:
28
+ # [:header_row] : Default is 0. Use alternative row as header definition.
29
+ # [:mandatory] : Array of mandatory column names
30
+ # [:strict] : Raise exception when no mapping found for a column heading (non mandatory)
31
+ # [:sheet_number]
32
+
33
+ def perform_excel_load( file_name, options = {} )
34
+
35
+ @mandatory = options[:mandatory] || []
36
+
37
+ @excel = JExcelFile.new
38
+
39
+ @excel.open(file_name)
40
+
41
+ #if(options[:verbose])
42
+ puts "\n\n\nLoading from Excel file: #{file_name}"
43
+
44
+ sheet_number = options[:sheet_number] || 0
45
+
46
+ @sheet = @excel.sheet( sheet_number )
47
+
48
+ header_row_index = options[:header_row] || 0
49
+ @header_row = @sheet.getRow(header_row_index)
50
+
51
+ raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" unless(@header_row)
52
+
53
+ @headers = []
54
+
55
+ (0..JExcelFile::MAX_COLUMNS).each do |i|
56
+ cell = @header_row.getCell(i)
57
+ break unless cell
58
+ header = "#{@excel.cell_value(cell).to_s}".strip
59
+ break if header.empty?
60
+ @headers << header
61
+ end
62
+
63
+ raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" if(@headers.empty?)
64
+
65
+ # Create a method_mapper which maps list of headers into suitable calls on the Active Record class
66
+ map_headers_to_operators( @headers, options[:strict] , @mandatory )
67
+
68
+ logger.info "Excel Loader prcoessing #{@excel.num_rows} rows"
69
+ load_object_class.transaction do
70
+ @loaded_objects = []
71
+
72
+ (1..@excel.num_rows).collect do |row|
73
+
74
+ # Excel num_rows seems to return all 'visible' rows, which appears to be greater than the actual data rows
75
+ # (TODO - write spec to process .xls with a huge number of rows)
76
+ #
77
+ # This is rubbish but currently manually detect when actual data ends, this isn't very smart but
78
+ # got no better idea than ending once we hit the first completely empty row
79
+ break if @excel.sheet.getRow(row).nil?
80
+
81
+ contains_data = false
82
+
83
+ # TODO - Smart sorting of column processing order ....
84
+ # Does not currently ensure mandatory columns (for valid?) processed first but model needs saving
85
+ # before associations can be processed so user should ensure mandatory columns are prior to associations
86
+
87
+ # as part of this we also attempt to save early, for example before assigning to
88
+ # has_and_belongs_to associations which require the load_object has an id for the join table
89
+
90
+ # Iterate over the columns method_mapper found in Excel,
91
+ # pulling data out of associated column
92
+ @method_mapper.method_details.each_with_index do |method_detail, col|
93
+
94
+ value = value_at(row, col)
95
+
96
+ contains_data = true unless(value.nil? || value.to_s.empty?)
97
+
98
+ prepare_data(method_detail, value)
99
+
100
+ process()
101
+ end
102
+
103
+ break unless(contains_data == true)
104
+
105
+ # TODO - requirements to handle not valid ?
106
+ # all or nothing or carry on and dump out the exception list at end
107
+ #puts "DEBUG: FINAL SAVE #{load_object.inspect}"
108
+ unless(save)
109
+ failure
110
+ logger.error "Failed to save row [#{row}]"
111
+ logger.error load_object.errors.inspect
112
+ else
113
+ logger.info "Row #{row} succesfully SAVED : ID #{load_object.id}"
114
+ end
115
+
116
+ # don't forget to reset the object or we'll update rather than create
117
+ new_load_object
118
+
119
+ end
120
+ end
121
+ puts "Excel loading stage complete - #{loaded_objects.size} rows added."
122
+ end
123
+
124
+ def value_at(row, column)
125
+ @excel.get_cell_value( @excel.sheet.getRow(row), column)
126
+ end
127
+ end
128
+
129
+
130
+ class ExcelLoader < LoaderBase
131
+
132
+ include ExcelLoading
133
+
134
+ def initialize(klass, object = nil, options = {})
135
+ super( klass, object, options )
136
+ raise "Cannot load - failed to create a #{klass}" unless @load_object
137
+ end
138
+
139
+
140
+ def perform_load( file_name, options = {} )
141
+ perform_excel_load( file_name, options )
142
+
143
+ puts "Excel loading stage complete - #{loaded_objects.size} rows added."
144
+ end
145
+
146
+ end
147
+
148
+ else
149
+
150
+ module ExcelLoading
151
+ end
152
+
153
+ end
154
+
150
155
  end
@@ -1,332 +1,404 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Aug 2010
4
- # License:: MIT
5
- #
6
- # Details:: Base class for loaders, providing a process hook which populates a model,
7
- # based on a method map and supplied value from a file - i.e a single column/row's string value.
8
- # Note that although a single column, the string can be formatted to contain multiple values.
9
- #
10
- # Tightly coupled with MethodMapper classes (in lib/engine) which contains full details of
11
- # a file's column and it's correlated AR associations.
12
- #
13
- module DataShift
14
-
15
- require 'datashift/method_mapper'
16
-
17
- class LoaderBase
18
-
19
- attr_reader :headers
20
-
21
- attr_accessor :method_mapper
22
-
23
- attr_accessor :load_object_class, :load_object
24
- attr_accessor :current_value, :current_method_detail
25
-
26
- attr_accessor :loaded_objects, :failed_objects
27
-
28
- attr_accessor :options
29
-
30
- # Support multiple associations being added to a base object to be specified in a single column.
31
- #
32
- # Entry represents the association to find via supplied name, value to use in the lookup.
33
- # Can contain multiple lookup name/value pairs, separated by multi_assoc_delim ( | )
34
- #
35
- # Default syntax :
36
- #
37
- # Name1:value1, value2|Name2:value1, value2, value3|Name3:value1, value2
38
- #
39
- # E.G.
40
- # Association Properties, has a column named Size, and another called Colour,
41
- # and this combination could be used to lookup multiple associations to add to the main model Jumper
42
- #
43
- # Size:small # => generates find_by_size( 'small' )
44
- # Size:large # => generates find_by_size( 'large' )
45
- # Colour:red,green,blue # => generates find_all_by_colour( ['red','green','blue'] )
46
- #
47
- # Size:large|Size:medium|Size:large
48
- # => Find 3 different associations, perform lookup via column called Size
49
- # => Jumper.properties << [ small, medium, large ]
50
- #
51
- def self.name_value_delim
52
- @name_value_delim ||= ':'
53
- @name_value_delim
54
- end
55
-
56
- def self.set_name_value_delim(x) @name_value_delim = x; end
57
- # TODO - support embedded object creation/update via hash (which hopefully we should be able to just forward to AR)
58
- #
59
- # |Category|
60
- # name:new{ :date => '20110102', :owner = > 'blah'}
61
- #
62
-
63
-
64
- def self.multi_value_delim
65
- @multi_value_delim ||= ','
66
- @multi_value_delim
67
- end
68
-
69
- def self.set_multi_value_delim(x) @multi_value_delim = x; end
70
-
71
- # TODO - support multi embedded object creation/update via hash (which hopefully we should be able to just forward to AR)
72
- #
73
- # |Category|
74
- # name:new{ :a => 1, :b => 2}|name:medium{ :a => 6, :b => 34}|name:old{ :a => 12, :b => 67}
75
- #
76
- def self.multi_assoc_delim
77
- @multi_assoc_delim ||= '|'
78
- @multi_assoc_delim
79
- end
80
-
81
- def self.set_multi_assoc_delim(x) @multi_assoc_delim = x; end
82
-
83
- # Options
84
- # :instance_methods => true
85
-
86
- def initialize(object_class, object = nil, options = {})
87
- @load_object_class = object_class
88
-
89
- # Gather list of all possible 'setter' methods on AR class (instance variables and associations)
90
- DataShift::MethodMapper.find_operators( @load_object_class, :reload => true, :instance_methods => options[:instance_methods] )
91
-
92
- @method_mapper = DataShift::MethodMapper.new
93
- @options = options.clone
94
- @headers = []
95
-
96
- @default_values = {}
97
- @prefixes = {}
98
- @postfixes = {}
99
-
100
- reset(object)
101
- end
102
-
103
-
104
- # kinda the derived classes interface - best way in Ruby ?
105
- def perform_load( input, options = {} )
106
- raise "WARNING- ABSTRACT METHOD CALLED - Please implement perform_load()"
107
- end
108
-
109
-
110
- # Core API - Given a list of free text column names from a file, map all headers to
111
- # method mapper's operator list.
112
- # Options:
113
- # strict : report any header values that can't be mapped as an error
114
- #
115
- def map_headers_to_operators( headers, strict, mandatory = [])
116
- @headers = headers
117
-
118
- @method_mapper.populate_methods( load_object_class, @headers )
119
-
120
- unless(@method_mapper.missing_methods.empty?)
121
- puts "WARNING: Following column headings could not be mapped : #{@method_mapper.missing_methods.inspect}"
122
- raise MappingDefinitionError, "Missing mappings for columns : #{@method_mapper.missing_methods.join(",")}" if(strict)
123
- end
124
-
125
- unless(@method_mapper.contains_mandatory?(mandatory) )
126
- @method_mapper.missing_mandatory(mandatory).each { |e| puts "ERROR: Mandatory column missing - expected column '#{e}'" }
127
- raise MissingMandatoryError, "Mandatory columns missing - please fix and retry."
128
- end unless(mandatory.empty?)
129
- end
130
-
131
-
132
- # Core API - Given a free text column name from a file, search method mapper for
133
- # associated operator on base object class.
134
- #
135
- # If suitable association found, process row data and then assign to current load_object
136
- def find_and_process(column_name, data)
137
- method_detail = MethodMapper.find_method_detail( load_object_class, column_name )
138
-
139
- if(method_detail)
140
- prepare_data(method_detail, data)
141
- process()
142
- else
143
- @load_object.errors.add_base( "No matching method found for column #{column_name}")
144
- end
145
- end
146
-
147
-
148
- # Set member variables to hold detsails and value.
149
- #
150
- # Check supplied value, validate it, and if required :
151
- # set to any provided default value
152
- # prepend or append with any provided extensions
153
- def prepare_data(method_detail, value)
154
-
155
- @current_value = value
156
-
157
- @current_method_detail = method_detail
158
-
159
- operator = method_detail.operator
160
-
161
- if(default_value(operator) && (value.nil? || value.to_s.empty?))
162
- @current_value = default_value(operator)
163
- end
164
-
165
- @current_value = "#{prefixes(operator)}#{@current_value}" if(prefixes(operator))
166
- @current_value = "#{@current_value}#{postfixes(operator)}" if(postfixes(operator))
167
-
168
- @current_value
169
- end
170
-
171
-
172
- # Process a value string from a column.
173
- # Assigning value(s) to correct association on @load_object.
174
- # Method detail represents a column from a file and it's correlated AR associations.
175
- # Value string which may contain multiple values for a collection association.
176
- #
177
- def process()
178
-
179
- if(@current_method_detail.operator_for(:has_many))
180
-
181
- if(@current_method_detail.operator_class && @current_value)
182
-
183
- # there are times when we need to save early, for example before assigning to
184
- # has_and_belongs_to associations which require the load_object has an id for the join table
185
-
186
- save_if_new
187
-
188
- # A single column can contain multiple associations delimited by special char
189
- columns = @current_value.to_s.split( LoaderBase::multi_assoc_delim)
190
-
191
- # Size:large|Colour:red,green,blue => generates find_by_size( 'large' ) and find_all_by_colour( ['red','green','blue'] )
192
-
193
- columns.each do |assoc|
194
- operator, values = assoc.split(LoaderBase::name_value_delim)
195
-
196
- lookups = values.split(LoaderBase::multi_value_delim)
197
-
198
- if(lookups.size > 1)
199
-
200
- @current_value = @current_method_detail.operator_class.send("find_all_by_#{operator}", lookups )
201
-
202
- unless(lookups.size == @current_value.size)
203
- found = @current_value.collect {|f| f.send(operator) }
204
- @load_object.errors.add( method_detail.operator, "Association with key(s) #{(lookups - found).inspect} NOT found")
205
- puts "WARNING: Association with key(s) #{(lookups - found).inspect} NOT found - Not added."
206
- next if(@current_value.empty?)
207
- end
208
-
209
- else
210
-
211
- @current_value = @current_method_detail.operator_class.send("find_by_#{operator}", lookups )
212
-
213
- unless(@current_value)
214
- @load_object.errors.add( @current_method_detail.operator, "Association with key #{lookups} NOT found")
215
- puts "WARNING: Association with key #{lookups} NOT found - Not added."
216
- next
217
- end
218
-
219
- end
220
-
221
- # Lookup Assoc's Model done, now add the found value(s) to load model's collection
222
- @current_method_detail.assign(@load_object, @current_value)
223
- end
224
- end
225
- # END HAS_MANY
226
- else
227
- # Nice n simple straight assignment to a column variable
228
- #puts "INFO: LOADER BASE processing #{method_detail.name}"
229
- @current_method_detail.assign(@load_object, @current_value)
230
- end
231
- end
232
-
233
- def save
234
- #puts "DEBUG: SAVING #{load_object.class} : #{load_object.inspect}" #if(options[:verbose])
235
- begin
236
- result = @load_object.save
237
- #puts "DEBUG: SAVED [#{result.inspect}]"
238
- #puts "SAVED 2. #{load_object.errors.methods.inspect}"
239
- #puts "SAVED 3. #{load_object.errors.full_messages.inspect}"
240
- @loaded_objects << @load_object unless(@loaded_objects.include?(@load_object))
241
-
242
- return result
243
- rescue => e
244
- @failed_objects << @load_object unless( !load_object.new_record? || @failed_objects.include?(@load_object))
245
- puts "Error saving #{@load_object.class} : #{e.inspect}"
246
- puts e.backtrace
247
- raise "Error in save whilst processing column #{@current_method_detail.name}" if(@options[:strict])
248
- end
249
- end
250
-
251
- def set_default_value( name, value )
252
- @default_values[name] = value
253
- end
254
-
255
- def default_value(name)
256
- @default_values[name]
257
- end
258
-
259
- def set_prefix( name, value )
260
- @prefixes[name] = value
261
- end
262
-
263
- def prefixes(name)
264
- @prefixes[name]
265
- end
266
-
267
- def set_postfix( name, value )
268
- @postfixes[name] = value
269
- end
270
-
271
- def postfixes(name)
272
- @postfixes[name]
273
- end
274
-
275
-
276
- # Reset the loader, including database object to be populated, and load counts
277
- #
278
- def reset(object = nil)
279
- @load_object = object || new_load_object
280
- @loaded_objects, @failed_objects = [],[]
281
- @current_value = nil
282
- end
283
-
284
-
285
- def new_load_object
286
- @load_object = @load_object_class.new
287
- @load_object
288
- end
289
-
290
- def abort_on_failure?
291
- @options[:abort_on_failure] == 'true'
292
- end
293
-
294
- def loaded_count
295
- @loaded_objects.size
296
- end
297
-
298
- def failed_count
299
- @failed_objects.size
300
- end
301
-
302
-
303
- # Check whether headers contains supplied list
304
- def headers_contain_mandatory?( mandatory_list )
305
- [ [*mandatory_list] - @headers].flatten.empty?
306
- end
307
-
308
-
309
- # Check whether headers contains supplied list
310
- def missing_mandatory_headers( mandatory_list )
311
- [ [*mandatory_list] - @headers].flatten
312
- end
313
-
314
- def find_or_new( klass, condition_hash = {} )
315
- @records[klass] = klass.find(:all, :conditions => condition_hash)
316
- if @records[klass].any?
317
- return @records[klass].first
318
- else
319
- return klass.new
320
- end
321
- end
322
-
323
- private
324
-
325
- def save_if_new
326
- #puts "SAVE", load_object.inspect
327
- save if(load_object.valid? && load_object.new_record?)
328
- end
329
-
330
- end
331
-
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Aug 2010
4
+ # License:: MIT
5
+ #
6
+ # Details:: Base class for loaders, providing a process hook which populates a model,
7
+ # based on a method map and supplied value from a file - i.e a single column/row's string value.
8
+ # Note that although a single column, the string can be formatted to contain multiple values.
9
+ #
10
+ # Tightly coupled with MethodMapper classes (in lib/engine) which contains full details of
11
+ # a file's column and it's correlated AR associations.
12
+ #
13
+ module DataShift
14
+
15
+ require 'datashift/method_mapper'
16
+
17
+ class LoaderBase
18
+
19
+
20
+ include DataShift::Logging
21
+
22
+ attr_reader :headers
23
+
24
+ attr_accessor :method_mapper
25
+
26
+ attr_accessor :load_object_class, :load_object
27
+ attr_accessor :current_value, :current_method_detail
28
+
29
+ attr_accessor :loaded_objects, :failed_objects
30
+
31
+ attr_accessor :options
32
+
33
+ # Support multiple associations being added to a base object to be specified in a single column.
34
+ #
35
+ # Entry represents the association to find via supplied name, value to use in the lookup.
36
+ # Can contain multiple lookup name/value pairs, separated by multi_assoc_delim ( | )
37
+ #
38
+ # Default syntax :
39
+ #
40
+ # Name1:value1, value2|Name2:value1, value2, value3|Name3:value1, value2
41
+ #
42
+ # E.G.
43
+ # Association Properties, has a column named Size, and another called Colour,
44
+ # and this combination could be used to lookup multiple associations to add to the main model Jumper
45
+ #
46
+ # Size:small # => generates find_by_size( 'small' )
47
+ # Size:large # => generates find_by_size( 'large' )
48
+ # Colour:red,green,blue # => generates find_all_by_colour( ['red','green','blue'] )
49
+ #
50
+ # Size:large|Size:medium|Size:large
51
+ # => Find 3 different associations, perform lookup via column called Size
52
+ # => Jumper.properties << [ small, medium, large ]
53
+ #
54
+ def self.name_value_delim
55
+ @name_value_delim ||= ':'
56
+ @name_value_delim
57
+ end
58
+
59
+ def self.set_name_value_delim(x) @name_value_delim = x; end
60
+ # TODO - support embedded object creation/update via hash (which hopefully we should be able to just forward to AR)
61
+ #
62
+ # |Category|
63
+ # name:new{ :date => '20110102', :owner = > 'blah'}
64
+ #
65
+
66
+
67
+ def self.multi_value_delim
68
+ @multi_value_delim ||= ','
69
+ @multi_value_delim
70
+ end
71
+
72
+ def self.set_multi_value_delim(x) @multi_value_delim = x; end
73
+
74
+ # TODO - support multi embedded object creation/update via hash (which hopefully we should be able to just forward to AR)
75
+ #
76
+ # |Category|
77
+ # name:new{ :a => 1, :b => 2}|name:medium{ :a => 6, :b => 34}|name:old{ :a => 12, :b => 67}
78
+ #
79
+ def self.multi_assoc_delim
80
+ @multi_assoc_delim ||= '|'
81
+ @multi_assoc_delim
82
+ end
83
+
84
+
85
+ def self.set_multi_assoc_delim(x) @multi_assoc_delim = x; end
86
+
87
+ # Options
88
+ # :instance_methods => true
89
+
90
+ def initialize(object_class, object = nil, options = {})
91
+ @load_object_class = object_class
92
+
93
+ # Gather list of all possible 'setter' methods on AR class (instance variables and associations)
94
+ DataShift::MethodMapper.find_operators( @load_object_class, :reload => true, :instance_methods => options[:instance_methods] )
95
+
96
+ @method_mapper = DataShift::MethodMapper.new
97
+ @options = options.clone
98
+ @headers = []
99
+
100
+ @default_data_objects ||= {}
101
+
102
+ @default_values = {}
103
+ @override_values = {}
104
+
105
+ @prefixes = {}
106
+ @postfixes = {}
107
+
108
+ reset(object)
109
+ end
110
+
111
+
112
+ # kinda the derived classes interface - best way in Ruby ?
113
+ def perform_load( input, options = {} )
114
+ raise "WARNING- ABSTRACT METHOD CALLED - Please implement perform_load()"
115
+ end
116
+
117
+
118
+ # Core API - Given a list of free text column names from a file, map all headers to
119
+ # method mapper's operator list.
120
+ # Options:
121
+ # strict : report any header values that can't be mapped as an error
122
+ #
123
+ def map_headers_to_operators( headers, strict, mandatory = [])
124
+ @headers = headers
125
+
126
+ @method_mapper.populate_methods( load_object_class, @headers )
127
+
128
+ unless(@method_mapper.missing_methods.empty?)
129
+ puts "WARNING: Following column headings could not be mapped : #{@method_mapper.missing_methods.inspect}"
130
+ raise MappingDefinitionError, "Missing mappings for columns : #{@method_mapper.missing_methods.join(",")}" if(strict)
131
+ end
132
+
133
+ unless(@method_mapper.contains_mandatory?(mandatory) )
134
+ @method_mapper.missing_mandatory(mandatory).each { |e| puts "ERROR: Mandatory column missing - expected column '#{e}'" }
135
+ raise MissingMandatoryError, "Mandatory columns missing - please fix and retry."
136
+ end unless(mandatory.empty?)
137
+ end
138
+
139
+
140
+ # Core API - Given a free text column name from a file, search method mapper for
141
+ # associated operator on base object class.
142
+ #
143
+ # If suitable association found, process row data and then assign to current load_object
144
+ def find_and_process(column_name, data)
145
+ method_detail = MethodMapper.find_method_detail( load_object_class, column_name )
146
+
147
+ if(method_detail)
148
+ prepare_data(method_detail, data)
149
+ process()
150
+ else
151
+ @load_object.errors.add_base( "No matching method found for column #{column_name}")
152
+ end
153
+ end
154
+
155
+
156
+ # Default values can be provided in YAML config file
157
+ # Format :
158
+ # Load Class
159
+ # atttribute: value
160
+
161
+ def configure_from( yaml_file )
162
+
163
+ data = YAML::load( File.open(yaml_file) )
164
+
165
+
166
+ # TODO - MOVE DEFAULTS TO OWN MODULE
167
+ # decorate the loading class with the defaults/ove rides to manage itself
168
+ # IDEAS .....
169
+ #
170
+ #unless(@default_data_objects[load_object_class])
171
+ #
172
+ # @default_data_objects[load_object_class] = load_object_class.new
173
+
174
+ # default_data_object = @default_data_objects[load_object_class]
175
+
176
+
177
+ # default_data_object.instance_eval do
178
+ # def datashift_defaults=(hash)
179
+ # @datashift_defaults = hash
180
+ # end
181
+ # def datashift_defaults
182
+ # @datashift_defaults
183
+ # end
184
+ #end unless load_object_class.respond_to?(:datashift_defaults)
185
+ #end
186
+
187
+ #puts load_object_class.new.to_yaml
188
+
189
+ puts data.inspect
190
+
191
+ if(data[load_object_class.name])
192
+
193
+ deflts = data[load_object_class.name]['datashift_defaults']
194
+ @default_values.merge!(deflts) if deflts
195
+
196
+ ovrides = data[load_object_class.name]['datashift_overrides']
197
+ @override_values.merge!(ovrides) if ovrides
198
+ end
199
+
200
+ end
201
+
202
+ # Set member variables to hold details and value.
203
+ #
204
+ # Check supplied value, validate it, and if required :
205
+ # set to any provided default value
206
+ # prepend or append with any provided extensions
207
+ def prepare_data(method_detail, value)
208
+
209
+ @current_value = value
210
+
211
+ @current_method_detail = method_detail
212
+
213
+ operator = method_detail.operator
214
+
215
+ override_value(operator)
216
+
217
+ if((value.nil? || value.to_s.empty?) && default_value(operator))
218
+ @current_value = default_value(operator)
219
+ end
220
+
221
+ @current_value = "#{prefixes(operator)}#{@current_value}" if(prefixes(operator))
222
+ @current_value = "#{@current_value}#{postfixes(operator)}" if(postfixes(operator))
223
+
224
+ @current_value
225
+ end
226
+
227
+
228
+ # Process a value string from a column.
229
+ # Assigning value(s) to correct association on @load_object.
230
+ # Method detail represents a column from a file and it's correlated AR associations.
231
+ # Value string which may contain multiple values for a collection association.
232
+ #
233
+ def process()
234
+
235
+ if(@current_method_detail.operator_for(:has_many))
236
+
237
+ if(@current_method_detail.operator_class && @current_value)
238
+
239
+ # there are times when we need to save early, for example before assigning to
240
+ # has_and_belongs_to associations which require the load_object has an id for the join table
241
+
242
+ save_if_new
243
+
244
+ # A single column can contain multiple associations delimited by special char
245
+ columns = @current_value.to_s.split( LoaderBase::multi_assoc_delim)
246
+
247
+ # Size:large|Colour:red,green,blue => generates find_by_size( 'large' ) and find_all_by_colour( ['red','green','blue'] )
248
+
249
+ columns.each do |assoc|
250
+ operator, values = assoc.split(LoaderBase::name_value_delim)
251
+
252
+ lookups = values.split(LoaderBase::multi_value_delim)
253
+
254
+ if(lookups.size > 1)
255
+
256
+ @current_value = @current_method_detail.operator_class.send("find_all_by_#{operator}", lookups )
257
+
258
+ unless(lookups.size == @current_value.size)
259
+ found = @current_value.collect {|f| f.send(operator) }
260
+ @load_object.errors.add( method_detail.operator, "Association with key(s) #{(lookups - found).inspect} NOT found")
261
+ puts "WARNING: Association with key(s) #{(lookups - found).inspect} NOT found - Not added."
262
+ next if(@current_value.empty?)
263
+ end
264
+
265
+ else
266
+
267
+ @current_value = @current_method_detail.operator_class.send("find_by_#{operator}", lookups )
268
+
269
+ unless(@current_value)
270
+ @load_object.errors.add( @current_method_detail.operator, "Association with key #{lookups} NOT found")
271
+ puts "WARNING: Association with key #{lookups} NOT found - Not added."
272
+ next
273
+ end
274
+
275
+ end
276
+
277
+ # Lookup Assoc's Model done, now add the found value(s) to load model's collection
278
+ @current_method_detail.assign(@load_object, @current_value)
279
+ end
280
+ end
281
+ # END HAS_MANY
282
+ else
283
+ # Nice n simple straight assignment to a column variable
284
+ #puts "INFO: LOADER BASE processing #{method_detail.name}"
285
+ @current_method_detail.assign(@load_object, @current_value)
286
+ end
287
+ end
288
+
289
+ def failure
290
+ @failed_objects << @load_object unless( !load_object.new_record? || @failed_objects.include?(@load_object))
291
+ end
292
+
293
+ def save
294
+ #puts "DEBUG: SAVING #{load_object.class} : #{load_object.inspect}" #if(options[:verbose])
295
+ begin
296
+ result = @load_object.save
297
+
298
+ @loaded_objects << @load_object unless(@loaded_objects.include?(@load_object))
299
+
300
+ return result
301
+ rescue => e
302
+ failure
303
+ puts "Error saving #{@load_object.class} : #{e.inspect}"
304
+ logger.error e.backtrace
305
+ raise "Error in save whilst processing column #{@current_method_detail.name}" if(@options[:strict])
306
+ end
307
+ end
308
+
309
+ def self.default_object_for( klass )
310
+ @default_data_objects ||= {}
311
+ @default_data_objects[klass]
312
+ end
313
+
314
+ def set_default_value( name, value )
315
+ @default_values[name] = value
316
+ end
317
+
318
+ def set_override_value( operator, value )
319
+ @override_values[operator] = value
320
+ end
321
+
322
+ def default_value(name)
323
+ @default_values[name]
324
+ end
325
+
326
+ def override_value( operator )
327
+ @current_value = @override_values[operator] if(@override_values[operator])
328
+ end
329
+
330
+
331
+ def set_prefix( name, value )
332
+ @prefixes[name] = value
333
+ end
334
+
335
+ def prefixes(name)
336
+ @prefixes[name]
337
+ end
338
+
339
+ def set_postfix( name, value )
340
+ @postfixes[name] = value
341
+ end
342
+
343
+ def postfixes(name)
344
+ @postfixes[name]
345
+ end
346
+
347
+
348
+ # Reset the loader, including database object to be populated, and load counts
349
+ #
350
+ def reset(object = nil)
351
+ @load_object = object || new_load_object
352
+ @loaded_objects, @failed_objects = [],[]
353
+ @current_value = nil
354
+ end
355
+
356
+
357
+ def new_load_object
358
+ @load_object = @load_object_class.new
359
+ @load_object
360
+ end
361
+
362
+ def abort_on_failure?
363
+ @options[:abort_on_failure] == 'true'
364
+ end
365
+
366
+ def loaded_count
367
+ @loaded_objects.size
368
+ end
369
+
370
+ def failed_count
371
+ @failed_objects.size
372
+ end
373
+
374
+
375
+ # Check whether headers contains supplied list
376
+ def headers_contain_mandatory?( mandatory_list )
377
+ [ [*mandatory_list] - @headers].flatten.empty?
378
+ end
379
+
380
+
381
+ # Check whether headers contains supplied list
382
+ def missing_mandatory_headers( mandatory_list )
383
+ [ [*mandatory_list] - @headers].flatten
384
+ end
385
+
386
+ def find_or_new( klass, condition_hash = {} )
387
+ @records[klass] = klass.find(:all, :conditions => condition_hash)
388
+ if @records[klass].any?
389
+ return @records[klass].first
390
+ else
391
+ return klass.new
392
+ end
393
+ end
394
+
395
+ private
396
+
397
+ def save_if_new
398
+ #puts "SAVE", load_object.inspect
399
+ save if(load_object.valid? && load_object.new_record?)
400
+ end
401
+
402
+ end
403
+
332
404
  end