datashift 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. data/.document +5 -5
  2. data/Gemfile +28 -25
  3. data/LICENSE.txt +26 -26
  4. data/README.markdown +302 -285
  5. data/README.rdoc +19 -19
  6. data/Rakefile +93 -95
  7. data/VERSION +5 -5
  8. data/datashift.gemspec +162 -178
  9. data/lib/applications/jruby/jexcel_file.rb +396 -396
  10. data/lib/applications/jruby/word.rb +79 -79
  11. data/lib/datashift.rb +152 -113
  12. data/lib/datashift/exceptions.rb +11 -11
  13. data/lib/datashift/file_definitions.rb +353 -353
  14. data/lib/datashift/mapping_file_definitions.rb +87 -87
  15. data/lib/datashift/method_detail.rb +236 -236
  16. data/lib/datashift/method_mapper.rb +256 -256
  17. data/lib/generators/csv_generator.rb +36 -36
  18. data/lib/generators/excel_generator.rb +121 -121
  19. data/lib/generators/generator_base.rb +13 -13
  20. data/lib/helpers/core_ext/to_b.rb +24 -24
  21. data/lib/helpers/spree_helper.rb +131 -131
  22. data/lib/java/poi-3.7/LICENSE +507 -507
  23. data/lib/java/poi-3.7/NOTICE +21 -21
  24. data/lib/java/poi-3.7/RELEASE_NOTES.txt +115 -115
  25. data/lib/loaders/csv_loader.rb +98 -98
  26. data/lib/loaders/excel_loader.rb +154 -149
  27. data/lib/loaders/loader_base.rb +403 -331
  28. data/lib/loaders/spreadsheet_loader.rb +136 -136
  29. data/lib/loaders/spree/image_loader.rb +45 -45
  30. data/lib/loaders/spree/product_loader.rb +224 -224
  31. data/spec/csv_loader_spec.rb +30 -30
  32. data/spec/datashift_spec.rb +26 -26
  33. data/spec/db/migrate/20110803201325_create_test_bed.rb +85 -85
  34. data/spec/excel_generator_spec.rb +78 -78
  35. data/spec/excel_loader_spec.rb +204 -176
  36. data/spec/file_definitions.rb +141 -141
  37. data/spec/fixtures/.~lock.ProjectsSingleCategories.xls# +1 -0
  38. data/spec/fixtures/ProjectsDefaults.yml +29 -0
  39. data/spec/fixtures/config/database.yml +24 -24
  40. data/spec/fixtures/interact_models_db.sqlite +0 -0
  41. data/spec/fixtures/interact_spree_db.sqlite +0 -0
  42. data/spec/fixtures/negative/SpreeProdMiss1Mandatory.csv +4 -4
  43. data/spec/fixtures/negative/SpreeProdMissManyMandatory.csv +4 -4
  44. data/spec/fixtures/spree/SpreeProducts.csv +4 -4
  45. data/spec/fixtures/spree/SpreeProductsMultiColumn.csv +4 -4
  46. data/spec/fixtures/spree/SpreeProductsSimple.csv +4 -4
  47. data/spec/fixtures/spree/SpreeZoneExample.csv +5 -5
  48. data/spec/fixtures/test_model_defs.rb +57 -57
  49. data/spec/loader_spec.rb +120 -120
  50. data/spec/method_mapper_spec.rb +237 -237
  51. data/spec/spec_helper.rb +115 -115
  52. data/spec/spree_generator_spec.rb +64 -64
  53. data/spec/spree_loader_spec.rb +310 -310
  54. data/spec/spree_method_mapping_spec.rb +214 -214
  55. data/tasks/config/seed_fu_product_template.erb +15 -15
  56. data/tasks/config/tidy_config.txt +12 -12
  57. data/tasks/db_tasks.rake +65 -64
  58. data/tasks/excel_generator.rake +78 -78
  59. data/tasks/file_tasks.rake +36 -36
  60. data/tasks/import/csv.rake +49 -49
  61. data/tasks/import/excel.rake +71 -66
  62. data/tasks/spree/image_load.rake +108 -108
  63. data/tasks/spree/product_loader.rake +43 -43
  64. data/tasks/word_to_seedfu.rake +166 -166
  65. data/test/helper.rb +18 -18
  66. data/test/test_interact.rb +7 -7
  67. metadata +7 -38
  68. data/Gemfile.lock +0 -211
  69. data/bin/autospec +0 -16
  70. data/bin/convert_to_should_syntax +0 -16
  71. data/bin/erubis +0 -16
  72. data/bin/htmldiff +0 -16
  73. data/bin/jeweler +0 -16
  74. data/bin/ldiff +0 -16
  75. data/bin/nokogiri +0 -16
  76. data/bin/rackup +0 -16
  77. data/bin/rails +0 -16
  78. data/bin/rake +0 -16
  79. data/bin/rake2thor +0 -16
  80. data/bin/ri +0 -16
  81. data/bin/rspec +0 -16
  82. data/bin/spree +0 -16
  83. data/bin/thor +0 -16
  84. data/bin/tilt +0 -16
  85. data/bin/tt +0 -16
@@ -1,150 +1,155 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Aug 2011
4
- # License:: MIT
5
- #
6
- # Details:: Specific loader to support Excel files.
7
- # Note this only requires JRuby, Excel not required, nor Win OLE.
8
- #
9
- # Maps column headings to operations on the model.
10
- # Iterates over all the rows using mapped operations to assign row data to a database object,
11
- # i.e pulls data from each column and sends to object.
12
- #
13
- require 'datashift/exceptions'
14
-
15
-
16
- module DataShift
17
-
18
- if(Guards::jruby?)
19
-
20
- require 'loaders/loader_base'
21
-
22
- require 'java'
23
- require 'jexcel_file'
24
-
25
- module ExcelLoading
26
-
27
- # Options:
28
- # [:header_row] : Default is 0. Use alternative row as header definition.
29
- # [:mandatory] : Array of mandatory column names
30
- # [:strict] : Raise exception when no mapping found for a column heading (non mandatory)
31
- # [:sheet_number]
32
-
33
- def perform_excel_load( file_name, options = {} )
34
-
35
- @mandatory = options[:mandatory] || []
36
-
37
- @excel = JExcelFile.new
38
-
39
- @excel.open(file_name)
40
-
41
- #if(options[:verbose])
42
- puts "\n\n\nLoading from Excel file: #{file_name}"
43
-
44
- sheet_number = options[:sheet_number] || 0
45
-
46
- @sheet = @excel.sheet( sheet_number )
47
-
48
- header_row_index = options[:header_row] || 0
49
- @header_row = @sheet.getRow(header_row_index)
50
-
51
- raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" unless(@header_row)
52
-
53
- @headers = []
54
-
55
- (0..JExcelFile::MAX_COLUMNS).each do |i|
56
- cell = @header_row.getCell(i)
57
- break unless cell
58
- header = "#{@excel.cell_value(cell).to_s}".strip
59
- break if header.empty?
60
- @headers << header
61
- end
62
-
63
- raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" if(@headers.empty?)
64
-
65
- # Create a method_mapper which maps list of headers into suitable calls on the Active Record class
66
- map_headers_to_operators( @headers, options[:strict] , @mandatory )
67
-
68
- load_object_class.transaction do
69
- @loaded_objects = []
70
-
71
- (1..@excel.num_rows).collect do |row|
72
-
73
- # Excel num_rows seems to return all 'visible' rows, which appears to be greater than the actual data rows
74
- # (TODO - write spec to process .xls with a huge number of rows)
75
- #
76
- # This is rubbish but currently manually detect when actual data ends, this isn't very smart but
77
- # got no better idea than ending once we hit the first completely empty row
78
- break if @excel.sheet.getRow(row).nil?
79
-
80
- contains_data = false
81
-
82
- # TODO - Smart sorting of column processing order ....
83
- # Does not currently ensure mandatory columns (for valid?) processed first but model needs saving
84
- # before associations can be processed so user should ensure mandatory columns are prior to associations
85
-
86
- # as part of this we also attempt to save early, for example before assigning to
87
- # has_and_belongs_to associations which require the load_object has an id for the join table
88
-
89
- # Iterate over the columns method_mapper found in Excel,
90
- # pulling data out of associated column
91
- @method_mapper.method_details.each_with_index do |method_detail, col|
92
-
93
- value = value_at(row, col)
94
-
95
- contains_data = true unless(value.nil? || value.to_s.empty?)
96
-
97
- #puts "DEBUG: Excel process METHOD :#{method_detail.inspect}", value.inspect
98
- prepare_data(method_detail, value)
99
-
100
- process()
101
- end
102
-
103
- break unless(contains_data == true)
104
-
105
- # TODO - requirements to handle not valid ?
106
- # all or nothing or carry on and dump out the exception list at end
107
- #puts "DEBUG: FINAL SAVE #{load_object.inspect}"
108
- save
109
- #puts "DEBUG: SAVED #{load_object.inspect}"
110
-
111
- # don't forget to reset the object or we'll update rather than create
112
- new_load_object
113
-
114
- end
115
- end
116
- puts "Excel loading stage complete - #{loaded_objects.size} rows added."
117
- end
118
-
119
- def value_at(row, column)
120
- @excel.get_cell_value( @excel.sheet.getRow(row), column)
121
- end
122
- end
123
-
124
-
125
- class ExcelLoader < LoaderBase
126
-
127
- include ExcelLoading
128
-
129
- def initialize(klass, object = nil, options = {})
130
- super( klass, object, options )
131
- raise "Cannot load - failed to create a #{klass}" unless @load_object
132
- end
133
-
134
-
135
- def perform_load( file_name, options = {} )
136
- perform_excel_load( file_name, options )
137
-
138
- puts "Excel loading stage complete - #{loaded_objects.size} rows added."
139
- end
140
-
141
- end
142
-
143
- else
144
-
145
- module ExcelLoading
146
- end
147
-
148
- end
149
-
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Aug 2011
4
+ # License:: MIT
5
+ #
6
+ # Details:: Specific loader to support Excel files.
7
+ # Note this only requires JRuby, Excel not required, nor Win OLE.
8
+ #
9
+ # Maps column headings to operations on the model.
10
+ # Iterates over all the rows using mapped operations to assign row data to a database object,
11
+ # i.e pulls data from each column and sends to object.
12
+ #
13
+ require 'datashift/exceptions'
14
+
15
+
16
+ module DataShift
17
+
18
+ if(Guards::jruby?)
19
+
20
+ require 'loaders/loader_base'
21
+
22
+ require 'java'
23
+ require 'jexcel_file'
24
+
25
+ module ExcelLoading
26
+
27
+ # Options:
28
+ # [:header_row] : Default is 0. Use alternative row as header definition.
29
+ # [:mandatory] : Array of mandatory column names
30
+ # [:strict] : Raise exception when no mapping found for a column heading (non mandatory)
31
+ # [:sheet_number]
32
+
33
+ def perform_excel_load( file_name, options = {} )
34
+
35
+ @mandatory = options[:mandatory] || []
36
+
37
+ @excel = JExcelFile.new
38
+
39
+ @excel.open(file_name)
40
+
41
+ #if(options[:verbose])
42
+ puts "\n\n\nLoading from Excel file: #{file_name}"
43
+
44
+ sheet_number = options[:sheet_number] || 0
45
+
46
+ @sheet = @excel.sheet( sheet_number )
47
+
48
+ header_row_index = options[:header_row] || 0
49
+ @header_row = @sheet.getRow(header_row_index)
50
+
51
+ raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" unless(@header_row)
52
+
53
+ @headers = []
54
+
55
+ (0..JExcelFile::MAX_COLUMNS).each do |i|
56
+ cell = @header_row.getCell(i)
57
+ break unless cell
58
+ header = "#{@excel.cell_value(cell).to_s}".strip
59
+ break if header.empty?
60
+ @headers << header
61
+ end
62
+
63
+ raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" if(@headers.empty?)
64
+
65
+ # Create a method_mapper which maps list of headers into suitable calls on the Active Record class
66
+ map_headers_to_operators( @headers, options[:strict] , @mandatory )
67
+
68
+ logger.info "Excel Loader prcoessing #{@excel.num_rows} rows"
69
+ load_object_class.transaction do
70
+ @loaded_objects = []
71
+
72
+ (1..@excel.num_rows).collect do |row|
73
+
74
+ # Excel num_rows seems to return all 'visible' rows, which appears to be greater than the actual data rows
75
+ # (TODO - write spec to process .xls with a huge number of rows)
76
+ #
77
+ # This is rubbish but currently manually detect when actual data ends, this isn't very smart but
78
+ # got no better idea than ending once we hit the first completely empty row
79
+ break if @excel.sheet.getRow(row).nil?
80
+
81
+ contains_data = false
82
+
83
+ # TODO - Smart sorting of column processing order ....
84
+ # Does not currently ensure mandatory columns (for valid?) processed first but model needs saving
85
+ # before associations can be processed so user should ensure mandatory columns are prior to associations
86
+
87
+ # as part of this we also attempt to save early, for example before assigning to
88
+ # has_and_belongs_to associations which require the load_object has an id for the join table
89
+
90
+ # Iterate over the columns method_mapper found in Excel,
91
+ # pulling data out of associated column
92
+ @method_mapper.method_details.each_with_index do |method_detail, col|
93
+
94
+ value = value_at(row, col)
95
+
96
+ contains_data = true unless(value.nil? || value.to_s.empty?)
97
+
98
+ prepare_data(method_detail, value)
99
+
100
+ process()
101
+ end
102
+
103
+ break unless(contains_data == true)
104
+
105
+ # TODO - requirements to handle not valid ?
106
+ # all or nothing or carry on and dump out the exception list at end
107
+ #puts "DEBUG: FINAL SAVE #{load_object.inspect}"
108
+ unless(save)
109
+ failure
110
+ logger.error "Failed to save row [#{row}]"
111
+ logger.error load_object.errors.inspect
112
+ else
113
+ logger.info "Row #{row} succesfully SAVED : ID #{load_object.id}"
114
+ end
115
+
116
+ # don't forget to reset the object or we'll update rather than create
117
+ new_load_object
118
+
119
+ end
120
+ end
121
+ puts "Excel loading stage complete - #{loaded_objects.size} rows added."
122
+ end
123
+
124
+ def value_at(row, column)
125
+ @excel.get_cell_value( @excel.sheet.getRow(row), column)
126
+ end
127
+ end
128
+
129
+
130
+ class ExcelLoader < LoaderBase
131
+
132
+ include ExcelLoading
133
+
134
+ def initialize(klass, object = nil, options = {})
135
+ super( klass, object, options )
136
+ raise "Cannot load - failed to create a #{klass}" unless @load_object
137
+ end
138
+
139
+
140
+ def perform_load( file_name, options = {} )
141
+ perform_excel_load( file_name, options )
142
+
143
+ puts "Excel loading stage complete - #{loaded_objects.size} rows added."
144
+ end
145
+
146
+ end
147
+
148
+ else
149
+
150
+ module ExcelLoading
151
+ end
152
+
153
+ end
154
+
150
155
  end
@@ -1,332 +1,404 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Aug 2010
4
- # License:: MIT
5
- #
6
- # Details:: Base class for loaders, providing a process hook which populates a model,
7
- # based on a method map and supplied value from a file - i.e a single column/row's string value.
8
- # Note that although a single column, the string can be formatted to contain multiple values.
9
- #
10
- # Tightly coupled with MethodMapper classes (in lib/engine) which contains full details of
11
- # a file's column and it's correlated AR associations.
12
- #
13
- module DataShift
14
-
15
- require 'datashift/method_mapper'
16
-
17
- class LoaderBase
18
-
19
- attr_reader :headers
20
-
21
- attr_accessor :method_mapper
22
-
23
- attr_accessor :load_object_class, :load_object
24
- attr_accessor :current_value, :current_method_detail
25
-
26
- attr_accessor :loaded_objects, :failed_objects
27
-
28
- attr_accessor :options
29
-
30
- # Support multiple associations being added to a base object to be specified in a single column.
31
- #
32
- # Entry represents the association to find via supplied name, value to use in the lookup.
33
- # Can contain multiple lookup name/value pairs, separated by multi_assoc_delim ( | )
34
- #
35
- # Default syntax :
36
- #
37
- # Name1:value1, value2|Name2:value1, value2, value3|Name3:value1, value2
38
- #
39
- # E.G.
40
- # Association Properties, has a column named Size, and another called Colour,
41
- # and this combination could be used to lookup multiple associations to add to the main model Jumper
42
- #
43
- # Size:small # => generates find_by_size( 'small' )
44
- # Size:large # => generates find_by_size( 'large' )
45
- # Colour:red,green,blue # => generates find_all_by_colour( ['red','green','blue'] )
46
- #
47
- # Size:large|Size:medium|Size:large
48
- # => Find 3 different associations, perform lookup via column called Size
49
- # => Jumper.properties << [ small, medium, large ]
50
- #
51
- def self.name_value_delim
52
- @name_value_delim ||= ':'
53
- @name_value_delim
54
- end
55
-
56
- def self.set_name_value_delim(x) @name_value_delim = x; end
57
- # TODO - support embedded object creation/update via hash (which hopefully we should be able to just forward to AR)
58
- #
59
- # |Category|
60
- # name:new{ :date => '20110102', :owner = > 'blah'}
61
- #
62
-
63
-
64
- def self.multi_value_delim
65
- @multi_value_delim ||= ','
66
- @multi_value_delim
67
- end
68
-
69
- def self.set_multi_value_delim(x) @multi_value_delim = x; end
70
-
71
- # TODO - support multi embedded object creation/update via hash (which hopefully we should be able to just forward to AR)
72
- #
73
- # |Category|
74
- # name:new{ :a => 1, :b => 2}|name:medium{ :a => 6, :b => 34}|name:old{ :a => 12, :b => 67}
75
- #
76
- def self.multi_assoc_delim
77
- @multi_assoc_delim ||= '|'
78
- @multi_assoc_delim
79
- end
80
-
81
- def self.set_multi_assoc_delim(x) @multi_assoc_delim = x; end
82
-
83
- # Options
84
- # :instance_methods => true
85
-
86
- def initialize(object_class, object = nil, options = {})
87
- @load_object_class = object_class
88
-
89
- # Gather list of all possible 'setter' methods on AR class (instance variables and associations)
90
- DataShift::MethodMapper.find_operators( @load_object_class, :reload => true, :instance_methods => options[:instance_methods] )
91
-
92
- @method_mapper = DataShift::MethodMapper.new
93
- @options = options.clone
94
- @headers = []
95
-
96
- @default_values = {}
97
- @prefixes = {}
98
- @postfixes = {}
99
-
100
- reset(object)
101
- end
102
-
103
-
104
- # kinda the derived classes interface - best way in Ruby ?
105
- def perform_load( input, options = {} )
106
- raise "WARNING- ABSTRACT METHOD CALLED - Please implement perform_load()"
107
- end
108
-
109
-
110
- # Core API - Given a list of free text column names from a file, map all headers to
111
- # method mapper's operator list.
112
- # Options:
113
- # strict : report any header values that can't be mapped as an error
114
- #
115
- def map_headers_to_operators( headers, strict, mandatory = [])
116
- @headers = headers
117
-
118
- @method_mapper.populate_methods( load_object_class, @headers )
119
-
120
- unless(@method_mapper.missing_methods.empty?)
121
- puts "WARNING: Following column headings could not be mapped : #{@method_mapper.missing_methods.inspect}"
122
- raise MappingDefinitionError, "Missing mappings for columns : #{@method_mapper.missing_methods.join(",")}" if(strict)
123
- end
124
-
125
- unless(@method_mapper.contains_mandatory?(mandatory) )
126
- @method_mapper.missing_mandatory(mandatory).each { |e| puts "ERROR: Mandatory column missing - expected column '#{e}'" }
127
- raise MissingMandatoryError, "Mandatory columns missing - please fix and retry."
128
- end unless(mandatory.empty?)
129
- end
130
-
131
-
132
- # Core API - Given a free text column name from a file, search method mapper for
133
- # associated operator on base object class.
134
- #
135
- # If suitable association found, process row data and then assign to current load_object
136
- def find_and_process(column_name, data)
137
- method_detail = MethodMapper.find_method_detail( load_object_class, column_name )
138
-
139
- if(method_detail)
140
- prepare_data(method_detail, data)
141
- process()
142
- else
143
- @load_object.errors.add_base( "No matching method found for column #{column_name}")
144
- end
145
- end
146
-
147
-
148
- # Set member variables to hold detsails and value.
149
- #
150
- # Check supplied value, validate it, and if required :
151
- # set to any provided default value
152
- # prepend or append with any provided extensions
153
- def prepare_data(method_detail, value)
154
-
155
- @current_value = value
156
-
157
- @current_method_detail = method_detail
158
-
159
- operator = method_detail.operator
160
-
161
- if(default_value(operator) && (value.nil? || value.to_s.empty?))
162
- @current_value = default_value(operator)
163
- end
164
-
165
- @current_value = "#{prefixes(operator)}#{@current_value}" if(prefixes(operator))
166
- @current_value = "#{@current_value}#{postfixes(operator)}" if(postfixes(operator))
167
-
168
- @current_value
169
- end
170
-
171
-
172
- # Process a value string from a column.
173
- # Assigning value(s) to correct association on @load_object.
174
- # Method detail represents a column from a file and it's correlated AR associations.
175
- # Value string which may contain multiple values for a collection association.
176
- #
177
- def process()
178
-
179
- if(@current_method_detail.operator_for(:has_many))
180
-
181
- if(@current_method_detail.operator_class && @current_value)
182
-
183
- # there are times when we need to save early, for example before assigning to
184
- # has_and_belongs_to associations which require the load_object has an id for the join table
185
-
186
- save_if_new
187
-
188
- # A single column can contain multiple associations delimited by special char
189
- columns = @current_value.to_s.split( LoaderBase::multi_assoc_delim)
190
-
191
- # Size:large|Colour:red,green,blue => generates find_by_size( 'large' ) and find_all_by_colour( ['red','green','blue'] )
192
-
193
- columns.each do |assoc|
194
- operator, values = assoc.split(LoaderBase::name_value_delim)
195
-
196
- lookups = values.split(LoaderBase::multi_value_delim)
197
-
198
- if(lookups.size > 1)
199
-
200
- @current_value = @current_method_detail.operator_class.send("find_all_by_#{operator}", lookups )
201
-
202
- unless(lookups.size == @current_value.size)
203
- found = @current_value.collect {|f| f.send(operator) }
204
- @load_object.errors.add( method_detail.operator, "Association with key(s) #{(lookups - found).inspect} NOT found")
205
- puts "WARNING: Association with key(s) #{(lookups - found).inspect} NOT found - Not added."
206
- next if(@current_value.empty?)
207
- end
208
-
209
- else
210
-
211
- @current_value = @current_method_detail.operator_class.send("find_by_#{operator}", lookups )
212
-
213
- unless(@current_value)
214
- @load_object.errors.add( @current_method_detail.operator, "Association with key #{lookups} NOT found")
215
- puts "WARNING: Association with key #{lookups} NOT found - Not added."
216
- next
217
- end
218
-
219
- end
220
-
221
- # Lookup Assoc's Model done, now add the found value(s) to load model's collection
222
- @current_method_detail.assign(@load_object, @current_value)
223
- end
224
- end
225
- # END HAS_MANY
226
- else
227
- # Nice n simple straight assignment to a column variable
228
- #puts "INFO: LOADER BASE processing #{method_detail.name}"
229
- @current_method_detail.assign(@load_object, @current_value)
230
- end
231
- end
232
-
233
- def save
234
- #puts "DEBUG: SAVING #{load_object.class} : #{load_object.inspect}" #if(options[:verbose])
235
- begin
236
- result = @load_object.save
237
- #puts "DEBUG: SAVED [#{result.inspect}]"
238
- #puts "SAVED 2. #{load_object.errors.methods.inspect}"
239
- #puts "SAVED 3. #{load_object.errors.full_messages.inspect}"
240
- @loaded_objects << @load_object unless(@loaded_objects.include?(@load_object))
241
-
242
- return result
243
- rescue => e
244
- @failed_objects << @load_object unless( !load_object.new_record? || @failed_objects.include?(@load_object))
245
- puts "Error saving #{@load_object.class} : #{e.inspect}"
246
- puts e.backtrace
247
- raise "Error in save whilst processing column #{@current_method_detail.name}" if(@options[:strict])
248
- end
249
- end
250
-
251
- def set_default_value( name, value )
252
- @default_values[name] = value
253
- end
254
-
255
- def default_value(name)
256
- @default_values[name]
257
- end
258
-
259
- def set_prefix( name, value )
260
- @prefixes[name] = value
261
- end
262
-
263
- def prefixes(name)
264
- @prefixes[name]
265
- end
266
-
267
- def set_postfix( name, value )
268
- @postfixes[name] = value
269
- end
270
-
271
- def postfixes(name)
272
- @postfixes[name]
273
- end
274
-
275
-
276
- # Reset the loader, including database object to be populated, and load counts
277
- #
278
- def reset(object = nil)
279
- @load_object = object || new_load_object
280
- @loaded_objects, @failed_objects = [],[]
281
- @current_value = nil
282
- end
283
-
284
-
285
- def new_load_object
286
- @load_object = @load_object_class.new
287
- @load_object
288
- end
289
-
290
- def abort_on_failure?
291
- @options[:abort_on_failure] == 'true'
292
- end
293
-
294
- def loaded_count
295
- @loaded_objects.size
296
- end
297
-
298
- def failed_count
299
- @failed_objects.size
300
- end
301
-
302
-
303
- # Check whether headers contains supplied list
304
- def headers_contain_mandatory?( mandatory_list )
305
- [ [*mandatory_list] - @headers].flatten.empty?
306
- end
307
-
308
-
309
- # Check whether headers contains supplied list
310
- def missing_mandatory_headers( mandatory_list )
311
- [ [*mandatory_list] - @headers].flatten
312
- end
313
-
314
- def find_or_new( klass, condition_hash = {} )
315
- @records[klass] = klass.find(:all, :conditions => condition_hash)
316
- if @records[klass].any?
317
- return @records[klass].first
318
- else
319
- return klass.new
320
- end
321
- end
322
-
323
- private
324
-
325
- def save_if_new
326
- #puts "SAVE", load_object.inspect
327
- save if(load_object.valid? && load_object.new_record?)
328
- end
329
-
330
- end
331
-
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Aug 2010
4
+ # License:: MIT
5
+ #
6
+ # Details:: Base class for loaders, providing a process hook which populates a model,
7
+ # based on a method map and supplied value from a file - i.e a single column/row's string value.
8
+ # Note that although a single column, the string can be formatted to contain multiple values.
9
+ #
10
+ # Tightly coupled with MethodMapper classes (in lib/engine) which contains full details of
11
+ # a file's column and it's correlated AR associations.
12
+ #
13
+ module DataShift
14
+
15
+ require 'datashift/method_mapper'
16
+
17
+ class LoaderBase
18
+
19
+
20
+ include DataShift::Logging
21
+
22
+ attr_reader :headers
23
+
24
+ attr_accessor :method_mapper
25
+
26
+ attr_accessor :load_object_class, :load_object
27
+ attr_accessor :current_value, :current_method_detail
28
+
29
+ attr_accessor :loaded_objects, :failed_objects
30
+
31
+ attr_accessor :options
32
+
33
+ # Support multiple associations being added to a base object to be specified in a single column.
34
+ #
35
+ # Entry represents the association to find via supplied name, value to use in the lookup.
36
+ # Can contain multiple lookup name/value pairs, separated by multi_assoc_delim ( | )
37
+ #
38
+ # Default syntax :
39
+ #
40
+ # Name1:value1, value2|Name2:value1, value2, value3|Name3:value1, value2
41
+ #
42
+ # E.G.
43
+ # Association Properties, has a column named Size, and another called Colour,
44
+ # and this combination could be used to lookup multiple associations to add to the main model Jumper
45
+ #
46
+ # Size:small # => generates find_by_size( 'small' )
47
+ # Size:large # => generates find_by_size( 'large' )
48
+ # Colour:red,green,blue # => generates find_all_by_colour( ['red','green','blue'] )
49
+ #
50
+ # Size:large|Size:medium|Size:large
51
+ # => Find 3 different associations, perform lookup via column called Size
52
+ # => Jumper.properties << [ small, medium, large ]
53
+ #
54
+ def self.name_value_delim
55
+ @name_value_delim ||= ':'
56
+ @name_value_delim
57
+ end
58
+
59
+ def self.set_name_value_delim(x) @name_value_delim = x; end
60
+ # TODO - support embedded object creation/update via hash (which hopefully we should be able to just forward to AR)
61
+ #
62
+ # |Category|
63
+ # name:new{ :date => '20110102', :owner = > 'blah'}
64
+ #
65
+
66
+
67
+ def self.multi_value_delim
68
+ @multi_value_delim ||= ','
69
+ @multi_value_delim
70
+ end
71
+
72
+ def self.set_multi_value_delim(x) @multi_value_delim = x; end
73
+
74
+ # TODO - support multi embedded object creation/update via hash (which hopefully we should be able to just forward to AR)
75
+ #
76
+ # |Category|
77
+ # name:new{ :a => 1, :b => 2}|name:medium{ :a => 6, :b => 34}|name:old{ :a => 12, :b => 67}
78
+ #
79
+ def self.multi_assoc_delim
80
+ @multi_assoc_delim ||= '|'
81
+ @multi_assoc_delim
82
+ end
83
+
84
+
85
+ def self.set_multi_assoc_delim(x) @multi_assoc_delim = x; end
86
+
87
+ # Options
88
+ # :instance_methods => true
89
+
90
+ def initialize(object_class, object = nil, options = {})
91
+ @load_object_class = object_class
92
+
93
+ # Gather list of all possible 'setter' methods on AR class (instance variables and associations)
94
+ DataShift::MethodMapper.find_operators( @load_object_class, :reload => true, :instance_methods => options[:instance_methods] )
95
+
96
+ @method_mapper = DataShift::MethodMapper.new
97
+ @options = options.clone
98
+ @headers = []
99
+
100
+ @default_data_objects ||= {}
101
+
102
+ @default_values = {}
103
+ @override_values = {}
104
+
105
+ @prefixes = {}
106
+ @postfixes = {}
107
+
108
+ reset(object)
109
+ end
110
+
111
+
112
+ # kinda the derived classes interface - best way in Ruby ?
113
+ def perform_load( input, options = {} )
114
+ raise "WARNING- ABSTRACT METHOD CALLED - Please implement perform_load()"
115
+ end
116
+
117
+
118
+ # Core API - Given a list of free text column names from a file, map all headers to
119
+ # method mapper's operator list.
120
+ # Options:
121
+ # strict : report any header values that can't be mapped as an error
122
+ #
123
+ def map_headers_to_operators( headers, strict, mandatory = [])
124
+ @headers = headers
125
+
126
+ @method_mapper.populate_methods( load_object_class, @headers )
127
+
128
+ unless(@method_mapper.missing_methods.empty?)
129
+ puts "WARNING: Following column headings could not be mapped : #{@method_mapper.missing_methods.inspect}"
130
+ raise MappingDefinitionError, "Missing mappings for columns : #{@method_mapper.missing_methods.join(",")}" if(strict)
131
+ end
132
+
133
+ unless(@method_mapper.contains_mandatory?(mandatory) )
134
+ @method_mapper.missing_mandatory(mandatory).each { |e| puts "ERROR: Mandatory column missing - expected column '#{e}'" }
135
+ raise MissingMandatoryError, "Mandatory columns missing - please fix and retry."
136
+ end unless(mandatory.empty?)
137
+ end
138
+
139
+
140
+ # Core API - Given a free text column name from a file, search method mapper for
141
+ # associated operator on base object class.
142
+ #
143
+ # If suitable association found, process row data and then assign to current load_object
144
+ def find_and_process(column_name, data)
145
+ method_detail = MethodMapper.find_method_detail( load_object_class, column_name )
146
+
147
+ if(method_detail)
148
+ prepare_data(method_detail, data)
149
+ process()
150
+ else
151
+ @load_object.errors.add_base( "No matching method found for column #{column_name}")
152
+ end
153
+ end
154
+
155
+
156
+ # Default values can be provided in YAML config file
157
+ # Format :
158
+ # Load Class
159
+ # atttribute: value
160
+
161
+ def configure_from( yaml_file )
162
+
163
+ data = YAML::load( File.open(yaml_file) )
164
+
165
+
166
+ # TODO - MOVE DEFAULTS TO OWN MODULE
167
+ # decorate the loading class with the defaults/ove rides to manage itself
168
+ # IDEAS .....
169
+ #
170
+ #unless(@default_data_objects[load_object_class])
171
+ #
172
+ # @default_data_objects[load_object_class] = load_object_class.new
173
+
174
+ # default_data_object = @default_data_objects[load_object_class]
175
+
176
+
177
+ # default_data_object.instance_eval do
178
+ # def datashift_defaults=(hash)
179
+ # @datashift_defaults = hash
180
+ # end
181
+ # def datashift_defaults
182
+ # @datashift_defaults
183
+ # end
184
+ #end unless load_object_class.respond_to?(:datashift_defaults)
185
+ #end
186
+
187
+ #puts load_object_class.new.to_yaml
188
+
189
+ puts data.inspect
190
+
191
+ if(data[load_object_class.name])
192
+
193
+ deflts = data[load_object_class.name]['datashift_defaults']
194
+ @default_values.merge!(deflts) if deflts
195
+
196
+ ovrides = data[load_object_class.name]['datashift_overrides']
197
+ @override_values.merge!(ovrides) if ovrides
198
+ end
199
+
200
+ end
201
+
202
+ # Set member variables to hold details and value.
203
+ #
204
+ # Check supplied value, validate it, and if required :
205
+ # set to any provided default value
206
+ # prepend or append with any provided extensions
207
+ def prepare_data(method_detail, value)
208
+
209
+ @current_value = value
210
+
211
+ @current_method_detail = method_detail
212
+
213
+ operator = method_detail.operator
214
+
215
+ override_value(operator)
216
+
217
+ if((value.nil? || value.to_s.empty?) && default_value(operator))
218
+ @current_value = default_value(operator)
219
+ end
220
+
221
+ @current_value = "#{prefixes(operator)}#{@current_value}" if(prefixes(operator))
222
+ @current_value = "#{@current_value}#{postfixes(operator)}" if(postfixes(operator))
223
+
224
+ @current_value
225
+ end
226
+
227
+
228
+ # Process a value string from a column.
229
+ # Assigning value(s) to correct association on @load_object.
230
+ # Method detail represents a column from a file and it's correlated AR associations.
231
+ # Value string which may contain multiple values for a collection association.
232
+ #
233
+ def process()
234
+
235
+ if(@current_method_detail.operator_for(:has_many))
236
+
237
+ if(@current_method_detail.operator_class && @current_value)
238
+
239
+ # there are times when we need to save early, for example before assigning to
240
+ # has_and_belongs_to associations which require the load_object has an id for the join table
241
+
242
+ save_if_new
243
+
244
+ # A single column can contain multiple associations delimited by special char
245
+ columns = @current_value.to_s.split( LoaderBase::multi_assoc_delim)
246
+
247
+ # Size:large|Colour:red,green,blue => generates find_by_size( 'large' ) and find_all_by_colour( ['red','green','blue'] )
248
+
249
+ columns.each do |assoc|
250
+ operator, values = assoc.split(LoaderBase::name_value_delim)
251
+
252
+ lookups = values.split(LoaderBase::multi_value_delim)
253
+
254
+ if(lookups.size > 1)
255
+
256
+ @current_value = @current_method_detail.operator_class.send("find_all_by_#{operator}", lookups )
257
+
258
+ unless(lookups.size == @current_value.size)
259
+ found = @current_value.collect {|f| f.send(operator) }
260
+ @load_object.errors.add( method_detail.operator, "Association with key(s) #{(lookups - found).inspect} NOT found")
261
+ puts "WARNING: Association with key(s) #{(lookups - found).inspect} NOT found - Not added."
262
+ next if(@current_value.empty?)
263
+ end
264
+
265
+ else
266
+
267
+ @current_value = @current_method_detail.operator_class.send("find_by_#{operator}", lookups )
268
+
269
+ unless(@current_value)
270
+ @load_object.errors.add( @current_method_detail.operator, "Association with key #{lookups} NOT found")
271
+ puts "WARNING: Association with key #{lookups} NOT found - Not added."
272
+ next
273
+ end
274
+
275
+ end
276
+
277
+ # Lookup Assoc's Model done, now add the found value(s) to load model's collection
278
+ @current_method_detail.assign(@load_object, @current_value)
279
+ end
280
+ end
281
+ # END HAS_MANY
282
+ else
283
+ # Nice n simple straight assignment to a column variable
284
+ #puts "INFO: LOADER BASE processing #{method_detail.name}"
285
+ @current_method_detail.assign(@load_object, @current_value)
286
+ end
287
+ end
288
+
289
+ def failure
290
+ @failed_objects << @load_object unless( !load_object.new_record? || @failed_objects.include?(@load_object))
291
+ end
292
+
293
+ def save
294
+ #puts "DEBUG: SAVING #{load_object.class} : #{load_object.inspect}" #if(options[:verbose])
295
+ begin
296
+ result = @load_object.save
297
+
298
+ @loaded_objects << @load_object unless(@loaded_objects.include?(@load_object))
299
+
300
+ return result
301
+ rescue => e
302
+ failure
303
+ puts "Error saving #{@load_object.class} : #{e.inspect}"
304
+ logger.error e.backtrace
305
+ raise "Error in save whilst processing column #{@current_method_detail.name}" if(@options[:strict])
306
+ end
307
+ end
308
+
309
+ def self.default_object_for( klass )
310
+ @default_data_objects ||= {}
311
+ @default_data_objects[klass]
312
+ end
313
+
314
+ def set_default_value( name, value )
315
+ @default_values[name] = value
316
+ end
317
+
318
+ def set_override_value( operator, value )
319
+ @override_values[operator] = value
320
+ end
321
+
322
+ def default_value(name)
323
+ @default_values[name]
324
+ end
325
+
326
+ def override_value( operator )
327
+ @current_value = @override_values[operator] if(@override_values[operator])
328
+ end
329
+
330
+
331
+ def set_prefix( name, value )
332
+ @prefixes[name] = value
333
+ end
334
+
335
+ def prefixes(name)
336
+ @prefixes[name]
337
+ end
338
+
339
+ def set_postfix( name, value )
340
+ @postfixes[name] = value
341
+ end
342
+
343
+ def postfixes(name)
344
+ @postfixes[name]
345
+ end
346
+
347
+
348
+ # Reset the loader, including database object to be populated, and load counts
349
+ #
350
+ def reset(object = nil)
351
+ @load_object = object || new_load_object
352
+ @loaded_objects, @failed_objects = [],[]
353
+ @current_value = nil
354
+ end
355
+
356
+
357
+ def new_load_object
358
+ @load_object = @load_object_class.new
359
+ @load_object
360
+ end
361
+
362
+ def abort_on_failure?
363
+ @options[:abort_on_failure] == 'true'
364
+ end
365
+
366
+ def loaded_count
367
+ @loaded_objects.size
368
+ end
369
+
370
+ def failed_count
371
+ @failed_objects.size
372
+ end
373
+
374
+
375
+ # Check whether headers contains supplied list
376
+ def headers_contain_mandatory?( mandatory_list )
377
+ [ [*mandatory_list] - @headers].flatten.empty?
378
+ end
379
+
380
+
381
+ # Check whether headers contains supplied list
382
+ def missing_mandatory_headers( mandatory_list )
383
+ [ [*mandatory_list] - @headers].flatten
384
+ end
385
+
386
+ def find_or_new( klass, condition_hash = {} )
387
+ @records[klass] = klass.find(:all, :conditions => condition_hash)
388
+ if @records[klass].any?
389
+ return @records[klass].first
390
+ else
391
+ return klass.new
392
+ end
393
+ end
394
+
395
+ private
396
+
397
+ def save_if_new
398
+ #puts "SAVE", load_object.inspect
399
+ save if(load_object.valid? && load_object.new_record?)
400
+ end
401
+
402
+ end
403
+
332
404
  end