datashift 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (108) hide show
  1. data/.document +5 -0
  2. data/Gemfile +25 -0
  3. data/Gemfile.lock +211 -0
  4. data/LICENSE.txt +27 -0
  5. data/README.markdown +286 -0
  6. data/README.rdoc +19 -0
  7. data/Rakefile +96 -0
  8. data/VERSION +5 -0
  9. data/bin/autospec +16 -0
  10. data/bin/convert_to_should_syntax +16 -0
  11. data/bin/erubis +16 -0
  12. data/bin/htmldiff +16 -0
  13. data/bin/jeweler +16 -0
  14. data/bin/ldiff +16 -0
  15. data/bin/nokogiri +16 -0
  16. data/bin/rackup +16 -0
  17. data/bin/rails +16 -0
  18. data/bin/rake +16 -0
  19. data/bin/rake2thor +16 -0
  20. data/bin/ri +16 -0
  21. data/bin/rspec +16 -0
  22. data/bin/spree +16 -0
  23. data/bin/thor +16 -0
  24. data/bin/tilt +16 -0
  25. data/bin/tt +16 -0
  26. data/datashift.gemspec +178 -0
  27. data/lib/applications/jruby/jexcel_file.rb +397 -0
  28. data/lib/applications/jruby/word.rb +79 -0
  29. data/lib/datashift.rb +114 -0
  30. data/lib/datashift/exceptions.rb +12 -0
  31. data/lib/datashift/file_definitions.rb +353 -0
  32. data/lib/datashift/mapping_file_definitions.rb +88 -0
  33. data/lib/datashift/method_detail.rb +237 -0
  34. data/lib/datashift/method_mapper.rb +257 -0
  35. data/lib/generators/csv_generator.rb +36 -0
  36. data/lib/generators/excel_generator.rb +122 -0
  37. data/lib/generators/generator_base.rb +14 -0
  38. data/lib/helpers/core_ext/to_b.rb +24 -0
  39. data/lib/helpers/spree_helper.rb +131 -0
  40. data/lib/java/poi-3.7/._poi-3.7-20101029.jar5645100390082102460.tmp +0 -0
  41. data/lib/java/poi-3.7/LICENSE +507 -0
  42. data/lib/java/poi-3.7/NOTICE +21 -0
  43. data/lib/java/poi-3.7/RELEASE_NOTES.txt +115 -0
  44. data/lib/java/poi-3.7/lib/commons-logging-1.1.jar +0 -0
  45. data/lib/java/poi-3.7/lib/junit-3.8.1.jar +0 -0
  46. data/lib/java/poi-3.7/lib/log4j-1.2.13.jar +0 -0
  47. data/lib/java/poi-3.7/ooxml-lib/dom4j-1.6.1.jar +0 -0
  48. data/lib/java/poi-3.7/ooxml-lib/geronimo-stax-api_1.0_spec-1.0.jar +0 -0
  49. data/lib/java/poi-3.7/ooxml-lib/xmlbeans-2.3.0.jar +0 -0
  50. data/lib/java/poi-3.7/poi-3.7-20101029.jar +0 -0
  51. data/lib/java/poi-3.7/poi-examples-3.7-20101029.jar +0 -0
  52. data/lib/java/poi-3.7/poi-ooxml-3.7-20101029.jar +0 -0
  53. data/lib/java/poi-3.7/poi-ooxml-schemas-3.7-20101029.jar +0 -0
  54. data/lib/java/poi-3.7/poi-scratchpad-3.7-20101029.jar +0 -0
  55. data/lib/loaders/csv_loader.rb +99 -0
  56. data/lib/loaders/excel_loader.rb +150 -0
  57. data/lib/loaders/loader_base.rb +332 -0
  58. data/lib/loaders/spreadsheet_loader.rb +137 -0
  59. data/lib/loaders/spree/image_loader.rb +46 -0
  60. data/lib/loaders/spree/product_loader.rb +225 -0
  61. data/spec/csv_loader_spec.rb +31 -0
  62. data/spec/datashift_spec.rb +27 -0
  63. data/spec/db/migrate/20110803201325_create_test_bed.rb +85 -0
  64. data/spec/excel_generator_spec.rb +79 -0
  65. data/spec/excel_loader_spec.rb +177 -0
  66. data/spec/file_definitions.rb +141 -0
  67. data/spec/fixtures/BadAssociationName.xls +0 -0
  68. data/spec/fixtures/DemoNegativeTesting.xls +0 -0
  69. data/spec/fixtures/ProjectsMultiCategories.xls +0 -0
  70. data/spec/fixtures/ProjectsSingleCategories.xls +0 -0
  71. data/spec/fixtures/SimpleProjects.xls +0 -0
  72. data/spec/fixtures/config/database.yml +25 -0
  73. data/spec/fixtures/interact_models_db.sqlite +0 -0
  74. data/spec/fixtures/interact_spree_db.sqlite +0 -0
  75. data/spec/fixtures/negative/SpreeProdMiss1Mandatory.csv +4 -0
  76. data/spec/fixtures/negative/SpreeProdMiss1Mandatory.xls +0 -0
  77. data/spec/fixtures/negative/SpreeProdMissManyMandatory.csv +4 -0
  78. data/spec/fixtures/negative/SpreeProdMissManyMandatory.xls +0 -0
  79. data/spec/fixtures/simple_export_spec.xls +0 -0
  80. data/spec/fixtures/simple_template_spec.xls +0 -0
  81. data/spec/fixtures/spree/SpreeProducts.csv +4 -0
  82. data/spec/fixtures/spree/SpreeProducts.xls +0 -0
  83. data/spec/fixtures/spree/SpreeProductsMultiColumn.csv +4 -0
  84. data/spec/fixtures/spree/SpreeProductsMultiColumn.xls +0 -0
  85. data/spec/fixtures/spree/SpreeProductsSimple.csv +4 -0
  86. data/spec/fixtures/spree/SpreeProductsSimple.xls +0 -0
  87. data/spec/fixtures/spree/SpreeZoneExample.csv +5 -0
  88. data/spec/fixtures/spree/SpreeZoneExample.xls +0 -0
  89. data/spec/fixtures/test_model_defs.rb +57 -0
  90. data/spec/loader_spec.rb +121 -0
  91. data/spec/method_mapper_spec.rb +238 -0
  92. data/spec/spec_helper.rb +116 -0
  93. data/spec/spree_generator_spec.rb +65 -0
  94. data/spec/spree_loader_spec.rb +311 -0
  95. data/spec/spree_method_mapping_spec.rb +215 -0
  96. data/tasks/config/seed_fu_product_template.erb +15 -0
  97. data/tasks/config/tidy_config.txt +13 -0
  98. data/tasks/db_tasks.rake +65 -0
  99. data/tasks/excel_generator.rake +79 -0
  100. data/tasks/file_tasks.rake +37 -0
  101. data/tasks/import/csv.rake +50 -0
  102. data/tasks/import/excel.rake +67 -0
  103. data/tasks/spree/image_load.rake +109 -0
  104. data/tasks/spree/product_loader.rake +44 -0
  105. data/tasks/word_to_seedfu.rake +167 -0
  106. data/test/helper.rb +18 -0
  107. data/test/test_interact.rb +7 -0
  108. metadata +301 -0
data/lib/datashift.rb ADDED
@@ -0,0 +1,114 @@
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Aug 2010
4
+ # License:: TBD. Free, Open Source. MIT ?
5
+ #
6
+ # Details:: Active Record Loader
7
+ #
8
+ require 'rbconfig'
9
+
10
+ module DataShift
11
+
12
+ module Guards
13
+
14
+ def self.jruby?
15
+ return RUBY_PLATFORM == "java"
16
+ end
17
+ def self.mac?
18
+ RbConfig::CONFIG['target_os'] =~ /darwin/i
19
+ end
20
+
21
+ def self.linux?
22
+ RbConfig::CONFIG['target_os'] =~ /linux/i
23
+ end
24
+
25
+ def self.windows?
26
+ RbConfig::CONFIG['target_os'] =~ /mswin|mingw/i
27
+ end
28
+
29
+ end
30
+
31
+ if(Guards::jruby?)
32
+ require 'java'
33
+
34
+ class Object
35
+ def add_to_classpath(path)
36
+ $CLASSPATH << File.join( DataShift.root_path, 'lib', path.gsub("\\", "/") )
37
+ end
38
+ end
39
+ end
40
+
41
+ def self.gem_version
42
+ unless(@gem_version)
43
+ File.read( File.join( 'VERSION') ).match(/.*(\d+.\d+.\d+)/)
44
+ @gem_version = $1
45
+ end
46
+ @gem_version
47
+ end
48
+
49
+ def self.gem_name
50
+ "datashift"
51
+ end
52
+
53
+ def self.root_path
54
+ File.expand_path("#{File.dirname(__FILE__)}/..")
55
+ end
56
+
57
+ def self.library_path
58
+ File.expand_path("#{File.dirname(__FILE__)}/../lib")
59
+ end
60
+
61
+ def self.require_libraries
62
+
63
+ loader_libs = %w{ lib }
64
+
65
+ # Base search paths - these will be searched recursively
66
+ loader_paths = []
67
+
68
+ loader_libs.each {|l| loader_paths << File.join(root_path(), l) }
69
+
70
+ # Define require search paths, any dir in here will be added to LOAD_PATH
71
+
72
+ loader_paths.each do |base|
73
+ $:.unshift base if File.directory?(base)
74
+ Dir[File.join(base, '**', '**')].each do |p|
75
+ if File.directory? p
76
+ $:.unshift p
77
+ end
78
+ end
79
+ end
80
+
81
+ require_libs = %w{ datashift loaders helpers }
82
+
83
+ require_libs.each do |base|
84
+ Dir[File.join(library_path, base, '*.rb')].each do |rb|
85
+ unless File.directory? rb
86
+ require rb
87
+ end
88
+ end
89
+ end
90
+
91
+ end
92
+
93
+ def self.load_tasks
94
+ # Long parameter lists so ensure rake -T produces nice wide output
95
+ ENV['RAKE_COLUMNS'] = '180'
96
+ base = File.join(root_path, 'tasks', '**')
97
+ Dir["#{base}/*.rake"].sort.each { |ext| load ext }
98
+ end
99
+
100
+ require 'logger'
101
+
102
+ def self.logdir
103
+ @logdir ||= File.dirname(__FILE__) + '/logs'
104
+ @logdir
105
+ end
106
+
107
+ def self.logger
108
+ @logger ||= Logger.new( File.join( logdir(), 'datashift.log') )
109
+ @logger
110
+ end
111
+
112
+ end
113
+
114
+ DataShift::require_libraries
@@ -0,0 +1,12 @@
1
+ module DataShift
2
+
3
+ class BadRuby < StandardError; end
4
+
5
+ class UnsupportedFileType < StandardError; end
6
+
7
+ class MappingDefinitionError < StandardError; end
8
+
9
+ class MissingHeadersError < StandardError; end
10
+ class MissingMandatoryError < StandardError; end
11
+
12
+ end
@@ -0,0 +1,353 @@
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Jan 2011
4
+ # License:: MIT
5
+ #
6
+ # Details:: This module acts as helpers for defining input/output file formats as classes.
7
+ #
8
+ # It provides a simple interface to define a file structure - field by field.
9
+ #
10
+ # By defining the structure, following methods and attributes are mixed in :
11
+ #
12
+ # An attribute, with accessor for each field/column.
13
+ # Parse a line, assigning values to each attribute.
14
+ # Parse an instance of that file line by line, accepts a block in which data can be processed.
15
+ # Method to split a file by field.
16
+ # Method to perform replace operations on a file by field and value.
17
+ #
18
+ # Either delimited or a fixed width definition can be created via macro-like class methods :
19
+ #
20
+ # create_field_definition [field_list]
21
+ #
22
+ # create_fixed_definition {field => range }
23
+ #
24
+ # Member attributes, with getters and setters, can be added for each field defined above via class method :
25
+ #
26
+ # create_field_attr_accessors
27
+ #
28
+ # USAGE :
29
+ #
30
+ # Create a class that contains definition of a file.
31
+ #
32
+ # class ExampleFixedWith < FileDefinitionBase
33
+ # create_fixed_definition(:name => (0..7), :value => (8..15), :ccy => (16..18), :dr_or_cr => (19..19) )
34
+ #
35
+ # create_field_attr_accessors
36
+ # end
37
+ #
38
+ # class ExampleCSV < FileDefinitionBase
39
+ # create_field_definition %w{abc def ghi jkl}
40
+ #
41
+ # create_field_attr_accessors
42
+ # end
43
+ #
44
+ # Any instance can then be used to parse the defined file type, with each field or column value
45
+ # being assigned automatically to the associated instance variable.
46
+ #
47
+ # line = '1,2,3,4'
48
+ # x = ExampleCSV.new( line )
49
+ #
50
+ # assert x.responds_to? :jkl
51
+ # assert_equal x.abc, '1'
52
+ # assert_equal x.jkl.to_i, 4
53
+ #
54
+ module FileDefinitions
55
+
56
+ include Enumerable
57
+
58
+ attr_accessor :key
59
+ attr_accessor :current_line
60
+
61
+ # Set the delimiter to use when splitting a line - can be either a String, or a Regexp
62
+ attr_writer :field_delim
63
+
64
+ def initialize( line = nil )
65
+ @key = String.new
66
+ parse(line) unless line.nil?
67
+ end
68
+
69
+ def self.included(base)
70
+ base.extend(ClassMethods)
71
+ subclasses << base
72
+ end
73
+
74
+ def self.subclasses
75
+ @subclasses ||=[]
76
+ end
77
+
78
+
79
+ # Return the field delimiter used when splitting a line
80
+ def field_delim
81
+ @field_delim || ','
82
+ end
83
+
84
+ # Parse each line of a file based on the field definition, yields self for each successive line
85
+ #
86
+ def each( file )
87
+ File::new(file).each_line do |line|
88
+ parse( line )
89
+ yield self
90
+ end
91
+ end
92
+
93
+ def fields
94
+ @fields = self.class.field_definition.collect {|f| instance_variable_get "@#{f}" }
95
+ @fields
96
+ end
97
+
98
+ def to_s
99
+ fields.join(',')
100
+ end
101
+
102
+ module ClassMethods
103
+
104
+ # Helper to generate methods to store and return the complete list of fields
105
+ # in this File definition (also creates member @field_definition) and parse a line.
106
+ #
107
+ # e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
108
+ #
109
+ def create_field_definition( *fields )
110
+ instance_eval <<-end_eval
111
+ @field_definition ||= %w{ #{fields.join(' ')} }
112
+ def field_definition
113
+ @field_definition
114
+ end
115
+ end_eval
116
+
117
+ class_eval <<-end_eval
118
+ def parse( line )
119
+ @current_line = line
120
+ before_parse if respond_to? :before_parse
121
+ @current_line.split(field_delim()).each_with_index {|x, i| instance_variable_set(\"@\#{self.class.field_definition[i]}\", x) }
122
+ after_parse if respond_to? :after_parse
123
+ generate_key if respond_to? :generate_key
124
+ end
125
+ end_eval
126
+ end
127
+
128
+ def add_field(field, add_accessor = true)
129
+ @field_definition ||= []
130
+ @field_definition << field.to_s
131
+ attr_accessor field if(add_accessor)
132
+ end
133
+
134
+
135
+ # Helper to generate methods that return the complete list of fixed width fields
136
+ # and associated ranges in this File definition, and parse a line.
137
+ # e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
138
+ #
139
+ def create_fixed_definition( field_range_map )
140
+ raise ArgumentError.new('Please supply hash to create_fixed_definition') unless field_range_map.is_a? Hash
141
+
142
+ keys = field_range_map.keys.collect(&:to_s)
143
+ string_map = Hash[*keys.zip(field_range_map.values).flatten]
144
+
145
+ instance_eval <<-end_eval
146
+ def fixed_definition
147
+ @fixed_definition ||= #{string_map.inspect}
148
+ @fixed_definition
149
+ end
150
+ end_eval
151
+
152
+ instance_eval <<-end_eval
153
+ def field_definition
154
+ @field_definition ||= %w{ #{keys.join(' ')} }
155
+ @field_definition
156
+ end
157
+ end_eval
158
+
159
+ class_eval <<-end_eval
160
+ def parse( line )
161
+ @current_line = line
162
+ before_parse if respond_to? :before_parse
163
+ self.class.fixed_definition.each do |key, range|
164
+ instance_variable_set(\"@\#{key}\", @current_line[range])
165
+ end
166
+ after_parse if respond_to? :after_parse
167
+ generate_key if respond_to? :generate_key
168
+ end
169
+ end_eval
170
+
171
+ end
172
+
173
+ # Create accessors for each field
174
+ def create_field_attr_accessors
175
+ self.field_definition.each {|f| attr_accessor f}
176
+ end
177
+
178
+
179
+ ###############################
180
+ # PARSING + FILE MANIPULATION #
181
+ ###############################
182
+
183
+ # Parse a complete file and return array of self, one per line
184
+ def parse_file( file, options = {} )
185
+ limit = options[:limit]
186
+ count = 0
187
+ lines = []
188
+ File::new(file).each_line do |line|
189
+ break if limit && ((count += 1) > limit)
190
+ lines << self.new( line )
191
+ end
192
+ lines
193
+ end
194
+
195
+
196
+
197
+ # Split a file, whose field definition is represented by self,
198
+ # into seperate streams, based on the values of one if it's fields.
199
+ #
200
+ # Writes the results, one file per split stream, to directory specified by output_path
201
+ #
202
+ # Options:
203
+ #
204
+ # :keys => Also write split files of the key fields
205
+ #
206
+ # :filter => Optional Regular Expression to act as filter be applid to the field.
207
+ # For example split by Ccy but filter to only include certain ccys pass
208
+ # filter => '[GBP|USD]'
209
+ #
210
+ def split_on_write( file_name, field, output_path, options = {} )
211
+
212
+ path = output_path || '.'
213
+
214
+ filtered = split_on( file_name, field, options )
215
+
216
+ unless filtered.empty?
217
+ log :info, "Writing seperate streams to #{path}"
218
+
219
+ filtered.each { |strm, objects| RecsBase::write( {"keys_#{field}_#{strm}.csv" => objects.collect(&:key).join("\n")}, path) } if(options.key?(:keys))
220
+
221
+ filtered.each { |strm, objects| RecsBase::write( {"#{field}_#{strm}.csv" => objects.collect(&:current_line).join("\n")}, path) }
222
+ end
223
+ end
224
+
225
+ # Split a file, whose field definition is represented by self,
226
+ # into seperate streams, based on one if it's fields.
227
+ #
228
+ # Returns a map of Field value => File def object
229
+ #
230
+ # We return the File Def object as this is now enriched, e.g with key fields, compared to the raw file.
231
+ #
232
+ # Users can get at the raw line simply by calling the line() method on File Def object
233
+ #
234
+ # Options:
235
+ #
236
+ # :output_path => directory to write the individual streams files to
237
+ #
238
+ # :filter => Optional Regular Expression to act as filter be applid to the field.
239
+ # For example split by Ccy but filter to only include certain ccys pass
240
+ # filter => 'GBP|USD|EUR'
241
+ #
242
+ def split_on( file_name, field, options = {} )
243
+
244
+ regex = options[:filter] ? Regexp.new(options[:filter]) : nil
245
+
246
+ log :debug, "Using REGEX: #{regex.inspect}" if regex
247
+
248
+ filtered = {}
249
+
250
+ if( self.new.respond_to?(field) )
251
+
252
+ log :info, "Splitting on #{field}"
253
+
254
+ File.open( file_name ) do |t|
255
+ t.each do |line|
256
+ next unless(line && line.chomp!)
257
+ x = self.new(line)
258
+
259
+ value = x.send( field.to_sym ) # the actual field value from the specified field column
260
+ next if value.nil?
261
+
262
+ if( regex.nil? || value.match(regex) )
263
+ filtered[value] ? filtered[value] << x : filtered[value] = [x]
264
+ end
265
+ end
266
+ end
267
+ else
268
+ log :warn, "Field [#{field}] nor defined for file definition #{self.class.name}"
269
+ end
270
+
271
+ if( options[:sort])
272
+ filtered.values.each( &:sort )
273
+ return filtered
274
+ end
275
+ return filtered
276
+ end
277
+
278
+ # Open and parse a file, replacing a value in the specfied field.
279
+ # Does not update the file itself. Does not write a new output file.
280
+ #
281
+ # Returns :
282
+ # 1) full collection of updated lines
283
+ # 2) collection of file def objects (self), with updated value.
284
+ #
285
+ # Finds values matching old_value in given map
286
+ #
287
+ # Replaces matches with new_value in map.
288
+ #
289
+ # Accepts more than one field, if files is either and array of strings
290
+ # or comma seperated list of fields.
291
+ #
292
+ def file_set_field_by_map( file_name, fields, value_map, regex = nil )
293
+
294
+ lines, objects = [],[]
295
+
296
+ if fields.is_a?(Array)
297
+ attribs = fields
298
+ else
299
+ attribs = "#{fields}".split(',')
300
+ end
301
+
302
+ attribs.collect! do |attrib|
303
+ raise ArgumentError.new("Field: #{attrib} is not a field on #{self.class.name}") unless self.new.respond_to?(attrib)
304
+ end
305
+
306
+ log :info, "#{self.class.name} - updating field(s) #{fields} in #{file_name}"
307
+
308
+ File.open( file_name ) do |t|
309
+ t.each do |line|
310
+ if line.chomp.empty?
311
+ lines << line
312
+ objects << self.new
313
+ next
314
+ end
315
+ x = self.new(line)
316
+
317
+ attribs.each do |a|
318
+ old_value = x.instance_variable_get( "@#{a}" )
319
+ x.instance_variable_set( "@#{a}", value_map[old_value] ) if value_map[old_value] || (regex && old_value.keys.detect {|k| k.match(regx) })
320
+ end
321
+
322
+ objects << x
323
+ lines << x.to_s
324
+ end
325
+ end
326
+
327
+ return lines, objects
328
+ end
329
+ end # END class methods
330
+
331
+ # Open and parse a file, replacing a value in the specfied field.
332
+ # Does not update the file itself. Does not write a new output file.
333
+ #
334
+ # Returns :
335
+ # 1) full collection of updated lines
336
+ # 2) collection of file def objects (self), with updated value.
337
+ #
338
+ # Finds values matching old_value, and also accepts an optional regex for more powerful
339
+ # matching strategies of values on the specfified field.
340
+ #
341
+ # Replaces matches with new_value.
342
+ #
343
+ # Accepts more than one field, if files is either and array of strings
344
+ # or comma seperated list of fields.
345
+ #
346
+ def file_set_field( file_name, field, old_value, new_value, regex = nil )
347
+
348
+ map = {old_value => new_value}
349
+
350
+ return file_set_field_by_map(file_name, field, map, regex)
351
+ end
352
+
353
+ end