ar_loader 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/LICENSE +9 -9
  2. data/README.markdown +268 -221
  3. data/Rakefile +76 -76
  4. data/lib/VERSION +1 -1
  5. data/lib/ar_loader.rb +87 -66
  6. data/lib/ar_loader/exceptions.rb +2 -0
  7. data/lib/{engine → ar_loader}/file_definitions.rb +353 -353
  8. data/lib/{engine → ar_loader}/mapping_file_definitions.rb +87 -87
  9. data/lib/ar_loader/method_detail.rb +257 -0
  10. data/lib/ar_loader/method_mapper.rb +213 -0
  11. data/lib/helpers/jruby/jexcel_file.rb +187 -0
  12. data/lib/{engine → helpers/jruby}/word.rb +79 -70
  13. data/lib/helpers/spree_helper.rb +85 -0
  14. data/lib/loaders/csv_loader.rb +87 -0
  15. data/lib/loaders/excel_loader.rb +132 -0
  16. data/lib/loaders/loader_base.rb +205 -73
  17. data/lib/loaders/spree/image_loader.rb +45 -41
  18. data/lib/loaders/spree/product_loader.rb +140 -91
  19. data/lib/to_b.rb +24 -24
  20. data/spec/csv_loader_spec.rb +27 -0
  21. data/spec/database.yml +19 -6
  22. data/spec/db/migrate/20110803201325_create_test_bed.rb +78 -0
  23. data/spec/excel_loader_spec.rb +113 -98
  24. data/spec/fixtures/BadAssociationName.xls +0 -0
  25. data/spec/fixtures/DemoNegativeTesting.xls +0 -0
  26. data/spec/fixtures/DemoTestModelAssoc.xls +0 -0
  27. data/spec/fixtures/ProjectsMultiCategories.xls +0 -0
  28. data/spec/fixtures/SimpleProjects.xls +0 -0
  29. data/spec/fixtures/SpreeProducts.xls +0 -0
  30. data/spec/fixtures/SpreeZoneExample.csv +5 -0
  31. data/spec/fixtures/SpreeZoneExample.xls +0 -0
  32. data/spec/loader_spec.rb +116 -0
  33. data/spec/logs/test.log +5000 -0
  34. data/spec/method_mapper_spec.rb +222 -0
  35. data/spec/models.rb +55 -0
  36. data/spec/spec_helper.rb +85 -18
  37. data/spec/spree_loader_spec.rb +223 -157
  38. data/tasks/config/seed_fu_product_template.erb +15 -15
  39. data/tasks/config/tidy_config.txt +12 -12
  40. data/tasks/db_tasks.rake +64 -64
  41. data/tasks/excel_loader.rake +63 -113
  42. data/tasks/file_tasks.rake +36 -37
  43. data/tasks/loader.rake +45 -0
  44. data/tasks/spree/image_load.rake +108 -107
  45. data/tasks/spree/product_loader.rake +49 -107
  46. data/tasks/word_to_seedfu.rake +166 -166
  47. metadata +66 -61
  48. data/lib/engine/jruby/jexcel_file.rb +0 -182
  49. data/lib/engine/jruby/method_mapper_excel.rb +0 -44
  50. data/lib/engine/method_detail.rb +0 -140
  51. data/lib/engine/method_mapper.rb +0 -157
  52. data/lib/engine/method_mapper_csv.rb +0 -28
  53. data/spec/db/migrate/20110803201325_create_testbed.rb +0 -25
data/Rakefile CHANGED
@@ -1,76 +1,76 @@
1
- require 'rubygems'
2
- require 'rake'
3
- require 'rake/clean'
4
- require 'rake/gempackagetask'
5
- require 'rake/rdoctask'
6
- require 'rake/testtask'
7
- require "lib/ar_loader"
8
-
9
- # Copyright:: (c) Autotelik Media Ltd 2011
10
- # Author :: Tom Statter
11
- # Date :: Aug 2010
12
- #
13
- # License:: MIT - Free, OpenSource
14
- #
15
- # Details:: Gem::Specification for Active Record Loader gem.
16
- #
17
- # Specifically enabled for uploading Spree products but easily
18
- # extended to any AR model.
19
- #
20
- # Currently support direct access to Excel Spreedsheets via JRuby
21
- #
22
- # TODO - Switch for non JRuby Rubies, enable load via CSV file instead of Excel.
23
- #
24
- ArLoader::require_tasks
25
-
26
- spec = Gem::Specification.new do |s|
27
- s.name = ArLoader.gem_name
28
- s.version = ArLoader.gem_version
29
- s.has_rdoc = true
30
- s.extra_rdoc_files = ['README.markdown', 'LICENSE']
31
- s.summary = 'File based loader for Active Record models'
32
- s.description = 'A file based loader for Active Record models. Seed database directly from Excel/CSV. Includes rake support for Spree'
33
- s.author = 'thomas statter'
34
- s.email = 'rubygems@autotelik.co.uk'
35
- s.date = DateTime.now.strftime("%Y-%m-%d")
36
- s.homepage = %q{http://www.autotelik.co.uk}
37
-
38
- # s.executables = ['your_executable_here']
39
- s.files = %w(LICENSE README.markdown Rakefile) + Dir.glob("{lib,spec,tasks}/**/*")
40
- s.require_path = "lib"
41
- s.bindir = "bin"
42
- end
43
-
44
- Rake::GemPackageTask.new(spec) do |p|
45
- p.gem_spec = spec
46
- p.need_tar = true
47
- p.need_zip = true
48
- end
49
-
50
- Rake::RDocTask.new do |rdoc|
51
- files =['README.markdown', 'LICENSE', 'lib/**/*.rb']
52
- rdoc.rdoc_files.add(files)
53
- rdoc.main = "README.markdown" # page to start on
54
- rdoc.title = "ARLoader Docs"
55
- rdoc.rdoc_dir = 'doc/rdoc' # rdoc output folder
56
- rdoc.options << '--line-numbers'
57
- end
58
-
59
- Rake::TestTask.new do |t|
60
- t.test_files = FileList['test/**/*.rb']
61
- end
62
-
63
- # Add in our own Tasks
64
-
65
- # Long parameter lists so ensure rake -T produces nice wide output
66
- ENV['RAKE_COLUMNS'] = '180'
67
-
68
- desc 'Build gem and install in one step'
69
- task :pik_install, :needs => [:gem] do |t, args|
70
-
71
- puts "Installing version #{ArLoader.gem_version}"
72
-
73
- gem = "#{ArLoader.gem_name}-#{ArLoader.gem_version}.gem"
74
- cmd = "pik gem install --no-ri --no-rdoc pkg\\#{gem}"
75
- system(cmd)
76
- end
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/clean'
4
+ require 'rake/gempackagetask'
5
+ require 'rake/rdoctask'
6
+ require 'rake/testtask'
7
+ require "lib/ar_loader"
8
+
9
+ # Copyright:: (c) Autotelik Media Ltd 2011
10
+ # Author :: Tom Statter
11
+ # Date :: Aug 2010
12
+ #
13
+ # License:: MIT - Free, OpenSource
14
+ #
15
+ # Details:: Gem::Specification for Active Record Loader gem.
16
+ #
17
+ # Specifically enabled for uploading Spree products but easily
18
+ # extended to any AR model.
19
+ #
20
+ # Currently support direct access to Excel Spreedsheets via JRuby
21
+ #
22
+ # TODO - Switch for non JRuby Rubies, enable load via CSV file instead of Excel.
23
+ #
24
+ ArLoader::load_tasks
25
+
26
+ spec = Gem::Specification.new do |s|
27
+ s.name = ArLoader.gem_name
28
+ s.version = ArLoader.gem_version
29
+ s.has_rdoc = true
30
+ s.extra_rdoc_files = ['README.markdown', 'LICENSE']
31
+ s.summary = 'File based loader for Active Record models'
32
+ s.description = 'A file based loader for Active Record models. Seed database directly from Excel/CSV. Includes rake support for Spree'
33
+ s.author = 'thomas statter'
34
+ s.email = 'rubygems@autotelik.co.uk'
35
+ s.date = DateTime.now.strftime("%Y-%m-%d")
36
+ s.homepage = %q{http://www.autotelik.co.uk}
37
+
38
+ # s.executables = ['your_executable_here']
39
+ s.files = %w(LICENSE README.markdown Rakefile) + Dir.glob("{lib,spec,tasks}/**/*")
40
+ s.require_path = "lib"
41
+ s.bindir = "bin"
42
+ end
43
+
44
+ Rake::GemPackageTask.new(spec) do |p|
45
+ p.gem_spec = spec
46
+ p.need_tar = true
47
+ p.need_zip = true
48
+ end
49
+
50
+ Rake::RDocTask.new do |rdoc|
51
+ files =['README.markdown', 'LICENSE', 'lib/**/*.rb']
52
+ rdoc.rdoc_files.add(files)
53
+ rdoc.main = "README.markdown" # page to start on
54
+ rdoc.title = "ARLoader Docs"
55
+ rdoc.rdoc_dir = 'doc/rdoc' # rdoc output folder
56
+ rdoc.options << '--line-numbers'
57
+ end
58
+
59
+ Rake::TestTask.new do |t|
60
+ t.test_files = FileList['test/**/*.rb']
61
+ end
62
+
63
+ # Add in our own Tasks
64
+
65
+ # Long parameter lists so ensure rake -T produces nice wide output
66
+ ENV['RAKE_COLUMNS'] = '180'
67
+
68
+ desc 'Build gem and install in one step'
69
+ task :pik_install, :needs => [:gem] do |t, args|
70
+
71
+ puts "Installing version #{ArLoader.gem_version}"
72
+
73
+ gem = "#{ArLoader.gem_name}-#{ArLoader.gem_version}.gem"
74
+ cmd = "pik gem install --no-ri --no-rdoc pkg\\#{gem}"
75
+ system(cmd)
76
+ end
data/lib/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.6
1
+ 0.0.8
data/lib/ar_loader.rb CHANGED
@@ -1,66 +1,87 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Aug 2010
4
- # License:: TBD. Free, Open Source. MIT ?
5
- #
6
- # Details:: Active Record Loader
7
- #
8
- require 'active_record'
9
-
10
- module ArLoader
11
-
12
- def self.gem_version
13
- @gem_version ||= File.read( File.join( root_path, 'lib', 'VERSION') ).chomp
14
- @gem_version
15
- end
16
-
17
- def self.gem_name
18
- "ar_loader"
19
- end
20
-
21
- def self.root_path
22
- File.expand_path("#{File.dirname(__FILE__)}/..")
23
- end
24
-
25
- def self.require_libraries
26
-
27
- loader_libs = %w{ lib }
28
-
29
- # Base search paths - these will be searched recursively and any xxx.rake files autoimported
30
- loader_paths = []
31
-
32
- loader_libs.each {|l| loader_paths << File.join(root_path(), l) }
33
-
34
- # Define require search paths, any dir in here will be added to LOAD_PATH
35
-
36
- loader_paths.each do |base|
37
- $:.unshift base if File.directory?(base)
38
- Dir[File.join(base, '**', '**')].each do |p|
39
- if File.directory? p
40
- $:.unshift p
41
- end
42
- end
43
- end
44
-
45
- require__libs = %w{ loaders engine }
46
-
47
- require__libs.each do |base|
48
- Dir[File.join('lib', base, '*.rb')].each do |rb|
49
- unless File.directory? rb
50
- require rb
51
- end
52
- end
53
- end
54
-
55
- end
56
-
57
- def self.require_tasks
58
- # Long parameter lists so ensure rake -T produces nice wide output
59
- ENV['RAKE_COLUMNS'] = '180'
60
- base = File.join(root_path, 'tasks', '**')
61
- Dir["#{base}/*.rake"].sort.each { |ext| load ext }
62
- end
63
-
64
- end
65
-
66
- ArLoader::require_libraries
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Aug 2010
4
+ # License:: TBD. Free, Open Source. MIT ?
5
+ #
6
+ # Details:: Active Record Loader
7
+ #
8
+ require 'active_record'
9
+ require 'rbconfig'
10
+
11
+ module Guards
12
+
13
+ def self.jruby?
14
+ return RUBY_PLATFORM == "java"
15
+ end
16
+ def self.mac?
17
+ RbConfig::CONFIG['target_os'] =~ /darwin/i
18
+ end
19
+
20
+ def self.linux?
21
+ RbConfig::CONFIG['target_os'] =~ /linux/i
22
+ end
23
+
24
+ def self.windows?
25
+ RbConfig::CONFIG['target_os'] =~ /mswin|mingw/i
26
+ end
27
+
28
+ end
29
+
30
+ module ArLoader
31
+
32
+ def self.gem_version
33
+ @gem_version ||= File.read( File.join( root_path, 'lib', 'VERSION') ).chomp
34
+ @gem_version
35
+ end
36
+
37
+ def self.gem_name
38
+ "ar_loader"
39
+ end
40
+
41
+ def self.root_path
42
+ File.expand_path("#{File.dirname(__FILE__)}/..")
43
+ end
44
+
45
+
46
+ def self.require_libraries
47
+
48
+ loader_libs = %w{ lib }
49
+
50
+ # Base search paths - these will be searched recursively
51
+ loader_paths = []
52
+
53
+ loader_libs.each {|l| loader_paths << File.join(root_path(), l) }
54
+
55
+ # Define require search paths, any dir in here will be added to LOAD_PATH
56
+
57
+ loader_paths.each do |base|
58
+ $:.unshift base if File.directory?(base)
59
+ Dir[File.join(base, '**', '**')].each do |p|
60
+ if File.directory? p
61
+ $:.unshift p
62
+ end
63
+ end
64
+ end
65
+
66
+ require_libs = %w{ ar_loader loaders helpers }
67
+
68
+ require_libs.each do |base|
69
+ Dir[File.join('lib', base, '*.rb')].each do |rb|
70
+ unless File.directory? rb
71
+ require rb
72
+ end
73
+ end
74
+ end
75
+
76
+ end
77
+
78
+ def self.load_tasks
79
+ # Long parameter lists so ensure rake -T produces nice wide output
80
+ ENV['RAKE_COLUMNS'] = '180'
81
+ base = File.join(root_path, 'tasks', '**')
82
+ Dir["#{base}/*.rake"].sort.each { |ext| load ext }
83
+ end
84
+
85
+ end
86
+
87
+ ArLoader::require_libraries
@@ -0,0 +1,2 @@
1
+ class MappingDefinitionError < StandardError
2
+ end
@@ -1,353 +1,353 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Jan 2011
4
- # License:: MIT
5
- #
6
- # Details:: This module acts as helpers for defining input/output file formats as classes.
7
- #
8
- # It provides a simple interface to define a file structure - field by field.
9
- #
10
- # By defining the structure, following methods and attributes are mixed in :
11
- #
12
- # An attribute, with accessor for each field/column.
13
- # Parse a line, assigning values to each attribute.
14
- # Parse an instance of that file line by line, accepts a block in which data can be processed.
15
- # Method to split a file by field.
16
- # Method to perform replace operations on a file by field and value.
17
- #
18
- # Either delimited or a fixed width definition can be created via macro-like class methods :
19
- #
20
- # create_field_definition [field_list]
21
- #
22
- # create_fixed_definition {field => range }
23
- #
24
- # Member attributes, with getters and setters, can be added for each field defined above via class method :
25
- #
26
- # create_field_attr_accessors
27
- #
28
- # USAGE :
29
- #
30
- # Create a class that contains definition of a file.
31
- #
32
- # class ExampleFixedWith < FileDefinitionBase
33
- # create_fixed_definition(:name => (0..7), :value => (8..15), :ccy => (16..18), :dr_or_cr => (19..19) )
34
- #
35
- # create_field_attr_accessors
36
- # end
37
- #
38
- # class ExampleCSV < FileDefinitionBase
39
- # create_field_definition %w{abc def ghi jkl}
40
- #
41
- # create_field_attr_accessors
42
- # end
43
- #
44
- # Any instance can then be used to parse the defined file type, with each field or column value
45
- # being assigned automatically to the associated instance variable.
46
- #
47
- # line = '1,2,3,4'
48
- # x = ExampleCSV.new( line )
49
- #
50
- # assert x.responds_to? :jkl
51
- # assert_equal x.abc, '1'
52
- # assert_equal x.jkl.to_i, 4
53
- #
54
- module FileDefinitions
55
-
56
- include Enumerable
57
-
58
- attr_accessor :key
59
- attr_accessor :current_line
60
-
61
- # Set the delimiter to use when splitting a line - can be either a String, or a Regexp
62
- attr_writer :field_delim
63
-
64
- def initialize( line = nil )
65
- @key = String.new
66
- parse(line) unless line.nil?
67
- end
68
-
69
- def self.included(base)
70
- base.extend(ClassMethods)
71
- subclasses << base
72
- end
73
-
74
- def self.subclasses
75
- @subclasses ||=[]
76
- end
77
-
78
-
79
- # Return the field delimiter used when splitting a line
80
- def field_delim
81
- @field_delim || ','
82
- end
83
-
84
- # Parse each line of a file based on the field definition, yields self for each successive line
85
- #
86
- def each( file )
87
- File::new(file).each_line do |line|
88
- parse( line )
89
- yield self
90
- end
91
- end
92
-
93
- def fields
94
- @fields = self.class.field_definition.collect {|f| instance_variable_get "@#{f}" }
95
- @fields
96
- end
97
-
98
- def to_s
99
- fields.join(',')
100
- end
101
-
102
- module ClassMethods
103
-
104
- # Helper to generate methods to store and return the complete list of fields
105
- # in this File definition (also creates member @field_definition) and parse a line.
106
- #
107
- # e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
108
- #
109
- def create_field_definition( *fields )
110
- instance_eval <<-end_eval
111
- @field_definition ||= %w{ #{fields.join(' ')} }
112
- def field_definition
113
- @field_definition
114
- end
115
- end_eval
116
-
117
- class_eval <<-end_eval
118
- def parse( line )
119
- @current_line = line
120
- before_parse if respond_to? :before_parse
121
- @current_line.split(field_delim()).each_with_index {|x, i| instance_variable_set(\"@\#{self.class.field_definition[i]}\", x) }
122
- after_parse if respond_to? :after_parse
123
- generate_key if respond_to? :generate_key
124
- end
125
- end_eval
126
- end
127
-
128
- def add_field(field, add_accessor = true)
129
- @field_definition ||= []
130
- @field_definition << field.to_s
131
- attr_accessor field if(add_accessor)
132
- end
133
-
134
-
135
- # Helper to generate methods that return the complete list of fixed width fields
136
- # and associated ranges in this File definition, and parse a line.
137
- # e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
138
- #
139
- def create_fixed_definition( field_range_map )
140
- raise ArgumentError.new('Please supply hash to create_fixed_definition') unless field_range_map.is_a? Hash
141
-
142
- keys = field_range_map.keys.collect(&:to_s)
143
- string_map = Hash[*keys.zip(field_range_map.values).flatten]
144
-
145
- instance_eval <<-end_eval
146
- def fixed_definition
147
- @fixed_definition ||= #{string_map.inspect}
148
- @fixed_definition
149
- end
150
- end_eval
151
-
152
- instance_eval <<-end_eval
153
- def field_definition
154
- @field_definition ||= %w{ #{keys.join(' ')} }
155
- @field_definition
156
- end
157
- end_eval
158
-
159
- class_eval <<-end_eval
160
- def parse( line )
161
- @current_line = line
162
- before_parse if respond_to? :before_parse
163
- self.class.fixed_definition.each do |key, range|
164
- instance_variable_set(\"@\#{key}\", @current_line[range])
165
- end
166
- after_parse if respond_to? :after_parse
167
- generate_key if respond_to? :generate_key
168
- end
169
- end_eval
170
-
171
- end
172
-
173
- # Create accessors for each field
174
- def create_field_attr_accessors
175
- self.field_definition.each {|f| attr_accessor f}
176
- end
177
-
178
-
179
- ###############################
180
- # PARSING + FILE MANIPULATION #
181
- ###############################
182
-
183
- # Parse a complete file and return array of self, one per line
184
- def parse_file( file, options = {} )
185
- limit = options[:limit]
186
- count = 0
187
- lines = []
188
- File::new(file).each_line do |line|
189
- break if limit && ((count += 1) > limit)
190
- lines << self.new( line )
191
- end
192
- lines
193
- end
194
-
195
-
196
-
197
- # Split a file, whose field definition is represented by self,
198
- # into seperate streams, based on the values of one if it's fields.
199
- #
200
- # Writes the results, one file per split stream, to directory specified by output_path
201
- #
202
- # Options:
203
- #
204
- # :keys => Also write split files of the key fields
205
- #
206
- # :filter => Optional Regular Expression to act as filter be applid to the field.
207
- # For example split by Ccy but filter to only include certain ccys pass
208
- # filter => '[GBP|USD]'
209
- #
210
- def split_on_write( file_name, field, output_path, options = {} )
211
-
212
- path = output_path || '.'
213
-
214
- filtered = split_on( file_name, field, options )
215
-
216
- unless filtered.empty?
217
- log :info, "Writing seperate streams to #{path}"
218
-
219
- filtered.each { |strm, objects| RecsBase::write( {"keys_#{field}_#{strm}.csv" => objects.collect(&:key).join("\n")}, path) } if(options.key?(:keys))
220
-
221
- filtered.each { |strm, objects| RecsBase::write( {"#{field}_#{strm}.csv" => objects.collect(&:current_line).join("\n")}, path) }
222
- end
223
- end
224
-
225
- # Split a file, whose field definition is represented by self,
226
- # into seperate streams, based on one if it's fields.
227
- #
228
- # Returns a map of Field value => File def object
229
- #
230
- # We return the File Def object as this is now enriched, e.g with key fields, compared to the raw file.
231
- #
232
- # Users can get at the raw line simply by calling the line() method on File Def object
233
- #
234
- # Options:
235
- #
236
- # :output_path => directory to write the individual streams files to
237
- #
238
- # :filter => Optional Regular Expression to act as filter be applid to the field.
239
- # For example split by Ccy but filter to only include certain ccys pass
240
- # filter => 'GBP|USD|EUR'
241
- #
242
- def split_on( file_name, field, options = {} )
243
-
244
- regex = options[:filter] ? Regexp.new(options[:filter]) : nil
245
-
246
- log :debug, "Using REGEX: #{regex.inspect}" if regex
247
-
248
- filtered = {}
249
-
250
- if( self.new.respond_to?(field) )
251
-
252
- log :info, "Splitting on #{field}"
253
-
254
- File.open( file_name ) do |t|
255
- t.each do |line|
256
- next unless(line && line.chomp!)
257
- x = self.new(line)
258
-
259
- value = x.send( field.to_sym ) # the actual field value from the specified field column
260
- next if value.nil?
261
-
262
- if( regex.nil? || value.match(regex) )
263
- filtered[value] ? filtered[value] << x : filtered[value] = [x]
264
- end
265
- end
266
- end
267
- else
268
- log :warn, "Field [#{field}] nor defined for file definition #{self.class.name}"
269
- end
270
-
271
- if( options[:sort])
272
- filtered.values.each( &:sort )
273
- return filtered
274
- end
275
- return filtered
276
- end
277
-
278
- # Open and parse a file, replacing a value in the specfied field.
279
- # Does not update the file itself. Does not write a new output file.
280
- #
281
- # Returns :
282
- # 1) full collection of updated lines
283
- # 2) collection of file def objects (self), with updated value.
284
- #
285
- # Finds values matching old_value in given map
286
- #
287
- # Replaces matches with new_value in map.
288
- #
289
- # Accepts more than one field, if files is either and array of strings
290
- # or comma seperated list of fields.
291
- #
292
- def file_set_field_by_map( file_name, fields, value_map, regex = nil )
293
-
294
- lines, objects = [],[]
295
-
296
- if fields.is_a?(Array)
297
- attribs = fields
298
- else
299
- attribs = "#{fields}".split(',')
300
- end
301
-
302
- attribs.collect! do |attrib|
303
- raise BadConfigError.new("Field: #{attrib} is not a field on #{self.class.name}") unless self.new.respond_to?(attrib)
304
- end
305
-
306
- log :info, "#{self.class.name} - updating field(s) #{fields} in #{file_name}"
307
-
308
- File.open( file_name ) do |t|
309
- t.each do |line|
310
- if line.chomp.empty?
311
- lines << line
312
- objects << self.new
313
- next
314
- end
315
- x = self.new(line)
316
-
317
- attribs.each do |a|
318
- old_value = x.instance_variable_get( "@#{a}" )
319
- x.instance_variable_set( "@#{a}", value_map[old_value] ) if value_map[old_value] || (regex && old_value.keys.detect {|k| k.match(regx) })
320
- end
321
-
322
- objects << x
323
- lines << x.to_s
324
- end
325
- end
326
-
327
- return lines, objects
328
- end
329
- end # END class methods
330
-
331
- # Open and parse a file, replacing a value in the specfied field.
332
- # Does not update the file itself. Does not write a new output file.
333
- #
334
- # Returns :
335
- # 1) full collection of updated lines
336
- # 2) collection of file def objects (self), with updated value.
337
- #
338
- # Finds values matching old_value, and also accepts an optional regex for more powerful
339
- # matching strategies of values on the specfified field.
340
- #
341
- # Replaces matches with new_value.
342
- #
343
- # Accepts more than one field, if files is either and array of strings
344
- # or comma seperated list of fields.
345
- #
346
- def file_set_field( file_name, field, old_value, new_value, regex = nil )
347
-
348
- map = {old_value => new_value}
349
-
350
- return file_set_field_by_map(file_name, field, map, regex)
351
- end
352
-
353
- end
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Jan 2011
4
+ # License:: MIT
5
+ #
6
+ # Details:: This module acts as helpers for defining input/output file formats as classes.
7
+ #
8
+ # It provides a simple interface to define a file structure - field by field.
9
+ #
10
+ # By defining the structure, following methods and attributes are mixed in :
11
+ #
12
+ # An attribute, with accessor for each field/column.
13
+ # Parse a line, assigning values to each attribute.
14
+ # Parse an instance of that file line by line, accepts a block in which data can be processed.
15
+ # Method to split a file by field.
16
+ # Method to perform replace operations on a file by field and value.
17
+ #
18
+ # Either delimited or a fixed width definition can be created via macro-like class methods :
19
+ #
20
+ # create_field_definition [field_list]
21
+ #
22
+ # create_fixed_definition {field => range }
23
+ #
24
+ # Member attributes, with getters and setters, can be added for each field defined above via class method :
25
+ #
26
+ # create_field_attr_accessors
27
+ #
28
+ # USAGE :
29
+ #
30
+ # Create a class that contains definition of a file.
31
+ #
32
+ # class ExampleFixedWith < FileDefinitionBase
33
+ # create_fixed_definition(:name => (0..7), :value => (8..15), :ccy => (16..18), :dr_or_cr => (19..19) )
34
+ #
35
+ # create_field_attr_accessors
36
+ # end
37
+ #
38
+ # class ExampleCSV < FileDefinitionBase
39
+ # create_field_definition %w{abc def ghi jkl}
40
+ #
41
+ # create_field_attr_accessors
42
+ # end
43
+ #
44
+ # Any instance can then be used to parse the defined file type, with each field or column value
45
+ # being assigned automatically to the associated instance variable.
46
+ #
47
+ # line = '1,2,3,4'
48
+ # x = ExampleCSV.new( line )
49
+ #
50
+ # assert x.responds_to? :jkl
51
+ # assert_equal x.abc, '1'
52
+ # assert_equal x.jkl.to_i, 4
53
+ #
54
+ module FileDefinitions
55
+
56
+ include Enumerable
57
+
58
+ attr_accessor :key
59
+ attr_accessor :current_line
60
+
61
+ # Set the delimiter to use when splitting a line - can be either a String, or a Regexp
62
+ attr_writer :field_delim
63
+
64
+ def initialize( line = nil )
65
+ @key = String.new
66
+ parse(line) unless line.nil?
67
+ end
68
+
69
+ def self.included(base)
70
+ base.extend(ClassMethods)
71
+ subclasses << base
72
+ end
73
+
74
+ def self.subclasses
75
+ @subclasses ||=[]
76
+ end
77
+
78
+
79
+ # Return the field delimiter used when splitting a line
80
+ def field_delim
81
+ @field_delim || ','
82
+ end
83
+
84
+ # Parse each line of a file based on the field definition, yields self for each successive line
85
+ #
86
+ def each( file )
87
+ File::new(file).each_line do |line|
88
+ parse( line )
89
+ yield self
90
+ end
91
+ end
92
+
93
+ def fields
94
+ @fields = self.class.field_definition.collect {|f| instance_variable_get "@#{f}" }
95
+ @fields
96
+ end
97
+
98
+ def to_s
99
+ fields.join(',')
100
+ end
101
+
102
+ module ClassMethods
103
+
104
+ # Helper to generate methods to store and return the complete list of fields
105
+ # in this File definition (also creates member @field_definition) and parse a line.
106
+ #
107
+ # e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
108
+ #
109
+ def create_field_definition( *fields )
110
+ instance_eval <<-end_eval
111
+ @field_definition ||= %w{ #{fields.join(' ')} }
112
+ def field_definition
113
+ @field_definition
114
+ end
115
+ end_eval
116
+
117
+ class_eval <<-end_eval
118
+ def parse( line )
119
+ @current_line = line
120
+ before_parse if respond_to? :before_parse
121
+ @current_line.split(field_delim()).each_with_index {|x, i| instance_variable_set(\"@\#{self.class.field_definition[i]}\", x) }
122
+ after_parse if respond_to? :after_parse
123
+ generate_key if respond_to? :generate_key
124
+ end
125
+ end_eval
126
+ end
127
+
128
+ def add_field(field, add_accessor = true)
129
+ @field_definition ||= []
130
+ @field_definition << field.to_s
131
+ attr_accessor field if(add_accessor)
132
+ end
133
+
134
+
135
+ # Helper to generate methods that return the complete list of fixed width fields
136
+ # and associated ranges in this File definition, and parse a line.
137
+ # e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
138
+ #
139
+ def create_fixed_definition( field_range_map )
140
+ raise ArgumentError.new('Please supply hash to create_fixed_definition') unless field_range_map.is_a? Hash
141
+
142
+ keys = field_range_map.keys.collect(&:to_s)
143
+ string_map = Hash[*keys.zip(field_range_map.values).flatten]
144
+
145
+ instance_eval <<-end_eval
146
+ def fixed_definition
147
+ @fixed_definition ||= #{string_map.inspect}
148
+ @fixed_definition
149
+ end
150
+ end_eval
151
+
152
+ instance_eval <<-end_eval
153
+ def field_definition
154
+ @field_definition ||= %w{ #{keys.join(' ')} }
155
+ @field_definition
156
+ end
157
+ end_eval
158
+
159
+ class_eval <<-end_eval
160
+ def parse( line )
161
+ @current_line = line
162
+ before_parse if respond_to? :before_parse
163
+ self.class.fixed_definition.each do |key, range|
164
+ instance_variable_set(\"@\#{key}\", @current_line[range])
165
+ end
166
+ after_parse if respond_to? :after_parse
167
+ generate_key if respond_to? :generate_key
168
+ end
169
+ end_eval
170
+
171
+ end
172
+
173
+ # Create accessors for each field
174
+ def create_field_attr_accessors
175
+ self.field_definition.each {|f| attr_accessor f}
176
+ end
177
+
178
+
179
+ ###############################
180
+ # PARSING + FILE MANIPULATION #
181
+ ###############################
182
+
183
+ # Parse a complete file and return array of self, one per line
184
+ def parse_file( file, options = {} )
185
+ limit = options[:limit]
186
+ count = 0
187
+ lines = []
188
+ File::new(file).each_line do |line|
189
+ break if limit && ((count += 1) > limit)
190
+ lines << self.new( line )
191
+ end
192
+ lines
193
+ end
194
+
195
+
196
+
197
+ # Split a file, whose field definition is represented by self,
198
+ # into seperate streams, based on the values of one if it's fields.
199
+ #
200
+ # Writes the results, one file per split stream, to directory specified by output_path
201
+ #
202
+ # Options:
203
+ #
204
+ # :keys => Also write split files of the key fields
205
+ #
206
+ # :filter => Optional Regular Expression to act as filter be applid to the field.
207
+ # For example split by Ccy but filter to only include certain ccys pass
208
+ # filter => '[GBP|USD]'
209
+ #
210
+ def split_on_write( file_name, field, output_path, options = {} )
211
+
212
+ path = output_path || '.'
213
+
214
+ filtered = split_on( file_name, field, options )
215
+
216
+ unless filtered.empty?
217
+ log :info, "Writing seperate streams to #{path}"
218
+
219
+ filtered.each { |strm, objects| RecsBase::write( {"keys_#{field}_#{strm}.csv" => objects.collect(&:key).join("\n")}, path) } if(options.key?(:keys))
220
+
221
+ filtered.each { |strm, objects| RecsBase::write( {"#{field}_#{strm}.csv" => objects.collect(&:current_line).join("\n")}, path) }
222
+ end
223
+ end
224
+
225
+ # Split a file, whose field definition is represented by self,
226
+ # into seperate streams, based on one if it's fields.
227
+ #
228
+ # Returns a map of Field value => File def object
229
+ #
230
+ # We return the File Def object as this is now enriched, e.g with key fields, compared to the raw file.
231
+ #
232
+ # Users can get at the raw line simply by calling the line() method on File Def object
233
+ #
234
+ # Options:
235
+ #
236
+ # :output_path => directory to write the individual streams files to
237
+ #
238
+ # :filter => Optional Regular Expression to act as filter be applid to the field.
239
+ # For example split by Ccy but filter to only include certain ccys pass
240
+ # filter => 'GBP|USD|EUR'
241
+ #
242
+ def split_on( file_name, field, options = {} )
243
+
244
+ regex = options[:filter] ? Regexp.new(options[:filter]) : nil
245
+
246
+ log :debug, "Using REGEX: #{regex.inspect}" if regex
247
+
248
+ filtered = {}
249
+
250
+ if( self.new.respond_to?(field) )
251
+
252
+ log :info, "Splitting on #{field}"
253
+
254
+ File.open( file_name ) do |t|
255
+ t.each do |line|
256
+ next unless(line && line.chomp!)
257
+ x = self.new(line)
258
+
259
+ value = x.send( field.to_sym ) # the actual field value from the specified field column
260
+ next if value.nil?
261
+
262
+ if( regex.nil? || value.match(regex) )
263
+ filtered[value] ? filtered[value] << x : filtered[value] = [x]
264
+ end
265
+ end
266
+ end
267
+ else
268
+ log :warn, "Field [#{field}] nor defined for file definition #{self.class.name}"
269
+ end
270
+
271
+ if( options[:sort])
272
+ filtered.values.each( &:sort )
273
+ return filtered
274
+ end
275
+ return filtered
276
+ end
277
+
278
+ # Open and parse a file, replacing a value in the specfied field.
279
+ # Does not update the file itself. Does not write a new output file.
280
+ #
281
+ # Returns :
282
+ # 1) full collection of updated lines
283
+ # 2) collection of file def objects (self), with updated value.
284
+ #
285
+ # Finds values matching old_value in given map
286
+ #
287
+ # Replaces matches with new_value in map.
288
+ #
289
+ # Accepts more than one field, if files is either and array of strings
290
+ # or comma seperated list of fields.
291
+ #
292
+ def file_set_field_by_map( file_name, fields, value_map, regex = nil )
293
+
294
+ lines, objects = [],[]
295
+
296
+ if fields.is_a?(Array)
297
+ attribs = fields
298
+ else
299
+ attribs = "#{fields}".split(',')
300
+ end
301
+
302
+ attribs.collect! do |attrib|
303
+ raise BadConfigError.new("Field: #{attrib} is not a field on #{self.class.name}") unless self.new.respond_to?(attrib)
304
+ end
305
+
306
+ log :info, "#{self.class.name} - updating field(s) #{fields} in #{file_name}"
307
+
308
+ File.open( file_name ) do |t|
309
+ t.each do |line|
310
+ if line.chomp.empty?
311
+ lines << line
312
+ objects << self.new
313
+ next
314
+ end
315
+ x = self.new(line)
316
+
317
+ attribs.each do |a|
318
+ old_value = x.instance_variable_get( "@#{a}" )
319
+ x.instance_variable_set( "@#{a}", value_map[old_value] ) if value_map[old_value] || (regex && old_value.keys.detect {|k| k.match(regx) })
320
+ end
321
+
322
+ objects << x
323
+ lines << x.to_s
324
+ end
325
+ end
326
+
327
+ return lines, objects
328
+ end
329
+ end # END class methods
330
+
331
+ # Open and parse a file, replacing a value in the specfied field.
332
+ # Does not update the file itself. Does not write a new output file.
333
+ #
334
+ # Returns :
335
+ # 1) full collection of updated lines
336
+ # 2) collection of file def objects (self), with updated value.
337
+ #
338
+ # Finds values matching old_value, and also accepts an optional regex for more powerful
339
+ # matching strategies of values on the specfified field.
340
+ #
341
+ # Replaces matches with new_value.
342
+ #
343
+ # Accepts more than one field, if files is either and array of strings
344
+ # or comma seperated list of fields.
345
+ #
346
+ def file_set_field( file_name, field, old_value, new_value, regex = nil )
347
+
348
+ map = {old_value => new_value}
349
+
350
+ return file_set_field_by_map(file_name, field, map, regex)
351
+ end
352
+
353
+ end