ar_loader 0.0.6 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/LICENSE +9 -9
  2. data/README.markdown +268 -221
  3. data/Rakefile +76 -76
  4. data/lib/VERSION +1 -1
  5. data/lib/ar_loader.rb +87 -66
  6. data/lib/ar_loader/exceptions.rb +2 -0
  7. data/lib/{engine → ar_loader}/file_definitions.rb +353 -353
  8. data/lib/{engine → ar_loader}/mapping_file_definitions.rb +87 -87
  9. data/lib/ar_loader/method_detail.rb +257 -0
  10. data/lib/ar_loader/method_mapper.rb +213 -0
  11. data/lib/helpers/jruby/jexcel_file.rb +187 -0
  12. data/lib/{engine → helpers/jruby}/word.rb +79 -70
  13. data/lib/helpers/spree_helper.rb +85 -0
  14. data/lib/loaders/csv_loader.rb +87 -0
  15. data/lib/loaders/excel_loader.rb +132 -0
  16. data/lib/loaders/loader_base.rb +205 -73
  17. data/lib/loaders/spree/image_loader.rb +45 -41
  18. data/lib/loaders/spree/product_loader.rb +140 -91
  19. data/lib/to_b.rb +24 -24
  20. data/spec/csv_loader_spec.rb +27 -0
  21. data/spec/database.yml +19 -6
  22. data/spec/db/migrate/20110803201325_create_test_bed.rb +78 -0
  23. data/spec/excel_loader_spec.rb +113 -98
  24. data/spec/fixtures/BadAssociationName.xls +0 -0
  25. data/spec/fixtures/DemoNegativeTesting.xls +0 -0
  26. data/spec/fixtures/DemoTestModelAssoc.xls +0 -0
  27. data/spec/fixtures/ProjectsMultiCategories.xls +0 -0
  28. data/spec/fixtures/SimpleProjects.xls +0 -0
  29. data/spec/fixtures/SpreeProducts.xls +0 -0
  30. data/spec/fixtures/SpreeZoneExample.csv +5 -0
  31. data/spec/fixtures/SpreeZoneExample.xls +0 -0
  32. data/spec/loader_spec.rb +116 -0
  33. data/spec/logs/test.log +5000 -0
  34. data/spec/method_mapper_spec.rb +222 -0
  35. data/spec/models.rb +55 -0
  36. data/spec/spec_helper.rb +85 -18
  37. data/spec/spree_loader_spec.rb +223 -157
  38. data/tasks/config/seed_fu_product_template.erb +15 -15
  39. data/tasks/config/tidy_config.txt +12 -12
  40. data/tasks/db_tasks.rake +64 -64
  41. data/tasks/excel_loader.rake +63 -113
  42. data/tasks/file_tasks.rake +36 -37
  43. data/tasks/loader.rake +45 -0
  44. data/tasks/spree/image_load.rake +108 -107
  45. data/tasks/spree/product_loader.rake +49 -107
  46. data/tasks/word_to_seedfu.rake +166 -166
  47. metadata +66 -61
  48. data/lib/engine/jruby/jexcel_file.rb +0 -182
  49. data/lib/engine/jruby/method_mapper_excel.rb +0 -44
  50. data/lib/engine/method_detail.rb +0 -140
  51. data/lib/engine/method_mapper.rb +0 -157
  52. data/lib/engine/method_mapper_csv.rb +0 -28
  53. data/spec/db/migrate/20110803201325_create_testbed.rb +0 -25
data/Rakefile CHANGED
@@ -1,76 +1,76 @@
1
- require 'rubygems'
2
- require 'rake'
3
- require 'rake/clean'
4
- require 'rake/gempackagetask'
5
- require 'rake/rdoctask'
6
- require 'rake/testtask'
7
- require "lib/ar_loader"
8
-
9
- # Copyright:: (c) Autotelik Media Ltd 2011
10
- # Author :: Tom Statter
11
- # Date :: Aug 2010
12
- #
13
- # License:: MIT - Free, OpenSource
14
- #
15
- # Details:: Gem::Specification for Active Record Loader gem.
16
- #
17
- # Specifically enabled for uploading Spree products but easily
18
- # extended to any AR model.
19
- #
20
- # Currently support direct access to Excel Spreedsheets via JRuby
21
- #
22
- # TODO - Switch for non JRuby Rubies, enable load via CSV file instead of Excel.
23
- #
24
- ArLoader::require_tasks
25
-
26
- spec = Gem::Specification.new do |s|
27
- s.name = ArLoader.gem_name
28
- s.version = ArLoader.gem_version
29
- s.has_rdoc = true
30
- s.extra_rdoc_files = ['README.markdown', 'LICENSE']
31
- s.summary = 'File based loader for Active Record models'
32
- s.description = 'A file based loader for Active Record models. Seed database directly from Excel/CSV. Includes rake support for Spree'
33
- s.author = 'thomas statter'
34
- s.email = 'rubygems@autotelik.co.uk'
35
- s.date = DateTime.now.strftime("%Y-%m-%d")
36
- s.homepage = %q{http://www.autotelik.co.uk}
37
-
38
- # s.executables = ['your_executable_here']
39
- s.files = %w(LICENSE README.markdown Rakefile) + Dir.glob("{lib,spec,tasks}/**/*")
40
- s.require_path = "lib"
41
- s.bindir = "bin"
42
- end
43
-
44
- Rake::GemPackageTask.new(spec) do |p|
45
- p.gem_spec = spec
46
- p.need_tar = true
47
- p.need_zip = true
48
- end
49
-
50
- Rake::RDocTask.new do |rdoc|
51
- files =['README.markdown', 'LICENSE', 'lib/**/*.rb']
52
- rdoc.rdoc_files.add(files)
53
- rdoc.main = "README.markdown" # page to start on
54
- rdoc.title = "ARLoader Docs"
55
- rdoc.rdoc_dir = 'doc/rdoc' # rdoc output folder
56
- rdoc.options << '--line-numbers'
57
- end
58
-
59
- Rake::TestTask.new do |t|
60
- t.test_files = FileList['test/**/*.rb']
61
- end
62
-
63
- # Add in our own Tasks
64
-
65
- # Long parameter lists so ensure rake -T produces nice wide output
66
- ENV['RAKE_COLUMNS'] = '180'
67
-
68
- desc 'Build gem and install in one step'
69
- task :pik_install, :needs => [:gem] do |t, args|
70
-
71
- puts "Installing version #{ArLoader.gem_version}"
72
-
73
- gem = "#{ArLoader.gem_name}-#{ArLoader.gem_version}.gem"
74
- cmd = "pik gem install --no-ri --no-rdoc pkg\\#{gem}"
75
- system(cmd)
76
- end
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/clean'
4
+ require 'rake/gempackagetask'
5
+ require 'rake/rdoctask'
6
+ require 'rake/testtask'
7
+ require "lib/ar_loader"
8
+
9
+ # Copyright:: (c) Autotelik Media Ltd 2011
10
+ # Author :: Tom Statter
11
+ # Date :: Aug 2010
12
+ #
13
+ # License:: MIT - Free, OpenSource
14
+ #
15
+ # Details:: Gem::Specification for Active Record Loader gem.
16
+ #
17
+ # Specifically enabled for uploading Spree products but easily
18
+ # extended to any AR model.
19
+ #
20
+ # Currently support direct access to Excel Spreedsheets via JRuby
21
+ #
22
+ # TODO - Switch for non JRuby Rubies, enable load via CSV file instead of Excel.
23
+ #
24
+ ArLoader::load_tasks
25
+
26
+ spec = Gem::Specification.new do |s|
27
+ s.name = ArLoader.gem_name
28
+ s.version = ArLoader.gem_version
29
+ s.has_rdoc = true
30
+ s.extra_rdoc_files = ['README.markdown', 'LICENSE']
31
+ s.summary = 'File based loader for Active Record models'
32
+ s.description = 'A file based loader for Active Record models. Seed database directly from Excel/CSV. Includes rake support for Spree'
33
+ s.author = 'thomas statter'
34
+ s.email = 'rubygems@autotelik.co.uk'
35
+ s.date = DateTime.now.strftime("%Y-%m-%d")
36
+ s.homepage = %q{http://www.autotelik.co.uk}
37
+
38
+ # s.executables = ['your_executable_here']
39
+ s.files = %w(LICENSE README.markdown Rakefile) + Dir.glob("{lib,spec,tasks}/**/*")
40
+ s.require_path = "lib"
41
+ s.bindir = "bin"
42
+ end
43
+
44
+ Rake::GemPackageTask.new(spec) do |p|
45
+ p.gem_spec = spec
46
+ p.need_tar = true
47
+ p.need_zip = true
48
+ end
49
+
50
+ Rake::RDocTask.new do |rdoc|
51
+ files =['README.markdown', 'LICENSE', 'lib/**/*.rb']
52
+ rdoc.rdoc_files.add(files)
53
+ rdoc.main = "README.markdown" # page to start on
54
+ rdoc.title = "ARLoader Docs"
55
+ rdoc.rdoc_dir = 'doc/rdoc' # rdoc output folder
56
+ rdoc.options << '--line-numbers'
57
+ end
58
+
59
+ Rake::TestTask.new do |t|
60
+ t.test_files = FileList['test/**/*.rb']
61
+ end
62
+
63
+ # Add in our own Tasks
64
+
65
+ # Long parameter lists so ensure rake -T produces nice wide output
66
+ ENV['RAKE_COLUMNS'] = '180'
67
+
68
+ desc 'Build gem and install in one step'
69
+ task :pik_install, :needs => [:gem] do |t, args|
70
+
71
+ puts "Installing version #{ArLoader.gem_version}"
72
+
73
+ gem = "#{ArLoader.gem_name}-#{ArLoader.gem_version}.gem"
74
+ cmd = "pik gem install --no-ri --no-rdoc pkg\\#{gem}"
75
+ system(cmd)
76
+ end
data/lib/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.6
1
+ 0.0.8
data/lib/ar_loader.rb CHANGED
@@ -1,66 +1,87 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Aug 2010
4
- # License:: TBD. Free, Open Source. MIT ?
5
- #
6
- # Details:: Active Record Loader
7
- #
8
- require 'active_record'
9
-
10
- module ArLoader
11
-
12
- def self.gem_version
13
- @gem_version ||= File.read( File.join( root_path, 'lib', 'VERSION') ).chomp
14
- @gem_version
15
- end
16
-
17
- def self.gem_name
18
- "ar_loader"
19
- end
20
-
21
- def self.root_path
22
- File.expand_path("#{File.dirname(__FILE__)}/..")
23
- end
24
-
25
- def self.require_libraries
26
-
27
- loader_libs = %w{ lib }
28
-
29
- # Base search paths - these will be searched recursively and any xxx.rake files autoimported
30
- loader_paths = []
31
-
32
- loader_libs.each {|l| loader_paths << File.join(root_path(), l) }
33
-
34
- # Define require search paths, any dir in here will be added to LOAD_PATH
35
-
36
- loader_paths.each do |base|
37
- $:.unshift base if File.directory?(base)
38
- Dir[File.join(base, '**', '**')].each do |p|
39
- if File.directory? p
40
- $:.unshift p
41
- end
42
- end
43
- end
44
-
45
- require__libs = %w{ loaders engine }
46
-
47
- require__libs.each do |base|
48
- Dir[File.join('lib', base, '*.rb')].each do |rb|
49
- unless File.directory? rb
50
- require rb
51
- end
52
- end
53
- end
54
-
55
- end
56
-
57
- def self.require_tasks
58
- # Long parameter lists so ensure rake -T produces nice wide output
59
- ENV['RAKE_COLUMNS'] = '180'
60
- base = File.join(root_path, 'tasks', '**')
61
- Dir["#{base}/*.rake"].sort.each { |ext| load ext }
62
- end
63
-
64
- end
65
-
66
- ArLoader::require_libraries
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Aug 2010
4
+ # License:: TBD. Free, Open Source. MIT ?
5
+ #
6
+ # Details:: Active Record Loader
7
+ #
8
+ require 'active_record'
9
+ require 'rbconfig'
10
+
11
+ module Guards
12
+
13
+ def self.jruby?
14
+ return RUBY_PLATFORM == "java"
15
+ end
16
+ def self.mac?
17
+ RbConfig::CONFIG['target_os'] =~ /darwin/i
18
+ end
19
+
20
+ def self.linux?
21
+ RbConfig::CONFIG['target_os'] =~ /linux/i
22
+ end
23
+
24
+ def self.windows?
25
+ RbConfig::CONFIG['target_os'] =~ /mswin|mingw/i
26
+ end
27
+
28
+ end
29
+
30
+ module ArLoader
31
+
32
+ def self.gem_version
33
+ @gem_version ||= File.read( File.join( root_path, 'lib', 'VERSION') ).chomp
34
+ @gem_version
35
+ end
36
+
37
+ def self.gem_name
38
+ "ar_loader"
39
+ end
40
+
41
+ def self.root_path
42
+ File.expand_path("#{File.dirname(__FILE__)}/..")
43
+ end
44
+
45
+
46
+ def self.require_libraries
47
+
48
+ loader_libs = %w{ lib }
49
+
50
+ # Base search paths - these will be searched recursively
51
+ loader_paths = []
52
+
53
+ loader_libs.each {|l| loader_paths << File.join(root_path(), l) }
54
+
55
+ # Define require search paths, any dir in here will be added to LOAD_PATH
56
+
57
+ loader_paths.each do |base|
58
+ $:.unshift base if File.directory?(base)
59
+ Dir[File.join(base, '**', '**')].each do |p|
60
+ if File.directory? p
61
+ $:.unshift p
62
+ end
63
+ end
64
+ end
65
+
66
+ require_libs = %w{ ar_loader loaders helpers }
67
+
68
+ require_libs.each do |base|
69
+ Dir[File.join('lib', base, '*.rb')].each do |rb|
70
+ unless File.directory? rb
71
+ require rb
72
+ end
73
+ end
74
+ end
75
+
76
+ end
77
+
78
+ def self.load_tasks
79
+ # Long parameter lists so ensure rake -T produces nice wide output
80
+ ENV['RAKE_COLUMNS'] = '180'
81
+ base = File.join(root_path, 'tasks', '**')
82
+ Dir["#{base}/*.rake"].sort.each { |ext| load ext }
83
+ end
84
+
85
+ end
86
+
87
+ ArLoader::require_libraries
@@ -0,0 +1,2 @@
1
+ class MappingDefinitionError < StandardError
2
+ end
@@ -1,353 +1,353 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Jan 2011
4
- # License:: MIT
5
- #
6
- # Details:: This module acts as helpers for defining input/output file formats as classes.
7
- #
8
- # It provides a simple interface to define a file structure - field by field.
9
- #
10
- # By defining the structure, following methods and attributes are mixed in :
11
- #
12
- # An attribute, with accessor for each field/column.
13
- # Parse a line, assigning values to each attribute.
14
- # Parse an instance of that file line by line, accepts a block in which data can be processed.
15
- # Method to split a file by field.
16
- # Method to perform replace operations on a file by field and value.
17
- #
18
- # Either delimited or a fixed width definition can be created via macro-like class methods :
19
- #
20
- # create_field_definition [field_list]
21
- #
22
- # create_fixed_definition {field => range }
23
- #
24
- # Member attributes, with getters and setters, can be added for each field defined above via class method :
25
- #
26
- # create_field_attr_accessors
27
- #
28
- # USAGE :
29
- #
30
- # Create a class that contains definition of a file.
31
- #
32
- # class ExampleFixedWith < FileDefinitionBase
33
- # create_fixed_definition(:name => (0..7), :value => (8..15), :ccy => (16..18), :dr_or_cr => (19..19) )
34
- #
35
- # create_field_attr_accessors
36
- # end
37
- #
38
- # class ExampleCSV < FileDefinitionBase
39
- # create_field_definition %w{abc def ghi jkl}
40
- #
41
- # create_field_attr_accessors
42
- # end
43
- #
44
- # Any instance can then be used to parse the defined file type, with each field or column value
45
- # being assigned automatically to the associated instance variable.
46
- #
47
- # line = '1,2,3,4'
48
- # x = ExampleCSV.new( line )
49
- #
50
- # assert x.responds_to? :jkl
51
- # assert_equal x.abc, '1'
52
- # assert_equal x.jkl.to_i, 4
53
- #
54
- module FileDefinitions
55
-
56
- include Enumerable
57
-
58
- attr_accessor :key
59
- attr_accessor :current_line
60
-
61
- # Set the delimiter to use when splitting a line - can be either a String, or a Regexp
62
- attr_writer :field_delim
63
-
64
- def initialize( line = nil )
65
- @key = String.new
66
- parse(line) unless line.nil?
67
- end
68
-
69
- def self.included(base)
70
- base.extend(ClassMethods)
71
- subclasses << base
72
- end
73
-
74
- def self.subclasses
75
- @subclasses ||=[]
76
- end
77
-
78
-
79
- # Return the field delimiter used when splitting a line
80
- def field_delim
81
- @field_delim || ','
82
- end
83
-
84
- # Parse each line of a file based on the field definition, yields self for each successive line
85
- #
86
- def each( file )
87
- File::new(file).each_line do |line|
88
- parse( line )
89
- yield self
90
- end
91
- end
92
-
93
- def fields
94
- @fields = self.class.field_definition.collect {|f| instance_variable_get "@#{f}" }
95
- @fields
96
- end
97
-
98
- def to_s
99
- fields.join(',')
100
- end
101
-
102
- module ClassMethods
103
-
104
- # Helper to generate methods to store and return the complete list of fields
105
- # in this File definition (also creates member @field_definition) and parse a line.
106
- #
107
- # e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
108
- #
109
- def create_field_definition( *fields )
110
- instance_eval <<-end_eval
111
- @field_definition ||= %w{ #{fields.join(' ')} }
112
- def field_definition
113
- @field_definition
114
- end
115
- end_eval
116
-
117
- class_eval <<-end_eval
118
- def parse( line )
119
- @current_line = line
120
- before_parse if respond_to? :before_parse
121
- @current_line.split(field_delim()).each_with_index {|x, i| instance_variable_set(\"@\#{self.class.field_definition[i]}\", x) }
122
- after_parse if respond_to? :after_parse
123
- generate_key if respond_to? :generate_key
124
- end
125
- end_eval
126
- end
127
-
128
- def add_field(field, add_accessor = true)
129
- @field_definition ||= []
130
- @field_definition << field.to_s
131
- attr_accessor field if(add_accessor)
132
- end
133
-
134
-
135
- # Helper to generate methods that return the complete list of fixed width fields
136
- # and associated ranges in this File definition, and parse a line.
137
- # e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
138
- #
139
- def create_fixed_definition( field_range_map )
140
- raise ArgumentError.new('Please supply hash to create_fixed_definition') unless field_range_map.is_a? Hash
141
-
142
- keys = field_range_map.keys.collect(&:to_s)
143
- string_map = Hash[*keys.zip(field_range_map.values).flatten]
144
-
145
- instance_eval <<-end_eval
146
- def fixed_definition
147
- @fixed_definition ||= #{string_map.inspect}
148
- @fixed_definition
149
- end
150
- end_eval
151
-
152
- instance_eval <<-end_eval
153
- def field_definition
154
- @field_definition ||= %w{ #{keys.join(' ')} }
155
- @field_definition
156
- end
157
- end_eval
158
-
159
- class_eval <<-end_eval
160
- def parse( line )
161
- @current_line = line
162
- before_parse if respond_to? :before_parse
163
- self.class.fixed_definition.each do |key, range|
164
- instance_variable_set(\"@\#{key}\", @current_line[range])
165
- end
166
- after_parse if respond_to? :after_parse
167
- generate_key if respond_to? :generate_key
168
- end
169
- end_eval
170
-
171
- end
172
-
173
- # Create accessors for each field
174
- def create_field_attr_accessors
175
- self.field_definition.each {|f| attr_accessor f}
176
- end
177
-
178
-
179
- ###############################
180
- # PARSING + FILE MANIPULATION #
181
- ###############################
182
-
183
- # Parse a complete file and return array of self, one per line
184
- def parse_file( file, options = {} )
185
- limit = options[:limit]
186
- count = 0
187
- lines = []
188
- File::new(file).each_line do |line|
189
- break if limit && ((count += 1) > limit)
190
- lines << self.new( line )
191
- end
192
- lines
193
- end
194
-
195
-
196
-
197
- # Split a file, whose field definition is represented by self,
198
- # into seperate streams, based on the values of one if it's fields.
199
- #
200
- # Writes the results, one file per split stream, to directory specified by output_path
201
- #
202
- # Options:
203
- #
204
- # :keys => Also write split files of the key fields
205
- #
206
- # :filter => Optional Regular Expression to act as filter be applid to the field.
207
- # For example split by Ccy but filter to only include certain ccys pass
208
- # filter => '[GBP|USD]'
209
- #
210
- def split_on_write( file_name, field, output_path, options = {} )
211
-
212
- path = output_path || '.'
213
-
214
- filtered = split_on( file_name, field, options )
215
-
216
- unless filtered.empty?
217
- log :info, "Writing seperate streams to #{path}"
218
-
219
- filtered.each { |strm, objects| RecsBase::write( {"keys_#{field}_#{strm}.csv" => objects.collect(&:key).join("\n")}, path) } if(options.key?(:keys))
220
-
221
- filtered.each { |strm, objects| RecsBase::write( {"#{field}_#{strm}.csv" => objects.collect(&:current_line).join("\n")}, path) }
222
- end
223
- end
224
-
225
- # Split a file, whose field definition is represented by self,
226
- # into seperate streams, based on one if it's fields.
227
- #
228
- # Returns a map of Field value => File def object
229
- #
230
- # We return the File Def object as this is now enriched, e.g with key fields, compared to the raw file.
231
- #
232
- # Users can get at the raw line simply by calling the line() method on File Def object
233
- #
234
- # Options:
235
- #
236
- # :output_path => directory to write the individual streams files to
237
- #
238
- # :filter => Optional Regular Expression to act as filter be applid to the field.
239
- # For example split by Ccy but filter to only include certain ccys pass
240
- # filter => 'GBP|USD|EUR'
241
- #
242
- def split_on( file_name, field, options = {} )
243
-
244
- regex = options[:filter] ? Regexp.new(options[:filter]) : nil
245
-
246
- log :debug, "Using REGEX: #{regex.inspect}" if regex
247
-
248
- filtered = {}
249
-
250
- if( self.new.respond_to?(field) )
251
-
252
- log :info, "Splitting on #{field}"
253
-
254
- File.open( file_name ) do |t|
255
- t.each do |line|
256
- next unless(line && line.chomp!)
257
- x = self.new(line)
258
-
259
- value = x.send( field.to_sym ) # the actual field value from the specified field column
260
- next if value.nil?
261
-
262
- if( regex.nil? || value.match(regex) )
263
- filtered[value] ? filtered[value] << x : filtered[value] = [x]
264
- end
265
- end
266
- end
267
- else
268
- log :warn, "Field [#{field}] nor defined for file definition #{self.class.name}"
269
- end
270
-
271
- if( options[:sort])
272
- filtered.values.each( &:sort )
273
- return filtered
274
- end
275
- return filtered
276
- end
277
-
278
- # Open and parse a file, replacing a value in the specfied field.
279
- # Does not update the file itself. Does not write a new output file.
280
- #
281
- # Returns :
282
- # 1) full collection of updated lines
283
- # 2) collection of file def objects (self), with updated value.
284
- #
285
- # Finds values matching old_value in given map
286
- #
287
- # Replaces matches with new_value in map.
288
- #
289
- # Accepts more than one field, if files is either and array of strings
290
- # or comma seperated list of fields.
291
- #
292
- def file_set_field_by_map( file_name, fields, value_map, regex = nil )
293
-
294
- lines, objects = [],[]
295
-
296
- if fields.is_a?(Array)
297
- attribs = fields
298
- else
299
- attribs = "#{fields}".split(',')
300
- end
301
-
302
- attribs.collect! do |attrib|
303
- raise BadConfigError.new("Field: #{attrib} is not a field on #{self.class.name}") unless self.new.respond_to?(attrib)
304
- end
305
-
306
- log :info, "#{self.class.name} - updating field(s) #{fields} in #{file_name}"
307
-
308
- File.open( file_name ) do |t|
309
- t.each do |line|
310
- if line.chomp.empty?
311
- lines << line
312
- objects << self.new
313
- next
314
- end
315
- x = self.new(line)
316
-
317
- attribs.each do |a|
318
- old_value = x.instance_variable_get( "@#{a}" )
319
- x.instance_variable_set( "@#{a}", value_map[old_value] ) if value_map[old_value] || (regex && old_value.keys.detect {|k| k.match(regx) })
320
- end
321
-
322
- objects << x
323
- lines << x.to_s
324
- end
325
- end
326
-
327
- return lines, objects
328
- end
329
- end # END class methods
330
-
331
- # Open and parse a file, replacing a value in the specfied field.
332
- # Does not update the file itself. Does not write a new output file.
333
- #
334
- # Returns :
335
- # 1) full collection of updated lines
336
- # 2) collection of file def objects (self), with updated value.
337
- #
338
- # Finds values matching old_value, and also accepts an optional regex for more powerful
339
- # matching strategies of values on the specfified field.
340
- #
341
- # Replaces matches with new_value.
342
- #
343
- # Accepts more than one field, if files is either and array of strings
344
- # or comma seperated list of fields.
345
- #
346
- def file_set_field( file_name, field, old_value, new_value, regex = nil )
347
-
348
- map = {old_value => new_value}
349
-
350
- return file_set_field_by_map(file_name, field, map, regex)
351
- end
352
-
353
- end
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Jan 2011
4
+ # License:: MIT
5
+ #
6
+ # Details:: This module acts as helpers for defining input/output file formats as classes.
7
+ #
8
+ # It provides a simple interface to define a file structure - field by field.
9
+ #
10
+ # By defining the structure, following methods and attributes are mixed in :
11
+ #
12
+ # An attribute, with accessor for each field/column.
13
+ # Parse a line, assigning values to each attribute.
14
+ # Parse an instance of that file line by line, accepts a block in which data can be processed.
15
+ # Method to split a file by field.
16
+ # Method to perform replace operations on a file by field and value.
17
+ #
18
+ # Either delimited or a fixed width definition can be created via macro-like class methods :
19
+ #
20
+ # create_field_definition [field_list]
21
+ #
22
+ # create_fixed_definition {field => range }
23
+ #
24
+ # Member attributes, with getters and setters, can be added for each field defined above via class method :
25
+ #
26
+ # create_field_attr_accessors
27
+ #
28
+ # USAGE :
29
+ #
30
+ # Create a class that contains definition of a file.
31
+ #
32
+ # class ExampleFixedWith < FileDefinitionBase
33
+ # create_fixed_definition(:name => (0..7), :value => (8..15), :ccy => (16..18), :dr_or_cr => (19..19) )
34
+ #
35
+ # create_field_attr_accessors
36
+ # end
37
+ #
38
+ # class ExampleCSV < FileDefinitionBase
39
+ # create_field_definition %w{abc def ghi jkl}
40
+ #
41
+ # create_field_attr_accessors
42
+ # end
43
+ #
44
+ # Any instance can then be used to parse the defined file type, with each field or column value
45
+ # being assigned automatically to the associated instance variable.
46
+ #
47
+ # line = '1,2,3,4'
48
+ # x = ExampleCSV.new( line )
49
+ #
50
+ # assert x.responds_to? :jkl
51
+ # assert_equal x.abc, '1'
52
+ # assert_equal x.jkl.to_i, 4
53
+ #
54
+ module FileDefinitions
55
+
56
+ include Enumerable
57
+
58
+ attr_accessor :key
59
+ attr_accessor :current_line
60
+
61
+ # Set the delimiter to use when splitting a line - can be either a String, or a Regexp
62
+ attr_writer :field_delim
63
+
64
+ def initialize( line = nil )
65
+ @key = String.new
66
+ parse(line) unless line.nil?
67
+ end
68
+
69
+ def self.included(base)
70
+ base.extend(ClassMethods)
71
+ subclasses << base
72
+ end
73
+
74
+ def self.subclasses
75
+ @subclasses ||=[]
76
+ end
77
+
78
+
79
+ # Return the field delimiter used when splitting a line
80
+ def field_delim
81
+ @field_delim || ','
82
+ end
83
+
84
+ # Parse each line of a file based on the field definition, yields self for each successive line
85
+ #
86
+ def each( file )
87
+ File::new(file).each_line do |line|
88
+ parse( line )
89
+ yield self
90
+ end
91
+ end
92
+
93
+ def fields
94
+ @fields = self.class.field_definition.collect {|f| instance_variable_get "@#{f}" }
95
+ @fields
96
+ end
97
+
98
+ def to_s
99
+ fields.join(',')
100
+ end
101
+
102
+ module ClassMethods
103
+
104
+ # Helper to generate methods to store and return the complete list of fields
105
+ # in this File definition (also creates member @field_definition) and parse a line.
106
+ #
107
+ # e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
108
+ #
109
+ def create_field_definition( *fields )
110
+ instance_eval <<-end_eval
111
+ @field_definition ||= %w{ #{fields.join(' ')} }
112
+ def field_definition
113
+ @field_definition
114
+ end
115
+ end_eval
116
+
117
+ class_eval <<-end_eval
118
+ def parse( line )
119
+ @current_line = line
120
+ before_parse if respond_to? :before_parse
121
+ @current_line.split(field_delim()).each_with_index {|x, i| instance_variable_set(\"@\#{self.class.field_definition[i]}\", x) }
122
+ after_parse if respond_to? :after_parse
123
+ generate_key if respond_to? :generate_key
124
+ end
125
+ end_eval
126
+ end
127
+
128
+ def add_field(field, add_accessor = true)
129
+ @field_definition ||= []
130
+ @field_definition << field.to_s
131
+ attr_accessor field if(add_accessor)
132
+ end
133
+
134
+
135
+ # Helper to generate methods that return the complete list of fixed width fields
136
+ # and associated ranges in this File definition, and parse a line.
137
+ # e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
138
+ #
139
+ def create_fixed_definition( field_range_map )
140
+ raise ArgumentError.new('Please supply hash to create_fixed_definition') unless field_range_map.is_a? Hash
141
+
142
+ keys = field_range_map.keys.collect(&:to_s)
143
+ string_map = Hash[*keys.zip(field_range_map.values).flatten]
144
+
145
+ instance_eval <<-end_eval
146
+ def fixed_definition
147
+ @fixed_definition ||= #{string_map.inspect}
148
+ @fixed_definition
149
+ end
150
+ end_eval
151
+
152
+ instance_eval <<-end_eval
153
+ def field_definition
154
+ @field_definition ||= %w{ #{keys.join(' ')} }
155
+ @field_definition
156
+ end
157
+ end_eval
158
+
159
+ class_eval <<-end_eval
160
+ def parse( line )
161
+ @current_line = line
162
+ before_parse if respond_to? :before_parse
163
+ self.class.fixed_definition.each do |key, range|
164
+ instance_variable_set(\"@\#{key}\", @current_line[range])
165
+ end
166
+ after_parse if respond_to? :after_parse
167
+ generate_key if respond_to? :generate_key
168
+ end
169
+ end_eval
170
+
171
+ end
172
+
173
+ # Create accessors for each field
174
+ def create_field_attr_accessors
175
+ self.field_definition.each {|f| attr_accessor f}
176
+ end
177
+
178
+
179
+ ###############################
180
+ # PARSING + FILE MANIPULATION #
181
+ ###############################
182
+
183
+ # Parse a complete file and return array of self, one per line
184
+ def parse_file( file, options = {} )
185
+ limit = options[:limit]
186
+ count = 0
187
+ lines = []
188
+ File::new(file).each_line do |line|
189
+ break if limit && ((count += 1) > limit)
190
+ lines << self.new( line )
191
+ end
192
+ lines
193
+ end
194
+
195
+
196
+
197
+ # Split a file, whose field definition is represented by self,
198
+ # into seperate streams, based on the values of one if it's fields.
199
+ #
200
+ # Writes the results, one file per split stream, to directory specified by output_path
201
+ #
202
+ # Options:
203
+ #
204
+ # :keys => Also write split files of the key fields
205
+ #
206
+ # :filter => Optional Regular Expression to act as filter be applid to the field.
207
+ # For example split by Ccy but filter to only include certain ccys pass
208
+ # filter => '[GBP|USD]'
209
+ #
210
+ def split_on_write( file_name, field, output_path, options = {} )
211
+
212
+ path = output_path || '.'
213
+
214
+ filtered = split_on( file_name, field, options )
215
+
216
+ unless filtered.empty?
217
+ log :info, "Writing seperate streams to #{path}"
218
+
219
+ filtered.each { |strm, objects| RecsBase::write( {"keys_#{field}_#{strm}.csv" => objects.collect(&:key).join("\n")}, path) } if(options.key?(:keys))
220
+
221
+ filtered.each { |strm, objects| RecsBase::write( {"#{field}_#{strm}.csv" => objects.collect(&:current_line).join("\n")}, path) }
222
+ end
223
+ end
224
+
225
+ # Split a file, whose field definition is represented by self,
226
+ # into seperate streams, based on one if it's fields.
227
+ #
228
+ # Returns a map of Field value => File def object
229
+ #
230
+ # We return the File Def object as this is now enriched, e.g with key fields, compared to the raw file.
231
+ #
232
+ # Users can get at the raw line simply by calling the line() method on File Def object
233
+ #
234
+ # Options:
235
+ #
236
+ # :output_path => directory to write the individual streams files to
237
+ #
238
+ # :filter => Optional Regular Expression to act as filter be applid to the field.
239
+ # For example split by Ccy but filter to only include certain ccys pass
240
+ # filter => 'GBP|USD|EUR'
241
+ #
242
+ def split_on( file_name, field, options = {} )
243
+
244
+ regex = options[:filter] ? Regexp.new(options[:filter]) : nil
245
+
246
+ log :debug, "Using REGEX: #{regex.inspect}" if regex
247
+
248
+ filtered = {}
249
+
250
+ if( self.new.respond_to?(field) )
251
+
252
+ log :info, "Splitting on #{field}"
253
+
254
+ File.open( file_name ) do |t|
255
+ t.each do |line|
256
+ next unless(line && line.chomp!)
257
+ x = self.new(line)
258
+
259
+ value = x.send( field.to_sym ) # the actual field value from the specified field column
260
+ next if value.nil?
261
+
262
+ if( regex.nil? || value.match(regex) )
263
+ filtered[value] ? filtered[value] << x : filtered[value] = [x]
264
+ end
265
+ end
266
+ end
267
+ else
268
+ log :warn, "Field [#{field}] nor defined for file definition #{self.class.name}"
269
+ end
270
+
271
+ if( options[:sort])
272
+ filtered.values.each( &:sort )
273
+ return filtered
274
+ end
275
+ return filtered
276
+ end
277
+
278
+ # Open and parse a file, replacing a value in the specfied field.
279
+ # Does not update the file itself. Does not write a new output file.
280
+ #
281
+ # Returns :
282
+ # 1) full collection of updated lines
283
+ # 2) collection of file def objects (self), with updated value.
284
+ #
285
+ # Finds values matching old_value in given map
286
+ #
287
+ # Replaces matches with new_value in map.
288
+ #
289
+ # Accepts more than one field, if files is either and array of strings
290
+ # or comma seperated list of fields.
291
+ #
292
+ def file_set_field_by_map( file_name, fields, value_map, regex = nil )
293
+
294
+ lines, objects = [],[]
295
+
296
+ if fields.is_a?(Array)
297
+ attribs = fields
298
+ else
299
+ attribs = "#{fields}".split(',')
300
+ end
301
+
302
+ attribs.collect! do |attrib|
303
+ raise BadConfigError.new("Field: #{attrib} is not a field on #{self.class.name}") unless self.new.respond_to?(attrib)
304
+ end
305
+
306
+ log :info, "#{self.class.name} - updating field(s) #{fields} in #{file_name}"
307
+
308
+ File.open( file_name ) do |t|
309
+ t.each do |line|
310
+ if line.chomp.empty?
311
+ lines << line
312
+ objects << self.new
313
+ next
314
+ end
315
+ x = self.new(line)
316
+
317
+ attribs.each do |a|
318
+ old_value = x.instance_variable_get( "@#{a}" )
319
+ x.instance_variable_set( "@#{a}", value_map[old_value] ) if value_map[old_value] || (regex && old_value.keys.detect {|k| k.match(regx) })
320
+ end
321
+
322
+ objects << x
323
+ lines << x.to_s
324
+ end
325
+ end
326
+
327
+ return lines, objects
328
+ end
329
+ end # END class methods
330
+
331
+ # Open and parse a file, replacing a value in the specfied field.
332
+ # Does not update the file itself. Does not write a new output file.
333
+ #
334
+ # Returns :
335
+ # 1) full collection of updated lines
336
+ # 2) collection of file def objects (self), with updated value.
337
+ #
338
+ # Finds values matching old_value, and also accepts an optional regex for more powerful
339
+ # matching strategies of values on the specfified field.
340
+ #
341
+ # Replaces matches with new_value.
342
+ #
343
+ # Accepts more than one field, if files is either and array of strings
344
+ # or comma seperated list of fields.
345
+ #
346
+ def file_set_field( file_name, field, old_value, new_value, regex = nil )
347
+
348
+ map = {old_value => new_value}
349
+
350
+ return file_set_field_by_map(file_name, field, map, regex)
351
+ end
352
+
353
+ end