ar_loader 0.0.4 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,353 +1,353 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Jan 2011
4
- # License:: MIT
5
- #
6
- # Details:: This module acts as helpers for defining input/output file formats as classes.
7
- #
8
- # It provides a simple interface to define a file structure - field by field.
9
- #
10
- # By defining the structure, following methods and attributes are mixed in :
11
- #
12
- # An attribute, with accessor for each field/column.
13
- # Parse a line, assigning values to each attribute.
14
- # Parse an instance of that file line by line, accepts a block in which data can be processed.
15
- # Method to split a file by field.
16
- # Method to perform replace operations on a file by field and value.
17
- #
18
- # Either delimited or a fixed width definition can be created via macro-like class methods :
19
- #
20
- # create_field_definition [field_list]
21
- #
22
- # create_fixed_definition {field => range }
23
- #
24
- # Member attributes, with getters and setters, can be added for each field defined above via class method :
25
- #
26
- # create_field_attr_accessors
27
- #
28
- # USAGE :
29
- #
30
- # Create a class that contains definition of a file.
31
- #
32
- # class ExampleFixedWith < FileDefinitionBase
33
- # create_fixed_definition(:name => (0..7), :value => (8..15), :ccy => (16..18), :dr_or_cr => (19..19) )
34
- #
35
- # create_field_attr_accessors
36
- # end
37
- #
38
- # class ExampleCSV < FileDefinitionBase
39
- # create_field_definition %w{abc def ghi jkl}
40
- #
41
- # create_field_attr_accessors
42
- # end
43
- #
44
- # Any instance can then be used to parse the defined file type, with each field or column value
45
- # being assigned automatically to the associated instance variable.
46
- #
47
- # line = '1,2,3,4'
48
- # x = ExampleCSV.new( line )
49
- #
50
- # assert x.responds_to? :jkl
51
- # assert_equal x.abc, '1'
52
- # assert_equal x.jkl.to_i, 4
53
- #
54
- module FileDefinitions
55
-
56
- include Enumerable
57
-
58
- attr_accessor :key
59
- attr_accessor :current_line
60
-
61
- # Set the delimiter to use when splitting a line - can be either a String, or a Regexp
62
- attr_writer :field_delim
63
-
64
- def initialize( line = nil )
65
- @key = String.new
66
- parse(line) unless line.nil?
67
- end
68
-
69
- def self.included(base)
70
- base.extend(ClassMethods)
71
- subclasses << base
72
- end
73
-
74
- def self.subclasses
75
- @subclasses ||=[]
76
- end
77
-
78
-
79
- # Return the field delimiter used when splitting a line
80
- def field_delim
81
- @field_delim || ','
82
- end
83
-
84
- # Parse each line of a file based on the field definition, yields self for each successive line
85
- #
86
- def each( file )
87
- File::new(file).each_line do |line|
88
- parse( line )
89
- yield self
90
- end
91
- end
92
-
93
- def fields
94
- @fields = self.class.field_definition.collect {|f| instance_variable_get "@#{f}" }
95
- @fields
96
- end
97
-
98
- def to_s
99
- fields.join(',')
100
- end
101
-
102
- module ClassMethods
103
-
104
- # Helper to generate methods to store and return the complete list of fields
105
- # in this File definition (also creates member @field_definition) and parse a line.
106
- #
107
- # e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
108
- #
109
- def create_field_definition( *fields )
110
- instance_eval <<-end_eval
111
- @field_definition ||= %w{ #{fields.join(' ')} }
112
- def field_definition
113
- @field_definition
114
- end
115
- end_eval
116
-
117
- class_eval <<-end_eval
118
- def parse( line )
119
- @current_line = line
120
- before_parse if respond_to? :before_parse
121
- @current_line.split(field_delim()).each_with_index {|x, i| instance_variable_set(\"@\#{self.class.field_definition[i]}\", x) }
122
- after_parse if respond_to? :after_parse
123
- generate_key if respond_to? :generate_key
124
- end
125
- end_eval
126
- end
127
-
128
- def add_field(field, add_accessor = true)
129
- @field_definition ||= []
130
- @field_definition << field.to_s
131
- attr_accessor field if(add_accessor)
132
- end
133
-
134
-
135
- # Helper to generate methods that return the complete list of fixed width fields
136
- # and associated ranges in this File definition, and parse a line.
137
- # e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
138
- #
139
- def create_fixed_definition( field_range_map )
140
- raise ArgumentError.new('Please supply hash to create_fixed_definition') unless field_range_map.is_a? Hash
141
-
142
- keys = field_range_map.keys.collect(&:to_s)
143
- string_map = Hash[*keys.zip(field_range_map.values).flatten]
144
-
145
- instance_eval <<-end_eval
146
- def fixed_definition
147
- @fixed_definition ||= #{string_map.inspect}
148
- @fixed_definition
149
- end
150
- end_eval
151
-
152
- instance_eval <<-end_eval
153
- def field_definition
154
- @field_definition ||= %w{ #{keys.join(' ')} }
155
- @field_definition
156
- end
157
- end_eval
158
-
159
- class_eval <<-end_eval
160
- def parse( line )
161
- @current_line = line
162
- before_parse if respond_to? :before_parse
163
- self.class.fixed_definition.each do |key, range|
164
- instance_variable_set(\"@\#{key}\", @current_line[range])
165
- end
166
- after_parse if respond_to? :after_parse
167
- generate_key if respond_to? :generate_key
168
- end
169
- end_eval
170
-
171
- end
172
-
173
- # Create accessors for each field
174
- def create_field_attr_accessors
175
- self.field_definition.each {|f| attr_accessor f}
176
- end
177
-
178
-
179
- ###############################
180
- # PARSING + FILE MANIPULATION #
181
- ###############################
182
-
183
- # Parse a complete file and return array of self, one per line
184
- def parse_file( file, options = {} )
185
- limit = options[:limit]
186
- count = 0
187
- lines = []
188
- File::new(file).each_line do |line|
189
- break if limit && ((count += 1) > limit)
190
- lines << self.new( line )
191
- end
192
- lines
193
- end
194
-
195
-
196
-
197
- # Split a file, whose field definition is represented by self,
198
- # into seperate streams, based on the values of one if it's fields.
199
- #
200
- # Writes the results, one file per split stream, to directory specified by output_path
201
- #
202
- # Options:
203
- #
204
- # :keys => Also write split files of the key fields
205
- #
206
- # :filter => Optional Regular Expression to act as filter be applid to the field.
207
- # For example split by Ccy but filter to only include certain ccys pass
208
- # filter => '[GBP|USD]'
209
- #
210
- def split_on_write( file_name, field, output_path, options = {} )
211
-
212
- path = output_path || '.'
213
-
214
- filtered = split_on( file_name, field, options )
215
-
216
- unless filtered.empty?
217
- log :info, "Writing seperate streams to #{path}"
218
-
219
- filtered.each { |strm, objects| RecsBase::write( {"keys_#{field}_#{strm}.csv" => objects.collect(&:key).join("\n")}, path) } if(options.key?(:keys))
220
-
221
- filtered.each { |strm, objects| RecsBase::write( {"#{field}_#{strm}.csv" => objects.collect(&:current_line).join("\n")}, path) }
222
- end
223
- end
224
-
225
- # Split a file, whose field definition is represented by self,
226
- # into seperate streams, based on one if it's fields.
227
- #
228
- # Returns a map of Field value => File def object
229
- #
230
- # We return the File Def object as this is now enriched, e.g with key fields, compared to the raw file.
231
- #
232
- # Users can get at the raw line simply by calling the line() method on File Def object
233
- #
234
- # Options:
235
- #
236
- # :output_path => directory to write the individual streams files to
237
- #
238
- # :filter => Optional Regular Expression to act as filter be applid to the field.
239
- # For example split by Ccy but filter to only include certain ccys pass
240
- # filter => 'GBP|USD|EUR'
241
- #
242
- def split_on( file_name, field, options = {} )
243
-
244
- regex = options[:filter] ? Regexp.new(options[:filter]) : nil
245
-
246
- log :debug, "Using REGEX: #{regex.inspect}" if regex
247
-
248
- filtered = {}
249
-
250
- if( self.new.respond_to?(field) )
251
-
252
- log :info, "Splitting on #{field}"
253
-
254
- File.open( file_name ) do |t|
255
- t.each do |line|
256
- next unless(line && line.chomp!)
257
- x = self.new(line)
258
-
259
- value = x.send( field.to_sym ) # the actual field value from the specified field column
260
- next if value.nil?
261
-
262
- if( regex.nil? || value.match(regex) )
263
- filtered[value] ? filtered[value] << x : filtered[value] = [x]
264
- end
265
- end
266
- end
267
- else
268
- log :warn, "Field [#{field}] nor defined for file definition #{self.class.name}"
269
- end
270
-
271
- if( options[:sort])
272
- filtered.values.each( &:sort )
273
- return filtered
274
- end
275
- return filtered
276
- end
277
-
278
- # Open and parse a file, replacing a value in the specfied field.
279
- # Does not update the file itself. Does not write a new output file.
280
- #
281
- # Returns :
282
- # 1) full collection of updated lines
283
- # 2) collection of file def objects (self), with updated value.
284
- #
285
- # Finds values matching old_value in given map
286
- #
287
- # Replaces matches with new_value in map.
288
- #
289
- # Accepts more than one field, if files is either and array of strings
290
- # or comma seperated list of fields.
291
- #
292
- def file_set_field_by_map( file_name, fields, value_map, regex = nil )
293
-
294
- lines, objects = [],[]
295
-
296
- if fields.is_a?(Array)
297
- attribs = fields
298
- else
299
- attribs = "#{fields}".split(',')
300
- end
301
-
302
- attribs.collect! do |attrib|
303
- raise BadConfigError.new("Field: #{attrib} is not a field on #{self.class.name}") unless self.new.respond_to?(attrib)
304
- end
305
-
306
- log :info, "#{self.class.name} - updating field(s) #{fields} in #{file_name}"
307
-
308
- File.open( file_name ) do |t|
309
- t.each do |line|
310
- if line.chomp.empty?
311
- lines << line
312
- objects << self.new
313
- next
314
- end
315
- x = self.new(line)
316
-
317
- attribs.each do |a|
318
- old_value = x.instance_variable_get( "@#{a}" )
319
- x.instance_variable_set( "@#{a}", value_map[old_value] ) if value_map[old_value] || (regex && old_value.keys.detect {|k| k.match(regx) })
320
- end
321
-
322
- objects << x
323
- lines << x.to_s
324
- end
325
- end
326
-
327
- return lines, objects
328
- end
329
- end # END class methods
330
-
331
- # Open and parse a file, replacing a value in the specfied field.
332
- # Does not update the file itself. Does not write a new output file.
333
- #
334
- # Returns :
335
- # 1) full collection of updated lines
336
- # 2) collection of file def objects (self), with updated value.
337
- #
338
- # Finds values matching old_value, and also accepts an optional regex for more powerful
339
- # matching strategies of values on the specfified field.
340
- #
341
- # Replaces matches with new_value.
342
- #
343
- # Accepts more than one field, if files is either and array of strings
344
- # or comma seperated list of fields.
345
- #
346
- def file_set_field( file_name, field, old_value, new_value, regex = nil )
347
-
348
- map = {old_value => new_value}
349
-
350
- return file_set_field_by_map(file_name, field, map, regex)
351
- end
352
-
353
- end
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Jan 2011
4
+ # License:: MIT
5
+ #
6
+ # Details:: This module acts as helpers for defining input/output file formats as classes.
7
+ #
8
+ # It provides a simple interface to define a file structure - field by field.
9
+ #
10
+ # By defining the structure, following methods and attributes are mixed in :
11
+ #
12
+ # An attribute, with accessor for each field/column.
13
+ # Parse a line, assigning values to each attribute.
14
+ # Parse an instance of that file line by line, accepts a block in which data can be processed.
15
+ # Method to split a file by field.
16
+ # Method to perform replace operations on a file by field and value.
17
+ #
18
+ # Either delimited or a fixed width definition can be created via macro-like class methods :
19
+ #
20
+ # create_field_definition [field_list]
21
+ #
22
+ # create_fixed_definition {field => range }
23
+ #
24
+ # Member attributes, with getters and setters, can be added for each field defined above via class method :
25
+ #
26
+ # create_field_attr_accessors
27
+ #
28
+ # USAGE :
29
+ #
30
+ # Create a class that contains definition of a file.
31
+ #
32
+ # class ExampleFixedWith < FileDefinitionBase
33
+ # create_fixed_definition(:name => (0..7), :value => (8..15), :ccy => (16..18), :dr_or_cr => (19..19) )
34
+ #
35
+ # create_field_attr_accessors
36
+ # end
37
+ #
38
+ # class ExampleCSV < FileDefinitionBase
39
+ # create_field_definition %w{abc def ghi jkl}
40
+ #
41
+ # create_field_attr_accessors
42
+ # end
43
+ #
44
+ # Any instance can then be used to parse the defined file type, with each field or column value
45
+ # being assigned automatically to the associated instance variable.
46
+ #
47
+ # line = '1,2,3,4'
48
+ # x = ExampleCSV.new( line )
49
+ #
50
+ # assert x.responds_to? :jkl
51
+ # assert_equal x.abc, '1'
52
+ # assert_equal x.jkl.to_i, 4
53
+ #
54
+ module FileDefinitions
55
+
56
+ include Enumerable
57
+
58
+ attr_accessor :key
59
+ attr_accessor :current_line
60
+
61
+ # Set the delimiter to use when splitting a line - can be either a String, or a Regexp
62
+ attr_writer :field_delim
63
+
64
+ def initialize( line = nil )
65
+ @key = String.new
66
+ parse(line) unless line.nil?
67
+ end
68
+
69
+ def self.included(base)
70
+ base.extend(ClassMethods)
71
+ subclasses << base
72
+ end
73
+
74
+ def self.subclasses
75
+ @subclasses ||=[]
76
+ end
77
+
78
+
79
+ # Return the field delimiter used when splitting a line
80
+ def field_delim
81
+ @field_delim || ','
82
+ end
83
+
84
+ # Parse each line of a file based on the field definition, yields self for each successive line
85
+ #
86
+ def each( file )
87
+ File::new(file).each_line do |line|
88
+ parse( line )
89
+ yield self
90
+ end
91
+ end
92
+
93
+ def fields
94
+ @fields = self.class.field_definition.collect {|f| instance_variable_get "@#{f}" }
95
+ @fields
96
+ end
97
+
98
+ def to_s
99
+ fields.join(',')
100
+ end
101
+
102
+ module ClassMethods
103
+
104
+ # Helper to generate methods to store and return the complete list of fields
105
+ # in this File definition (also creates member @field_definition) and parse a line.
106
+ #
107
+ # e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
108
+ #
109
+ def create_field_definition( *fields )
110
+ instance_eval <<-end_eval
111
+ @field_definition ||= %w{ #{fields.join(' ')} }
112
+ def field_definition
113
+ @field_definition
114
+ end
115
+ end_eval
116
+
117
+ class_eval <<-end_eval
118
+ def parse( line )
119
+ @current_line = line
120
+ before_parse if respond_to? :before_parse
121
+ @current_line.split(field_delim()).each_with_index {|x, i| instance_variable_set(\"@\#{self.class.field_definition[i]}\", x) }
122
+ after_parse if respond_to? :after_parse
123
+ generate_key if respond_to? :generate_key
124
+ end
125
+ end_eval
126
+ end
127
+
128
+ def add_field(field, add_accessor = true)
129
+ @field_definition ||= []
130
+ @field_definition << field.to_s
131
+ attr_accessor field if(add_accessor)
132
+ end
133
+
134
+
135
+ # Helper to generate methods that return the complete list of fixed width fields
136
+ # and associated ranges in this File definition, and parse a line.
137
+ # e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
138
+ #
139
+ def create_fixed_definition( field_range_map )
140
+ raise ArgumentError.new('Please supply hash to create_fixed_definition') unless field_range_map.is_a? Hash
141
+
142
+ keys = field_range_map.keys.collect(&:to_s)
143
+ string_map = Hash[*keys.zip(field_range_map.values).flatten]
144
+
145
+ instance_eval <<-end_eval
146
+ def fixed_definition
147
+ @fixed_definition ||= #{string_map.inspect}
148
+ @fixed_definition
149
+ end
150
+ end_eval
151
+
152
+ instance_eval <<-end_eval
153
+ def field_definition
154
+ @field_definition ||= %w{ #{keys.join(' ')} }
155
+ @field_definition
156
+ end
157
+ end_eval
158
+
159
+ class_eval <<-end_eval
160
+ def parse( line )
161
+ @current_line = line
162
+ before_parse if respond_to? :before_parse
163
+ self.class.fixed_definition.each do |key, range|
164
+ instance_variable_set(\"@\#{key}\", @current_line[range])
165
+ end
166
+ after_parse if respond_to? :after_parse
167
+ generate_key if respond_to? :generate_key
168
+ end
169
+ end_eval
170
+
171
+ end
172
+
173
+ # Create accessors for each field
174
+ def create_field_attr_accessors
175
+ self.field_definition.each {|f| attr_accessor f}
176
+ end
177
+
178
+
179
+ ###############################
180
+ # PARSING + FILE MANIPULATION #
181
+ ###############################
182
+
183
+ # Parse a complete file and return array of self, one per line
184
+ def parse_file( file, options = {} )
185
+ limit = options[:limit]
186
+ count = 0
187
+ lines = []
188
+ File::new(file).each_line do |line|
189
+ break if limit && ((count += 1) > limit)
190
+ lines << self.new( line )
191
+ end
192
+ lines
193
+ end
194
+
195
+
196
+
197
+ # Split a file, whose field definition is represented by self,
198
+ # into seperate streams, based on the values of one if it's fields.
199
+ #
200
+ # Writes the results, one file per split stream, to directory specified by output_path
201
+ #
202
+ # Options:
203
+ #
204
+ # :keys => Also write split files of the key fields
205
+ #
206
+ # :filter => Optional Regular Expression to act as filter be applid to the field.
207
+ # For example split by Ccy but filter to only include certain ccys pass
208
+ # filter => '[GBP|USD]'
209
+ #
210
+ def split_on_write( file_name, field, output_path, options = {} )
211
+
212
+ path = output_path || '.'
213
+
214
+ filtered = split_on( file_name, field, options )
215
+
216
+ unless filtered.empty?
217
+ log :info, "Writing seperate streams to #{path}"
218
+
219
+ filtered.each { |strm, objects| RecsBase::write( {"keys_#{field}_#{strm}.csv" => objects.collect(&:key).join("\n")}, path) } if(options.key?(:keys))
220
+
221
+ filtered.each { |strm, objects| RecsBase::write( {"#{field}_#{strm}.csv" => objects.collect(&:current_line).join("\n")}, path) }
222
+ end
223
+ end
224
+
225
+ # Split a file, whose field definition is represented by self,
226
+ # into seperate streams, based on one if it's fields.
227
+ #
228
+ # Returns a map of Field value => File def object
229
+ #
230
+ # We return the File Def object as this is now enriched, e.g with key fields, compared to the raw file.
231
+ #
232
+ # Users can get at the raw line simply by calling the line() method on File Def object
233
+ #
234
+ # Options:
235
+ #
236
+ # :output_path => directory to write the individual streams files to
237
+ #
238
+ # :filter => Optional Regular Expression to act as filter be applid to the field.
239
+ # For example split by Ccy but filter to only include certain ccys pass
240
+ # filter => 'GBP|USD|EUR'
241
+ #
242
+ def split_on( file_name, field, options = {} )
243
+
244
+ regex = options[:filter] ? Regexp.new(options[:filter]) : nil
245
+
246
+ log :debug, "Using REGEX: #{regex.inspect}" if regex
247
+
248
+ filtered = {}
249
+
250
+ if( self.new.respond_to?(field) )
251
+
252
+ log :info, "Splitting on #{field}"
253
+
254
+ File.open( file_name ) do |t|
255
+ t.each do |line|
256
+ next unless(line && line.chomp!)
257
+ x = self.new(line)
258
+
259
+ value = x.send( field.to_sym ) # the actual field value from the specified field column
260
+ next if value.nil?
261
+
262
+ if( regex.nil? || value.match(regex) )
263
+ filtered[value] ? filtered[value] << x : filtered[value] = [x]
264
+ end
265
+ end
266
+ end
267
+ else
268
+ log :warn, "Field [#{field}] nor defined for file definition #{self.class.name}"
269
+ end
270
+
271
+ if( options[:sort])
272
+ filtered.values.each( &:sort )
273
+ return filtered
274
+ end
275
+ return filtered
276
+ end
277
+
278
+ # Open and parse a file, replacing a value in the specfied field.
279
+ # Does not update the file itself. Does not write a new output file.
280
+ #
281
+ # Returns :
282
+ # 1) full collection of updated lines
283
+ # 2) collection of file def objects (self), with updated value.
284
+ #
285
+ # Finds values matching old_value in given map
286
+ #
287
+ # Replaces matches with new_value in map.
288
+ #
289
+ # Accepts more than one field, if files is either and array of strings
290
+ # or comma seperated list of fields.
291
+ #
292
+ def file_set_field_by_map( file_name, fields, value_map, regex = nil )
293
+
294
+ lines, objects = [],[]
295
+
296
+ if fields.is_a?(Array)
297
+ attribs = fields
298
+ else
299
+ attribs = "#{fields}".split(',')
300
+ end
301
+
302
+ attribs.collect! do |attrib|
303
+ raise BadConfigError.new("Field: #{attrib} is not a field on #{self.class.name}") unless self.new.respond_to?(attrib)
304
+ end
305
+
306
+ log :info, "#{self.class.name} - updating field(s) #{fields} in #{file_name}"
307
+
308
+ File.open( file_name ) do |t|
309
+ t.each do |line|
310
+ if line.chomp.empty?
311
+ lines << line
312
+ objects << self.new
313
+ next
314
+ end
315
+ x = self.new(line)
316
+
317
+ attribs.each do |a|
318
+ old_value = x.instance_variable_get( "@#{a}" )
319
+ x.instance_variable_set( "@#{a}", value_map[old_value] ) if value_map[old_value] || (regex && old_value.keys.detect {|k| k.match(regx) })
320
+ end
321
+
322
+ objects << x
323
+ lines << x.to_s
324
+ end
325
+ end
326
+
327
+ return lines, objects
328
+ end
329
+ end # END class methods
330
+
331
+ # Open and parse a file, replacing a value in the specfied field.
332
+ # Does not update the file itself. Does not write a new output file.
333
+ #
334
+ # Returns :
335
+ # 1) full collection of updated lines
336
+ # 2) collection of file def objects (self), with updated value.
337
+ #
338
+ # Finds values matching old_value, and also accepts an optional regex for more powerful
339
+ # matching strategies of values on the specfified field.
340
+ #
341
+ # Replaces matches with new_value.
342
+ #
343
+ # Accepts more than one field, if files is either and array of strings
344
+ # or comma seperated list of fields.
345
+ #
346
+ def file_set_field( file_name, field, old_value, new_value, regex = nil )
347
+
348
+ map = {old_value => new_value}
349
+
350
+ return file_set_field_by_map(file_name, field, map, regex)
351
+ end
352
+
353
+ end