datashift 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -5
- data/LICENSE.txt +26 -26
- data/README.markdown +326 -305
- data/README.rdoc +19 -19
- data/Rakefile +86 -93
- data/VERSION +1 -1
- data/datashift.gemspec +163 -152
- data/lib/applications/jruby/jexcel_file.rb +410 -408
- data/lib/applications/jruby/word.rb +79 -79
- data/lib/datashift.rb +183 -152
- data/lib/datashift/exceptions.rb +11 -11
- data/lib/datashift/file_definitions.rb +353 -353
- data/lib/datashift/mapping_file_definitions.rb +87 -87
- data/lib/datashift/method_detail.rb +293 -275
- data/lib/datashift/method_dictionary.rb +208 -209
- data/lib/datashift/method_mapper.rb +90 -90
- data/lib/datashift/model_mapper.rb +27 -0
- data/lib/exporters/csv_exporter.rb +36 -0
- data/lib/exporters/excel_exporter.rb +116 -0
- data/lib/exporters/exporter_base.rb +15 -0
- data/lib/generators/csv_generator.rb +36 -36
- data/lib/generators/excel_generator.rb +106 -122
- data/lib/generators/generator_base.rb +13 -13
- data/lib/helpers/core_ext/to_b.rb +24 -24
- data/lib/helpers/rake_utils.rb +42 -0
- data/lib/helpers/spree_helper.rb +194 -153
- data/lib/java/poi-3.7/LICENSE +507 -507
- data/lib/java/poi-3.7/NOTICE +21 -21
- data/lib/java/poi-3.7/RELEASE_NOTES.txt +115 -115
- data/lib/loaders/csv_loader.rb +98 -98
- data/lib/loaders/excel_loader.rb +155 -155
- data/lib/loaders/loader_base.rb +420 -420
- data/lib/loaders/spreadsheet_loader.rb +136 -136
- data/lib/loaders/spree/image_loader.rb +67 -63
- data/lib/loaders/spree/product_loader.rb +289 -248
- data/lib/thor/generate_excel.thor +54 -0
- data/sandbox/app/controllers/application_controller.rb +3 -0
- data/sandbox/config/application.rb +43 -0
- data/sandbox/config/database.yml +34 -0
- data/sandbox/config/environment.rb +7 -0
- data/sandbox/config/environments/development.rb +30 -0
- data/spec/csv_loader_spec.rb +30 -30
- data/spec/datashift_spec.rb +26 -26
- data/spec/db/migrate/20110803201325_create_test_bed.rb +85 -85
- data/spec/excel_exporter_spec.rb +78 -78
- data/spec/excel_generator_spec.rb +78 -78
- data/spec/excel_loader_spec.rb +223 -223
- data/spec/file_definitions.rb +141 -141
- data/spec/fixtures/ProjectsDefaults.yml +29 -29
- data/spec/fixtures/config/database.yml +27 -27
- data/spec/fixtures/datashift_Spree_db.sqlite +0 -0
- data/spec/fixtures/datashift_test_models_db.sqlite +0 -0
- data/spec/fixtures/negative/SpreeProdMiss1Mandatory.csv +4 -4
- data/spec/fixtures/negative/SpreeProdMissManyMandatory.csv +4 -4
- data/spec/fixtures/spree/SpreeProducts.csv +4 -4
- data/spec/fixtures/spree/SpreeProducts.xls +0 -0
- data/spec/fixtures/spree/SpreeProductsMultiColumn.csv +4 -4
- data/spec/fixtures/spree/SpreeProductsMultiColumn.xls +0 -0
- data/spec/fixtures/spree/SpreeProductsSimple.csv +4 -4
- data/spec/fixtures/spree/SpreeProductsWithImages.csv +4 -4
- data/spec/fixtures/spree/SpreeZoneExample.csv +5 -5
- data/spec/fixtures/test_model_defs.rb +57 -57
- data/spec/loader_spec.rb +120 -120
- data/spec/method_dictionary_spec.rb +242 -242
- data/spec/method_mapper_spec.rb +41 -41
- data/spec/spec_helper.rb +154 -116
- data/spec/spree_exporter_spec.rb +67 -0
- data/spec/spree_generator_spec.rb +77 -64
- data/spec/spree_loader_spec.rb +363 -324
- data/spec/spree_method_mapping_spec.rb +218 -214
- data/tasks/config/seed_fu_product_template.erb +15 -15
- data/tasks/config/tidy_config.txt +12 -12
- data/tasks/{excel_generator.rake → export/excel_generator.rake} +101 -78
- data/tasks/file_tasks.rake +36 -36
- data/tasks/import/csv.rake +50 -49
- data/tasks/import/excel.rake +74 -71
- data/tasks/spree/image_load.rake +108 -108
- data/tasks/spree/product_loader.rake +43 -43
- data/tasks/word_to_seedfu.rake +166 -166
- data/test/helper.rb +18 -18
- data/test/test_interact.rb +7 -7
- metadata +16 -8
- data/datashift-0.1.0.gem +0 -0
- data/tasks/db_tasks.rake +0 -66
data/lib/datashift/exceptions.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
|
-
module DataShift
|
2
|
-
|
3
|
-
class BadRuby < StandardError; end
|
4
|
-
|
5
|
-
class UnsupportedFileType < StandardError; end
|
6
|
-
|
7
|
-
class MappingDefinitionError < StandardError; end
|
8
|
-
|
9
|
-
class MissingHeadersError < StandardError; end
|
10
|
-
class MissingMandatoryError < StandardError; end
|
11
|
-
|
1
|
+
module DataShift
|
2
|
+
|
3
|
+
class BadRuby < StandardError; end
|
4
|
+
|
5
|
+
class UnsupportedFileType < StandardError; end
|
6
|
+
|
7
|
+
class MappingDefinitionError < StandardError; end
|
8
|
+
|
9
|
+
class MissingHeadersError < StandardError; end
|
10
|
+
class MissingMandatoryError < StandardError; end
|
11
|
+
|
12
12
|
end
|
@@ -1,353 +1,353 @@
|
|
1
|
-
# Copyright:: (c) Autotelik Media Ltd 2011
|
2
|
-
# Author :: Tom Statter
|
3
|
-
# Date :: Jan 2011
|
4
|
-
# License:: MIT
|
5
|
-
#
|
6
|
-
# Details:: This module acts as helpers for defining input/output file formats as classes.
|
7
|
-
#
|
8
|
-
# It provides a simple interface to define a file structure - field by field.
|
9
|
-
#
|
10
|
-
# By defining the structure, following methods and attributes are mixed in :
|
11
|
-
#
|
12
|
-
# An attribute, with accessor for each field/column.
|
13
|
-
# Parse a line, assigning values to each attribute.
|
14
|
-
# Parse an instance of that file line by line, accepts a block in which data can be processed.
|
15
|
-
# Method to split a file by field.
|
16
|
-
# Method to perform replace operations on a file by field and value.
|
17
|
-
#
|
18
|
-
# Either delimited or a fixed width definition can be created via macro-like class methods :
|
19
|
-
#
|
20
|
-
# create_field_definition [field_list]
|
21
|
-
#
|
22
|
-
# create_fixed_definition {field => range }
|
23
|
-
#
|
24
|
-
# Member attributes, with getters and setters, can be added for each field defined above via class method :
|
25
|
-
#
|
26
|
-
# create_field_attr_accessors
|
27
|
-
#
|
28
|
-
# USAGE :
|
29
|
-
#
|
30
|
-
# Create a class that contains definition of a file.
|
31
|
-
#
|
32
|
-
# class ExampleFixedWith < FileDefinitionBase
|
33
|
-
# create_fixed_definition(:name => (0..7), :value => (8..15), :ccy => (16..18), :dr_or_cr => (19..19) )
|
34
|
-
#
|
35
|
-
# create_field_attr_accessors
|
36
|
-
# end
|
37
|
-
#
|
38
|
-
# class ExampleCSV < FileDefinitionBase
|
39
|
-
# create_field_definition %w{abc def ghi jkl}
|
40
|
-
#
|
41
|
-
# create_field_attr_accessors
|
42
|
-
# end
|
43
|
-
#
|
44
|
-
# Any instance can then be used to parse the defined file type, with each field or column value
|
45
|
-
# being assigned automatically to the associated instance variable.
|
46
|
-
#
|
47
|
-
# line = '1,2,3,4'
|
48
|
-
# x = ExampleCSV.new( line )
|
49
|
-
#
|
50
|
-
# assert x.responds_to? :jkl
|
51
|
-
# assert_equal x.abc, '1'
|
52
|
-
# assert_equal x.jkl.to_i, 4
|
53
|
-
#
|
54
|
-
module FileDefinitions
|
55
|
-
|
56
|
-
include Enumerable
|
57
|
-
|
58
|
-
attr_accessor :key
|
59
|
-
attr_accessor :current_line
|
60
|
-
|
61
|
-
# Set the delimiter to use when splitting a line - can be either a String, or a Regexp
|
62
|
-
attr_writer :field_delim
|
63
|
-
|
64
|
-
def initialize( line = nil )
|
65
|
-
@key = String.new
|
66
|
-
parse(line) unless line.nil?
|
67
|
-
end
|
68
|
-
|
69
|
-
def self.included(base)
|
70
|
-
base.extend(ClassMethods)
|
71
|
-
subclasses << base
|
72
|
-
end
|
73
|
-
|
74
|
-
def self.subclasses
|
75
|
-
@subclasses ||=[]
|
76
|
-
end
|
77
|
-
|
78
|
-
|
79
|
-
# Return the field delimiter used when splitting a line
|
80
|
-
def field_delim
|
81
|
-
@field_delim || ','
|
82
|
-
end
|
83
|
-
|
84
|
-
# Parse each line of a file based on the field definition, yields self for each successive line
|
85
|
-
#
|
86
|
-
def each( file )
|
87
|
-
File::new(file).each_line do |line|
|
88
|
-
parse( line )
|
89
|
-
yield self
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
def fields
|
94
|
-
@fields = self.class.field_definition.collect {|f| instance_variable_get "@#{f}" }
|
95
|
-
@fields
|
96
|
-
end
|
97
|
-
|
98
|
-
def to_s
|
99
|
-
fields.join(',')
|
100
|
-
end
|
101
|
-
|
102
|
-
module ClassMethods
|
103
|
-
|
104
|
-
# Helper to generate methods to store and return the complete list of fields
|
105
|
-
# in this File definition (also creates member @field_definition) and parse a line.
|
106
|
-
#
|
107
|
-
# e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
|
108
|
-
#
|
109
|
-
def create_field_definition( *fields )
|
110
|
-
instance_eval <<-end_eval
|
111
|
-
@field_definition ||= %w{ #{fields.join(' ')} }
|
112
|
-
def field_definition
|
113
|
-
@field_definition
|
114
|
-
end
|
115
|
-
end_eval
|
116
|
-
|
117
|
-
class_eval <<-end_eval
|
118
|
-
def parse( line )
|
119
|
-
@current_line = line
|
120
|
-
before_parse if respond_to? :before_parse
|
121
|
-
@current_line.split(field_delim()).each_with_index {|x, i| instance_variable_set(\"@\#{self.class.field_definition[i]}\", x) }
|
122
|
-
after_parse if respond_to? :after_parse
|
123
|
-
generate_key if respond_to? :generate_key
|
124
|
-
end
|
125
|
-
end_eval
|
126
|
-
end
|
127
|
-
|
128
|
-
def add_field(field, add_accessor = true)
|
129
|
-
@field_definition ||= []
|
130
|
-
@field_definition << field.to_s
|
131
|
-
attr_accessor field if(add_accessor)
|
132
|
-
end
|
133
|
-
|
134
|
-
|
135
|
-
# Helper to generate methods that return the complete list of fixed width fields
|
136
|
-
# and associated ranges in this File definition, and parse a line.
|
137
|
-
# e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
|
138
|
-
#
|
139
|
-
def create_fixed_definition( field_range_map )
|
140
|
-
raise ArgumentError.new('Please supply hash to create_fixed_definition') unless field_range_map.is_a? Hash
|
141
|
-
|
142
|
-
keys = field_range_map.keys.collect(&:to_s)
|
143
|
-
string_map = Hash[*keys.zip(field_range_map.values).flatten]
|
144
|
-
|
145
|
-
instance_eval <<-end_eval
|
146
|
-
def fixed_definition
|
147
|
-
@fixed_definition ||= #{string_map.inspect}
|
148
|
-
@fixed_definition
|
149
|
-
end
|
150
|
-
end_eval
|
151
|
-
|
152
|
-
instance_eval <<-end_eval
|
153
|
-
def field_definition
|
154
|
-
@field_definition ||= %w{ #{keys.join(' ')} }
|
155
|
-
@field_definition
|
156
|
-
end
|
157
|
-
end_eval
|
158
|
-
|
159
|
-
class_eval <<-end_eval
|
160
|
-
def parse( line )
|
161
|
-
@current_line = line
|
162
|
-
before_parse if respond_to? :before_parse
|
163
|
-
self.class.fixed_definition.each do |key, range|
|
164
|
-
instance_variable_set(\"@\#{key}\", @current_line[range])
|
165
|
-
end
|
166
|
-
after_parse if respond_to? :after_parse
|
167
|
-
generate_key if respond_to? :generate_key
|
168
|
-
end
|
169
|
-
end_eval
|
170
|
-
|
171
|
-
end
|
172
|
-
|
173
|
-
# Create accessors for each field
|
174
|
-
def create_field_attr_accessors
|
175
|
-
self.field_definition.each {|f| attr_accessor f}
|
176
|
-
end
|
177
|
-
|
178
|
-
|
179
|
-
###############################
|
180
|
-
# PARSING + FILE MANIPULATION #
|
181
|
-
###############################
|
182
|
-
|
183
|
-
# Parse a complete file and return array of self, one per line
|
184
|
-
def parse_file( file, options = {} )
|
185
|
-
limit = options[:limit]
|
186
|
-
count = 0
|
187
|
-
lines = []
|
188
|
-
File::new(file).each_line do |line|
|
189
|
-
break if limit && ((count += 1) > limit)
|
190
|
-
lines << self.new( line )
|
191
|
-
end
|
192
|
-
lines
|
193
|
-
end
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
# Split a file, whose field definition is represented by self,
|
198
|
-
# into seperate streams, based on the values of one if it's fields.
|
199
|
-
#
|
200
|
-
# Writes the results, one file per split stream, to directory specified by output_path
|
201
|
-
#
|
202
|
-
# Options:
|
203
|
-
#
|
204
|
-
# :keys => Also write split files of the key fields
|
205
|
-
#
|
206
|
-
# :filter => Optional Regular Expression to act as filter be applid to the field.
|
207
|
-
# For example split by Ccy but filter to only include certain ccys pass
|
208
|
-
# filter => '[GBP|USD]'
|
209
|
-
#
|
210
|
-
def split_on_write( file_name, field, output_path, options = {} )
|
211
|
-
|
212
|
-
path = output_path || '.'
|
213
|
-
|
214
|
-
filtered = split_on( file_name, field, options )
|
215
|
-
|
216
|
-
unless filtered.empty?
|
217
|
-
log :info, "Writing seperate streams to #{path}"
|
218
|
-
|
219
|
-
filtered.each { |strm, objects| RecsBase::write( {"keys_#{field}_#{strm}.csv" => objects.collect(&:key).join("\n")}, path) } if(options.key?(:keys))
|
220
|
-
|
221
|
-
filtered.each { |strm, objects| RecsBase::write( {"#{field}_#{strm}.csv" => objects.collect(&:current_line).join("\n")}, path) }
|
222
|
-
end
|
223
|
-
end
|
224
|
-
|
225
|
-
# Split a file, whose field definition is represented by self,
|
226
|
-
# into seperate streams, based on one if it's fields.
|
227
|
-
#
|
228
|
-
# Returns a map of Field value => File def object
|
229
|
-
#
|
230
|
-
# We return the File Def object as this is now enriched, e.g with key fields, compared to the raw file.
|
231
|
-
#
|
232
|
-
# Users can get at the raw line simply by calling the line() method on File Def object
|
233
|
-
#
|
234
|
-
# Options:
|
235
|
-
#
|
236
|
-
# :output_path => directory to write the individual streams files to
|
237
|
-
#
|
238
|
-
# :filter => Optional Regular Expression to act as filter be applid to the field.
|
239
|
-
# For example split by Ccy but filter to only include certain ccys pass
|
240
|
-
# filter => 'GBP|USD|EUR'
|
241
|
-
#
|
242
|
-
def split_on( file_name, field, options = {} )
|
243
|
-
|
244
|
-
regex = options[:filter] ? Regexp.new(options[:filter]) : nil
|
245
|
-
|
246
|
-
log :debug, "Using REGEX: #{regex.inspect}" if regex
|
247
|
-
|
248
|
-
filtered = {}
|
249
|
-
|
250
|
-
if( self.new.respond_to?(field) )
|
251
|
-
|
252
|
-
log :info, "Splitting on #{field}"
|
253
|
-
|
254
|
-
File.open( file_name ) do |t|
|
255
|
-
t.each do |line|
|
256
|
-
next unless(line && line.chomp!)
|
257
|
-
x = self.new(line)
|
258
|
-
|
259
|
-
value = x.send( field.to_sym ) # the actual field value from the specified field column
|
260
|
-
next if value.nil?
|
261
|
-
|
262
|
-
if( regex.nil? || value.match(regex) )
|
263
|
-
filtered[value] ? filtered[value] << x : filtered[value] = [x]
|
264
|
-
end
|
265
|
-
end
|
266
|
-
end
|
267
|
-
else
|
268
|
-
log :warn, "Field [#{field}] nor defined for file definition #{self.class.name}"
|
269
|
-
end
|
270
|
-
|
271
|
-
if( options[:sort])
|
272
|
-
filtered.values.each( &:sort )
|
273
|
-
return filtered
|
274
|
-
end
|
275
|
-
return filtered
|
276
|
-
end
|
277
|
-
|
278
|
-
# Open and parse a file, replacing a value in the specfied field.
|
279
|
-
# Does not update the file itself. Does not write a new output file.
|
280
|
-
#
|
281
|
-
# Returns :
|
282
|
-
# 1) full collection of updated lines
|
283
|
-
# 2) collection of file def objects (self), with updated value.
|
284
|
-
#
|
285
|
-
# Finds values matching old_value in given map
|
286
|
-
#
|
287
|
-
# Replaces matches with new_value in map.
|
288
|
-
#
|
289
|
-
# Accepts more than one field, if files is either and array of strings
|
290
|
-
# or comma seperated list of fields.
|
291
|
-
#
|
292
|
-
def file_set_field_by_map( file_name, fields, value_map, regex = nil )
|
293
|
-
|
294
|
-
lines, objects = [],[]
|
295
|
-
|
296
|
-
if fields.is_a?(Array)
|
297
|
-
attribs = fields
|
298
|
-
else
|
299
|
-
attribs = "#{fields}".split(',')
|
300
|
-
end
|
301
|
-
|
302
|
-
attribs.collect! do |attrib|
|
303
|
-
raise ArgumentError.new("Field: #{attrib} is not a field on #{self.class.name}") unless self.new.respond_to?(attrib)
|
304
|
-
end
|
305
|
-
|
306
|
-
log :info, "#{self.class.name} - updating field(s) #{fields} in #{file_name}"
|
307
|
-
|
308
|
-
File.open( file_name ) do |t|
|
309
|
-
t.each do |line|
|
310
|
-
if line.chomp.empty?
|
311
|
-
lines << line
|
312
|
-
objects << self.new
|
313
|
-
next
|
314
|
-
end
|
315
|
-
x = self.new(line)
|
316
|
-
|
317
|
-
attribs.each do |a|
|
318
|
-
old_value = x.instance_variable_get( "@#{a}" )
|
319
|
-
x.instance_variable_set( "@#{a}", value_map[old_value] ) if value_map[old_value] || (regex && old_value.keys.detect {|k| k.match(regx) })
|
320
|
-
end
|
321
|
-
|
322
|
-
objects << x
|
323
|
-
lines << x.to_s
|
324
|
-
end
|
325
|
-
end
|
326
|
-
|
327
|
-
return lines, objects
|
328
|
-
end
|
329
|
-
end # END class methods
|
330
|
-
|
331
|
-
# Open and parse a file, replacing a value in the specfied field.
|
332
|
-
# Does not update the file itself. Does not write a new output file.
|
333
|
-
#
|
334
|
-
# Returns :
|
335
|
-
# 1) full collection of updated lines
|
336
|
-
# 2) collection of file def objects (self), with updated value.
|
337
|
-
#
|
338
|
-
# Finds values matching old_value, and also accepts an optional regex for more powerful
|
339
|
-
# matching strategies of values on the specfified field.
|
340
|
-
#
|
341
|
-
# Replaces matches with new_value.
|
342
|
-
#
|
343
|
-
# Accepts more than one field, if files is either and array of strings
|
344
|
-
# or comma seperated list of fields.
|
345
|
-
#
|
346
|
-
def file_set_field( file_name, field, old_value, new_value, regex = nil )
|
347
|
-
|
348
|
-
map = {old_value => new_value}
|
349
|
-
|
350
|
-
return file_set_field_by_map(file_name, field, map, regex)
|
351
|
-
end
|
352
|
-
|
353
|
-
end
|
1
|
+
# Copyright:: (c) Autotelik Media Ltd 2011
|
2
|
+
# Author :: Tom Statter
|
3
|
+
# Date :: Jan 2011
|
4
|
+
# License:: MIT
|
5
|
+
#
|
6
|
+
# Details:: This module acts as helpers for defining input/output file formats as classes.
|
7
|
+
#
|
8
|
+
# It provides a simple interface to define a file structure - field by field.
|
9
|
+
#
|
10
|
+
# By defining the structure, following methods and attributes are mixed in :
|
11
|
+
#
|
12
|
+
# An attribute, with accessor for each field/column.
|
13
|
+
# Parse a line, assigning values to each attribute.
|
14
|
+
# Parse an instance of that file line by line, accepts a block in which data can be processed.
|
15
|
+
# Method to split a file by field.
|
16
|
+
# Method to perform replace operations on a file by field and value.
|
17
|
+
#
|
18
|
+
# Either delimited or a fixed width definition can be created via macro-like class methods :
|
19
|
+
#
|
20
|
+
# create_field_definition [field_list]
|
21
|
+
#
|
22
|
+
# create_fixed_definition {field => range }
|
23
|
+
#
|
24
|
+
# Member attributes, with getters and setters, can be added for each field defined above via class method :
|
25
|
+
#
|
26
|
+
# create_field_attr_accessors
|
27
|
+
#
|
28
|
+
# USAGE :
|
29
|
+
#
|
30
|
+
# Create a class that contains definition of a file.
|
31
|
+
#
|
32
|
+
# class ExampleFixedWith < FileDefinitionBase
|
33
|
+
# create_fixed_definition(:name => (0..7), :value => (8..15), :ccy => (16..18), :dr_or_cr => (19..19) )
|
34
|
+
#
|
35
|
+
# create_field_attr_accessors
|
36
|
+
# end
|
37
|
+
#
|
38
|
+
# class ExampleCSV < FileDefinitionBase
|
39
|
+
# create_field_definition %w{abc def ghi jkl}
|
40
|
+
#
|
41
|
+
# create_field_attr_accessors
|
42
|
+
# end
|
43
|
+
#
|
44
|
+
# Any instance can then be used to parse the defined file type, with each field or column value
|
45
|
+
# being assigned automatically to the associated instance variable.
|
46
|
+
#
|
47
|
+
# line = '1,2,3,4'
|
48
|
+
# x = ExampleCSV.new( line )
|
49
|
+
#
|
50
|
+
# assert x.responds_to? :jkl
|
51
|
+
# assert_equal x.abc, '1'
|
52
|
+
# assert_equal x.jkl.to_i, 4
|
53
|
+
#
|
54
|
+
module FileDefinitions
|
55
|
+
|
56
|
+
include Enumerable
|
57
|
+
|
58
|
+
attr_accessor :key
|
59
|
+
attr_accessor :current_line
|
60
|
+
|
61
|
+
# Set the delimiter to use when splitting a line - can be either a String, or a Regexp
|
62
|
+
attr_writer :field_delim
|
63
|
+
|
64
|
+
def initialize( line = nil )
|
65
|
+
@key = String.new
|
66
|
+
parse(line) unless line.nil?
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.included(base)
|
70
|
+
base.extend(ClassMethods)
|
71
|
+
subclasses << base
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.subclasses
|
75
|
+
@subclasses ||=[]
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
# Return the field delimiter used when splitting a line
|
80
|
+
def field_delim
|
81
|
+
@field_delim || ','
|
82
|
+
end
|
83
|
+
|
84
|
+
# Parse each line of a file based on the field definition, yields self for each successive line
|
85
|
+
#
|
86
|
+
def each( file )
|
87
|
+
File::new(file).each_line do |line|
|
88
|
+
parse( line )
|
89
|
+
yield self
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def fields
|
94
|
+
@fields = self.class.field_definition.collect {|f| instance_variable_get "@#{f}" }
|
95
|
+
@fields
|
96
|
+
end
|
97
|
+
|
98
|
+
def to_s
|
99
|
+
fields.join(',')
|
100
|
+
end
|
101
|
+
|
102
|
+
module ClassMethods
|
103
|
+
|
104
|
+
# Helper to generate methods to store and return the complete list of fields
|
105
|
+
# in this File definition (also creates member @field_definition) and parse a line.
|
106
|
+
#
|
107
|
+
# e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
|
108
|
+
#
|
109
|
+
def create_field_definition( *fields )
|
110
|
+
instance_eval <<-end_eval
|
111
|
+
@field_definition ||= %w{ #{fields.join(' ')} }
|
112
|
+
def field_definition
|
113
|
+
@field_definition
|
114
|
+
end
|
115
|
+
end_eval
|
116
|
+
|
117
|
+
class_eval <<-end_eval
|
118
|
+
def parse( line )
|
119
|
+
@current_line = line
|
120
|
+
before_parse if respond_to? :before_parse
|
121
|
+
@current_line.split(field_delim()).each_with_index {|x, i| instance_variable_set(\"@\#{self.class.field_definition[i]}\", x) }
|
122
|
+
after_parse if respond_to? :after_parse
|
123
|
+
generate_key if respond_to? :generate_key
|
124
|
+
end
|
125
|
+
end_eval
|
126
|
+
end
|
127
|
+
|
128
|
+
def add_field(field, add_accessor = true)
|
129
|
+
@field_definition ||= []
|
130
|
+
@field_definition << field.to_s
|
131
|
+
attr_accessor field if(add_accessor)
|
132
|
+
end
|
133
|
+
|
134
|
+
|
135
|
+
# Helper to generate methods that return the complete list of fixed width fields
|
136
|
+
# and associated ranges in this File definition, and parse a line.
|
137
|
+
# e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
|
138
|
+
#
|
139
|
+
def create_fixed_definition( field_range_map )
|
140
|
+
raise ArgumentError.new('Please supply hash to create_fixed_definition') unless field_range_map.is_a? Hash
|
141
|
+
|
142
|
+
keys = field_range_map.keys.collect(&:to_s)
|
143
|
+
string_map = Hash[*keys.zip(field_range_map.values).flatten]
|
144
|
+
|
145
|
+
instance_eval <<-end_eval
|
146
|
+
def fixed_definition
|
147
|
+
@fixed_definition ||= #{string_map.inspect}
|
148
|
+
@fixed_definition
|
149
|
+
end
|
150
|
+
end_eval
|
151
|
+
|
152
|
+
instance_eval <<-end_eval
|
153
|
+
def field_definition
|
154
|
+
@field_definition ||= %w{ #{keys.join(' ')} }
|
155
|
+
@field_definition
|
156
|
+
end
|
157
|
+
end_eval
|
158
|
+
|
159
|
+
class_eval <<-end_eval
|
160
|
+
def parse( line )
|
161
|
+
@current_line = line
|
162
|
+
before_parse if respond_to? :before_parse
|
163
|
+
self.class.fixed_definition.each do |key, range|
|
164
|
+
instance_variable_set(\"@\#{key}\", @current_line[range])
|
165
|
+
end
|
166
|
+
after_parse if respond_to? :after_parse
|
167
|
+
generate_key if respond_to? :generate_key
|
168
|
+
end
|
169
|
+
end_eval
|
170
|
+
|
171
|
+
end
|
172
|
+
|
173
|
+
# Create accessors for each field
|
174
|
+
def create_field_attr_accessors
|
175
|
+
self.field_definition.each {|f| attr_accessor f}
|
176
|
+
end
|
177
|
+
|
178
|
+
|
179
|
+
###############################
|
180
|
+
# PARSING + FILE MANIPULATION #
|
181
|
+
###############################
|
182
|
+
|
183
|
+
# Parse a complete file and return array of self, one per line
|
184
|
+
def parse_file( file, options = {} )
|
185
|
+
limit = options[:limit]
|
186
|
+
count = 0
|
187
|
+
lines = []
|
188
|
+
File::new(file).each_line do |line|
|
189
|
+
break if limit && ((count += 1) > limit)
|
190
|
+
lines << self.new( line )
|
191
|
+
end
|
192
|
+
lines
|
193
|
+
end
|
194
|
+
|
195
|
+
|
196
|
+
|
197
|
+
# Split a file, whose field definition is represented by self,
|
198
|
+
# into seperate streams, based on the values of one if it's fields.
|
199
|
+
#
|
200
|
+
# Writes the results, one file per split stream, to directory specified by output_path
|
201
|
+
#
|
202
|
+
# Options:
|
203
|
+
#
|
204
|
+
# :keys => Also write split files of the key fields
|
205
|
+
#
|
206
|
+
# :filter => Optional Regular Expression to act as filter be applid to the field.
|
207
|
+
# For example split by Ccy but filter to only include certain ccys pass
|
208
|
+
# filter => '[GBP|USD]'
|
209
|
+
#
|
210
|
+
def split_on_write( file_name, field, output_path, options = {} )
|
211
|
+
|
212
|
+
path = output_path || '.'
|
213
|
+
|
214
|
+
filtered = split_on( file_name, field, options )
|
215
|
+
|
216
|
+
unless filtered.empty?
|
217
|
+
log :info, "Writing seperate streams to #{path}"
|
218
|
+
|
219
|
+
filtered.each { |strm, objects| RecsBase::write( {"keys_#{field}_#{strm}.csv" => objects.collect(&:key).join("\n")}, path) } if(options.key?(:keys))
|
220
|
+
|
221
|
+
filtered.each { |strm, objects| RecsBase::write( {"#{field}_#{strm}.csv" => objects.collect(&:current_line).join("\n")}, path) }
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
# Split a file, whose field definition is represented by self,
|
226
|
+
# into seperate streams, based on one if it's fields.
|
227
|
+
#
|
228
|
+
# Returns a map of Field value => File def object
|
229
|
+
#
|
230
|
+
# We return the File Def object as this is now enriched, e.g with key fields, compared to the raw file.
|
231
|
+
#
|
232
|
+
# Users can get at the raw line simply by calling the line() method on File Def object
|
233
|
+
#
|
234
|
+
# Options:
|
235
|
+
#
|
236
|
+
# :output_path => directory to write the individual streams files to
|
237
|
+
#
|
238
|
+
# :filter => Optional Regular Expression to act as filter be applid to the field.
|
239
|
+
# For example split by Ccy but filter to only include certain ccys pass
|
240
|
+
# filter => 'GBP|USD|EUR'
|
241
|
+
#
|
242
|
+
def split_on( file_name, field, options = {} )
|
243
|
+
|
244
|
+
regex = options[:filter] ? Regexp.new(options[:filter]) : nil
|
245
|
+
|
246
|
+
log :debug, "Using REGEX: #{regex.inspect}" if regex
|
247
|
+
|
248
|
+
filtered = {}
|
249
|
+
|
250
|
+
if( self.new.respond_to?(field) )
|
251
|
+
|
252
|
+
log :info, "Splitting on #{field}"
|
253
|
+
|
254
|
+
File.open( file_name ) do |t|
|
255
|
+
t.each do |line|
|
256
|
+
next unless(line && line.chomp!)
|
257
|
+
x = self.new(line)
|
258
|
+
|
259
|
+
value = x.send( field.to_sym ) # the actual field value from the specified field column
|
260
|
+
next if value.nil?
|
261
|
+
|
262
|
+
if( regex.nil? || value.match(regex) )
|
263
|
+
filtered[value] ? filtered[value] << x : filtered[value] = [x]
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
267
|
+
else
|
268
|
+
log :warn, "Field [#{field}] nor defined for file definition #{self.class.name}"
|
269
|
+
end
|
270
|
+
|
271
|
+
if( options[:sort])
|
272
|
+
filtered.values.each( &:sort )
|
273
|
+
return filtered
|
274
|
+
end
|
275
|
+
return filtered
|
276
|
+
end
|
277
|
+
|
278
|
+
# Open and parse a file, replacing a value in the specfied field.
|
279
|
+
# Does not update the file itself. Does not write a new output file.
|
280
|
+
#
|
281
|
+
# Returns :
|
282
|
+
# 1) full collection of updated lines
|
283
|
+
# 2) collection of file def objects (self), with updated value.
|
284
|
+
#
|
285
|
+
# Finds values matching old_value in given map
|
286
|
+
#
|
287
|
+
# Replaces matches with new_value in map.
|
288
|
+
#
|
289
|
+
# Accepts more than one field, if files is either and array of strings
|
290
|
+
# or comma seperated list of fields.
|
291
|
+
#
|
292
|
+
def file_set_field_by_map( file_name, fields, value_map, regex = nil )
|
293
|
+
|
294
|
+
lines, objects = [],[]
|
295
|
+
|
296
|
+
if fields.is_a?(Array)
|
297
|
+
attribs = fields
|
298
|
+
else
|
299
|
+
attribs = "#{fields}".split(',')
|
300
|
+
end
|
301
|
+
|
302
|
+
attribs.collect! do |attrib|
|
303
|
+
raise ArgumentError.new("Field: #{attrib} is not a field on #{self.class.name}") unless self.new.respond_to?(attrib)
|
304
|
+
end
|
305
|
+
|
306
|
+
log :info, "#{self.class.name} - updating field(s) #{fields} in #{file_name}"
|
307
|
+
|
308
|
+
File.open( file_name ) do |t|
|
309
|
+
t.each do |line|
|
310
|
+
if line.chomp.empty?
|
311
|
+
lines << line
|
312
|
+
objects << self.new
|
313
|
+
next
|
314
|
+
end
|
315
|
+
x = self.new(line)
|
316
|
+
|
317
|
+
attribs.each do |a|
|
318
|
+
old_value = x.instance_variable_get( "@#{a}" )
|
319
|
+
x.instance_variable_set( "@#{a}", value_map[old_value] ) if value_map[old_value] || (regex && old_value.keys.detect {|k| k.match(regx) })
|
320
|
+
end
|
321
|
+
|
322
|
+
objects << x
|
323
|
+
lines << x.to_s
|
324
|
+
end
|
325
|
+
end
|
326
|
+
|
327
|
+
return lines, objects
|
328
|
+
end
|
329
|
+
end # END class methods
|
330
|
+
|
331
|
+
# Open and parse a file, replacing a value in the specfied field.
|
332
|
+
# Does not update the file itself. Does not write a new output file.
|
333
|
+
#
|
334
|
+
# Returns :
|
335
|
+
# 1) full collection of updated lines
|
336
|
+
# 2) collection of file def objects (self), with updated value.
|
337
|
+
#
|
338
|
+
# Finds values matching old_value, and also accepts an optional regex for more powerful
|
339
|
+
# matching strategies of values on the specfified field.
|
340
|
+
#
|
341
|
+
# Replaces matches with new_value.
|
342
|
+
#
|
343
|
+
# Accepts more than one field, if files is either and array of strings
|
344
|
+
# or comma seperated list of fields.
|
345
|
+
#
|
346
|
+
def file_set_field( file_name, field, old_value, new_value, regex = nil )
|
347
|
+
|
348
|
+
map = {old_value => new_value}
|
349
|
+
|
350
|
+
return file_set_field_by_map(file_name, field, map, regex)
|
351
|
+
end
|
352
|
+
|
353
|
+
end
|