flat_filer 0.0.17

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/lib/core_extensions.rb +20 -0
  2. data/lib/flat_file.rb +530 -0
  3. metadata +54 -0
@@ -0,0 +1,20 @@
1
+ class Object
2
+ # An object is blank if it's nil, empty, or a whitespace string.
3
+ # For example, "", " ", '0000', nil, [], 0, 0.0 and {} are blank.
4
+ #
5
+ # This simplifies
6
+ # if !address.nil? && !address.empty?
7
+ # to
8
+ # if !address.blank?
9
+ def blank?
10
+ if respond_to?(:empty?) && respond_to?(:strip)
11
+ empty? or strip.empty? or gsub('0', '').empty?
12
+ elsif respond_to?(:empty?)
13
+ empty?
14
+ elsif respond_to?(:zero?)
15
+ zero?
16
+ else
17
+ !self
18
+ end
19
+ end
20
+ end
data/lib/flat_file.rb ADDED
@@ -0,0 +1,530 @@
1
+ # A class to help parse and dump flat files
2
+ #
3
+ # This class provides an easy method of dealing with fixed
4
+ # field width flat files.
5
+ #
6
+ # For example a flat file containing information about people that
7
+ # looks like this:
8
+ # 10 20 30
9
+ # 012345678901234567890123456789
10
+ # Walt Whitman 18190531
11
+ # Linus Torvalds 19691228
12
+ #
13
+ # class Poeple < FlatFile
14
+ # add_field :first_name, :width => 10, :filter => :trim
15
+ # add_field :last_name, :width => 10, :filter => :trim
16
+ # add_field :birthday, :width => 8, :filter => lambda { |v| Date.parse(v) }
17
+ # pad :auto_name, :width => 2,
18
+ #
19
+ # def self.trim(v)
20
+ # v.trim
21
+ # end
22
+ #
23
+ # p = People.new
24
+ # p.each_record(open('somefile.dat')) do |person|
25
+ # puts "First Name: #{ person.first_name }"
26
+ # puts "Last Name : #{ person.last_name}"
27
+ # puts "Birthday : #{ person.birthday}"
28
+ #
29
+ # puts person.to_s
30
+ # end
31
+ #
32
+ #
33
+ #
34
+ # An alternative method for adding fields is to pass a block to the
35
+ # add_field method. The name is optional, but needs to be set either
36
+ # by passing the name parameter, or in the block that's passed. When
37
+ # you pass a block the first parameter is the FieldDef for the field
38
+ # being constructed for this fild.
39
+ #
40
+ # class People < FlatFile
41
+ # add_field { |fd|
42
+ # fd.name = :first_name
43
+ # fd.width = 10
44
+ # fd.add_filter { |v| v.trim }
45
+ # fd.add_formatter { |v| v.trim }
46
+ # .
47
+ # .
48
+ # }
49
+ # end
50
+ #
51
+ # Filters and Formatters
52
+ #
53
+ # Filters touch data when on the way in to the flat filer
54
+ # via each_record or create_record.
55
+ #
56
+ # Formatters are used when a record is converted into a
57
+ # string using to_s.
58
+ #
59
+ # Structurally, filters and formatters can be lambdas, code
60
+ # blocks, or symbols referencing methods.
61
+ #
62
+ # There's an expectaiton on the part of formatters of the
63
+ # type of a field value. This means that the programmer
64
+ # needs to set the value of a field as a type that the formatter
65
+ # won't bork on.
66
+ #
67
+ # A good argument can be made to change filtering to happen any
68
+ # time a field value is assigned. I've decided to not take this
69
+ # route because it'll make writing filters more complex.
70
+ #
71
+ # An example of this might be a date field. If you've built up
72
+ # a date field where a string read from a file is marshalled into
73
+ # a Date object. If you assign a string to that field and then
74
+ # attempt to export to a file you may run into problems. This is
75
+ # because your formatters may not be resiliant enough to handle
76
+ # unepxected types.
77
+ #
78
+ # Until we build this into the system, write resiliant formatters
79
+ # OR take risks. Practially speaking, if your system is stable
80
+ # with respect to input/ output you're probably going to be fine.
81
+ #
82
+ # If the filter were run every time a field value is assigned
83
+ # to a record, then the filter will need to check the value being
84
+ # passed to it and then make a filtering decision based on that.
85
+ # This seemed pretty unattractive to me. So it's expected that
86
+ # when creating records with new_record, that you assign field
87
+ # values in the format that the formatter expect them to be in.
88
+ #
89
+ # Essentially, robustness needed either be in the filter or formatter,
90
+ # due to lazyness, I chose formatter.
91
+ #
92
+ # Generally this is just anything that can have to_s called
93
+ # on it, but if the filter does anything special, be cognizent
94
+ # of that when assigning values into fields.
95
+ #
96
+ # Class Organization
97
+ #
98
+ # add_field, and pad add FieldDef classes to an array. This
99
+ # arary represents fields in a record. Each FieldDef class contains
100
+ # information about the field such as it's name, and how to filter
101
+ # and format the class.
102
+ #
103
+ # add_field also adds to a variable that olds a pack format. This is
104
+ # how the records parsed and assembeled.
105
+ require File.dirname(__FILE__) + "/../lib/core_extensions"
106
+ class FlatFile
107
+
108
+ class FlatFileException < Exception; end
109
+ class ShortRecordError < FlatFileException; end
110
+ class LongRecordError < FlatFileException; end
111
+ class RecordLengthError < FlatFileException; end
112
+
113
+ # A field definition tracks infomration that's necessary for
114
+ # FlatFile to process a particular field. This is typically
115
+ # added to a subclass of FlatFile like so:
116
+ #
117
+ # class SomeFile < FlatFile
118
+ # add_field :some_field_name, :width => 35
119
+ # end
120
+ #
121
+ class FieldDef
122
+ attr :name, true
123
+ attr :width, true
124
+ attr :filters, true
125
+ attr :formatters, true
126
+ attr :file_klass, true
127
+ attr :padding, true
128
+ attr :map_in_proc, true
129
+ attr :aggressive, true
130
+
131
+ # Create a new FeildDef, having name and width. klass is a reference to the FlatFile
132
+ # subclass that contains this field definition. This reference is needed when calling
133
+ # filters if they are specified using a symbol.
134
+ #
135
+ # Options can be :padding (if present and a true value, field is marked as a pad field),
136
+ # :width, specify the field width, :formatter, specify a formatter, :filter, specify a
137
+ # filter.
138
+ def initialize(name=null,options={},klass={})
139
+ @name = name
140
+ @width = 10
141
+ @filters = Array.new
142
+ @formatters = Array.new
143
+ @file_klass = klass
144
+ @padding = options.delete(:padding)
145
+
146
+ add_filter(options[:filter]) if options.has_key?(:filter)
147
+ add_formatter(options[:formatter]) if options.has_key?(:formatter)
148
+ @map_in_proc = options[:map_in_proc]
149
+ @width = options[:width] if options.has_key?(:width)
150
+ @aggressive = options[:aggressive] || false
151
+ end
152
+
153
+ # Will return true if the field is a padding field. Padding fields are ignored
154
+ # when doing various things. For example, when you're populating an ActiveRecord
155
+ # model with a record, padding fields are ignored.
156
+ def is_padding?
157
+ @padding
158
+ end
159
+
160
+ # Add a filter. Filters are used for processing field data when a flat file is being
161
+ # processed. For fomratting the data when writing a flat file, see add_formatter
162
+ def add_filter(filter=nil,&block) #:nodoc:
163
+ @filters.push(filter) unless filter.nil?
164
+ @filters.push(block) if block_given?
165
+ end
166
+
167
+ # Add a formatter. Formatters are used for formatting a field
168
+ # for rendering a record, or writing it to a file in the desired format.
169
+ def add_formatter(formatter=nil,&block) #:nodoc:
170
+ @formatters.push(formatter) if formatter
171
+ @formatters.push(block) if block_given?
172
+ end
173
+
174
+ # Filters a value based on teh filters associated with a
175
+ # FieldDef.
176
+ def pass_through_filters(v) #:nodoc:
177
+ pass_through(@filters,v)
178
+ end
179
+
180
+ # Filters a value based on the filters associated with a
181
+ # FieldDef.
182
+ def pass_through_formatters(v) #:nodoc:
183
+ pass_through(@formatters,v)
184
+ end
185
+
186
+ #protected
187
+
188
+ def pass_through(what,value) #:nodoc:
189
+ #puts "PASS THROUGH #{what.inspect} => #{value}"
190
+ what.each do |filter|
191
+ value = case
192
+ when filter.is_a?(Symbol)
193
+ #puts "filter is a symbol"
194
+ @file_klass.send(filter,value)
195
+ when filter_block?(filter)
196
+ #puts "filter is a block"
197
+ filter.call(value)
198
+ when filter_class?(filter)
199
+ #puts "filter is a class"
200
+ filter.filter(value)
201
+ else
202
+ #puts "filter not valid, preserving"
203
+ value
204
+ end
205
+ end
206
+ value
207
+ end
208
+
209
+ # Test to see if filter is a filter block. A filter block
210
+ # can be called (using call) and takes one parameter
211
+ def filter_block?(filter) #:nodoc:
212
+ filter.respond_to?('call') && ( filter.arity >= 1 || filter.arity <= -1 )
213
+ end
214
+
215
+ # Test to see if a class is a filter class. A filter class responds
216
+ # to the filter signal (you can call filter on it).
217
+ def filter_class?(filter) #:nodoc:
218
+ filter.respond_to?('filter')
219
+ end
220
+ end
221
+
222
+ # A record abstracts on line or 'record' of a fixed width field.
223
+ # The methods available are the kes of the hash passed to the constructor.
224
+ # For example the call:
225
+ #
226
+ # h = Hash['first_name','Andy','status','Supercool!']
227
+ # r = Record.new(h)
228
+ #
229
+ # would respond to r.first_name, and r.status yielding
230
+ # 'Andy' and 'Supercool!' respectively.
231
+ #
232
+ class Record
233
+ attr_reader :fields
234
+ attr_reader :klass
235
+ attr_reader :line_number
236
+
237
+ # Create a new Record from a hash of fields
238
+ def initialize(klass,fields=Hash.new,line_number = -1,&block)
239
+ @fields = Hash.new()
240
+ @klass = klass
241
+ @line_number = line_number
242
+
243
+ klass_fields = klass.get_subclass_variable('fields')
244
+
245
+ klass_fields.each do |f|
246
+ @fields.store(f.name, "")
247
+ end
248
+
249
+ @fields.merge!(fields)
250
+
251
+ @fields.each_key do |k|
252
+ @fields.delete(k) unless klass.has_field?(k)
253
+ end
254
+
255
+ yield(block, self)if block_given?
256
+
257
+ self
258
+ end
259
+
260
+ def map_in(model)
261
+ @klass.non_pad_fields.each do |f|
262
+ next unless(model.respond_to? "#{f.name}=")
263
+ if f.map_in_proc
264
+ f.map_in_proc.call(model,self)
265
+ else
266
+ model.send("#{f.name}=", send(f.name)) if f.aggressive or model.send(f.name).blank?
267
+ end
268
+ end
269
+ end
270
+
271
+ # Catches method calls and returns field values or raises an Error.
272
+ def method_missing(method,params=nil)
273
+ if(method.to_s.match(/^(.*)=$/))
274
+ if(fields.has_key?($1.to_sym))
275
+ @fields.store($1.to_sym,params)
276
+ else
277
+ raise Exception.new("Unknown method: #{ method }")
278
+ end
279
+ else
280
+ if(fields.has_key? method)
281
+ @fields.fetch(method)
282
+ else
283
+ raise Exception.new("Unknown method: #{ method }")
284
+ end
285
+ end
286
+ end
287
+
288
+ # Returns a string representation of the record suitable for writing to a flat
289
+ # file on disk or other media. The fields are parepared according to the file
290
+ # definition, and any formatters attached to the field definitions.
291
+ def to_s
292
+ klass.fields.map { |field_def|
293
+ field_name = field_def.name.to_s
294
+ v = @fields[ field_name.to_sym ].to_s
295
+
296
+ field_def.pass_through_formatters(
297
+ field_def.is_padding? ? "" : v
298
+ )
299
+ }.pack(klass.pack_format)
300
+ end
301
+
302
+ # Produces a multiline string, one field per line suitable for debugging purposes.
303
+ def debug_string
304
+ str = ""
305
+ klass.fields.each do |f|
306
+ if f.is_padding?
307
+ str << "#{f.name}: \n"
308
+ else
309
+ str << "#{f.name}: #{send(f.name.to_sym)}\n"
310
+ end
311
+ end
312
+
313
+ str
314
+ end
315
+ end
316
+
317
+ # A hash of data stored on behalf of subclasses. One hash
318
+ # key for each subclass.
319
+ @@subclass_data = Hash.new(nil)
320
+
321
+ # Used to generate unique names for pad fields which use :auto_name.
322
+ @@unique_id = 0
323
+
324
+ def next_record(io,&block)
325
+ return nil if io.eof?
326
+ required_line_length = self.class.get_subclass_variable 'width'
327
+ line = io.readline
328
+ line.chop!
329
+ return nil if line.length == 0
330
+ difference = required_line_length - line.length
331
+ raise RecordLengthError.new(
332
+ "length is #{line.length} but should be #{required_line_length}"
333
+ ) unless(difference == 0)
334
+
335
+ if block_given?
336
+ yield(create_record(line, io.lineno), line)
337
+ else
338
+ create_record(line,io.lineno)
339
+ end
340
+ end
341
+
342
+ # Iterate through each record (each line of the data file). The passed
343
+ # block is passed a new Record representing the line.
344
+ #
345
+ # s = SomeFile.new
346
+ # s.each_record(open('/path/to/file')) do |r|
347
+ # puts r.first_name
348
+ # end
349
+ #
350
+ def each_record(io,&block)
351
+ io.each_line do |line|
352
+ required_line_length = self.class.get_subclass_variable 'width'
353
+ #line = io.readline
354
+ line.chop!
355
+ next if line.length == 0
356
+ difference = required_line_length - line.length
357
+ raise RecordLengthError.new(
358
+ "length is #{line.length} but should be #{required_line_length}"
359
+ ) unless(difference == 0)
360
+ yield(create_record(line, io.lineno), line)
361
+ end
362
+ end
363
+
364
+ # create a record from line. The line is one line (or record) read from the
365
+ # text file. The resulting record is an object which. The object takes signals
366
+ # for each field according to the various fields defined with add_field or
367
+ # varients of it.
368
+ #
369
+ # line_number is an optional line number of the line in a file of records.
370
+ # If line is not in a series of records (lines), omit and it'll be -1 in the
371
+ # resulting record objects. Just make sure you realize this when reporting
372
+ # errors.
373
+ #
374
+ # Both a getter (field_name), and setter (field_name=) are available to the
375
+ # user.
376
+ def create_record(line, line_number = -1) #:nodoc:
377
+ h = Hash.new
378
+
379
+ pack_format = self.class.get_subclass_variable 'pack_format'
380
+ fields = self.class.get_subclass_variable 'fields'
381
+
382
+ f = line.unpack(pack_format)
383
+ (0..(fields.size-1)).map do |index|
384
+ unless fields[index].is_padding?
385
+ h.store fields[index].name, fields[index].pass_through_filters(f[index])
386
+ end
387
+ end
388
+ Record.new(self.class, h, line_number)
389
+ end
390
+
391
+ # Add a field to the FlatFile subclass. Options can include
392
+ #
393
+ # :width - number of characters in field (default 10)
394
+ # :filter - callack, lambda or code block for processing during reading
395
+ # :formatter - callback, lambda, or code block for processing during writing
396
+ #
397
+ # class SomeFile < FlatFile
398
+ # add_field :some_field_name, :width => 35
399
+ # end
400
+ #
401
+ def self.add_field(name=nil, options={},&block)
402
+ options[:width] ||= 10;
403
+
404
+ fields = get_subclass_variable 'fields'
405
+ width = get_subclass_variable 'width'
406
+ pack_format = get_subclass_variable 'pack_format'
407
+
408
+
409
+ fd = FieldDef.new(name,options,self)
410
+ yield(fd) if block_given?
411
+
412
+ fields << fd
413
+ width += fd.width
414
+ pack_format << "A#{fd.width}"
415
+ set_subclass_variable 'width', width
416
+ fd
417
+ end
418
+
419
+ # Add a pad field. To have the name auto generated, use :auto_name for
420
+ # the name parameter. For options see add_field.
421
+ def self.pad(name, options = {})
422
+ fd = self.add_field(
423
+ name.eql?(:auto_name) ? self.new_pad_name : name,
424
+ options
425
+ )
426
+ fd.padding = true
427
+ end
428
+
429
+ def self.new_pad_name #:nodoc:
430
+ "pad_#{ @@unique_id+=1 }".to_sym
431
+ end
432
+
433
+
434
+ # Create a new empty record object conforming to this file.
435
+ #
436
+ #
437
+ def self.new_record(model = nil, &block)
438
+ fields = get_subclass_variable 'fields'
439
+
440
+ record = Record.new(self)
441
+
442
+ fields.map do |f|
443
+ assign_method = "#{f.name}="
444
+ value = model.respond_to?(f.name.to_sym) ? model.send(f.name.to_sym) : ""
445
+ record.send(assign_method, value)
446
+ end
447
+
448
+ if block_given?
449
+ yield block, record
450
+ end
451
+
452
+ record
453
+ end
454
+
455
+ # Return a lsit of fields for the FlatFile subclass
456
+ def fields
457
+ self.class.fields
458
+ end
459
+
460
+ def self.non_pad_fields
461
+ self.fields.select { |f| not f.is_padding? }
462
+ end
463
+
464
+ def non_pad_fields
465
+ self.non_pad_fields
466
+ end
467
+
468
+ def self.fields
469
+ self.get_subclass_variable 'fields'
470
+ end
471
+
472
+ def self.has_field?(field_name)
473
+
474
+ if self.fields.select { |f| f.name == field_name.to_sym }.length > 0
475
+ true
476
+ else
477
+ false
478
+ end
479
+ end
480
+
481
+ def self.width
482
+ get_subclass_variable 'width'
483
+ end
484
+
485
+ # Return the record length for the FlatFile subclass
486
+ def width
487
+ self.class.width
488
+ end
489
+
490
+ # Returns the pack format which is generated from add_field
491
+ # calls. This format is used to unpack each line and create Records.
492
+ def pack_format
493
+ self.class.get_pack_format
494
+ end
495
+
496
+ def self.pack_format
497
+ get_subclass_variable 'pack_format'
498
+ end
499
+
500
+ protected
501
+
502
+ # Retrieve the subclass data hash for the current class
503
+ def self.subclass_data #:nodoc:
504
+ unless(@@subclass_data.has_key?(self))
505
+ @@subclass_data.store(self, Hash.new)
506
+ end
507
+
508
+ @@subclass_data.fetch(self)
509
+ end
510
+
511
+ # Retrieve a particular subclass variable for this class by it's name.
512
+ def self.get_subclass_variable(name) #:nodoc:
513
+ if subclass_data.has_key? name
514
+ subclass_data.fetch name
515
+ end
516
+ end
517
+
518
+ # Set a subclass variable of 'name' to 'value'
519
+ def self.set_subclass_variable(name,value) #:nodoc:
520
+ subclass_data.store name, value
521
+ end
522
+
523
+ # Setup subclass class variables. This initializes the
524
+ # record width, pack format, and fields array
525
+ def self.inherited(s) #:nodoc:
526
+ s.set_subclass_variable('width',0)
527
+ s.set_subclass_variable('pack_format',"")
528
+ s.set_subclass_variable('fields',Array.new)
529
+ end
530
+ end
metadata ADDED
@@ -0,0 +1,54 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: flat_filer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.17
5
+ platform: ruby
6
+ authors:
7
+ - Andrew Libby
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-05-21 00:00:00 -04:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: alibby@tangeis.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - lib/core_extensions.rb
26
+ - lib/flat_file.rb
27
+ has_rdoc: true
28
+ homepage: http://www.tangeis.com/
29
+ post_install_message:
30
+ rdoc_options: []
31
+
32
+ require_paths:
33
+ - lib
34
+ required_ruby_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: "0"
39
+ version:
40
+ required_rubygems_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: "0"
45
+ version:
46
+ requirements: []
47
+
48
+ rubyforge_project:
49
+ rubygems_version: 1.1.1
50
+ signing_key:
51
+ specification_version: 2
52
+ summary: Library for processing flat files
53
+ test_files: []
54
+