flat_filer 0.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/lib/core_extensions.rb +20 -0
  2. data/lib/flat_file.rb +530 -0
  3. metadata +54 -0
@@ -0,0 +1,20 @@
1
+ class Object
2
+ # An object is blank if it's nil, empty, or a whitespace string.
3
+ # For example, "", " ", '0000', nil, [], 0, 0.0 and {} are blank.
4
+ #
5
+ # This simplifies
6
+ # if !address.nil? && !address.empty?
7
+ # to
8
+ # if !address.blank?
9
+ def blank?
10
+ if respond_to?(:empty?) && respond_to?(:strip)
11
+ empty? or strip.empty? or gsub('0', '').empty?
12
+ elsif respond_to?(:empty?)
13
+ empty?
14
+ elsif respond_to?(:zero?)
15
+ zero?
16
+ else
17
+ !self
18
+ end
19
+ end
20
+ end
data/lib/flat_file.rb ADDED
@@ -0,0 +1,530 @@
1
+ # A class to help parse and dump flat files
2
+ #
3
+ # This class provides an easy method of dealing with fixed
4
+ # field width flat files.
5
+ #
6
+ # For example a flat file containing information about people that
7
+ # looks like this:
8
+ # 10 20 30
9
+ # 012345678901234567890123456789
10
+ # Walt Whitman 18190531
11
+ # Linus Torvalds 19691228
12
+ #
13
+ # class Poeple < FlatFile
14
+ # add_field :first_name, :width => 10, :filter => :trim
15
+ # add_field :last_name, :width => 10, :filter => :trim
16
+ # add_field :birthday, :width => 8, :filter => lambda { |v| Date.parse(v) }
17
+ # pad :auto_name, :width => 2,
18
+ #
19
+ # def self.trim(v)
20
+ # v.trim
21
+ # end
22
+ #
23
+ # p = People.new
24
+ # p.each_record(open('somefile.dat')) do |person|
25
+ # puts "First Name: #{ person.first_name }"
26
+ # puts "Last Name : #{ person.last_name}"
27
+ # puts "Birthday : #{ person.birthday}"
28
+ #
29
+ # puts person.to_s
30
+ # end
31
+ #
32
+ #
33
+ #
34
+ # An alternative method for adding fields is to pass a block to the
35
+ # add_field method. The name is optional, but needs to be set either
36
+ # by passing the name parameter, or in the block that's passed. When
37
+ # you pass a block the first parameter is the FieldDef for the field
38
+ # being constructed for this fild.
39
+ #
40
+ # class People < FlatFile
41
+ # add_field { |fd|
42
+ # fd.name = :first_name
43
+ # fd.width = 10
44
+ # fd.add_filter { |v| v.trim }
45
+ # fd.add_formatter { |v| v.trim }
46
+ # .
47
+ # .
48
+ # }
49
+ # end
50
+ #
51
+ # Filters and Formatters
52
+ #
53
+ # Filters touch data when on the way in to the flat filer
54
+ # via each_record or create_record.
55
+ #
56
+ # Formatters are used when a record is converted into a
57
+ # string using to_s.
58
+ #
59
+ # Structurally, filters and formatters can be lambdas, code
60
+ # blocks, or symbols referencing methods.
61
+ #
62
+ # There's an expectaiton on the part of formatters of the
63
+ # type of a field value. This means that the programmer
64
+ # needs to set the value of a field as a type that the formatter
65
+ # won't bork on.
66
+ #
67
+ # A good argument can be made to change filtering to happen any
68
+ # time a field value is assigned. I've decided to not take this
69
+ # route because it'll make writing filters more complex.
70
+ #
71
+ # An example of this might be a date field. If you've built up
72
+ # a date field where a string read from a file is marshalled into
73
+ # a Date object. If you assign a string to that field and then
74
+ # attempt to export to a file you may run into problems. This is
75
+ # because your formatters may not be resiliant enough to handle
76
+ # unepxected types.
77
+ #
78
+ # Until we build this into the system, write resiliant formatters
79
+ # OR take risks. Practially speaking, if your system is stable
80
+ # with respect to input/ output you're probably going to be fine.
81
+ #
82
+ # If the filter were run every time a field value is assigned
83
+ # to a record, then the filter will need to check the value being
84
+ # passed to it and then make a filtering decision based on that.
85
+ # This seemed pretty unattractive to me. So it's expected that
86
+ # when creating records with new_record, that you assign field
87
+ # values in the format that the formatter expect them to be in.
88
+ #
89
+ # Essentially, robustness needed either be in the filter or formatter,
90
+ # due to lazyness, I chose formatter.
91
+ #
92
+ # Generally this is just anything that can have to_s called
93
+ # on it, but if the filter does anything special, be cognizent
94
+ # of that when assigning values into fields.
95
+ #
96
+ # Class Organization
97
+ #
98
+ # add_field, and pad add FieldDef classes to an array. This
99
+ # arary represents fields in a record. Each FieldDef class contains
100
+ # information about the field such as it's name, and how to filter
101
+ # and format the class.
102
+ #
103
+ # add_field also adds to a variable that olds a pack format. This is
104
+ # how the records parsed and assembeled.
105
+ require File.dirname(__FILE__) + "/../lib/core_extensions"
106
+ class FlatFile
107
+
108
+ class FlatFileException < Exception; end
109
+ class ShortRecordError < FlatFileException; end
110
+ class LongRecordError < FlatFileException; end
111
+ class RecordLengthError < FlatFileException; end
112
+
113
+ # A field definition tracks infomration that's necessary for
114
+ # FlatFile to process a particular field. This is typically
115
+ # added to a subclass of FlatFile like so:
116
+ #
117
+ # class SomeFile < FlatFile
118
+ # add_field :some_field_name, :width => 35
119
+ # end
120
+ #
121
+ class FieldDef
122
+ attr :name, true
123
+ attr :width, true
124
+ attr :filters, true
125
+ attr :formatters, true
126
+ attr :file_klass, true
127
+ attr :padding, true
128
+ attr :map_in_proc, true
129
+ attr :aggressive, true
130
+
131
+ # Create a new FeildDef, having name and width. klass is a reference to the FlatFile
132
+ # subclass that contains this field definition. This reference is needed when calling
133
+ # filters if they are specified using a symbol.
134
+ #
135
+ # Options can be :padding (if present and a true value, field is marked as a pad field),
136
+ # :width, specify the field width, :formatter, specify a formatter, :filter, specify a
137
+ # filter.
138
+ def initialize(name=null,options={},klass={})
139
+ @name = name
140
+ @width = 10
141
+ @filters = Array.new
142
+ @formatters = Array.new
143
+ @file_klass = klass
144
+ @padding = options.delete(:padding)
145
+
146
+ add_filter(options[:filter]) if options.has_key?(:filter)
147
+ add_formatter(options[:formatter]) if options.has_key?(:formatter)
148
+ @map_in_proc = options[:map_in_proc]
149
+ @width = options[:width] if options.has_key?(:width)
150
+ @aggressive = options[:aggressive] || false
151
+ end
152
+
153
+ # Will return true if the field is a padding field. Padding fields are ignored
154
+ # when doing various things. For example, when you're populating an ActiveRecord
155
+ # model with a record, padding fields are ignored.
156
+ def is_padding?
157
+ @padding
158
+ end
159
+
160
+ # Add a filter. Filters are used for processing field data when a flat file is being
161
+ # processed. For fomratting the data when writing a flat file, see add_formatter
162
+ def add_filter(filter=nil,&block) #:nodoc:
163
+ @filters.push(filter) unless filter.nil?
164
+ @filters.push(block) if block_given?
165
+ end
166
+
167
+ # Add a formatter. Formatters are used for formatting a field
168
+ # for rendering a record, or writing it to a file in the desired format.
169
+ def add_formatter(formatter=nil,&block) #:nodoc:
170
+ @formatters.push(formatter) if formatter
171
+ @formatters.push(block) if block_given?
172
+ end
173
+
174
+ # Filters a value based on teh filters associated with a
175
+ # FieldDef.
176
+ def pass_through_filters(v) #:nodoc:
177
+ pass_through(@filters,v)
178
+ end
179
+
180
+ # Filters a value based on the filters associated with a
181
+ # FieldDef.
182
+ def pass_through_formatters(v) #:nodoc:
183
+ pass_through(@formatters,v)
184
+ end
185
+
186
+ #protected
187
+
188
+ def pass_through(what,value) #:nodoc:
189
+ #puts "PASS THROUGH #{what.inspect} => #{value}"
190
+ what.each do |filter|
191
+ value = case
192
+ when filter.is_a?(Symbol)
193
+ #puts "filter is a symbol"
194
+ @file_klass.send(filter,value)
195
+ when filter_block?(filter)
196
+ #puts "filter is a block"
197
+ filter.call(value)
198
+ when filter_class?(filter)
199
+ #puts "filter is a class"
200
+ filter.filter(value)
201
+ else
202
+ #puts "filter not valid, preserving"
203
+ value
204
+ end
205
+ end
206
+ value
207
+ end
208
+
209
+ # Test to see if filter is a filter block. A filter block
210
+ # can be called (using call) and takes one parameter
211
+ def filter_block?(filter) #:nodoc:
212
+ filter.respond_to?('call') && ( filter.arity >= 1 || filter.arity <= -1 )
213
+ end
214
+
215
+ # Test to see if a class is a filter class. A filter class responds
216
+ # to the filter signal (you can call filter on it).
217
+ def filter_class?(filter) #:nodoc:
218
+ filter.respond_to?('filter')
219
+ end
220
+ end
221
+
222
+ # A record abstracts on line or 'record' of a fixed width field.
223
+ # The methods available are the kes of the hash passed to the constructor.
224
+ # For example the call:
225
+ #
226
+ # h = Hash['first_name','Andy','status','Supercool!']
227
+ # r = Record.new(h)
228
+ #
229
+ # would respond to r.first_name, and r.status yielding
230
+ # 'Andy' and 'Supercool!' respectively.
231
+ #
232
+ class Record
233
+ attr_reader :fields
234
+ attr_reader :klass
235
+ attr_reader :line_number
236
+
237
+ # Create a new Record from a hash of fields
238
+ def initialize(klass,fields=Hash.new,line_number = -1,&block)
239
+ @fields = Hash.new()
240
+ @klass = klass
241
+ @line_number = line_number
242
+
243
+ klass_fields = klass.get_subclass_variable('fields')
244
+
245
+ klass_fields.each do |f|
246
+ @fields.store(f.name, "")
247
+ end
248
+
249
+ @fields.merge!(fields)
250
+
251
+ @fields.each_key do |k|
252
+ @fields.delete(k) unless klass.has_field?(k)
253
+ end
254
+
255
+ yield(block, self)if block_given?
256
+
257
+ self
258
+ end
259
+
260
+ def map_in(model)
261
+ @klass.non_pad_fields.each do |f|
262
+ next unless(model.respond_to? "#{f.name}=")
263
+ if f.map_in_proc
264
+ f.map_in_proc.call(model,self)
265
+ else
266
+ model.send("#{f.name}=", send(f.name)) if f.aggressive or model.send(f.name).blank?
267
+ end
268
+ end
269
+ end
270
+
271
+ # Catches method calls and returns field values or raises an Error.
272
+ def method_missing(method,params=nil)
273
+ if(method.to_s.match(/^(.*)=$/))
274
+ if(fields.has_key?($1.to_sym))
275
+ @fields.store($1.to_sym,params)
276
+ else
277
+ raise Exception.new("Unknown method: #{ method }")
278
+ end
279
+ else
280
+ if(fields.has_key? method)
281
+ @fields.fetch(method)
282
+ else
283
+ raise Exception.new("Unknown method: #{ method }")
284
+ end
285
+ end
286
+ end
287
+
288
+ # Returns a string representation of the record suitable for writing to a flat
289
+ # file on disk or other media. The fields are parepared according to the file
290
+ # definition, and any formatters attached to the field definitions.
291
+ def to_s
292
+ klass.fields.map { |field_def|
293
+ field_name = field_def.name.to_s
294
+ v = @fields[ field_name.to_sym ].to_s
295
+
296
+ field_def.pass_through_formatters(
297
+ field_def.is_padding? ? "" : v
298
+ )
299
+ }.pack(klass.pack_format)
300
+ end
301
+
302
+ # Produces a multiline string, one field per line suitable for debugging purposes.
303
+ def debug_string
304
+ str = ""
305
+ klass.fields.each do |f|
306
+ if f.is_padding?
307
+ str << "#{f.name}: \n"
308
+ else
309
+ str << "#{f.name}: #{send(f.name.to_sym)}\n"
310
+ end
311
+ end
312
+
313
+ str
314
+ end
315
+ end
316
+
317
+ # A hash of data stored on behalf of subclasses. One hash
318
+ # key for each subclass.
319
+ @@subclass_data = Hash.new(nil)
320
+
321
+ # Used to generate unique names for pad fields which use :auto_name.
322
+ @@unique_id = 0
323
+
324
+ def next_record(io,&block)
325
+ return nil if io.eof?
326
+ required_line_length = self.class.get_subclass_variable 'width'
327
+ line = io.readline
328
+ line.chop!
329
+ return nil if line.length == 0
330
+ difference = required_line_length - line.length
331
+ raise RecordLengthError.new(
332
+ "length is #{line.length} but should be #{required_line_length}"
333
+ ) unless(difference == 0)
334
+
335
+ if block_given?
336
+ yield(create_record(line, io.lineno), line)
337
+ else
338
+ create_record(line,io.lineno)
339
+ end
340
+ end
341
+
342
+ # Iterate through each record (each line of the data file). The passed
343
+ # block is passed a new Record representing the line.
344
+ #
345
+ # s = SomeFile.new
346
+ # s.each_record(open('/path/to/file')) do |r|
347
+ # puts r.first_name
348
+ # end
349
+ #
350
+ def each_record(io,&block)
351
+ io.each_line do |line|
352
+ required_line_length = self.class.get_subclass_variable 'width'
353
+ #line = io.readline
354
+ line.chop!
355
+ next if line.length == 0
356
+ difference = required_line_length - line.length
357
+ raise RecordLengthError.new(
358
+ "length is #{line.length} but should be #{required_line_length}"
359
+ ) unless(difference == 0)
360
+ yield(create_record(line, io.lineno), line)
361
+ end
362
+ end
363
+
364
+ # create a record from line. The line is one line (or record) read from the
365
+ # text file. The resulting record is an object which. The object takes signals
366
+ # for each field according to the various fields defined with add_field or
367
+ # varients of it.
368
+ #
369
+ # line_number is an optional line number of the line in a file of records.
370
+ # If line is not in a series of records (lines), omit and it'll be -1 in the
371
+ # resulting record objects. Just make sure you realize this when reporting
372
+ # errors.
373
+ #
374
+ # Both a getter (field_name), and setter (field_name=) are available to the
375
+ # user.
376
+ def create_record(line, line_number = -1) #:nodoc:
377
+ h = Hash.new
378
+
379
+ pack_format = self.class.get_subclass_variable 'pack_format'
380
+ fields = self.class.get_subclass_variable 'fields'
381
+
382
+ f = line.unpack(pack_format)
383
+ (0..(fields.size-1)).map do |index|
384
+ unless fields[index].is_padding?
385
+ h.store fields[index].name, fields[index].pass_through_filters(f[index])
386
+ end
387
+ end
388
+ Record.new(self.class, h, line_number)
389
+ end
390
+
391
+ # Add a field to the FlatFile subclass. Options can include
392
+ #
393
+ # :width - number of characters in field (default 10)
394
+ # :filter - callack, lambda or code block for processing during reading
395
+ # :formatter - callback, lambda, or code block for processing during writing
396
+ #
397
+ # class SomeFile < FlatFile
398
+ # add_field :some_field_name, :width => 35
399
+ # end
400
+ #
401
+ def self.add_field(name=nil, options={},&block)
402
+ options[:width] ||= 10;
403
+
404
+ fields = get_subclass_variable 'fields'
405
+ width = get_subclass_variable 'width'
406
+ pack_format = get_subclass_variable 'pack_format'
407
+
408
+
409
+ fd = FieldDef.new(name,options,self)
410
+ yield(fd) if block_given?
411
+
412
+ fields << fd
413
+ width += fd.width
414
+ pack_format << "A#{fd.width}"
415
+ set_subclass_variable 'width', width
416
+ fd
417
+ end
418
+
419
+ # Add a pad field. To have the name auto generated, use :auto_name for
420
+ # the name parameter. For options see add_field.
421
+ def self.pad(name, options = {})
422
+ fd = self.add_field(
423
+ name.eql?(:auto_name) ? self.new_pad_name : name,
424
+ options
425
+ )
426
+ fd.padding = true
427
+ end
428
+
429
+ def self.new_pad_name #:nodoc:
430
+ "pad_#{ @@unique_id+=1 }".to_sym
431
+ end
432
+
433
+
434
+ # Create a new empty record object conforming to this file.
435
+ #
436
+ #
437
+ def self.new_record(model = nil, &block)
438
+ fields = get_subclass_variable 'fields'
439
+
440
+ record = Record.new(self)
441
+
442
+ fields.map do |f|
443
+ assign_method = "#{f.name}="
444
+ value = model.respond_to?(f.name.to_sym) ? model.send(f.name.to_sym) : ""
445
+ record.send(assign_method, value)
446
+ end
447
+
448
+ if block_given?
449
+ yield block, record
450
+ end
451
+
452
+ record
453
+ end
454
+
455
+ # Return a lsit of fields for the FlatFile subclass
456
+ def fields
457
+ self.class.fields
458
+ end
459
+
460
+ def self.non_pad_fields
461
+ self.fields.select { |f| not f.is_padding? }
462
+ end
463
+
464
+ def non_pad_fields
465
+ self.non_pad_fields
466
+ end
467
+
468
+ def self.fields
469
+ self.get_subclass_variable 'fields'
470
+ end
471
+
472
+ def self.has_field?(field_name)
473
+
474
+ if self.fields.select { |f| f.name == field_name.to_sym }.length > 0
475
+ true
476
+ else
477
+ false
478
+ end
479
+ end
480
+
481
+ def self.width
482
+ get_subclass_variable 'width'
483
+ end
484
+
485
+ # Return the record length for the FlatFile subclass
486
+ def width
487
+ self.class.width
488
+ end
489
+
490
+ # Returns the pack format which is generated from add_field
491
+ # calls. This format is used to unpack each line and create Records.
492
+ def pack_format
493
+ self.class.get_pack_format
494
+ end
495
+
496
+ def self.pack_format
497
+ get_subclass_variable 'pack_format'
498
+ end
499
+
500
+ protected
501
+
502
+ # Retrieve the subclass data hash for the current class
503
+ def self.subclass_data #:nodoc:
504
+ unless(@@subclass_data.has_key?(self))
505
+ @@subclass_data.store(self, Hash.new)
506
+ end
507
+
508
+ @@subclass_data.fetch(self)
509
+ end
510
+
511
+ # Retrieve a particular subclass variable for this class by it's name.
512
+ def self.get_subclass_variable(name) #:nodoc:
513
+ if subclass_data.has_key? name
514
+ subclass_data.fetch name
515
+ end
516
+ end
517
+
518
+ # Set a subclass variable of 'name' to 'value'
519
+ def self.set_subclass_variable(name,value) #:nodoc:
520
+ subclass_data.store name, value
521
+ end
522
+
523
+ # Setup subclass class variables. This initializes the
524
+ # record width, pack format, and fields array
525
+ def self.inherited(s) #:nodoc:
526
+ s.set_subclass_variable('width',0)
527
+ s.set_subclass_variable('pack_format',"")
528
+ s.set_subclass_variable('fields',Array.new)
529
+ end
530
+ end
metadata ADDED
@@ -0,0 +1,54 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: flat_filer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.17
5
+ platform: ruby
6
+ authors:
7
+ - Andrew Libby
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-05-21 00:00:00 -04:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: alibby@tangeis.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - lib/core_extensions.rb
26
+ - lib/flat_file.rb
27
+ has_rdoc: true
28
+ homepage: http://www.tangeis.com/
29
+ post_install_message:
30
+ rdoc_options: []
31
+
32
+ require_paths:
33
+ - lib
34
+ required_ruby_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: "0"
39
+ version:
40
+ required_rubygems_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: "0"
45
+ version:
46
+ requirements: []
47
+
48
+ rubyforge_project:
49
+ rubygems_version: 1.1.1
50
+ signing_key:
51
+ specification_version: 2
52
+ summary: Library for processing flat files
53
+ test_files: []
54
+