hflr2 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt ADDED
@@ -0,0 +1,14 @@
1
+ == 1.0.1 / 2010-01-21
2
+
3
+ * Fixed warnings
4
+ * Added better exception handling for badly formatted output data
5
+
6
+
7
+ == 0.11.0 / 2009-08-04
8
+
9
+ * Removed useless files
10
+ * Corrected version number
11
+
12
+ == 1.0.1 / 2011-02-09
13
+ * Added ability to specify file format in Ruby with Range class (see example.rb in /test.)
14
+
data/README.txt ADDED
@@ -0,0 +1,45 @@
1
+ = HFLR
2
+
3
+ * http://ruff.rubyforge.org
4
+
5
+ == Description:
6
+
7
+ HFLR -- Hierarchical Fixed Length Records
8
+
9
+ Allows you to read and write files of fixed width records when the file contains one or more
10
+ than one type of record.
11
+
12
+ Install with 'gem install hflr'
13
+
14
+
15
+
16
+
17
+ See the tests and examples bundled with this gem.
18
+
19
+
20
+
21
+
22
+ == LICENSE:
23
+
24
+ (The MIT License)
25
+
26
+ Copyright (c) 2009 Colin C. Davis
27
+
28
+ Permission is hereby granted, free of charge, to any person obtaining
29
+ a copy of this software and associated documentation files (the
30
+ 'Software'), to deal in the Software without restriction, including
31
+ without limitation the rights to use, copy, modify, merge, publish,
32
+ distribute, sublicense, and/or sell copies of the Software, and to
33
+ permit persons to whom the Software is furnished to do so, subject to
34
+ the following conditions:
35
+
36
+ The above copyright notice and this permission notice shall be
37
+ included in all copies or substantial portions of the Software.
38
+
39
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
40
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
41
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
42
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
43
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
44
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
45
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/hflr2.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{hflr2}
5
+ s.version = "1.0.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Colin Davis", "Bozydar Sobczak"]
9
+ s.date = %q{2011-10-24}
10
+ s.description = %q{HFLR2 -- Hierarchical Fixed Length Records
11
+
12
+ NOTE: This gem is a modification of the hflr gem. It can be not compatible with it.
13
+
14
+ Allows you to read and write files of fixed width records when the file contains one or more
15
+ than one type of record.
16
+
17
+ Install with 'gem install hflr2'
18
+
19
+ See the tests and examples bundled with this gem.}
20
+ s.email = %q{colin.c.davis@gmail.com}
21
+ s.extra_rdoc_files = %w(History.txt README.txt)
22
+ s.files = %w(History.txt README.txt hflr2.gemspec lib/hflr.rb lib/hflr/fl_record_file.rb lib/hflr/hflr.rb lib/hflr/record_template.rb test/customer_orders.dat test/customers.dat test/examples.rb test/flrfile_test.rb test/record_template_test.rb test/sample.dat test/sample2_out.dat test/sample_activities.dat test/sample_out.dat test/test_helper.rb test/test_hflr.rb)
23
+ s.require_paths = %w(lib)
24
+
25
+ s.rubygems_version = %q{1.3.4}
26
+ s.summary = %q{HFLR2 -- Hierarchical Fixed Length Records 2 Allows you to read and write files of fixed width records when the file contains one or more than one type of record}
27
+ s.test_files = %w(test/test_hflr.rb test/test_helper.rb)
28
+ end
@@ -0,0 +1,262 @@
1
+ class FLRFile
2
+
3
+ include Enumerable
4
+
5
+ attr_reader :line_number, :record_template
6
+
7
+ def initialize(source, record_types, record_layouts, logical_first_column=0, extra_columns = nil)
8
+ # Allow record layouts like
9
+ # {:type1=>[:var1=>1..5,:var2=>7..8],:type2=>[:var1=>1..1,:var2=>3..4]}
10
+ if record_layouts.values.first.is_a? Hash
11
+ record_layouts = create_layouts(record_layouts)
12
+ end
13
+ @line_number = 0
14
+ @file = source
15
+ @record_type_labels = record_types
16
+ @record_type_symbols = record_types.is_a?(Hash) ? record_types : :none
17
+ if extra_columns then
18
+ @record_template = HFLR::RecordTemplate.create(record_layouts, @record_type_symbols, logical_first_column, extra_columns)
19
+ else
20
+ @record_template = HFLR::RecordTemplate.create(record_layouts, @record_type_symbols, logical_first_column)
21
+ end
22
+ end
23
+
24
+ def set_fast
25
+ @fast = !@record_type_labels.is_a?(Hash)
26
+ unless @fast
27
+ raise "Cannot set fast mode with more than one record type."
28
+ end
29
+ if @fast
30
+ @width = get_record_width_from_file
31
+
32
+
33
+ records_to_take = 100000000 / @width
34
+
35
+ @buffer_size = @width * records_to_take
36
+
37
+
38
+ @position=0
39
+ @current_buffer=nil
40
+ end
41
+ end
42
+
43
+ def ranges=(ranges)
44
+ @fast or raise "Cannot read selected ranges because input file has multiple record types #{@record_type_labels.to_s}"
45
+ unless ranges.first.is_a?(Range)
46
+ raise "You specified a #{ranges.first.class.to_s} instead of a range in the list of ranges. Use (a..b) to specify a range."
47
+ end
48
+
49
+ @offsets =offsets_to_read(ranges, @width)
50
+
51
+ @ranges = ranges
52
+ end
53
+
54
+
55
+ def in_range?(line_number)
56
+ @ranges ? !!(@ranges.detect { |r| r.member?(line_number) }) : true
57
+ end
58
+
59
+ def finished?
60
+ if @fast
61
+ @offsets.empty? && @current_buffer.nil?
62
+ else
63
+ @file.eof?
64
+ end
65
+ end
66
+
67
+ def close
68
+ @file.close
69
+ end
70
+
71
+ # If multiple record types, extract it from the string, otherwise just return the type of this file
72
+ def get_record_type(line)
73
+ return nil if line.nil?
74
+ return nil if line.strip.empty?
75
+ if @record_type_labels.is_a?(Hash)
76
+ matching_pair = @record_type_labels.find do |_, value|
77
+ discriminator = value[:discriminator]
78
+ position = value[:position] - 1
79
+ line[position..-1].start_with?(discriminator)
80
+ end
81
+ if matching_pair
82
+ matching_pair[0]
83
+ else
84
+ nil
85
+ end
86
+ else
87
+ @record_type_labels
88
+ end
89
+ end
90
+
91
+ def build_record(line)
92
+ return nil if line.nil?
93
+ record_type = line_type(line)
94
+ raise "Unknown record type at line #{@line_number.to_s}" if record_type == :unknown
95
+ @record_template[record_type].build_record(line.chomp)
96
+ end
97
+
98
+ def next_record
99
+ build_record(get_next_known_line_type)
100
+ end
101
+
102
+ def line_type(line)
103
+ record_type = get_record_type(line)
104
+ record_type ? record_type : :unknown
105
+ end
106
+
107
+ def get_next_known_line_type
108
+ @fast ? fast_get_next_known_line_type : sequential_get_next_known_line_type
109
+ end
110
+
111
+ def get_next_line
112
+ line = @file.gets
113
+ @line_number+=1
114
+ line
115
+ end
116
+
117
+ def next
118
+ build_record(get_next_line)
119
+ end
120
+
121
+ def fast_get_next_known_line_type
122
+ if @current_buffer.nil? && (@offsets.nil? || @offsets.empty?)
123
+ nil
124
+ else
125
+ if @current_buffer.nil?
126
+ chunk = @offsets.shift
127
+
128
+
129
+ @file.pos = chunk.pos
130
+ @current_buffer=@file.read(chunk.width)
131
+
132
+ record= @current_buffer.slice(@position, @width)
133
+
134
+
135
+ @position += @width
136
+
137
+ if @position >= @current_buffer.size
138
+
139
+ @current_buffer = nil
140
+ @position=0
141
+ end
142
+ return record
143
+ else
144
+ record= @current_buffer.slice(@position, @width)
145
+
146
+ @position += @width
147
+ if @position>=@current_buffer.size
148
+ @position=0
149
+ @current_buffer=nil
150
+ end
151
+ return record
152
+ end
153
+
154
+ end
155
+ end
156
+
157
+ def sequential_get_next_known_line_type
158
+ line = @file.gets
159
+ @line_number+=1
160
+ record_type = line_type(line)
161
+ while !finished? && (!in_range?(@line_number) || record_type == :unknown)
162
+ line = @file.gets
163
+ @line_number+=1
164
+ record_type = line_type(line)
165
+ end
166
+ record_type == :unknown ? nil : line
167
+ end
168
+
169
+
170
+ def each
171
+ @line_number = 1
172
+ if @fast
173
+ yield(next_record) until finished?
174
+ else
175
+ @file.each_line do |line|
176
+ unless line_type(line) == :unknown || !in_range?(@line_number)
177
+ data = build_record(line)
178
+ yield data
179
+ end
180
+ end
181
+ end
182
+ end
183
+
184
+ # This will take a Hash or Struct orArray; if an Array the record type must be the last element when
185
+ # the record layout has more than one record type.
186
+ def <<(record)
187
+ record_type =
188
+ if record.is_a? Array
189
+ @record_type_symbols == :none ? @record_template.keys.first : record.last
190
+ else
191
+ if @record_template[record[:record_type]] == nil then
192
+ raise "Record type problem in output: #{record[:record_type].to_s} type on record, #{@record_template.keys.join(",")} types of templates"
193
+ end
194
+ @record_type_symbols == :none ? @record_template.keys.first : record[:record_type]
195
+ end
196
+ line = @record_template[record_type].build_line(record)
197
+ if @cr_before_line
198
+ line = "\n" + line
199
+ else
200
+ @cr_before_line = true
201
+ end
202
+ @file.write(line)
203
+ end
204
+
205
+ # Use when creating a new HFLR file
206
+ def self.open(path, mode, record_types, record_layouts, logical_first_column=0)
207
+ file = File.open(path, mode)
208
+ begin
209
+ hflr_file = new(file, record_types, record_layouts, logical_first_column)
210
+ yield hflr_file
211
+ ensure
212
+ file.close
213
+ end
214
+ end
215
+
216
+ private
217
+
218
+ def offsets_to_read(ranges, width)
219
+ #ranges.map{|r| r.map{|o| o * width}}.flatten.uniq
220
+ chunk = Struct.new(:pos, :width)
221
+ chunks = []
222
+ ranges.each do |range|
223
+ offsets = range.map { |offset| offset }
224
+ taken = []
225
+
226
+ until offsets.empty?
227
+ taken << offsets.shift
228
+ if taken.size * width == @buffer_size || offsets.empty?
229
+
230
+ chunks << chunk.new(taken.first * @width, taken.size * width)
231
+ taken = []
232
+ end # if
233
+ end # while
234
+ end # each
235
+
236
+ chunks
237
+ end
238
+
239
+ def get_record_width_from_file
240
+ width = @file.gets.size
241
+ @file.rewind
242
+ width
243
+ end
244
+
245
+
246
+ # If the layout is given in the convenient Ruby form
247
+ def create_layouts(layout)
248
+ var_class = Struct.new(:name, :start, :len)
249
+ new_layout = {}
250
+ layout.each_pair do |record_type, vars|
251
+
252
+ new_layout[record_type] = []
253
+ vars.each_pair do |var_name, range|
254
+ new_layout[record_type] << var_class.new(var_name.to_s, range.first, range.last - range.first + 1)
255
+ end
256
+ new_layout[record_type].sort! { |a, b| a.start<=>b.start }
257
+ end
258
+ new_layout
259
+ end
260
+
261
+ end
262
+
data/lib/hflr/hflr.rb ADDED
@@ -0,0 +1 @@
1
+ require File.expand_path(File.dirname(__FILE__) + "/fl_record_file")
@@ -0,0 +1,155 @@
1
+ module HFLR
2
+
3
+ class RecordTemplate
4
+ UNFILLED_CHAR = ' '
5
+ MISSING_OUTPUT = "ZZZZZZZZZZZZZZZZZZZZZ"
6
+
7
+ attr_reader :record_structure, :field_pattern, :record_type, :record_type_label
8
+ attr_accessor :strip_whitespace
9
+
10
+ def initialize(record_type, record_type_label, record_structure, field_pattern, field_widths)
11
+ @record_type = record_type
12
+ @record_type_label = record_type_label
13
+ @record_structure = record_structure
14
+ @field_pattern = field_pattern
15
+ @field_widths = field_widths
16
+ end
17
+
18
+ # Layouts is a hash of variables by record type
19
+ # record_type_symbols maps record type names to their labels in the data {:household=>"H",:person=>"P"}
20
+ # Returns a set of record templates, one for each record type
21
+ def self.create(record_layouts, record_type_symbols, first_column_location, extra_columns=[])
22
+ extra_columns = empty_extra_columns(record_layouts.keys) if extra_columns.is_a? Array
23
+ templates = {}
24
+ self.check_record_layouts(record_layouts)
25
+
26
+ record_layouts.each_pair do |record_type, vars|
27
+ record_label = record_type_symbols == :none ? :none : record_type_symbols[record_type]
28
+ templates[record_type] =
29
+ self.create_template_class(record_type,
30
+ record_label,
31
+ vars,
32
+ first_column_location,
33
+ extra_columns[record_type])
34
+ end
35
+ return templates
36
+ end
37
+
38
+ private
39
+ # If the name exists already do not replace it, but add extra columns not to be mapped by the unpack field patterns
40
+ # and ensure the record_type variable is added.
41
+ # Since 'record_type' may not be in the metadata we don't want to map it to a
42
+ # specific column location but do want it included always.
43
+ def self.add_extra_columns(names, extra)
44
+ new_names = names.dup
45
+ # names are not case sensitive
46
+ extra.each { |n| new_names << n unless names.map { |m| m.to_s.upcase }.include? n.to_s.upcase }
47
+
48
+ # No matter what, include 'record_type'
49
+ unless new_names.map { |n| n.to_s.upcase }.include?("RECORD_TYPE")
50
+ new_names << :record_type
51
+ end
52
+ return new_names
53
+ end
54
+
55
+ def self.get_pattern(layout, first_column_location=0)
56
+ layout.map { |l| '@' + (l.start - first_column_location).to_s + 'a' + l.len.to_s }.to_s
57
+ end
58
+
59
+ public
60
+
61
+ def build_record(line)
62
+ rec = line.unpack(@field_pattern)
63
+ rec.map { |f| f.strip! } if @strip_whitespace
64
+ begin
65
+ data = self.record_structure.new(*rec)
66
+ data[:record_type] = @record_type
67
+ rescue Exception => msg
68
+ raise "On record type #{self.record_type} problem with structure " + msg.to_s
69
+ end
70
+ data
71
+ end
72
+
73
+ def build_line(record)
74
+ line = format_fields(record).pack(@field_pattern)
75
+ line.tr!("\0", UNFILLED_CHAR)
76
+ line
77
+ end
78
+
79
+ private
80
+
81
+ def self.empty_extra_columns(record_types)
82
+ extra = {}
83
+ record_types.map { |rt| extra[rt] = [] }
84
+ extra
85
+ end
86
+
87
+ # All starting columns must be in order
88
+ def self.check_record_layouts(layouts)
89
+ layouts.values.each do |layout|
90
+ last_v = layout.first
91
+ layout.each do |v|
92
+ if v.respond_to?(:rectype) then
93
+ if last_v.rectype != v.rectype
94
+ raise "record type mismatch between #{v.name} and #{last_v.name}"
95
+ end
96
+ end
97
+ if last_v.start<= v.start then
98
+ last_v = v
99
+ else
100
+ raise "Problem with start columns #{last_v.name} start #{last_v.start.to_s} out of sequence with #{v.name} starting at #{v.start.to_s}"
101
+ end
102
+ end
103
+ end
104
+ end
105
+
106
+ def self.create_template_class(record_type, record_type_label, layout, first_column_location, extra_columns = nil)
107
+ names = layout.map { |l| l.name.to_sym }
108
+ names = add_extra_columns(names, extra_columns)
109
+ structure = Struct.new(*names)
110
+ return new(record_type,
111
+ record_type_label,
112
+ structure,
113
+ self.get_pattern(layout, first_column_location),
114
+ layout.map { |v| v.len })
115
+ end
116
+
117
+
118
+ def format_fields(record)
119
+ if record.is_a?(Array) or record.is_a?(Struct)
120
+ fields = []
121
+ @field_widths.each_with_index do |width, i|
122
+ begin
123
+ fields << right_format(record[i], width)
124
+ rescue Exception => msg
125
+ if record.is_a?(Struct)
126
+ raise "Output format problem for #{record.members[i].to_s} #{msg.to_s}"
127
+ else
128
+ raise "Output format problem for column #{i.to_s} with value #{record[i].to_s} #{msg.to_s}"
129
+ end
130
+ end
131
+ end
132
+ fields
133
+ else
134
+ raise "Record to format must be a Struct or Array"
135
+ end
136
+ end
137
+
138
+ def right_format(data, len)
139
+ if data.is_a? String
140
+ data_str = data.ljust(len)
141
+ elsif data.is_a? Symbol
142
+ data_str = data.to_s.ljust(len)
143
+ else
144
+ data_str = sprintf("%0#{len.to_s}d", data)
145
+ data_str = MISSING_OUTPUT[0..len-1] if data == -999998
146
+ end
147
+ raise "Data too large for allocated columns #{data_str}" if data_str.size > len
148
+ data_str
149
+ end
150
+
151
+
152
+ end # RecordTemplate class
153
+
154
+ end # HFLR module
155
+
data/lib/hflr.rb ADDED
@@ -0,0 +1,49 @@
1
+
2
+ module Hflr
3
+
4
+ # :stopdoc:
5
+ VERSION = '0.11.0'
6
+ LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
7
+ PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
8
+ # :startdoc:
9
+
10
+ # Returns the version string for the library.
11
+ #
12
+ def self.version
13
+ VERSION
14
+ end
15
+
16
+ # Returns the library path for the module. If any arguments are given,
17
+ # they will be joined to the end of the libray path using
18
+ # <tt>File.join</tt>.
19
+ #
20
+ def self.libpath( *args )
21
+ args.empty? ? LIBPATH : ::File.join(LIBPATH, args.flatten)
22
+ end
23
+
24
+ # Returns the lpath for the module. If any arguments are given,
25
+ # they will be joined to the end of the path using
26
+ # <tt>File.join</tt>.
27
+ #
28
+ def self.path( *args )
29
+ args.empty? ? PATH : ::File.join(PATH, args.flatten)
30
+ end
31
+
32
+ # Utility method used to require all files ending in .rb that lie in the
33
+ # directory below this file that has the same name as the filename passed
34
+ # in. Optionally, a specific _directory_ name can be passed in such that
35
+ # the _filename_ does not have to be equivalent to the directory.
36
+ #
37
+ def self.require_all_libs_relative_to( fname, dir = nil )
38
+ dir ||= ::File.basename(fname, '.*')
39
+ search_me = ::File.expand_path(
40
+ ::File.join(::File.dirname(fname), dir, '**', '*.rb'))
41
+
42
+ Dir.glob(search_me).sort.each {|rb| require rb}
43
+ end
44
+
45
+ end # module Hflr
46
+
47
+ Hflr.require_all_libs_relative_to(__FILE__)
48
+
49
+ # EOF
@@ -0,0 +1,5 @@
1
+ CJoe Smith 55455025.53
2
+ O0005233110-10-2008
3
+ CJane Smith 55404015.25
4
+ O0054933310-11-2008
5
+ O0075789110-12-2008
@@ -0,0 +1,3 @@
1
+ Joe Smith 55455025.53
2
+ Jane Smith 55404015.25
3
+ John Smith 55404015.25
data/test/examples.rb ADDED
@@ -0,0 +1,67 @@
1
+ require File.dirname(__FILE__) + "/../lib/hflr"
2
+
3
+
4
+ # Read a file with only one record type (no record type markers)
5
+
6
+ # metadata for customer file
7
+ Column = Struct.new(:name, :start, :len)
8
+ columns = {:customer => [
9
+ Column.new("name", 1, 25),
10
+ Column.new("zip", 26, 5),
11
+ Column.new("balance", 31, 5)]}
12
+
13
+ customer_file = FLRFile.new(File.new("customers.dat"), :customer, columns, 1, [:line_number])
14
+
15
+
16
+ # You can read through the file and access the fields with methods named after the columns:
17
+ customer_file.each do |record|
18
+ puts "Customer #{customer_file.line_number.to_s} #{record.name} #{record.zip} "
19
+ end
20
+
21
+
22
+ # You can get the values by attribute name like a hash
23
+ def show(record)
24
+ print record.members.map { |m| m.to_s + ": " + record[m].to_s }.join(", ") + "\n"
25
+ end
26
+
27
+ # metadata for customer_orders file
28
+ layouts = {:customer => [
29
+ Column.new("name", 1, 25),
30
+ Column.new("zip", 26, 5),
31
+ Column.new("balance", 31, 5)],
32
+ :order => [
33
+ Column.new("order_num", 1, 8),
34
+ Column.new("date", 9, 10),]}
35
+
36
+
37
+ customer_orders_file = FLRFile.new(
38
+ File.new("customer_orders.dat"),
39
+ {"C" => :customer, "O" => :order}, # Use these characters as record type markers
40
+ layouts,
41
+ 0, # shift parsed string 0 columns to the left of the indicated start column
42
+ {:customer => [:line_number, :record_type], :order => [:line_number, :record_type]}) # Add these columns to the indicated record types post read
43
+
44
+
45
+ customer_orders_file.each do |record|
46
+ show record
47
+ end
48
+
49
+
50
+ puts " ----- You can also use metadata in Ruby -----"
51
+ # Use Ruby metadata
52
+
53
+ layout = {:customer => {
54
+ :name => 1..25,
55
+ :zip => 26..30,
56
+ :balance => 31..35
57
+
58
+ }
59
+ }
60
+ customer_file = FLRFile.new(File.new("customers.dat"), :customer, layout, 1, [:line_number])
61
+
62
+
63
+ # You can read through the file and access the fields with methods named after the columns:
64
+ customer_file.each do |record|
65
+ puts "Customer #{customer_file.line_number.to_s} #{record.name} #{record.zip} "
66
+ end
67
+
@@ -0,0 +1,301 @@
1
+ require 'test_hflr'
2
+
3
+ class FLRFileTest < Test::Unit::TestCase
4
+
5
+ def setup
6
+ var_type = Struct.new(:name, :start, :len)
7
+
8
+ # Split up your metadata by record type.
9
+
10
+ @layouts =
11
+ {:household => [var_type.new(:rectypeh, 1, 1), var_type.new(:phone, 2, 1), var_type.new("mortgage", 3, 1)],
12
+ :person => [var_type.new(:rectypep, 1, 1), var_type.new("age", 2, 3), var_type.new("sex", 5, 1), var_type.new("marst", 6, 1)]}
13
+
14
+ # Give the values used in the data for each record type
15
+ @record_types = {:household => {:discriminator => 'H', :position => 1}, :person => {:discriminator => 'P', :position => 1}}
16
+ end
17
+
18
+ def teardown
19
+ end
20
+
21
+ def test_initialize
22
+ sample_data_path = File.dirname(__FILE__)
23
+ fwf = FLRFile.new(
24
+ File.new("#{sample_data_path}/sample.dat"),
25
+ @record_types, # Record types to read from the file, all others will be ignored
26
+ @layouts, # metadata for all record types
27
+ 1, # column 0 starts at logical location 1
28
+ {:household => [:people], :person => [:household_id, :pserial]} # extra columns by record type
29
+ )
30
+
31
+ # Extra columns + record_type accessors should have been created
32
+ hh_struct = fwf.record_template[:household].record_structure.new
33
+ assert hh_struct.respond_to?(:record_type), "household record should have record_type method"
34
+ p_struct = fwf.record_template[:person].record_structure.new
35
+ assert p_struct.respond_to?(:household_id), "Should have household_id as an extra column"
36
+ assert p_struct.respond_to?(:record_type), "Should have record_type method"
37
+
38
+
39
+ fwf = FLRFile.new(
40
+ File.new("#{sample_data_path}/sample.dat"),
41
+ @record_types, # Record types to read from the file, all others will be ignored
42
+ @layouts, # metadata for all record types
43
+ 1)
44
+
45
+ # Should still have added the record_type method but none of the others
46
+ hh_struct = fwf.record_template[:household].record_structure.new
47
+ assert hh_struct.respond_to?(:record_type), "Should have record_type method"
48
+ assert !hh_struct.respond_to?(:people)
49
+ end
50
+
51
+ def test_get_record_type
52
+ sample_data_path = File.dirname(__FILE__)
53
+ fwf = FLRFile.new(
54
+ File.new("#{sample_data_path}/sample.dat"),
55
+ @record_types, # Record types to read from the file, all others will be ignored
56
+ @layouts, # metadata for all record types
57
+ 1, # column 0 starts at logical location 1
58
+ {:household => [:people], :person => [:household_id, :pserial]} # extra columns by record type
59
+ )
60
+ assert_nil fwf.get_record_type(nil)
61
+ assert_equal :household, fwf.get_record_type("H123")
62
+ assert_equal :person, fwf.get_record_type("P1234")
63
+ assert_equal nil, fwf.get_record_type("C123")
64
+
65
+ end
66
+
67
+
68
+ def test_build_record
69
+ sample_data_path = File.dirname(__FILE__)
70
+ fwf = FLRFile.new(
71
+ File.new("#{sample_data_path}/sample.dat"),
72
+ @record_types, # Record types to read from the file, all others will be ignored
73
+ @layouts, # metadata for all record types
74
+ 1, # column 0 starts at logical location 1
75
+ {:household => [:people], :person => [:household_id, :pserial]} # extra columns by record type
76
+ )
77
+
78
+ assert_equal nil, fwf.build_record(nil)
79
+ rec = fwf.build_record("H012345666665555444333")
80
+ assert_equal :household, rec[:record_type]
81
+
82
+ assert_raise RuntimeError do
83
+ fwf.build_record("c23abbbc")
84
+ end
85
+
86
+ end
87
+
88
+ def test_fast_next_record
89
+ sample_data_path = File.dirname(__FILE__)
90
+
91
+ layout = {:customer => {
92
+ :name => 1..25,
93
+ :zip => 26..30,
94
+ :balance => 31..35
95
+ }
96
+ }
97
+
98
+ customer_file = FLRFile.new(File.new(sample_data_path + "/customers.dat"), :customer, layout, 1)
99
+ customer_file.set_fast
100
+ customer_file.ranges=[(0..2)]
101
+
102
+ records = []
103
+ until customer_file.finished?
104
+ record = customer_file.next_record
105
+
106
+ puts record.inspect
107
+ records << record
108
+ end
109
+
110
+ assert_equal 3, records.size
111
+
112
+ # Check that the records aren't off by one
113
+ assert_equal "Jane Smith", records[1].name.strip
114
+ assert_equal "John Smith", records.last.name.strip
115
+
116
+ end
117
+
118
+ def test_partial_fast_next_line
119
+ sample_data_path = File.dirname(__FILE__)
120
+
121
+ layout = {:customer => {
122
+ :name => 1..25,
123
+ :zip => 26..30,
124
+ :balance => 31..35
125
+ }
126
+ }
127
+
128
+ customer_file = FLRFile.new(File.new(sample_data_path + "/customers.dat"), :customer, layout, 1)
129
+ customer_file.set_fast
130
+ customer_file.ranges=[(0..1)]
131
+
132
+ records = []
133
+ until customer_file.finished?
134
+ record = customer_file.next_record
135
+ puts record.inspect
136
+ records << record
137
+ end
138
+
139
+ assert_equal 2, records.size
140
+
141
+ # Check that the records aren't off by one
142
+ assert_equal "Jane Smith", records[1].name.strip
143
+
144
+ end
145
+
146
+ def test_fast_each
147
+ sample_data_path = File.dirname(__FILE__)
148
+
149
+ layout = {:customer => {
150
+ :name => 1..25,
151
+ :zip => 26..30,
152
+ :balance => 31..35
153
+ }
154
+ }
155
+
156
+ customer_file = FLRFile.new(File.new(sample_data_path + "/customers.dat"), :customer, layout, 1)
157
+ customer_file.set_fast
158
+ customer_file.ranges=[(0..1)]
159
+
160
+ records = []
161
+ customer_file.each do |record|
162
+
163
+ puts record.inspect
164
+ records << record
165
+ end
166
+
167
+ assert_equal 2, records.size
168
+
169
+ # Check that the records aren't off by one
170
+ assert_equal "Jane Smith ", records[1].name
171
+
172
+ end
173
+
174
+
175
+ def test_each
176
+ sample_data_path = File.dirname(__FILE__)
177
+ fwf = FLRFile.new(
178
+ File.new("#{sample_data_path}/sample.dat"),
179
+ @record_types,
180
+ @layouts,
181
+ 1,
182
+ {:household => [:record_type, :people], :person => [:household_id, :pserial, :record_type]})
183
+
184
+ records = []
185
+
186
+ fwf.each do |record|
187
+ records << record
188
+ end
189
+ assert records.first.respond_to?(:record_type)
190
+ assert_equal :person, records.last.record_type
191
+ assert_equal :household, records[0].record_type
192
+ assert_equal :person, records[1].record_type
193
+
194
+ end
195
+
196
+ def test_next_record
197
+ sample_data_path = File.dirname(__FILE__)
198
+ fwf = FLRFile.new(
199
+ File.new("#{sample_data_path}/sample.dat"), # data is in this file
200
+ @record_types, # Records of different types have these labels
201
+ @layouts, # metadata for creating record structs
202
+ 1, # All metadata starting column locations are to be shifted 1 left
203
+ {:household => [:people], :person => [:household_id, :pserial]}) # Extra columns not to come from metadata
204
+
205
+ records = []
206
+ while (rec = fwf.next_record) do
207
+ records << rec
208
+
209
+ end
210
+
211
+ assert_equal :household, records.first.record_type
212
+
213
+ assert records.first.respond_to?(:record_type)
214
+
215
+ # The last record is a person type and should not have a 'people' accessor
216
+ assert !records.last.respond_to?(:people)
217
+
218
+ # Should have added these accessors from the extra_columns argument above
219
+ assert records.first.respond_to?(:people)
220
+ assert records.last.respond_to?(:household_id)
221
+
222
+ assert_equal :household, records[0].record_type
223
+ assert_equal :person, records[1].record_type
224
+
225
+ end
226
+
227
+ def test_open
228
+ record = Struct.new(:rectypeh, :phone, :mortgage, :record_type)
229
+
230
+ sample_data_path = File.dirname(__FILE__)
231
+ FLRFile.open(
232
+ "#{sample_data_path}/sample_out.dat", # data is in this file
233
+ "w", # open file for writing
234
+ @record_types, # Records of different types have these labels
235
+ @layouts, # metadata for creating record structs
236
+ 1) do |fwf| # All metadata starting column locations are to be shifted 1 left
237
+ assert_equal FLRFile, fwf.class
238
+ fwf << record.new("H", 1, 2, :household)
239
+ fwf << ["H", 1, 3, :household]
240
+ end
241
+
242
+ assert File.exists?("#{sample_data_path}/sample.dat") # data is in this file
243
+
244
+ var = Struct.new(:name, :start, :len)
245
+ l = {:customer => [var.new("name", 1, 25), var.new("age", 26, 3)]}
246
+ fwf = FLRFile.new(File.new("sample2_out.dat", "w"),
247
+ :customer, l, 1)
248
+
249
+ fwf << ["joe", 25, :customer]
250
+ fwf.close
251
+
252
+ end
253
+
254
+
255
+ def test_line_type
256
+ sample_data_path = File.dirname(__FILE__)
257
+ fwf = FLRFile.new(
258
+ File.new("#{sample_data_path}/sample.dat"),
259
+ @record_types, # Record types to read from the file, all others will be ignored
260
+ @layouts, # metadata for all record types
261
+ 1, # column 0 starts at logical location 1
262
+ {:household => [:people], :person => [:household_id, :pserial]} # extra columns by record type
263
+ )
264
+
265
+ assert_equal :unknown, fwf.line_type(nil)
266
+ assert_equal :household, fwf.line_type("H123")
267
+ assert_equal :person, fwf.line_type("P123")
268
+ assert_equal :unknown, fwf.line_type("C123")
269
+ end
270
+
271
+ def test_ranges
272
+ end
273
+
274
+ def test_in_range
275
+ end
276
+
277
+
278
+ def test_get_next_known_line_type
279
+ sample_data_path = File.dirname(__FILE__)
280
+ fwf = FLRFile.new(
281
+ File.new("#{sample_data_path}/sample_activities.dat"),
282
+ @record_types, # Record types to read from the file, all others will be ignored
283
+ @layouts, # metadata for all record types
284
+ 1, # column 0 starts at logical location 1
285
+ {:household => [:people], :person => [:household_id, :pserial]} # extra columns by record type
286
+ )
287
+ # By reading the sample_activities file with only the household and person record types know
288
+ # we should get the activity and who records to be skipped.
289
+ while (rec=fwf.get_next_known_line_type)
290
+
291
+ unless rec.strip.empty?
292
+
293
+ assert %w(P H).include?(rec[0..0])
294
+
295
+ end
296
+ end
297
+
298
+ end
299
+ end
300
+
301
+
@@ -0,0 +1,169 @@
1
+ require 'test_hflr'
2
+
3
+ class RecordTemplateTest < Test::Unit::TestCase
4
+
5
+ Var = Struct.new(:name, :start, :len)
6
+
7
+ # Some real world metadata. This is only layout for the starting portion of the ATUS / CPS household record
8
+ # which actually extends to beyond column 117
9
+ AtusHH =[[:RECTYPEH, 1, 1,],
10
+ [:CASEID, 2, 14],
11
+ [:AGEYCHILD, 16, 3],
12
+ [:SERIAL, 19, 7],
13
+ [:HH_NUMADULTS, 26, 2],
14
+ [:FAMBUS_RESP, 28, 2],
15
+ [:FAMBUS_OTHER, 30, 2],
16
+ [:FAMBUS_SPOUSE, 32, 2],
17
+ [:FAMBUS, 34, 2],
18
+ [:HH_CHILD, 36, 2],
19
+ [:HH_NUMKIDS, 38, 2],
20
+ [:HH_SIZE, 40, 3],
21
+ [:HH_NUMEMPLD, 43, 3],
22
+ [:FAMINCOME, 46, 3]]
23
+
24
+
25
+ def setup
26
+
27
+ hh_vars = AtusHH.map { |v| Var.new(v[0], v[1], v[2]) }
28
+
29
+ @vars = {:household => hh_vars,
30
+ :person => [Var.new("age", 2, 3), Var.new("sex", 5, 1)],
31
+ :activity => [Var.new("where", 1, 5), Var.new("activity", 6, 5)],
32
+ :who => [Var.new("relatew", 1, 2)]}
33
+ @record_types = {"H" => :household, "P" => :person, "A" => :activity, "W" => :who}
34
+ @record_type_symbols = @record_types.invert
35
+ end
36
+
37
+ def test_create
38
+ record_layouts = @vars # variables by record type
39
+ templates = HFLR::RecordTemplate.create(record_layouts, @record_type_symbols, 1)
40
+ assert_equal @vars.keys, templates.keys
41
+
42
+ household_field_pattern = templates[:household].field_pattern
43
+ person_field_pattern =templates[:person].field_pattern
44
+ activity_field_pattern = templates[:activity].field_pattern
45
+ who_field_pattern = templates[:who].field_pattern
46
+
47
+
48
+ assert household_field_pattern.is_a?(String)
49
+ assert person_field_pattern.size>2, "field pattern should have at least one variable"
50
+ assert activity_field_pattern.size>2, "field pattern for activity should have at least one variable"
51
+ assert who_field_pattern.size>2, "field pattern for who should have at least one variable"
52
+
53
+
54
+ household_record_struct = templates[:household].record_structure.new
55
+ assert household_record_struct.respond_to?(:HH_SIZE)
56
+
57
+ end
58
+
59
+ def test_create_template_class
60
+ template = HFLR::RecordTemplate.create_template_class(:person, @record_type_symbols[:person], @vars[:person], 1, {})
61
+ assert template.respond_to?(:field_pattern)
62
+ assert template.respond_to?(:record_structure)
63
+ record_structure = template.record_structure
64
+ assert record_structure.new.is_a?(Struct)
65
+ assert record_structure.new.respond_to?(:record_type)
66
+
67
+ end
68
+
69
+
70
+ def test_get_pattern
71
+ household_layout = @vars[:household]
72
+
73
+ pattern = HFLR::RecordTemplate.get_pattern(household_layout)
74
+ assert_equal "@1a1", pattern[0..3]
75
+
76
+ # Adjust the location ('@') leftward (the metadata refers to the 0th column as column 1.)
77
+ pattern = HFLR::RecordTemplate.get_pattern(household_layout, 1)
78
+ assert_equal "@0a1", pattern[0..3]
79
+
80
+ vars_in_pattern = pattern.scan("a").size
81
+ assert_equal household_layout.size, vars_in_pattern
82
+
83
+ end
84
+
85
+ def test_build_record
86
+ templates = HFLR::RecordTemplate.create(@vars, @record_type_symbols, 1)
87
+ hh_str = "1200501010500069980000001020000000000000020009960200009999999999999999991330299902305030201034300000000037110550997797000000000007700100000000200411000000000"
88
+
89
+ # from a string to a record struct
90
+ household_rec = templates[:household].build_record(hh_str)
91
+
92
+ assert household_rec.is_a?(Struct)
93
+ assert household_rec.values.size> @vars[:household].size, "Should be values for the extra columns"
94
+
95
+ # Check a few things...
96
+ assert_equal 1, household_rec[:SERIAL].to_i
97
+
98
+
99
+ assert_equal 1, household_rec.SERIAL.to_i
100
+
101
+ assert_equal "1", household_rec[0]
102
+ assert_equal "1", household_rec.RECTYPEH
103
+ assert_equal "1", household_rec[:RECTYPEH]
104
+
105
+
106
+ end
107
+
108
+ def test_build_line
109
+ templates = HFLR::RecordTemplate.create(@vars, @record_type_symbols, 1)
110
+
111
+ hh_str = "H200501010500069980000001020000000000000020009960200009999999999999999991330299902305030201034300000000037110550997797000000000007700100000000200411000000000"
112
+
113
+ # from a string to a record struct
114
+ household_rec = templates[:household].build_record(hh_str)
115
+ assert_equal "002", hh_str[39..41]
116
+ assert_equal "002", household_rec.HH_SIZE
117
+
118
+
119
+ # back to a string
120
+ new_hh_str = templates[:household].build_line(household_rec)
121
+
122
+
123
+ assert_equal "002", new_hh_str[39..41]
124
+
125
+ # Some of the values in hh_str won't be in new_hh_str because not all data
126
+ # in hh_str is mapped by household_layout, but the mapped variables should have
127
+ # the same values.
128
+
129
+ @vars[:household].each do |v|
130
+ format_str = "@#{(v.start-1).to_s}a#{v.len.to_s}"
131
+ orig_data = hh_str.unpack(format_str)
132
+ new_data = new_hh_str.unpack(format_str)
133
+
134
+
135
+ assert_equal new_data, orig_data, "Comparing #{v.name} #{format_str}"
136
+ end
137
+ end
138
+
139
+
140
+ def test_format_fields
141
+ templates = HFLR::RecordTemplate.create(@vars, @record_type_symbols, 1)
142
+
143
+ formatted_fields = templates[:who].send(:format_fields, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
144
+
145
+ widths = @vars[:who].map { |v| v.len }
146
+ formatted_fields.size.times do |i|
147
+ assert_equal formatted_fields[i].size, widths[i], "Width of #{@vars[:who][i].name} should have been #{widths[i].to_s}"
148
+ end
149
+
150
+ end
151
+
152
+ def test_write_format
153
+ templates = HFLR::RecordTemplate.create(@vars, @record_type_symbols, 1)
154
+
155
+ assert_equal "abc", templates[:activity].send(:right_format, "abc", 3)
156
+ assert_equal "abc ", templates[:activity].send(:right_format, "abc", 6)
157
+ assert_equal "3", templates[:activity].send(:right_format, 3, 1)
158
+ assert_equal "005", templates[:activity].send(:right_format, 5, 3)
159
+ assert_equal "ZZZ", templates[:activity].send(:right_format, -999998, 3)
160
+ end
161
+
162
+
163
+ end
164
+
165
+
166
+
167
+
168
+
169
+
data/test/sample.dat ADDED
@@ -0,0 +1,7 @@
1
+ H010599999999
2
+ P05525
3
+ P02326
4
+ P01916
5
+ H010599999999
6
+ P02516
7
+
@@ -0,0 +1 @@
1
+ joe 025
@@ -0,0 +1,12 @@
1
+ H010599999999
2
+ P05525
3
+ P02326
4
+ A12344001201
5
+ W230
6
+ P01916
7
+ H010599999999
8
+ P02516
9
+ A2212320601
10
+ A2232320602
11
+ W100
12
+
@@ -0,0 +1,2 @@
1
+ H12
2
+ H13
@@ -0,0 +1,3 @@
1
+ require 'stringio'
2
+ require 'test/unit'
3
+ require File.dirname(__FILE__) + '/../lib/hflr'
data/test/test_hflr.rb ADDED
@@ -0,0 +1,4 @@
1
+ require File.dirname(__FILE__) + '/test_helper.rb'
2
+ require File.dirname(__FILE__) + "/record_template_test"
3
+ require File.dirname(__FILE__) + "/flrfile_test"
4
+
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hflr2
3
+ version: !ruby/object:Gem::Version
4
+ hash: 23
5
+ prerelease:
6
+ segments:
7
+ - 1
8
+ - 0
9
+ - 0
10
+ version: 1.0.0
11
+ platform: ruby
12
+ authors:
13
+ - Colin Davis
14
+ - Bozydar Sobczak
15
+ autorequire:
16
+ bindir: bin
17
+ cert_chain: []
18
+
19
+ date: 2011-10-24 00:00:00 Z
20
+ dependencies: []
21
+
22
+ description: |-
23
+ HFLR2 -- Hierarchical Fixed Length Records
24
+
25
+ NOTE: This gem is a modification of the hflr gem. It can be not compatible with it.
26
+
27
+ Allows you to read and write files of fixed width records when the file contains one or more
28
+ than one type of record.
29
+
30
+ Install with 'gem install hflr2'
31
+
32
+ See the tests and examples bundled with this gem.
33
+ email: colin.c.davis@gmail.com
34
+ executables: []
35
+
36
+ extensions: []
37
+
38
+ extra_rdoc_files:
39
+ - History.txt
40
+ - README.txt
41
+ files:
42
+ - History.txt
43
+ - README.txt
44
+ - hflr2.gemspec
45
+ - lib/hflr.rb
46
+ - lib/hflr/fl_record_file.rb
47
+ - lib/hflr/hflr.rb
48
+ - lib/hflr/record_template.rb
49
+ - test/customer_orders.dat
50
+ - test/customers.dat
51
+ - test/examples.rb
52
+ - test/flrfile_test.rb
53
+ - test/record_template_test.rb
54
+ - test/sample.dat
55
+ - test/sample2_out.dat
56
+ - test/sample_activities.dat
57
+ - test/sample_out.dat
58
+ - test/test_helper.rb
59
+ - test/test_hflr.rb
60
+ homepage:
61
+ licenses: []
62
+
63
+ post_install_message:
64
+ rdoc_options: []
65
+
66
+ require_paths:
67
+ - lib
68
+ required_ruby_version: !ruby/object:Gem::Requirement
69
+ none: false
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ hash: 3
74
+ segments:
75
+ - 0
76
+ version: "0"
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ none: false
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ hash: 3
83
+ segments:
84
+ - 0
85
+ version: "0"
86
+ requirements: []
87
+
88
+ rubyforge_project:
89
+ rubygems_version: 1.8.11
90
+ signing_key:
91
+ specification_version: 3
92
+ summary: HFLR2 -- Hierarchical Fixed Length Records 2 Allows you to read and write files of fixed width records when the file contains one or more than one type of record
93
+ test_files:
94
+ - test/test_hflr.rb
95
+ - test/test_helper.rb