hflr2 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +14 -0
- data/README.txt +45 -0
- data/hflr2.gemspec +28 -0
- data/lib/hflr/fl_record_file.rb +262 -0
- data/lib/hflr/hflr.rb +1 -0
- data/lib/hflr/record_template.rb +155 -0
- data/lib/hflr.rb +49 -0
- data/test/customer_orders.dat +5 -0
- data/test/customers.dat +3 -0
- data/test/examples.rb +67 -0
- data/test/flrfile_test.rb +301 -0
- data/test/record_template_test.rb +169 -0
- data/test/sample.dat +7 -0
- data/test/sample2_out.dat +1 -0
- data/test/sample_activities.dat +12 -0
- data/test/sample_out.dat +2 -0
- data/test/test_helper.rb +3 -0
- data/test/test_hflr.rb +4 -0
- metadata +95 -0
data/History.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
== 1.0.1 / 2010-01-21
|
|
2
|
+
|
|
3
|
+
* Fixed warnings
|
|
4
|
+
* Added better exception handling for badly formatted output data
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
== 0.11.0 / 2009-08-04
|
|
8
|
+
|
|
9
|
+
* Removed useless files
|
|
10
|
+
* Corrected version number
|
|
11
|
+
|
|
12
|
+
== 1.0.1 / 2011-02-09
|
|
13
|
+
* Added ability to specify file format in Ruby with Range class (see example.rb in /test.)
|
|
14
|
+
|
data/README.txt
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
= HFLR
|
|
2
|
+
|
|
3
|
+
* http://ruff.rubyforge.org
|
|
4
|
+
|
|
5
|
+
== Description:
|
|
6
|
+
|
|
7
|
+
HFLR -- Hierarchical Fixed Length Records
|
|
8
|
+
|
|
9
|
+
Allows you to read and write files of fixed width records when the file contains one or more
|
|
10
|
+
than one type of record.
|
|
11
|
+
|
|
12
|
+
Install with 'gem install hflr'
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
See the tests and examples bundled with this gem.
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
== LICENSE:
|
|
23
|
+
|
|
24
|
+
(The MIT License)
|
|
25
|
+
|
|
26
|
+
Copyright (c) 2009 Colin C. Davis
|
|
27
|
+
|
|
28
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
29
|
+
a copy of this software and associated documentation files (the
|
|
30
|
+
'Software'), to deal in the Software without restriction, including
|
|
31
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
32
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
33
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
34
|
+
the following conditions:
|
|
35
|
+
|
|
36
|
+
The above copyright notice and this permission notice shall be
|
|
37
|
+
included in all copies or substantial portions of the Software.
|
|
38
|
+
|
|
39
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
40
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
41
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
42
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
43
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
44
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
45
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/hflr2.gemspec
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
Gem::Specification.new do |s|
|
|
4
|
+
s.name = %q{hflr2}
|
|
5
|
+
s.version = "1.0.0"
|
|
6
|
+
|
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
8
|
+
s.authors = ["Colin Davis", "Bozydar Sobczak"]
|
|
9
|
+
s.date = %q{2011-10-24}
|
|
10
|
+
s.description = %q{HFLR2 -- Hierarchical Fixed Length Records
|
|
11
|
+
|
|
12
|
+
NOTE: This gem is a modification of the hflr gem. It can be not compatible with it.
|
|
13
|
+
|
|
14
|
+
Allows you to read and write files of fixed width records when the file contains one or more
|
|
15
|
+
than one type of record.
|
|
16
|
+
|
|
17
|
+
Install with 'gem install hflr2'
|
|
18
|
+
|
|
19
|
+
See the tests and examples bundled with this gem.}
|
|
20
|
+
s.email = %q{colin.c.davis@gmail.com}
|
|
21
|
+
s.extra_rdoc_files = %w(History.txt README.txt)
|
|
22
|
+
s.files = %w(History.txt README.txt hflr2.gemspec lib/hflr.rb lib/hflr/fl_record_file.rb lib/hflr/hflr.rb lib/hflr/record_template.rb test/customer_orders.dat test/customers.dat test/examples.rb test/flrfile_test.rb test/record_template_test.rb test/sample.dat test/sample2_out.dat test/sample_activities.dat test/sample_out.dat test/test_helper.rb test/test_hflr.rb)
|
|
23
|
+
s.require_paths = %w(lib)
|
|
24
|
+
|
|
25
|
+
s.rubygems_version = %q{1.3.4}
|
|
26
|
+
s.summary = %q{HFLR2 -- Hierarchical Fixed Length Records 2 Allows you to read and write files of fixed width records when the file contains one or more than one type of record}
|
|
27
|
+
s.test_files = %w(test/test_hflr.rb test/test_helper.rb)
|
|
28
|
+
end
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
class FLRFile
|
|
2
|
+
|
|
3
|
+
include Enumerable
|
|
4
|
+
|
|
5
|
+
attr_reader :line_number, :record_template
|
|
6
|
+
|
|
7
|
+
def initialize(source, record_types, record_layouts, logical_first_column=0, extra_columns = nil)
|
|
8
|
+
# Allow record layouts like
|
|
9
|
+
# {:type1=>[:var1=>1..5,:var2=>7..8],:type2=>[:var1=>1..1,:var2=>3..4]}
|
|
10
|
+
if record_layouts.values.first.is_a? Hash
|
|
11
|
+
record_layouts = create_layouts(record_layouts)
|
|
12
|
+
end
|
|
13
|
+
@line_number = 0
|
|
14
|
+
@file = source
|
|
15
|
+
@record_type_labels = record_types
|
|
16
|
+
@record_type_symbols = record_types.is_a?(Hash) ? record_types : :none
|
|
17
|
+
if extra_columns then
|
|
18
|
+
@record_template = HFLR::RecordTemplate.create(record_layouts, @record_type_symbols, logical_first_column, extra_columns)
|
|
19
|
+
else
|
|
20
|
+
@record_template = HFLR::RecordTemplate.create(record_layouts, @record_type_symbols, logical_first_column)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def set_fast
|
|
25
|
+
@fast = !@record_type_labels.is_a?(Hash)
|
|
26
|
+
unless @fast
|
|
27
|
+
raise "Cannot set fast mode with more than one record type."
|
|
28
|
+
end
|
|
29
|
+
if @fast
|
|
30
|
+
@width = get_record_width_from_file
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
records_to_take = 100000000 / @width
|
|
34
|
+
|
|
35
|
+
@buffer_size = @width * records_to_take
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@position=0
|
|
39
|
+
@current_buffer=nil
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def ranges=(ranges)
|
|
44
|
+
@fast or raise "Cannot read selected ranges because input file has multiple record types #{@record_type_labels.to_s}"
|
|
45
|
+
unless ranges.first.is_a?(Range)
|
|
46
|
+
raise "You specified a #{ranges.first.class.to_s} instead of a range in the list of ranges. Use (a..b) to specify a range."
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
@offsets =offsets_to_read(ranges, @width)
|
|
50
|
+
|
|
51
|
+
@ranges = ranges
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def in_range?(line_number)
|
|
56
|
+
@ranges ? !!(@ranges.detect { |r| r.member?(line_number) }) : true
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def finished?
|
|
60
|
+
if @fast
|
|
61
|
+
@offsets.empty? && @current_buffer.nil?
|
|
62
|
+
else
|
|
63
|
+
@file.eof?
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def close
|
|
68
|
+
@file.close
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# If multiple record types, extract it from the string, otherwise just return the type of this file
|
|
72
|
+
def get_record_type(line)
|
|
73
|
+
return nil if line.nil?
|
|
74
|
+
return nil if line.strip.empty?
|
|
75
|
+
if @record_type_labels.is_a?(Hash)
|
|
76
|
+
matching_pair = @record_type_labels.find do |_, value|
|
|
77
|
+
discriminator = value[:discriminator]
|
|
78
|
+
position = value[:position] - 1
|
|
79
|
+
line[position..-1].start_with?(discriminator)
|
|
80
|
+
end
|
|
81
|
+
if matching_pair
|
|
82
|
+
matching_pair[0]
|
|
83
|
+
else
|
|
84
|
+
nil
|
|
85
|
+
end
|
|
86
|
+
else
|
|
87
|
+
@record_type_labels
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def build_record(line)
|
|
92
|
+
return nil if line.nil?
|
|
93
|
+
record_type = line_type(line)
|
|
94
|
+
raise "Unknown record type at line #{@line_number.to_s}" if record_type == :unknown
|
|
95
|
+
@record_template[record_type].build_record(line.chomp)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def next_record
|
|
99
|
+
build_record(get_next_known_line_type)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def line_type(line)
|
|
103
|
+
record_type = get_record_type(line)
|
|
104
|
+
record_type ? record_type : :unknown
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def get_next_known_line_type
|
|
108
|
+
@fast ? fast_get_next_known_line_type : sequential_get_next_known_line_type
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def get_next_line
|
|
112
|
+
line = @file.gets
|
|
113
|
+
@line_number+=1
|
|
114
|
+
line
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def next
|
|
118
|
+
build_record(get_next_line)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def fast_get_next_known_line_type
|
|
122
|
+
if @current_buffer.nil? && (@offsets.nil? || @offsets.empty?)
|
|
123
|
+
nil
|
|
124
|
+
else
|
|
125
|
+
if @current_buffer.nil?
|
|
126
|
+
chunk = @offsets.shift
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@file.pos = chunk.pos
|
|
130
|
+
@current_buffer=@file.read(chunk.width)
|
|
131
|
+
|
|
132
|
+
record= @current_buffer.slice(@position, @width)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@position += @width
|
|
136
|
+
|
|
137
|
+
if @position >= @current_buffer.size
|
|
138
|
+
|
|
139
|
+
@current_buffer = nil
|
|
140
|
+
@position=0
|
|
141
|
+
end
|
|
142
|
+
return record
|
|
143
|
+
else
|
|
144
|
+
record= @current_buffer.slice(@position, @width)
|
|
145
|
+
|
|
146
|
+
@position += @width
|
|
147
|
+
if @position>=@current_buffer.size
|
|
148
|
+
@position=0
|
|
149
|
+
@current_buffer=nil
|
|
150
|
+
end
|
|
151
|
+
return record
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def sequential_get_next_known_line_type
|
|
158
|
+
line = @file.gets
|
|
159
|
+
@line_number+=1
|
|
160
|
+
record_type = line_type(line)
|
|
161
|
+
while !finished? && (!in_range?(@line_number) || record_type == :unknown)
|
|
162
|
+
line = @file.gets
|
|
163
|
+
@line_number+=1
|
|
164
|
+
record_type = line_type(line)
|
|
165
|
+
end
|
|
166
|
+
record_type == :unknown ? nil : line
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def each
|
|
171
|
+
@line_number = 1
|
|
172
|
+
if @fast
|
|
173
|
+
yield(next_record) until finished?
|
|
174
|
+
else
|
|
175
|
+
@file.each_line do |line|
|
|
176
|
+
unless line_type(line) == :unknown || !in_range?(@line_number)
|
|
177
|
+
data = build_record(line)
|
|
178
|
+
yield data
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# This will take a Hash or Struct orArray; if an Array the record type must be the last element when
|
|
185
|
+
# the record layout has more than one record type.
|
|
186
|
+
def <<(record)
|
|
187
|
+
record_type =
|
|
188
|
+
if record.is_a? Array
|
|
189
|
+
@record_type_symbols == :none ? @record_template.keys.first : record.last
|
|
190
|
+
else
|
|
191
|
+
if @record_template[record[:record_type]] == nil then
|
|
192
|
+
raise "Record type problem in output: #{record[:record_type].to_s} type on record, #{@record_template.keys.join(",")} types of templates"
|
|
193
|
+
end
|
|
194
|
+
@record_type_symbols == :none ? @record_template.keys.first : record[:record_type]
|
|
195
|
+
end
|
|
196
|
+
line = @record_template[record_type].build_line(record)
|
|
197
|
+
if @cr_before_line
|
|
198
|
+
line = "\n" + line
|
|
199
|
+
else
|
|
200
|
+
@cr_before_line = true
|
|
201
|
+
end
|
|
202
|
+
@file.write(line)
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
# Use when creating a new HFLR file
|
|
206
|
+
def self.open(path, mode, record_types, record_layouts, logical_first_column=0)
|
|
207
|
+
file = File.open(path, mode)
|
|
208
|
+
begin
|
|
209
|
+
hflr_file = new(file, record_types, record_layouts, logical_first_column)
|
|
210
|
+
yield hflr_file
|
|
211
|
+
ensure
|
|
212
|
+
file.close
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
private
|
|
217
|
+
|
|
218
|
+
def offsets_to_read(ranges, width)
|
|
219
|
+
#ranges.map{|r| r.map{|o| o * width}}.flatten.uniq
|
|
220
|
+
chunk = Struct.new(:pos, :width)
|
|
221
|
+
chunks = []
|
|
222
|
+
ranges.each do |range|
|
|
223
|
+
offsets = range.map { |offset| offset }
|
|
224
|
+
taken = []
|
|
225
|
+
|
|
226
|
+
until offsets.empty?
|
|
227
|
+
taken << offsets.shift
|
|
228
|
+
if taken.size * width == @buffer_size || offsets.empty?
|
|
229
|
+
|
|
230
|
+
chunks << chunk.new(taken.first * @width, taken.size * width)
|
|
231
|
+
taken = []
|
|
232
|
+
end # if
|
|
233
|
+
end # while
|
|
234
|
+
end # each
|
|
235
|
+
|
|
236
|
+
chunks
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
def get_record_width_from_file
|
|
240
|
+
width = @file.gets.size
|
|
241
|
+
@file.rewind
|
|
242
|
+
width
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
# If the layout is given in the convenient Ruby form
|
|
247
|
+
def create_layouts(layout)
|
|
248
|
+
var_class = Struct.new(:name, :start, :len)
|
|
249
|
+
new_layout = {}
|
|
250
|
+
layout.each_pair do |record_type, vars|
|
|
251
|
+
|
|
252
|
+
new_layout[record_type] = []
|
|
253
|
+
vars.each_pair do |var_name, range|
|
|
254
|
+
new_layout[record_type] << var_class.new(var_name.to_s, range.first, range.last - range.first + 1)
|
|
255
|
+
end
|
|
256
|
+
new_layout[record_type].sort! { |a, b| a.start<=>b.start }
|
|
257
|
+
end
|
|
258
|
+
new_layout
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
end
|
|
262
|
+
|
data/lib/hflr/hflr.rb
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + "/fl_record_file")
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
module HFLR
|
|
2
|
+
|
|
3
|
+
class RecordTemplate
|
|
4
|
+
UNFILLED_CHAR = ' '
|
|
5
|
+
MISSING_OUTPUT = "ZZZZZZZZZZZZZZZZZZZZZ"
|
|
6
|
+
|
|
7
|
+
attr_reader :record_structure, :field_pattern, :record_type, :record_type_label
|
|
8
|
+
attr_accessor :strip_whitespace
|
|
9
|
+
|
|
10
|
+
def initialize(record_type, record_type_label, record_structure, field_pattern, field_widths)
|
|
11
|
+
@record_type = record_type
|
|
12
|
+
@record_type_label = record_type_label
|
|
13
|
+
@record_structure = record_structure
|
|
14
|
+
@field_pattern = field_pattern
|
|
15
|
+
@field_widths = field_widths
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Layouts is a hash of variables by record type
|
|
19
|
+
# record_type_symbols maps record type names to their labels in the data {:household=>"H",:person=>"P"}
|
|
20
|
+
# Returns a set of record templates, one for each record type
|
|
21
|
+
def self.create(record_layouts, record_type_symbols, first_column_location, extra_columns=[])
|
|
22
|
+
extra_columns = empty_extra_columns(record_layouts.keys) if extra_columns.is_a? Array
|
|
23
|
+
templates = {}
|
|
24
|
+
self.check_record_layouts(record_layouts)
|
|
25
|
+
|
|
26
|
+
record_layouts.each_pair do |record_type, vars|
|
|
27
|
+
record_label = record_type_symbols == :none ? :none : record_type_symbols[record_type]
|
|
28
|
+
templates[record_type] =
|
|
29
|
+
self.create_template_class(record_type,
|
|
30
|
+
record_label,
|
|
31
|
+
vars,
|
|
32
|
+
first_column_location,
|
|
33
|
+
extra_columns[record_type])
|
|
34
|
+
end
|
|
35
|
+
return templates
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
# If the name exists already do not replace it, but add extra columns not to be mapped by the unpack field patterns
|
|
40
|
+
# and ensure the record_type variable is added.
|
|
41
|
+
# Since 'record_type' may not be in the metadata we don't want to map it to a
|
|
42
|
+
# specific column location but do want it included always.
|
|
43
|
+
def self.add_extra_columns(names, extra)
|
|
44
|
+
new_names = names.dup
|
|
45
|
+
# names are not case sensitive
|
|
46
|
+
extra.each { |n| new_names << n unless names.map { |m| m.to_s.upcase }.include? n.to_s.upcase }
|
|
47
|
+
|
|
48
|
+
# No matter what, include 'record_type'
|
|
49
|
+
unless new_names.map { |n| n.to_s.upcase }.include?("RECORD_TYPE")
|
|
50
|
+
new_names << :record_type
|
|
51
|
+
end
|
|
52
|
+
return new_names
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def self.get_pattern(layout, first_column_location=0)
|
|
56
|
+
layout.map { |l| '@' + (l.start - first_column_location).to_s + 'a' + l.len.to_s }.to_s
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
public
|
|
60
|
+
|
|
61
|
+
def build_record(line)
|
|
62
|
+
rec = line.unpack(@field_pattern)
|
|
63
|
+
rec.map { |f| f.strip! } if @strip_whitespace
|
|
64
|
+
begin
|
|
65
|
+
data = self.record_structure.new(*rec)
|
|
66
|
+
data[:record_type] = @record_type
|
|
67
|
+
rescue Exception => msg
|
|
68
|
+
raise "On record type #{self.record_type} problem with structure " + msg.to_s
|
|
69
|
+
end
|
|
70
|
+
data
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def build_line(record)
|
|
74
|
+
line = format_fields(record).pack(@field_pattern)
|
|
75
|
+
line.tr!("\0", UNFILLED_CHAR)
|
|
76
|
+
line
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
private
|
|
80
|
+
|
|
81
|
+
def self.empty_extra_columns(record_types)
|
|
82
|
+
extra = {}
|
|
83
|
+
record_types.map { |rt| extra[rt] = [] }
|
|
84
|
+
extra
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# All starting columns must be in order
|
|
88
|
+
def self.check_record_layouts(layouts)
|
|
89
|
+
layouts.values.each do |layout|
|
|
90
|
+
last_v = layout.first
|
|
91
|
+
layout.each do |v|
|
|
92
|
+
if v.respond_to?(:rectype) then
|
|
93
|
+
if last_v.rectype != v.rectype
|
|
94
|
+
raise "record type mismatch between #{v.name} and #{last_v.name}"
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
if last_v.start<= v.start then
|
|
98
|
+
last_v = v
|
|
99
|
+
else
|
|
100
|
+
raise "Problem with start columns #{last_v.name} start #{last_v.start.to_s} out of sequence with #{v.name} starting at #{v.start.to_s}"
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def self.create_template_class(record_type, record_type_label, layout, first_column_location, extra_columns = nil)
|
|
107
|
+
names = layout.map { |l| l.name.to_sym }
|
|
108
|
+
names = add_extra_columns(names, extra_columns)
|
|
109
|
+
structure = Struct.new(*names)
|
|
110
|
+
return new(record_type,
|
|
111
|
+
record_type_label,
|
|
112
|
+
structure,
|
|
113
|
+
self.get_pattern(layout, first_column_location),
|
|
114
|
+
layout.map { |v| v.len })
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def format_fields(record)
|
|
119
|
+
if record.is_a?(Array) or record.is_a?(Struct)
|
|
120
|
+
fields = []
|
|
121
|
+
@field_widths.each_with_index do |width, i|
|
|
122
|
+
begin
|
|
123
|
+
fields << right_format(record[i], width)
|
|
124
|
+
rescue Exception => msg
|
|
125
|
+
if record.is_a?(Struct)
|
|
126
|
+
raise "Output format problem for #{record.members[i].to_s} #{msg.to_s}"
|
|
127
|
+
else
|
|
128
|
+
raise "Output format problem for column #{i.to_s} with value #{record[i].to_s} #{msg.to_s}"
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
fields
|
|
133
|
+
else
|
|
134
|
+
raise "Record to format must be a Struct or Array"
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def right_format(data, len)
|
|
139
|
+
if data.is_a? String
|
|
140
|
+
data_str = data.ljust(len)
|
|
141
|
+
elsif data.is_a? Symbol
|
|
142
|
+
data_str = data.to_s.ljust(len)
|
|
143
|
+
else
|
|
144
|
+
data_str = sprintf("%0#{len.to_s}d", data)
|
|
145
|
+
data_str = MISSING_OUTPUT[0..len-1] if data == -999998
|
|
146
|
+
end
|
|
147
|
+
raise "Data too large for allocated columns #{data_str}" if data_str.size > len
|
|
148
|
+
data_str
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
end # RecordTemplate class
|
|
153
|
+
|
|
154
|
+
end # HFLR module
|
|
155
|
+
|
data/lib/hflr.rb
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
|
|
2
|
+
module Hflr
|
|
3
|
+
|
|
4
|
+
# :stopdoc:
|
|
5
|
+
VERSION = '0.11.0'
|
|
6
|
+
LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
|
|
7
|
+
PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
|
|
8
|
+
# :startdoc:
|
|
9
|
+
|
|
10
|
+
# Returns the version string for the library.
|
|
11
|
+
#
|
|
12
|
+
def self.version
|
|
13
|
+
VERSION
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Returns the library path for the module. If any arguments are given,
|
|
17
|
+
# they will be joined to the end of the libray path using
|
|
18
|
+
# <tt>File.join</tt>.
|
|
19
|
+
#
|
|
20
|
+
def self.libpath( *args )
|
|
21
|
+
args.empty? ? LIBPATH : ::File.join(LIBPATH, args.flatten)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Returns the lpath for the module. If any arguments are given,
|
|
25
|
+
# they will be joined to the end of the path using
|
|
26
|
+
# <tt>File.join</tt>.
|
|
27
|
+
#
|
|
28
|
+
def self.path( *args )
|
|
29
|
+
args.empty? ? PATH : ::File.join(PATH, args.flatten)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Utility method used to require all files ending in .rb that lie in the
|
|
33
|
+
# directory below this file that has the same name as the filename passed
|
|
34
|
+
# in. Optionally, a specific _directory_ name can be passed in such that
|
|
35
|
+
# the _filename_ does not have to be equivalent to the directory.
|
|
36
|
+
#
|
|
37
|
+
def self.require_all_libs_relative_to( fname, dir = nil )
|
|
38
|
+
dir ||= ::File.basename(fname, '.*')
|
|
39
|
+
search_me = ::File.expand_path(
|
|
40
|
+
::File.join(::File.dirname(fname), dir, '**', '*.rb'))
|
|
41
|
+
|
|
42
|
+
Dir.glob(search_me).sort.each {|rb| require rb}
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
end # module Hflr
|
|
46
|
+
|
|
47
|
+
Hflr.require_all_libs_relative_to(__FILE__)
|
|
48
|
+
|
|
49
|
+
# EOF
|
data/test/customers.dat
ADDED
data/test/examples.rb
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
require File.dirname(__FILE__) + "/../lib/hflr"
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
# Read a file with only one record type (no record type markers)
|
|
5
|
+
|
|
6
|
+
# metadata for customer file
|
|
7
|
+
Column = Struct.new(:name, :start, :len)
|
|
8
|
+
columns = {:customer => [
|
|
9
|
+
Column.new("name", 1, 25),
|
|
10
|
+
Column.new("zip", 26, 5),
|
|
11
|
+
Column.new("balance", 31, 5)]}
|
|
12
|
+
|
|
13
|
+
customer_file = FLRFile.new(File.new("customers.dat"), :customer, columns, 1, [:line_number])
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# You can read through the file and access the fields with methods named after the columns:
|
|
17
|
+
customer_file.each do |record|
|
|
18
|
+
puts "Customer #{customer_file.line_number.to_s} #{record.name} #{record.zip} "
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# You can get the values by attribute name like a hash
|
|
23
|
+
def show(record)
|
|
24
|
+
print record.members.map { |m| m.to_s + ": " + record[m].to_s }.join(", ") + "\n"
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# metadata for customer_orders file
|
|
28
|
+
layouts = {:customer => [
|
|
29
|
+
Column.new("name", 1, 25),
|
|
30
|
+
Column.new("zip", 26, 5),
|
|
31
|
+
Column.new("balance", 31, 5)],
|
|
32
|
+
:order => [
|
|
33
|
+
Column.new("order_num", 1, 8),
|
|
34
|
+
Column.new("date", 9, 10),]}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
customer_orders_file = FLRFile.new(
|
|
38
|
+
File.new("customer_orders.dat"),
|
|
39
|
+
{"C" => :customer, "O" => :order}, # Use these characters as record type markers
|
|
40
|
+
layouts,
|
|
41
|
+
0, # shift parsed string 0 columns to the left of the indicated start column
|
|
42
|
+
{:customer => [:line_number, :record_type], :order => [:line_number, :record_type]}) # Add these columns to the indicated record types post read
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
customer_orders_file.each do |record|
|
|
46
|
+
show record
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
puts " ----- You can also use metadata in Ruby -----"
|
|
51
|
+
# Use Ruby metadata
|
|
52
|
+
|
|
53
|
+
layout = {:customer => {
|
|
54
|
+
:name => 1..25,
|
|
55
|
+
:zip => 26..30,
|
|
56
|
+
:balance => 31..35
|
|
57
|
+
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
customer_file = FLRFile.new(File.new("customers.dat"), :customer, layout, 1, [:line_number])
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# You can read through the file and access the fields with methods named after the columns:
|
|
64
|
+
customer_file.each do |record|
|
|
65
|
+
puts "Customer #{customer_file.line_number.to_s} #{record.name} #{record.zip} "
|
|
66
|
+
end
|
|
67
|
+
|
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
require 'test_hflr'
|
|
2
|
+
|
|
3
|
+
class FLRFileTest < Test::Unit::TestCase
|
|
4
|
+
|
|
5
|
+
def setup
|
|
6
|
+
var_type = Struct.new(:name, :start, :len)
|
|
7
|
+
|
|
8
|
+
# Split up your metadata by record type.
|
|
9
|
+
|
|
10
|
+
@layouts =
|
|
11
|
+
{:household => [var_type.new(:rectypeh, 1, 1), var_type.new(:phone, 2, 1), var_type.new("mortgage", 3, 1)],
|
|
12
|
+
:person => [var_type.new(:rectypep, 1, 1), var_type.new("age", 2, 3), var_type.new("sex", 5, 1), var_type.new("marst", 6, 1)]}
|
|
13
|
+
|
|
14
|
+
# Give the values used in the data for each record type
|
|
15
|
+
@record_types = {:household => {:discriminator => 'H', :position => 1}, :person => {:discriminator => 'P', :position => 1}}
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def teardown
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def test_initialize
|
|
22
|
+
sample_data_path = File.dirname(__FILE__)
|
|
23
|
+
fwf = FLRFile.new(
|
|
24
|
+
File.new("#{sample_data_path}/sample.dat"),
|
|
25
|
+
@record_types, # Record types to read from the file, all others will be ignored
|
|
26
|
+
@layouts, # metadata for all record types
|
|
27
|
+
1, # column 0 starts at logical location 1
|
|
28
|
+
{:household => [:people], :person => [:household_id, :pserial]} # extra columns by record type
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# Extra columns + record_type accessors should have been created
|
|
32
|
+
hh_struct = fwf.record_template[:household].record_structure.new
|
|
33
|
+
assert hh_struct.respond_to?(:record_type), "household record should have record_type method"
|
|
34
|
+
p_struct = fwf.record_template[:person].record_structure.new
|
|
35
|
+
assert p_struct.respond_to?(:household_id), "Should have household_id as an extra column"
|
|
36
|
+
assert p_struct.respond_to?(:record_type), "Should have record_type method"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
fwf = FLRFile.new(
|
|
40
|
+
File.new("#{sample_data_path}/sample.dat"),
|
|
41
|
+
@record_types, # Record types to read from the file, all others will be ignored
|
|
42
|
+
@layouts, # metadata for all record types
|
|
43
|
+
1)
|
|
44
|
+
|
|
45
|
+
# Should still have added the record_type method but none of the others
|
|
46
|
+
hh_struct = fwf.record_template[:household].record_structure.new
|
|
47
|
+
assert hh_struct.respond_to?(:record_type), "Should have record_type method"
|
|
48
|
+
assert !hh_struct.respond_to?(:people)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def test_get_record_type
|
|
52
|
+
sample_data_path = File.dirname(__FILE__)
|
|
53
|
+
fwf = FLRFile.new(
|
|
54
|
+
File.new("#{sample_data_path}/sample.dat"),
|
|
55
|
+
@record_types, # Record types to read from the file, all others will be ignored
|
|
56
|
+
@layouts, # metadata for all record types
|
|
57
|
+
1, # column 0 starts at logical location 1
|
|
58
|
+
{:household => [:people], :person => [:household_id, :pserial]} # extra columns by record type
|
|
59
|
+
)
|
|
60
|
+
assert_nil fwf.get_record_type(nil)
|
|
61
|
+
assert_equal :household, fwf.get_record_type("H123")
|
|
62
|
+
assert_equal :person, fwf.get_record_type("P1234")
|
|
63
|
+
assert_equal nil, fwf.get_record_type("C123")
|
|
64
|
+
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def test_build_record
|
|
69
|
+
sample_data_path = File.dirname(__FILE__)
|
|
70
|
+
fwf = FLRFile.new(
|
|
71
|
+
File.new("#{sample_data_path}/sample.dat"),
|
|
72
|
+
@record_types, # Record types to read from the file, all others will be ignored
|
|
73
|
+
@layouts, # metadata for all record types
|
|
74
|
+
1, # column 0 starts at logical location 1
|
|
75
|
+
{:household => [:people], :person => [:household_id, :pserial]} # extra columns by record type
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
assert_equal nil, fwf.build_record(nil)
|
|
79
|
+
rec = fwf.build_record("H012345666665555444333")
|
|
80
|
+
assert_equal :household, rec[:record_type]
|
|
81
|
+
|
|
82
|
+
assert_raise RuntimeError do
|
|
83
|
+
fwf.build_record("c23abbbc")
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def test_fast_next_record
|
|
89
|
+
sample_data_path = File.dirname(__FILE__)
|
|
90
|
+
|
|
91
|
+
layout = {:customer => {
|
|
92
|
+
:name => 1..25,
|
|
93
|
+
:zip => 26..30,
|
|
94
|
+
:balance => 31..35
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
customer_file = FLRFile.new(File.new(sample_data_path + "/customers.dat"), :customer, layout, 1)
|
|
99
|
+
customer_file.set_fast
|
|
100
|
+
customer_file.ranges=[(0..2)]
|
|
101
|
+
|
|
102
|
+
records = []
|
|
103
|
+
until customer_file.finished?
|
|
104
|
+
record = customer_file.next_record
|
|
105
|
+
|
|
106
|
+
puts record.inspect
|
|
107
|
+
records << record
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
assert_equal 3, records.size
|
|
111
|
+
|
|
112
|
+
# Check that the records aren't off by one
|
|
113
|
+
assert_equal "Jane Smith", records[1].name.strip
|
|
114
|
+
assert_equal "John Smith", records.last.name.strip
|
|
115
|
+
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def test_partial_fast_next_line
|
|
119
|
+
sample_data_path = File.dirname(__FILE__)
|
|
120
|
+
|
|
121
|
+
layout = {:customer => {
|
|
122
|
+
:name => 1..25,
|
|
123
|
+
:zip => 26..30,
|
|
124
|
+
:balance => 31..35
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
customer_file = FLRFile.new(File.new(sample_data_path + "/customers.dat"), :customer, layout, 1)
|
|
129
|
+
customer_file.set_fast
|
|
130
|
+
customer_file.ranges=[(0..1)]
|
|
131
|
+
|
|
132
|
+
records = []
|
|
133
|
+
until customer_file.finished?
|
|
134
|
+
record = customer_file.next_record
|
|
135
|
+
puts record.inspect
|
|
136
|
+
records << record
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
assert_equal 2, records.size
|
|
140
|
+
|
|
141
|
+
# Check that the records aren't off by one
|
|
142
|
+
assert_equal "Jane Smith", records[1].name.strip
|
|
143
|
+
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def test_fast_each
|
|
147
|
+
sample_data_path = File.dirname(__FILE__)
|
|
148
|
+
|
|
149
|
+
layout = {:customer => {
|
|
150
|
+
:name => 1..25,
|
|
151
|
+
:zip => 26..30,
|
|
152
|
+
:balance => 31..35
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
customer_file = FLRFile.new(File.new(sample_data_path + "/customers.dat"), :customer, layout, 1)
|
|
157
|
+
customer_file.set_fast
|
|
158
|
+
customer_file.ranges=[(0..1)]
|
|
159
|
+
|
|
160
|
+
records = []
|
|
161
|
+
customer_file.each do |record|
|
|
162
|
+
|
|
163
|
+
puts record.inspect
|
|
164
|
+
records << record
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
assert_equal 2, records.size
|
|
168
|
+
|
|
169
|
+
# Check that the records aren't off by one
|
|
170
|
+
assert_equal "Jane Smith ", records[1].name
|
|
171
|
+
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def test_each
|
|
176
|
+
sample_data_path = File.dirname(__FILE__)
|
|
177
|
+
fwf = FLRFile.new(
|
|
178
|
+
File.new("#{sample_data_path}/sample.dat"),
|
|
179
|
+
@record_types,
|
|
180
|
+
@layouts,
|
|
181
|
+
1,
|
|
182
|
+
{:household => [:record_type, :people], :person => [:household_id, :pserial, :record_type]})
|
|
183
|
+
|
|
184
|
+
records = []
|
|
185
|
+
|
|
186
|
+
fwf.each do |record|
|
|
187
|
+
records << record
|
|
188
|
+
end
|
|
189
|
+
assert records.first.respond_to?(:record_type)
|
|
190
|
+
assert_equal :person, records.last.record_type
|
|
191
|
+
assert_equal :household, records[0].record_type
|
|
192
|
+
assert_equal :person, records[1].record_type
|
|
193
|
+
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def test_next_record
|
|
197
|
+
sample_data_path = File.dirname(__FILE__)
|
|
198
|
+
fwf = FLRFile.new(
|
|
199
|
+
File.new("#{sample_data_path}/sample.dat"), # data is in this file
|
|
200
|
+
@record_types, # Records of different types have these labels
|
|
201
|
+
@layouts, # metadata for creating record structs
|
|
202
|
+
1, # All metadata starting column locations are to be shifted 1 left
|
|
203
|
+
{:household => [:people], :person => [:household_id, :pserial]}) # Extra columns not to come from metadata
|
|
204
|
+
|
|
205
|
+
records = []
|
|
206
|
+
while (rec = fwf.next_record) do
|
|
207
|
+
records << rec
|
|
208
|
+
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
assert_equal :household, records.first.record_type
|
|
212
|
+
|
|
213
|
+
assert records.first.respond_to?(:record_type)
|
|
214
|
+
|
|
215
|
+
# The last record is a person type and should not have a 'people' accessor
|
|
216
|
+
assert !records.last.respond_to?(:people)
|
|
217
|
+
|
|
218
|
+
# Should have added these accessors from the extra_columns argument above
|
|
219
|
+
assert records.first.respond_to?(:people)
|
|
220
|
+
assert records.last.respond_to?(:household_id)
|
|
221
|
+
|
|
222
|
+
assert_equal :household, records[0].record_type
|
|
223
|
+
assert_equal :person, records[1].record_type
|
|
224
|
+
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
def test_open
|
|
228
|
+
record = Struct.new(:rectypeh, :phone, :mortgage, :record_type)
|
|
229
|
+
|
|
230
|
+
sample_data_path = File.dirname(__FILE__)
|
|
231
|
+
FLRFile.open(
|
|
232
|
+
"#{sample_data_path}/sample_out.dat", # data is in this file
|
|
233
|
+
"w", # open file for writing
|
|
234
|
+
@record_types, # Records of different types have these labels
|
|
235
|
+
@layouts, # metadata for creating record structs
|
|
236
|
+
1) do |fwf| # All metadata starting column locations are to be shifted 1 left
|
|
237
|
+
assert_equal FLRFile, fwf.class
|
|
238
|
+
fwf << record.new("H", 1, 2, :household)
|
|
239
|
+
fwf << ["H", 1, 3, :household]
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
assert File.exists?("#{sample_data_path}/sample.dat") # data is in this file
|
|
243
|
+
|
|
244
|
+
var = Struct.new(:name, :start, :len)
|
|
245
|
+
l = {:customer => [var.new("name", 1, 25), var.new("age", 26, 3)]}
|
|
246
|
+
fwf = FLRFile.new(File.new("sample2_out.dat", "w"),
|
|
247
|
+
:customer, l, 1)
|
|
248
|
+
|
|
249
|
+
fwf << ["joe", 25, :customer]
|
|
250
|
+
fwf.close
|
|
251
|
+
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def test_line_type
|
|
256
|
+
sample_data_path = File.dirname(__FILE__)
|
|
257
|
+
fwf = FLRFile.new(
|
|
258
|
+
File.new("#{sample_data_path}/sample.dat"),
|
|
259
|
+
@record_types, # Record types to read from the file, all others will be ignored
|
|
260
|
+
@layouts, # metadata for all record types
|
|
261
|
+
1, # column 0 starts at logical location 1
|
|
262
|
+
{:household => [:people], :person => [:household_id, :pserial]} # extra columns by record type
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
assert_equal :unknown, fwf.line_type(nil)
|
|
266
|
+
assert_equal :household, fwf.line_type("H123")
|
|
267
|
+
assert_equal :person, fwf.line_type("P123")
|
|
268
|
+
assert_equal :unknown, fwf.line_type("C123")
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
def test_ranges
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
def test_in_range
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def test_get_next_known_line_type
|
|
279
|
+
sample_data_path = File.dirname(__FILE__)
|
|
280
|
+
fwf = FLRFile.new(
|
|
281
|
+
File.new("#{sample_data_path}/sample_activities.dat"),
|
|
282
|
+
@record_types, # Record types to read from the file, all others will be ignored
|
|
283
|
+
@layouts, # metadata for all record types
|
|
284
|
+
1, # column 0 starts at logical location 1
|
|
285
|
+
{:household => [:people], :person => [:household_id, :pserial]} # extra columns by record type
|
|
286
|
+
)
|
|
287
|
+
# By reading the sample_activities file with only the household and person record types know
|
|
288
|
+
# we should get the activity and who records to be skipped.
|
|
289
|
+
while (rec=fwf.get_next_known_line_type)
|
|
290
|
+
|
|
291
|
+
unless rec.strip.empty?
|
|
292
|
+
|
|
293
|
+
assert %w(P H).include?(rec[0..0])
|
|
294
|
+
|
|
295
|
+
end
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
require 'test_hflr'
|
|
2
|
+
|
|
3
|
+
class RecordTemplateTest < Test::Unit::TestCase
|
|
4
|
+
|
|
5
|
+
Var = Struct.new(:name, :start, :len)
|
|
6
|
+
|
|
7
|
+
# Some real world metadata. This is only layout for the starting portion of the ATUS / CPS household record
|
|
8
|
+
# which actually extends to beyond column 117
|
|
9
|
+
AtusHH =[[:RECTYPEH, 1, 1,],
|
|
10
|
+
[:CASEID, 2, 14],
|
|
11
|
+
[:AGEYCHILD, 16, 3],
|
|
12
|
+
[:SERIAL, 19, 7],
|
|
13
|
+
[:HH_NUMADULTS, 26, 2],
|
|
14
|
+
[:FAMBUS_RESP, 28, 2],
|
|
15
|
+
[:FAMBUS_OTHER, 30, 2],
|
|
16
|
+
[:FAMBUS_SPOUSE, 32, 2],
|
|
17
|
+
[:FAMBUS, 34, 2],
|
|
18
|
+
[:HH_CHILD, 36, 2],
|
|
19
|
+
[:HH_NUMKIDS, 38, 2],
|
|
20
|
+
[:HH_SIZE, 40, 3],
|
|
21
|
+
[:HH_NUMEMPLD, 43, 3],
|
|
22
|
+
[:FAMINCOME, 46, 3]]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def setup
|
|
26
|
+
|
|
27
|
+
hh_vars = AtusHH.map { |v| Var.new(v[0], v[1], v[2]) }
|
|
28
|
+
|
|
29
|
+
@vars = {:household => hh_vars,
|
|
30
|
+
:person => [Var.new("age", 2, 3), Var.new("sex", 5, 1)],
|
|
31
|
+
:activity => [Var.new("where", 1, 5), Var.new("activity", 6, 5)],
|
|
32
|
+
:who => [Var.new("relatew", 1, 2)]}
|
|
33
|
+
@record_types = {"H" => :household, "P" => :person, "A" => :activity, "W" => :who}
|
|
34
|
+
@record_type_symbols = @record_types.invert
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def test_create
|
|
38
|
+
record_layouts = @vars # variables by record type
|
|
39
|
+
templates = HFLR::RecordTemplate.create(record_layouts, @record_type_symbols, 1)
|
|
40
|
+
assert_equal @vars.keys, templates.keys
|
|
41
|
+
|
|
42
|
+
household_field_pattern = templates[:household].field_pattern
|
|
43
|
+
person_field_pattern =templates[:person].field_pattern
|
|
44
|
+
activity_field_pattern = templates[:activity].field_pattern
|
|
45
|
+
who_field_pattern = templates[:who].field_pattern
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
assert household_field_pattern.is_a?(String)
|
|
49
|
+
assert person_field_pattern.size>2, "field pattern should have at least one variable"
|
|
50
|
+
assert activity_field_pattern.size>2, "field pattern for activity should have at least one variable"
|
|
51
|
+
assert who_field_pattern.size>2, "field pattern for who should have at least one variable"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
household_record_struct = templates[:household].record_structure.new
|
|
55
|
+
assert household_record_struct.respond_to?(:HH_SIZE)
|
|
56
|
+
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def test_create_template_class
|
|
60
|
+
template = HFLR::RecordTemplate.create_template_class(:person, @record_type_symbols[:person], @vars[:person], 1, {})
|
|
61
|
+
assert template.respond_to?(:field_pattern)
|
|
62
|
+
assert template.respond_to?(:record_structure)
|
|
63
|
+
record_structure = template.record_structure
|
|
64
|
+
assert record_structure.new.is_a?(Struct)
|
|
65
|
+
assert record_structure.new.respond_to?(:record_type)
|
|
66
|
+
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_get_pattern
|
|
71
|
+
household_layout = @vars[:household]
|
|
72
|
+
|
|
73
|
+
pattern = HFLR::RecordTemplate.get_pattern(household_layout)
|
|
74
|
+
assert_equal "@1a1", pattern[0..3]
|
|
75
|
+
|
|
76
|
+
# Adjust the location ('@') leftward (the metadata refers to the 0th column as column 1.)
|
|
77
|
+
pattern = HFLR::RecordTemplate.get_pattern(household_layout, 1)
|
|
78
|
+
assert_equal "@0a1", pattern[0..3]
|
|
79
|
+
|
|
80
|
+
vars_in_pattern = pattern.scan("a").size
|
|
81
|
+
assert_equal household_layout.size, vars_in_pattern
|
|
82
|
+
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def test_build_record
|
|
86
|
+
templates = HFLR::RecordTemplate.create(@vars, @record_type_symbols, 1)
|
|
87
|
+
hh_str = "1200501010500069980000001020000000000000020009960200009999999999999999991330299902305030201034300000000037110550997797000000000007700100000000200411000000000"
|
|
88
|
+
|
|
89
|
+
# from a string to a record struct
|
|
90
|
+
household_rec = templates[:household].build_record(hh_str)
|
|
91
|
+
|
|
92
|
+
assert household_rec.is_a?(Struct)
|
|
93
|
+
assert household_rec.values.size> @vars[:household].size, "Should be values for the extra columns"
|
|
94
|
+
|
|
95
|
+
# Check a few things...
|
|
96
|
+
assert_equal 1, household_rec[:SERIAL].to_i
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
assert_equal 1, household_rec.SERIAL.to_i
|
|
100
|
+
|
|
101
|
+
assert_equal "1", household_rec[0]
|
|
102
|
+
assert_equal "1", household_rec.RECTYPEH
|
|
103
|
+
assert_equal "1", household_rec[:RECTYPEH]
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def test_build_line
|
|
109
|
+
templates = HFLR::RecordTemplate.create(@vars, @record_type_symbols, 1)
|
|
110
|
+
|
|
111
|
+
hh_str = "H200501010500069980000001020000000000000020009960200009999999999999999991330299902305030201034300000000037110550997797000000000007700100000000200411000000000"
|
|
112
|
+
|
|
113
|
+
# from a string to a record struct
|
|
114
|
+
household_rec = templates[:household].build_record(hh_str)
|
|
115
|
+
assert_equal "002", hh_str[39..41]
|
|
116
|
+
assert_equal "002", household_rec.HH_SIZE
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
# back to a string
|
|
120
|
+
new_hh_str = templates[:household].build_line(household_rec)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
assert_equal "002", new_hh_str[39..41]
|
|
124
|
+
|
|
125
|
+
# Some of the values in hh_str won't be in new_hh_str because not all data
|
|
126
|
+
# in hh_str is mapped by household_layout, but the mapped variables should have
|
|
127
|
+
# the same values.
|
|
128
|
+
|
|
129
|
+
@vars[:household].each do |v|
|
|
130
|
+
format_str = "@#{(v.start-1).to_s}a#{v.len.to_s}"
|
|
131
|
+
orig_data = hh_str.unpack(format_str)
|
|
132
|
+
new_data = new_hh_str.unpack(format_str)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
assert_equal new_data, orig_data, "Comparing #{v.name} #{format_str}"
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def test_format_fields
|
|
141
|
+
templates = HFLR::RecordTemplate.create(@vars, @record_type_symbols, 1)
|
|
142
|
+
|
|
143
|
+
formatted_fields = templates[:who].send(:format_fields, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
|
|
144
|
+
|
|
145
|
+
widths = @vars[:who].map { |v| v.len }
|
|
146
|
+
formatted_fields.size.times do |i|
|
|
147
|
+
assert_equal formatted_fields[i].size, widths[i], "Width of #{@vars[:who][i].name} should have been #{widths[i].to_s}"
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def test_write_format
|
|
153
|
+
templates = HFLR::RecordTemplate.create(@vars, @record_type_symbols, 1)
|
|
154
|
+
|
|
155
|
+
assert_equal "abc", templates[:activity].send(:right_format, "abc", 3)
|
|
156
|
+
assert_equal "abc ", templates[:activity].send(:right_format, "abc", 6)
|
|
157
|
+
assert_equal "3", templates[:activity].send(:right_format, 3, 1)
|
|
158
|
+
assert_equal "005", templates[:activity].send(:right_format, 5, 3)
|
|
159
|
+
assert_equal "ZZZ", templates[:activity].send(:right_format, -999998, 3)
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
|
data/test/sample.dat
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
joe 025
|
data/test/sample_out.dat
ADDED
data/test/test_helper.rb
ADDED
data/test/test_hflr.rb
ADDED
metadata
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: hflr2
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
hash: 23
|
|
5
|
+
prerelease:
|
|
6
|
+
segments:
|
|
7
|
+
- 1
|
|
8
|
+
- 0
|
|
9
|
+
- 0
|
|
10
|
+
version: 1.0.0
|
|
11
|
+
platform: ruby
|
|
12
|
+
authors:
|
|
13
|
+
- Colin Davis
|
|
14
|
+
- Bozydar Sobczak
|
|
15
|
+
autorequire:
|
|
16
|
+
bindir: bin
|
|
17
|
+
cert_chain: []
|
|
18
|
+
|
|
19
|
+
date: 2011-10-24 00:00:00 Z
|
|
20
|
+
dependencies: []
|
|
21
|
+
|
|
22
|
+
description: |-
|
|
23
|
+
HFLR2 -- Hierarchical Fixed Length Records
|
|
24
|
+
|
|
25
|
+
NOTE: This gem is a modification of the hflr gem. It can be not compatible with it.
|
|
26
|
+
|
|
27
|
+
Allows you to read and write files of fixed width records when the file contains one or more
|
|
28
|
+
than one type of record.
|
|
29
|
+
|
|
30
|
+
Install with 'gem install hflr2'
|
|
31
|
+
|
|
32
|
+
See the tests and examples bundled with this gem.
|
|
33
|
+
email: colin.c.davis@gmail.com
|
|
34
|
+
executables: []
|
|
35
|
+
|
|
36
|
+
extensions: []
|
|
37
|
+
|
|
38
|
+
extra_rdoc_files:
|
|
39
|
+
- History.txt
|
|
40
|
+
- README.txt
|
|
41
|
+
files:
|
|
42
|
+
- History.txt
|
|
43
|
+
- README.txt
|
|
44
|
+
- hflr2.gemspec
|
|
45
|
+
- lib/hflr.rb
|
|
46
|
+
- lib/hflr/fl_record_file.rb
|
|
47
|
+
- lib/hflr/hflr.rb
|
|
48
|
+
- lib/hflr/record_template.rb
|
|
49
|
+
- test/customer_orders.dat
|
|
50
|
+
- test/customers.dat
|
|
51
|
+
- test/examples.rb
|
|
52
|
+
- test/flrfile_test.rb
|
|
53
|
+
- test/record_template_test.rb
|
|
54
|
+
- test/sample.dat
|
|
55
|
+
- test/sample2_out.dat
|
|
56
|
+
- test/sample_activities.dat
|
|
57
|
+
- test/sample_out.dat
|
|
58
|
+
- test/test_helper.rb
|
|
59
|
+
- test/test_hflr.rb
|
|
60
|
+
homepage:
|
|
61
|
+
licenses: []
|
|
62
|
+
|
|
63
|
+
post_install_message:
|
|
64
|
+
rdoc_options: []
|
|
65
|
+
|
|
66
|
+
require_paths:
|
|
67
|
+
- lib
|
|
68
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
69
|
+
none: false
|
|
70
|
+
requirements:
|
|
71
|
+
- - ">="
|
|
72
|
+
- !ruby/object:Gem::Version
|
|
73
|
+
hash: 3
|
|
74
|
+
segments:
|
|
75
|
+
- 0
|
|
76
|
+
version: "0"
|
|
77
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
78
|
+
none: false
|
|
79
|
+
requirements:
|
|
80
|
+
- - ">="
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
hash: 3
|
|
83
|
+
segments:
|
|
84
|
+
- 0
|
|
85
|
+
version: "0"
|
|
86
|
+
requirements: []
|
|
87
|
+
|
|
88
|
+
rubyforge_project:
|
|
89
|
+
rubygems_version: 1.8.11
|
|
90
|
+
signing_key:
|
|
91
|
+
specification_version: 3
|
|
92
|
+
summary: HFLR2 -- Hierarchical Fixed Length Records 2 Allows you to read and write files of fixed width records when the file contains one or more than one type of record
|
|
93
|
+
test_files:
|
|
94
|
+
- test/test_hflr.rb
|
|
95
|
+
- test/test_helper.rb
|