hflr 1.0.2 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/hflr.gemspec +1 -1
- data/lib/hflr/fl_record_file.rb +64 -7
- data/test/customers.dat +2 -1
- data/test/flrfile_test.rb +105 -9
- metadata +1 -1
data/hflr.gemspec
CHANGED
data/lib/hflr/fl_record_file.rb
CHANGED
@@ -23,9 +23,34 @@ class FLRFile
|
|
23
23
|
@record_template = HFLR::RecordTemplate.create(record_layouts, @record_type_symbols, logical_first_column)
|
24
24
|
end
|
25
25
|
end
|
26
|
+
|
27
|
+
def set_fast
|
28
|
+
@fast = !@record_type_labels.is_a?(Hash)
|
29
|
+
unless @fast
|
30
|
+
raise "Cannot set fast mode with more than one record type."
|
31
|
+
end
|
32
|
+
if @fast
|
33
|
+
@width = get_record_width_from_file
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def ranges=(ranges)
|
38
|
+
@fast or raise "Cannot read selected ranges because input file has multiple record types #{@record_type_labels.to_s}"
|
39
|
+
unless ranges.first.is_a?(Range)
|
40
|
+
raise "You specified a #{ranges.first.class.to_s} instead of a range in the list of ranges. Use (a..b) to specify a range."
|
41
|
+
end
|
42
|
+
|
43
|
+
@offsets =offsets_to_read(ranges, @width)
|
44
|
+
@ranges = ranges
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
def in_range?(line_number)
|
49
|
+
@ranges ? !!(@ranges.detect{|r| r.member?(line_number)}) : true
|
50
|
+
end
|
26
51
|
|
27
52
|
def finished?
|
28
|
-
@file.eof?
|
53
|
+
@file.eof? || (@fast && @offsets.empty?)
|
29
54
|
end
|
30
55
|
|
31
56
|
def close
|
@@ -46,8 +71,7 @@ def build_record(line)
|
|
46
71
|
return @record_template[record_type].build_record(line.chomp)
|
47
72
|
end
|
48
73
|
|
49
|
-
def next_record
|
50
|
-
@line_number += 1
|
74
|
+
def next_record
|
51
75
|
build_record(get_next_known_line_type)
|
52
76
|
end
|
53
77
|
|
@@ -57,23 +81,44 @@ def line_type(line)
|
|
57
81
|
end
|
58
82
|
|
59
83
|
def get_next_known_line_type
|
84
|
+
@fast ? fast_get_next_known_line_type : sequential_get_next_known_line_type
|
85
|
+
end
|
86
|
+
|
87
|
+
def fast_get_next_known_line_type
|
88
|
+
unless @offsets.nil? || @offsets.empty?
|
89
|
+
@file.pos = @offsets.shift
|
90
|
+
@file.read(@width)
|
91
|
+
else
|
92
|
+
nil
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def sequential_get_next_known_line_type
|
60
97
|
line = @file.gets
|
98
|
+
@line_number+=1
|
61
99
|
record_type = line_type(line)
|
62
|
-
while record_type == :unknown
|
100
|
+
while !finished? && (!in_range?(@line_number) || record_type == :unknown)
|
63
101
|
line = @file.gets
|
102
|
+
@line_number+=1
|
64
103
|
record_type = line_type(line)
|
65
104
|
end
|
66
105
|
record_type == :unknown ? nil : line
|
67
106
|
end
|
68
107
|
|
108
|
+
|
109
|
+
|
69
110
|
def each
|
70
|
-
@
|
71
|
-
|
72
|
-
|
111
|
+
@line_number = 1
|
112
|
+
if @fast
|
113
|
+
yield(next_record) until finished?
|
114
|
+
else
|
115
|
+
@file.each_line do |line|
|
116
|
+
unless line_type(line) == :unknown || !in_range?(@line_number)
|
73
117
|
data = build_record(line)
|
74
118
|
yield data
|
75
119
|
end
|
76
120
|
end
|
121
|
+
end
|
77
122
|
end
|
78
123
|
|
79
124
|
# This will take a Hash or Struct orArray; if an Array the record type must be the last element when
|
@@ -105,6 +150,17 @@ end
|
|
105
150
|
|
106
151
|
private
|
107
152
|
|
153
|
+
def offsets_to_read(ranges, width)
|
154
|
+
ranges.map{|r| r.map{|o| o * width}}.flatten.uniq
|
155
|
+
end
|
156
|
+
|
157
|
+
def get_record_width_from_file
|
158
|
+
width = @file.gets.size
|
159
|
+
@file.rewind
|
160
|
+
width
|
161
|
+
end
|
162
|
+
|
163
|
+
|
108
164
|
# If the layout is given in the convenient Ruby form
|
109
165
|
def create_layouts(layout)
|
110
166
|
var_class = Struct.new(:name,:start,:len)
|
@@ -121,3 +177,4 @@ new_layout = {}
|
|
121
177
|
end
|
122
178
|
|
123
179
|
end
|
180
|
+
|
data/test/customers.dat
CHANGED
data/test/flrfile_test.rb
CHANGED
@@ -23,14 +23,14 @@ class FLRFileTest < Test::Unit::TestCase
|
|
23
23
|
|
24
24
|
|
25
25
|
def test_initialize
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
26
|
+
sample_data_path = File.dirname(__FILE__)
|
27
|
+
fwf = FLRFile.new(
|
28
|
+
File.new("#{sample_data_path}/sample.dat"),
|
29
|
+
@record_types, # Record types to read from the file, all others will be ignored
|
30
|
+
@layouts,# metadata for all record types
|
31
|
+
1, # column 0 starts at logical location 1
|
32
|
+
{:household=>[:people],:person=>[:household_id,:pserial]} # extra columns by record type
|
33
|
+
)
|
34
34
|
|
35
35
|
# Extra columns + record_type accessors should have been created
|
36
36
|
hh_struct = fwf.record_template[:household].record_structure.new
|
@@ -72,7 +72,7 @@ class FLRFileTest < Test::Unit::TestCase
|
|
72
72
|
|
73
73
|
|
74
74
|
def test_build_record
|
75
|
-
|
75
|
+
sample_data_path = File.dirname(__FILE__)
|
76
76
|
fwf = FLRFile.new(
|
77
77
|
File.new("#{sample_data_path}/sample.dat"),
|
78
78
|
@record_types, # Record types to read from the file, all others will be ignored
|
@@ -90,7 +90,93 @@ class FLRFileTest < Test::Unit::TestCase
|
|
90
90
|
end
|
91
91
|
|
92
92
|
end
|
93
|
+
|
94
|
+
def test_faast_next_record
|
95
|
+
sample_data_path = File.dirname(__FILE__)
|
96
|
+
|
97
|
+
layout = {:customer=>{
|
98
|
+
:name=>1..25,
|
99
|
+
:zip=>26..30,
|
100
|
+
:balance=>31..35
|
101
|
+
}
|
102
|
+
}
|
103
|
+
|
104
|
+
customer_file = FLRFile.new(File.new(sample_data_path + "/customers.dat"), :customer, layout, 1)
|
105
|
+
customer_file.set_fast
|
106
|
+
customer_file.ranges=[(0..2)]
|
107
|
+
|
108
|
+
records = []
|
109
|
+
while !customer_file.finished?
|
110
|
+
record = customer_file.next_record
|
111
|
+
puts record.inspect
|
112
|
+
records << record
|
113
|
+
end
|
114
|
+
|
115
|
+
assert_equal 3, records.size
|
116
|
+
|
117
|
+
# Check that the records aren't off by one
|
118
|
+
assert_equal "Jane Smith",records[1].name
|
119
|
+
assert_equal "John Smith",records.last.name
|
120
|
+
|
121
|
+
end
|
122
|
+
|
123
|
+
def test_partial_fast_next_line
|
124
|
+
sample_data_path = File.dirname(__FILE__)
|
125
|
+
|
126
|
+
layout = {:customer=>{
|
127
|
+
:name=>1..25,
|
128
|
+
:zip=>26..30,
|
129
|
+
:balance=>31..35
|
130
|
+
}
|
131
|
+
}
|
132
|
+
|
133
|
+
customer_file = FLRFile.new(File.new(sample_data_path + "/customers.dat"), :customer, layout, 1)
|
134
|
+
customer_file.set_fast
|
135
|
+
customer_file.ranges=[(0..1)]
|
136
|
+
|
137
|
+
records = []
|
138
|
+
while !customer_file.finished?
|
139
|
+
record = customer_file.next_record
|
140
|
+
puts record.inspect
|
141
|
+
records << record
|
142
|
+
end
|
143
|
+
|
144
|
+
assert_equal 2, records.size
|
145
|
+
|
146
|
+
# Check that the records aren't off by one
|
147
|
+
assert_equal "Jane Smith",records[1].name
|
148
|
+
|
149
|
+
end
|
150
|
+
|
151
|
+
def test_fast_each
|
152
|
+
sample_data_path = File.dirname(__FILE__)
|
153
|
+
|
154
|
+
layout = {:customer=>{
|
155
|
+
:name=>1..25,
|
156
|
+
:zip=>26..30,
|
157
|
+
:balance=>31..35
|
158
|
+
}
|
159
|
+
}
|
160
|
+
|
161
|
+
customer_file = FLRFile.new(File.new(sample_data_path + "/customers.dat"), :customer, layout, 1)
|
162
|
+
customer_file.set_fast
|
163
|
+
customer_file.ranges=[(0..1)]
|
164
|
+
|
165
|
+
records = []
|
166
|
+
customer_file.each do |record|
|
93
167
|
|
168
|
+
puts record.inspect
|
169
|
+
records << record
|
170
|
+
end
|
171
|
+
|
172
|
+
assert_equal 2, records.size
|
173
|
+
|
174
|
+
# Check that the records aren't off by one
|
175
|
+
assert_equal "Jane Smith",records[1].name
|
176
|
+
|
177
|
+
end
|
178
|
+
|
179
|
+
|
94
180
|
|
95
181
|
def test_each
|
96
182
|
sample_data_path = File.dirname(__FILE__)
|
@@ -187,6 +273,14 @@ def test_line_type
|
|
187
273
|
assert_equal :person,fwf.line_type("P123")
|
188
274
|
assert_equal :unknown, fwf.line_type("C123")
|
189
275
|
end
|
276
|
+
|
277
|
+
def test_ranges
|
278
|
+
end
|
279
|
+
|
280
|
+
def test_in_range
|
281
|
+
end
|
282
|
+
|
283
|
+
|
190
284
|
|
191
285
|
def test_get_next_known_line_type
|
192
286
|
sample_data_path = File.dirname(__FILE__)
|
@@ -211,3 +305,5 @@ end
|
|
211
305
|
end
|
212
306
|
|
213
307
|
end
|
308
|
+
|
309
|
+
|