hflr 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/hflr.gemspec +1 -1
- data/lib/hflr/fl_record_file.rb +64 -7
- data/test/customers.dat +2 -1
- data/test/flrfile_test.rb +105 -9
- metadata +1 -1
data/hflr.gemspec
CHANGED
data/lib/hflr/fl_record_file.rb
CHANGED
@@ -23,9 +23,34 @@ class FLRFile
|
|
23
23
|
@record_template = HFLR::RecordTemplate.create(record_layouts, @record_type_symbols, logical_first_column)
|
24
24
|
end
|
25
25
|
end
|
26
|
+
|
27
|
+
def set_fast
|
28
|
+
@fast = !@record_type_labels.is_a?(Hash)
|
29
|
+
unless @fast
|
30
|
+
raise "Cannot set fast mode with more than one record type."
|
31
|
+
end
|
32
|
+
if @fast
|
33
|
+
@width = get_record_width_from_file
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def ranges=(ranges)
|
38
|
+
@fast or raise "Cannot read selected ranges because input file has multiple record types #{@record_type_labels.to_s}"
|
39
|
+
unless ranges.first.is_a?(Range)
|
40
|
+
raise "You specified a #{ranges.first.class.to_s} instead of a range in the list of ranges. Use (a..b) to specify a range."
|
41
|
+
end
|
42
|
+
|
43
|
+
@offsets =offsets_to_read(ranges, @width)
|
44
|
+
@ranges = ranges
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
def in_range?(line_number)
|
49
|
+
@ranges ? !!(@ranges.detect{|r| r.member?(line_number)}) : true
|
50
|
+
end
|
26
51
|
|
27
52
|
def finished?
|
28
|
-
@file.eof?
|
53
|
+
@file.eof? || (@fast && @offsets.empty?)
|
29
54
|
end
|
30
55
|
|
31
56
|
def close
|
@@ -46,8 +71,7 @@ def build_record(line)
|
|
46
71
|
return @record_template[record_type].build_record(line.chomp)
|
47
72
|
end
|
48
73
|
|
49
|
-
def next_record
|
50
|
-
@line_number += 1
|
74
|
+
def next_record
|
51
75
|
build_record(get_next_known_line_type)
|
52
76
|
end
|
53
77
|
|
@@ -57,23 +81,44 @@ def line_type(line)
|
|
57
81
|
end
|
58
82
|
|
59
83
|
def get_next_known_line_type
|
84
|
+
@fast ? fast_get_next_known_line_type : sequential_get_next_known_line_type
|
85
|
+
end
|
86
|
+
|
87
|
+
def fast_get_next_known_line_type
|
88
|
+
unless @offsets.nil? || @offsets.empty?
|
89
|
+
@file.pos = @offsets.shift
|
90
|
+
@file.read(@width)
|
91
|
+
else
|
92
|
+
nil
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def sequential_get_next_known_line_type
|
60
97
|
line = @file.gets
|
98
|
+
@line_number+=1
|
61
99
|
record_type = line_type(line)
|
62
|
-
while record_type == :unknown
|
100
|
+
while !finished? && (!in_range?(@line_number) || record_type == :unknown)
|
63
101
|
line = @file.gets
|
102
|
+
@line_number+=1
|
64
103
|
record_type = line_type(line)
|
65
104
|
end
|
66
105
|
record_type == :unknown ? nil : line
|
67
106
|
end
|
68
107
|
|
108
|
+
|
109
|
+
|
69
110
|
def each
|
70
|
-
@
|
71
|
-
|
72
|
-
|
111
|
+
@line_number = 1
|
112
|
+
if @fast
|
113
|
+
yield(next_record) until finished?
|
114
|
+
else
|
115
|
+
@file.each_line do |line|
|
116
|
+
unless line_type(line) == :unknown || !in_range?(@line_number)
|
73
117
|
data = build_record(line)
|
74
118
|
yield data
|
75
119
|
end
|
76
120
|
end
|
121
|
+
end
|
77
122
|
end
|
78
123
|
|
79
124
|
# This will take a Hash or Struct orArray; if an Array the record type must be the last element when
|
@@ -105,6 +150,17 @@ end
|
|
105
150
|
|
106
151
|
private
|
107
152
|
|
153
|
+
def offsets_to_read(ranges, width)
|
154
|
+
ranges.map{|r| r.map{|o| o * width}}.flatten.uniq
|
155
|
+
end
|
156
|
+
|
157
|
+
def get_record_width_from_file
|
158
|
+
width = @file.gets.size
|
159
|
+
@file.rewind
|
160
|
+
width
|
161
|
+
end
|
162
|
+
|
163
|
+
|
108
164
|
# If the layout is given in the convenient Ruby form
|
109
165
|
def create_layouts(layout)
|
110
166
|
var_class = Struct.new(:name,:start,:len)
|
@@ -121,3 +177,4 @@ new_layout = {}
|
|
121
177
|
end
|
122
178
|
|
123
179
|
end
|
180
|
+
|
data/test/customers.dat
CHANGED
data/test/flrfile_test.rb
CHANGED
@@ -23,14 +23,14 @@ class FLRFileTest < Test::Unit::TestCase
|
|
23
23
|
|
24
24
|
|
25
25
|
def test_initialize
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
26
|
+
sample_data_path = File.dirname(__FILE__)
|
27
|
+
fwf = FLRFile.new(
|
28
|
+
File.new("#{sample_data_path}/sample.dat"),
|
29
|
+
@record_types, # Record types to read from the file, all others will be ignored
|
30
|
+
@layouts,# metadata for all record types
|
31
|
+
1, # column 0 starts at logical location 1
|
32
|
+
{:household=>[:people],:person=>[:household_id,:pserial]} # extra columns by record type
|
33
|
+
)
|
34
34
|
|
35
35
|
# Extra columns + record_type accessors should have been created
|
36
36
|
hh_struct = fwf.record_template[:household].record_structure.new
|
@@ -72,7 +72,7 @@ class FLRFileTest < Test::Unit::TestCase
|
|
72
72
|
|
73
73
|
|
74
74
|
def test_build_record
|
75
|
-
|
75
|
+
sample_data_path = File.dirname(__FILE__)
|
76
76
|
fwf = FLRFile.new(
|
77
77
|
File.new("#{sample_data_path}/sample.dat"),
|
78
78
|
@record_types, # Record types to read from the file, all others will be ignored
|
@@ -90,7 +90,93 @@ class FLRFileTest < Test::Unit::TestCase
|
|
90
90
|
end
|
91
91
|
|
92
92
|
end
|
93
|
+
|
94
|
+
def test_faast_next_record
|
95
|
+
sample_data_path = File.dirname(__FILE__)
|
96
|
+
|
97
|
+
layout = {:customer=>{
|
98
|
+
:name=>1..25,
|
99
|
+
:zip=>26..30,
|
100
|
+
:balance=>31..35
|
101
|
+
}
|
102
|
+
}
|
103
|
+
|
104
|
+
customer_file = FLRFile.new(File.new(sample_data_path + "/customers.dat"), :customer, layout, 1)
|
105
|
+
customer_file.set_fast
|
106
|
+
customer_file.ranges=[(0..2)]
|
107
|
+
|
108
|
+
records = []
|
109
|
+
while !customer_file.finished?
|
110
|
+
record = customer_file.next_record
|
111
|
+
puts record.inspect
|
112
|
+
records << record
|
113
|
+
end
|
114
|
+
|
115
|
+
assert_equal 3, records.size
|
116
|
+
|
117
|
+
# Check that the records aren't off by one
|
118
|
+
assert_equal "Jane Smith",records[1].name
|
119
|
+
assert_equal "John Smith",records.last.name
|
120
|
+
|
121
|
+
end
|
122
|
+
|
123
|
+
def test_partial_fast_next_line
|
124
|
+
sample_data_path = File.dirname(__FILE__)
|
125
|
+
|
126
|
+
layout = {:customer=>{
|
127
|
+
:name=>1..25,
|
128
|
+
:zip=>26..30,
|
129
|
+
:balance=>31..35
|
130
|
+
}
|
131
|
+
}
|
132
|
+
|
133
|
+
customer_file = FLRFile.new(File.new(sample_data_path + "/customers.dat"), :customer, layout, 1)
|
134
|
+
customer_file.set_fast
|
135
|
+
customer_file.ranges=[(0..1)]
|
136
|
+
|
137
|
+
records = []
|
138
|
+
while !customer_file.finished?
|
139
|
+
record = customer_file.next_record
|
140
|
+
puts record.inspect
|
141
|
+
records << record
|
142
|
+
end
|
143
|
+
|
144
|
+
assert_equal 2, records.size
|
145
|
+
|
146
|
+
# Check that the records aren't off by one
|
147
|
+
assert_equal "Jane Smith",records[1].name
|
148
|
+
|
149
|
+
end
|
150
|
+
|
151
|
+
def test_fast_each
|
152
|
+
sample_data_path = File.dirname(__FILE__)
|
153
|
+
|
154
|
+
layout = {:customer=>{
|
155
|
+
:name=>1..25,
|
156
|
+
:zip=>26..30,
|
157
|
+
:balance=>31..35
|
158
|
+
}
|
159
|
+
}
|
160
|
+
|
161
|
+
customer_file = FLRFile.new(File.new(sample_data_path + "/customers.dat"), :customer, layout, 1)
|
162
|
+
customer_file.set_fast
|
163
|
+
customer_file.ranges=[(0..1)]
|
164
|
+
|
165
|
+
records = []
|
166
|
+
customer_file.each do |record|
|
93
167
|
|
168
|
+
puts record.inspect
|
169
|
+
records << record
|
170
|
+
end
|
171
|
+
|
172
|
+
assert_equal 2, records.size
|
173
|
+
|
174
|
+
# Check that the records aren't off by one
|
175
|
+
assert_equal "Jane Smith",records[1].name
|
176
|
+
|
177
|
+
end
|
178
|
+
|
179
|
+
|
94
180
|
|
95
181
|
def test_each
|
96
182
|
sample_data_path = File.dirname(__FILE__)
|
@@ -187,6 +273,14 @@ def test_line_type
|
|
187
273
|
assert_equal :person,fwf.line_type("P123")
|
188
274
|
assert_equal :unknown, fwf.line_type("C123")
|
189
275
|
end
|
276
|
+
|
277
|
+
def test_ranges
|
278
|
+
end
|
279
|
+
|
280
|
+
def test_in_range
|
281
|
+
end
|
282
|
+
|
283
|
+
|
190
284
|
|
191
285
|
def test_get_next_known_line_type
|
192
286
|
sample_data_path = File.dirname(__FILE__)
|
@@ -211,3 +305,5 @@ end
|
|
211
305
|
end
|
212
306
|
|
213
307
|
end
|
308
|
+
|
309
|
+
|