hflr 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{hflr}
5
- s.version = "1.0.2"
5
+ s.version = "1.1.0"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Colin Davis"]
@@ -23,9 +23,34 @@ class FLRFile
23
23
  @record_template = HFLR::RecordTemplate.create(record_layouts, @record_type_symbols, logical_first_column)
24
24
  end
25
25
  end
26
+
27
+ def set_fast
28
+ @fast = !@record_type_labels.is_a?(Hash)
29
+ unless @fast
30
+ raise "Cannot set fast mode with more than one record type."
31
+ end
32
+ if @fast
33
+ @width = get_record_width_from_file
34
+ end
35
+ end
36
+
37
+ def ranges=(ranges)
38
+ @fast or raise "Cannot read selected ranges because input file has multiple record types #{@record_type_labels.to_s}"
39
+ unless ranges.first.is_a?(Range)
40
+ raise "You specified a #{ranges.first.class.to_s} instead of a range in the list of ranges. Use (a..b) to specify a range."
41
+ end
42
+
43
+ @offsets =offsets_to_read(ranges, @width)
44
+ @ranges = ranges
45
+ end
46
+
47
+
48
+ def in_range?(line_number)
49
+ @ranges ? !!(@ranges.detect{|r| r.member?(line_number)}) : true
50
+ end
26
51
 
27
52
  def finished?
28
- @file.eof?
53
+ @file.eof? || (@fast && @offsets.empty?)
29
54
  end
30
55
 
31
56
  def close
@@ -46,8 +71,7 @@ def build_record(line)
46
71
  return @record_template[record_type].build_record(line.chomp)
47
72
  end
48
73
 
49
- def next_record
50
- @line_number += 1
74
+ def next_record
51
75
  build_record(get_next_known_line_type)
52
76
  end
53
77
 
@@ -57,23 +81,44 @@ def line_type(line)
57
81
  end
58
82
 
59
83
  def get_next_known_line_type
84
+ @fast ? fast_get_next_known_line_type : sequential_get_next_known_line_type
85
+ end
86
+
87
+ def fast_get_next_known_line_type
88
+ unless @offsets.nil? || @offsets.empty?
89
+ @file.pos = @offsets.shift
90
+ @file.read(@width)
91
+ else
92
+ nil
93
+ end
94
+ end
95
+
96
+ def sequential_get_next_known_line_type
60
97
  line = @file.gets
98
+ @line_number+=1
61
99
  record_type = line_type(line)
62
- while record_type == :unknown and (not finished?)
100
+ while !finished? && (!in_range?(@line_number) || record_type == :unknown)
63
101
  line = @file.gets
102
+ @line_number+=1
64
103
  record_type = line_type(line)
65
104
  end
66
105
  record_type == :unknown ? nil : line
67
106
  end
68
107
 
108
+
109
+
69
110
  def each
70
- @file.each_line do |line|
71
- @line_number += 1
72
- unless line_type(line) == :unknown
111
+ @line_number = 1
112
+ if @fast
113
+ yield(next_record) until finished?
114
+ else
115
+ @file.each_line do |line|
116
+ unless line_type(line) == :unknown || !in_range?(@line_number)
73
117
  data = build_record(line)
74
118
  yield data
75
119
  end
76
120
  end
121
+ end
77
122
  end
78
123
 
79
124
  # This will take a Hash or Struct orArray; if an Array the record type must be the last element when
@@ -105,6 +150,17 @@ end
105
150
 
106
151
  private
107
152
 
153
+ def offsets_to_read(ranges, width)
154
+ ranges.map{|r| r.map{|o| o * width}}.flatten.uniq
155
+ end
156
+
157
+ def get_record_width_from_file
158
+ width = @file.gets.size
159
+ @file.rewind
160
+ width
161
+ end
162
+
163
+
108
164
  # If the layout is given in the convenient Ruby form
109
165
  def create_layouts(layout)
110
166
  var_class = Struct.new(:name,:start,:len)
@@ -121,3 +177,4 @@ new_layout = {}
121
177
  end
122
178
 
123
179
  end
180
+
@@ -1,2 +1,3 @@
1
1
  Joe Smith 55455025.53
2
- Jane Smith 55404015.25
2
+ Jane Smith 55404015.25
3
+ John Smith 55404015.25
@@ -23,14 +23,14 @@ class FLRFileTest < Test::Unit::TestCase
23
23
 
24
24
 
25
25
  def test_initialize
26
- sample_data_path = File.dirname(__FILE__)
27
- fwf = FLRFile.new(
28
- File.new("#{sample_data_path}/sample.dat"),
29
- @record_types, # Record types to read from the file, all others will be ignored
30
- @layouts,# metadata for all record types
31
- 1, # column 0 starts at logical location 1
32
- {:household=>[:people],:person=>[:household_id,:pserial]} # extra columns by record type
33
- )
26
+ sample_data_path = File.dirname(__FILE__)
27
+ fwf = FLRFile.new(
28
+ File.new("#{sample_data_path}/sample.dat"),
29
+ @record_types, # Record types to read from the file, all others will be ignored
30
+ @layouts,# metadata for all record types
31
+ 1, # column 0 starts at logical location 1
32
+ {:household=>[:people],:person=>[:household_id,:pserial]} # extra columns by record type
33
+ )
34
34
 
35
35
  # Extra columns + record_type accessors should have been created
36
36
  hh_struct = fwf.record_template[:household].record_structure.new
@@ -72,7 +72,7 @@ class FLRFileTest < Test::Unit::TestCase
72
72
 
73
73
 
74
74
  def test_build_record
75
- sample_data_path = File.dirname(__FILE__)
75
+ sample_data_path = File.dirname(__FILE__)
76
76
  fwf = FLRFile.new(
77
77
  File.new("#{sample_data_path}/sample.dat"),
78
78
  @record_types, # Record types to read from the file, all others will be ignored
@@ -90,7 +90,93 @@ class FLRFileTest < Test::Unit::TestCase
90
90
  end
91
91
 
92
92
  end
93
+
94
+ def test_faast_next_record
95
+ sample_data_path = File.dirname(__FILE__)
96
+
97
+ layout = {:customer=>{
98
+ :name=>1..25,
99
+ :zip=>26..30,
100
+ :balance=>31..35
101
+ }
102
+ }
103
+
104
+ customer_file = FLRFile.new(File.new(sample_data_path + "/customers.dat"), :customer, layout, 1)
105
+ customer_file.set_fast
106
+ customer_file.ranges=[(0..2)]
107
+
108
+ records = []
109
+ while !customer_file.finished?
110
+ record = customer_file.next_record
111
+ puts record.inspect
112
+ records << record
113
+ end
114
+
115
+ assert_equal 3, records.size
116
+
117
+ # Check that the records aren't off by one
118
+ assert_equal "Jane Smith",records[1].name
119
+ assert_equal "John Smith",records.last.name
120
+
121
+ end
122
+
123
+ def test_partial_fast_next_line
124
+ sample_data_path = File.dirname(__FILE__)
125
+
126
+ layout = {:customer=>{
127
+ :name=>1..25,
128
+ :zip=>26..30,
129
+ :balance=>31..35
130
+ }
131
+ }
132
+
133
+ customer_file = FLRFile.new(File.new(sample_data_path + "/customers.dat"), :customer, layout, 1)
134
+ customer_file.set_fast
135
+ customer_file.ranges=[(0..1)]
136
+
137
+ records = []
138
+ while !customer_file.finished?
139
+ record = customer_file.next_record
140
+ puts record.inspect
141
+ records << record
142
+ end
143
+
144
+ assert_equal 2, records.size
145
+
146
+ # Check that the records aren't off by one
147
+ assert_equal "Jane Smith",records[1].name
148
+
149
+ end
150
+
151
+ def test_fast_each
152
+ sample_data_path = File.dirname(__FILE__)
153
+
154
+ layout = {:customer=>{
155
+ :name=>1..25,
156
+ :zip=>26..30,
157
+ :balance=>31..35
158
+ }
159
+ }
160
+
161
+ customer_file = FLRFile.new(File.new(sample_data_path + "/customers.dat"), :customer, layout, 1)
162
+ customer_file.set_fast
163
+ customer_file.ranges=[(0..1)]
164
+
165
+ records = []
166
+ customer_file.each do |record|
93
167
 
168
+ puts record.inspect
169
+ records << record
170
+ end
171
+
172
+ assert_equal 2, records.size
173
+
174
+ # Check that the records aren't off by one
175
+ assert_equal "Jane Smith",records[1].name
176
+
177
+ end
178
+
179
+
94
180
 
95
181
  def test_each
96
182
  sample_data_path = File.dirname(__FILE__)
@@ -187,6 +273,14 @@ def test_line_type
187
273
  assert_equal :person,fwf.line_type("P123")
188
274
  assert_equal :unknown, fwf.line_type("C123")
189
275
  end
276
+
277
+ def test_ranges
278
+ end
279
+
280
+ def test_in_range
281
+ end
282
+
283
+
190
284
 
191
285
  def test_get_next_known_line_type
192
286
  sample_data_path = File.dirname(__FILE__)
@@ -211,3 +305,5 @@ end
211
305
  end
212
306
 
213
307
  end
308
+
309
+
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: hflr
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.0.2
5
+ version: 1.1.0
6
6
  platform: ruby
7
7
  authors:
8
8
  - Colin Davis