hflr 1.0.2 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{hflr}
5
- s.version = "1.0.2"
5
+ s.version = "1.1.0"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Colin Davis"]
@@ -23,9 +23,34 @@ class FLRFile
23
23
  @record_template = HFLR::RecordTemplate.create(record_layouts, @record_type_symbols, logical_first_column)
24
24
  end
25
25
  end
26
+
27
+ def set_fast
28
+ @fast = !@record_type_labels.is_a?(Hash)
29
+ unless @fast
30
+ raise "Cannot set fast mode with more than one record type."
31
+ end
32
+ if @fast
33
+ @width = get_record_width_from_file
34
+ end
35
+ end
36
+
37
+ def ranges=(ranges)
38
+ @fast or raise "Cannot read selected ranges because input file has multiple record types #{@record_type_labels.to_s}"
39
+ unless ranges.first.is_a?(Range)
40
+ raise "You specified a #{ranges.first.class.to_s} instead of a range in the list of ranges. Use (a..b) to specify a range."
41
+ end
42
+
43
+ @offsets =offsets_to_read(ranges, @width)
44
+ @ranges = ranges
45
+ end
46
+
47
+
48
+ def in_range?(line_number)
49
+ @ranges ? !!(@ranges.detect{|r| r.member?(line_number)}) : true
50
+ end
26
51
 
27
52
  def finished?
28
- @file.eof?
53
+ @file.eof? || (@fast && @offsets.empty?)
29
54
  end
30
55
 
31
56
  def close
@@ -46,8 +71,7 @@ def build_record(line)
46
71
  return @record_template[record_type].build_record(line.chomp)
47
72
  end
48
73
 
49
- def next_record
50
- @line_number += 1
74
+ def next_record
51
75
  build_record(get_next_known_line_type)
52
76
  end
53
77
 
@@ -57,23 +81,44 @@ def line_type(line)
57
81
  end
58
82
 
59
83
  def get_next_known_line_type
84
+ @fast ? fast_get_next_known_line_type : sequential_get_next_known_line_type
85
+ end
86
+
87
+ def fast_get_next_known_line_type
88
+ unless @offsets.nil? || @offsets.empty?
89
+ @file.pos = @offsets.shift
90
+ @file.read(@width)
91
+ else
92
+ nil
93
+ end
94
+ end
95
+
96
+ def sequential_get_next_known_line_type
60
97
  line = @file.gets
98
+ @line_number+=1
61
99
  record_type = line_type(line)
62
- while record_type == :unknown and (not finished?)
100
+ while !finished? && (!in_range?(@line_number) || record_type == :unknown)
63
101
  line = @file.gets
102
+ @line_number+=1
64
103
  record_type = line_type(line)
65
104
  end
66
105
  record_type == :unknown ? nil : line
67
106
  end
68
107
 
108
+
109
+
69
110
  def each
70
- @file.each_line do |line|
71
- @line_number += 1
72
- unless line_type(line) == :unknown
111
+ @line_number = 1
112
+ if @fast
113
+ yield(next_record) until finished?
114
+ else
115
+ @file.each_line do |line|
116
+ unless line_type(line) == :unknown || !in_range?(@line_number)
73
117
  data = build_record(line)
74
118
  yield data
75
119
  end
76
120
  end
121
+ end
77
122
  end
78
123
 
79
124
  # This will take a Hash or Struct orArray; if an Array the record type must be the last element when
@@ -105,6 +150,17 @@ end
105
150
 
106
151
  private
107
152
 
153
+ def offsets_to_read(ranges, width)
154
+ ranges.map{|r| r.map{|o| o * width}}.flatten.uniq
155
+ end
156
+
157
+ def get_record_width_from_file
158
+ width = @file.gets.size
159
+ @file.rewind
160
+ width
161
+ end
162
+
163
+
108
164
  # If the layout is given in the convenient Ruby form
109
165
  def create_layouts(layout)
110
166
  var_class = Struct.new(:name,:start,:len)
@@ -121,3 +177,4 @@ new_layout = {}
121
177
  end
122
178
 
123
179
  end
180
+
@@ -1,2 +1,3 @@
1
1
  Joe Smith 55455025.53
2
- Jane Smith 55404015.25
2
+ Jane Smith 55404015.25
3
+ John Smith 55404015.25
@@ -23,14 +23,14 @@ class FLRFileTest < Test::Unit::TestCase
23
23
 
24
24
 
25
25
  def test_initialize
26
- sample_data_path = File.dirname(__FILE__)
27
- fwf = FLRFile.new(
28
- File.new("#{sample_data_path}/sample.dat"),
29
- @record_types, # Record types to read from the file, all others will be ignored
30
- @layouts,# metadata for all record types
31
- 1, # column 0 starts at logical location 1
32
- {:household=>[:people],:person=>[:household_id,:pserial]} # extra columns by record type
33
- )
26
+ sample_data_path = File.dirname(__FILE__)
27
+ fwf = FLRFile.new(
28
+ File.new("#{sample_data_path}/sample.dat"),
29
+ @record_types, # Record types to read from the file, all others will be ignored
30
+ @layouts,# metadata for all record types
31
+ 1, # column 0 starts at logical location 1
32
+ {:household=>[:people],:person=>[:household_id,:pserial]} # extra columns by record type
33
+ )
34
34
 
35
35
  # Extra columns + record_type accessors should have been created
36
36
  hh_struct = fwf.record_template[:household].record_structure.new
@@ -72,7 +72,7 @@ class FLRFileTest < Test::Unit::TestCase
72
72
 
73
73
 
74
74
  def test_build_record
75
- sample_data_path = File.dirname(__FILE__)
75
+ sample_data_path = File.dirname(__FILE__)
76
76
  fwf = FLRFile.new(
77
77
  File.new("#{sample_data_path}/sample.dat"),
78
78
  @record_types, # Record types to read from the file, all others will be ignored
@@ -90,7 +90,93 @@ class FLRFileTest < Test::Unit::TestCase
90
90
  end
91
91
 
92
92
  end
93
+
94
+ def test_faast_next_record
95
+ sample_data_path = File.dirname(__FILE__)
96
+
97
+ layout = {:customer=>{
98
+ :name=>1..25,
99
+ :zip=>26..30,
100
+ :balance=>31..35
101
+ }
102
+ }
103
+
104
+ customer_file = FLRFile.new(File.new(sample_data_path + "/customers.dat"), :customer, layout, 1)
105
+ customer_file.set_fast
106
+ customer_file.ranges=[(0..2)]
107
+
108
+ records = []
109
+ while !customer_file.finished?
110
+ record = customer_file.next_record
111
+ puts record.inspect
112
+ records << record
113
+ end
114
+
115
+ assert_equal 3, records.size
116
+
117
+ # Check that the records aren't off by one
118
+ assert_equal "Jane Smith",records[1].name
119
+ assert_equal "John Smith",records.last.name
120
+
121
+ end
122
+
123
+ def test_partial_fast_next_line
124
+ sample_data_path = File.dirname(__FILE__)
125
+
126
+ layout = {:customer=>{
127
+ :name=>1..25,
128
+ :zip=>26..30,
129
+ :balance=>31..35
130
+ }
131
+ }
132
+
133
+ customer_file = FLRFile.new(File.new(sample_data_path + "/customers.dat"), :customer, layout, 1)
134
+ customer_file.set_fast
135
+ customer_file.ranges=[(0..1)]
136
+
137
+ records = []
138
+ while !customer_file.finished?
139
+ record = customer_file.next_record
140
+ puts record.inspect
141
+ records << record
142
+ end
143
+
144
+ assert_equal 2, records.size
145
+
146
+ # Check that the records aren't off by one
147
+ assert_equal "Jane Smith",records[1].name
148
+
149
+ end
150
+
151
+ def test_fast_each
152
+ sample_data_path = File.dirname(__FILE__)
153
+
154
+ layout = {:customer=>{
155
+ :name=>1..25,
156
+ :zip=>26..30,
157
+ :balance=>31..35
158
+ }
159
+ }
160
+
161
+ customer_file = FLRFile.new(File.new(sample_data_path + "/customers.dat"), :customer, layout, 1)
162
+ customer_file.set_fast
163
+ customer_file.ranges=[(0..1)]
164
+
165
+ records = []
166
+ customer_file.each do |record|
93
167
 
168
+ puts record.inspect
169
+ records << record
170
+ end
171
+
172
+ assert_equal 2, records.size
173
+
174
+ # Check that the records aren't off by one
175
+ assert_equal "Jane Smith",records[1].name
176
+
177
+ end
178
+
179
+
94
180
 
95
181
  def test_each
96
182
  sample_data_path = File.dirname(__FILE__)
@@ -187,6 +273,14 @@ def test_line_type
187
273
  assert_equal :person,fwf.line_type("P123")
188
274
  assert_equal :unknown, fwf.line_type("C123")
189
275
  end
276
+
277
+ def test_ranges
278
+ end
279
+
280
+ def test_in_range
281
+ end
282
+
283
+
190
284
 
191
285
  def test_get_next_known_line_type
192
286
  sample_data_path = File.dirname(__FILE__)
@@ -211,3 +305,5 @@ end
211
305
  end
212
306
 
213
307
  end
308
+
309
+
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: hflr
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.0.2
5
+ version: 1.1.0
6
6
  platform: ruby
7
7
  authors:
8
8
  - Colin Davis