line_iterator 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +17 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +247 -0
- data/Rakefile +8 -0
- data/lib/line_iterator.rb +195 -0
- data/lib/line_iterator/version.rb +4 -0
- data/line_iterator.gemspec +24 -0
- data/test/minitest_helper.rb +11 -0
- data/test/test_data/addresses.txt +7 -0
- data/test/test_data/numbers.txt +12 -0
- data/test/test_data/numbers.txt.gz +0 -0
- data/test/test_data/poetry.txt +37 -0
- data/test/test_data/prefix_based_record.txt +10 -0
- data/test/test_data/zero_length_file.txt +0 -0
- data/test/test_line_iterator.rb +269 -0
- metadata +113 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
NTkwMWEzNDM5ZmM1YmM5ZmU4MWYwNTYxYzc4N2MxNzA1Yjk2NThkNg==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
YjM2MDdjNGRhYTk0MDE3OTMxMWFmMjhhN2E4MjdhYTczODcyMWJmYw==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
MjIxNmE3NzMyMDg3NTliN2IwOGQ1MmVlMjU4NmM0ZDgzNGRmOTgwZTZhYWM1
|
10
|
+
MzViNGQwZTdkYjdlZTQ2ZGJmNjFkN2IxOGIzMDEyNDI5YTllNWEwMmYwZGZk
|
11
|
+
MmMyMjA2Zjk3ZmNlYTQ5ZGJlZDE2YjkxZmI3ODlkOGI1NDA0MTA=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
ZmU0OGNmZWMxY2QxNzY1ZmM5MDczZTVjODRiNWE0MjQ0NzJlMzBlMWRhYjdj
|
14
|
+
Y2NmMTcyZDY2MzZlOTdjMjMxNGM0YzlmOTk4NzNjYWYwNmVmOTczODc2ZTUz
|
15
|
+
NTQ5YzJhNGVkMmE1ODIxNDUxNWM4NjE3ZmJiYmM3YmUxYWE0Zjk=
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Bill Dueber
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,247 @@
|
|
1
|
+
# LineIterator
|
2
|
+
|
3
|
+
A simple iterator designed to deal easiy with line-oriented text files.
|
4
|
+
|
5
|
+
## Features
|
6
|
+
|
7
|
+
* Automatically deal with files that end in .gz (via zlib)
|
8
|
+
* Methods like #next and #each (aliased to #next_line and #each_line) return the line data with the line endings already `chomp`ed off
|
9
|
+
* Skip forward or backwards by lines (limit on skipping backwards)
|
10
|
+
* Track line numbers (staring with 1) no matter how you use each_line, next_line, skip (forward and backwards), etc.
|
11
|
+
* Allow line-oriented records (where a record is a set of lines). The default implementation detends end-of-record as a blank line, but subclassing is easily for other types of line-oriented records.
|
12
|
+
|
13
|
+
|
14
|
+
## Installation
|
15
|
+
|
16
|
+
$ gem install line_iterator
|
17
|
+
|
18
|
+
## Basic Usage
|
19
|
+
|
20
|
+
First, two quick warnings:
|
21
|
+
|
22
|
+
* The record iteration stuff works fine, but I may change the interface to return a smarter object than just an array (so I can pass along, e.g. the starting and ending line numbers in the file for that record).
|
23
|
+
* This works fine under MRI, fails hard under Jruby 1.7.9 (Null Pointer exception), and runs fine on jruby 1.7.10 in both 1.9 and 2.0 mode.
|
24
|
+
|
25
|
+
|
26
|
+
### Getting a new iterator
|
27
|
+
|
28
|
+
~~~ruby
|
29
|
+
|
30
|
+
require 'line_iterator'
|
31
|
+
|
32
|
+
# opening a file
|
33
|
+
iter = LineIterator.open('myfile.txt')
|
34
|
+
iter = LineIterator.open(already_open_file_or_io_object)
|
35
|
+
iter = LineIterator.open('myfile.txt.gz') # automatically use Zlib::GzipReader
|
36
|
+
iter = LineIterator.open($stdin, :gzip=>true) # manually set gzip status
|
37
|
+
|
38
|
+
~~~
|
39
|
+
|
40
|
+
### Getting line data and line numbers
|
41
|
+
|
42
|
+
It's an enumerator, so you can use #next, #each, etc.
|
43
|
+
|
44
|
+
For clarity you can use `#each_line` and `#next_line` as aliases of
|
45
|
+
`#each` and `#next`, respectively.
|
46
|
+
|
47
|
+
Access the line number of the last returned (or just consumed via `#skip`) line via `#last_line_number` (where the first line is line 1, just like in an editor or when using head/tail/whatever)
|
48
|
+
|
49
|
+
~~~ruby
|
50
|
+
iter.each do |line|
|
51
|
+
puts "Line number #{iter.last_line_number} : #{line}"
|
52
|
+
end
|
53
|
+
|
54
|
+
# Line numbers work regardless of how you move through the iterator
|
55
|
+
|
56
|
+
iter = LineIterator.open('myfile.txt')
|
57
|
+
iter.next
|
58
|
+
iter.next
|
59
|
+
iter.last_line_number #=> 2
|
60
|
+
iter.each do |line|
|
61
|
+
puts iter.last_line_number
|
62
|
+
break
|
63
|
+
end #=> 3, since the two calls to #next advanced the line number
|
64
|
+
|
65
|
+
|
66
|
+
# There's a special iterator, `#each_with_line_number`, to mirror
|
67
|
+
# `#each_with_index` but keep proper track of line numbers
|
68
|
+
|
69
|
+
iter.each_with_line_number do |line, line_num|
|
70
|
+
#...
|
71
|
+
end
|
72
|
+
|
73
|
+
# You can use things like #map, but of course that'll read in the whole
|
74
|
+
# input.
|
75
|
+
|
76
|
+
iter.map{|line| [iter.last_line_number, line.size]}
|
77
|
+
|
78
|
+
~~~
|
79
|
+
|
80
|
+
### Skipping forward and backward
|
81
|
+
|
82
|
+
A `LineIterator` can skip forward or (in some cases) backwards.
|
83
|
+
|
84
|
+
Calling `#skip` just skips the next line. Calling `#skip(num)` will skip forward `num` lines, or move to the end of the file if you run out of data.
|
85
|
+
|
86
|
+
**Unlike `#next`, `#skip` will never throw `StopIteration`**.
|
87
|
+
|
88
|
+
If you call `#skip` with a negative number, the LineIterator will attempt to back up via an internal buffer (set at 100 lines). If you try to back up further than the available data allows, you'll get an `IndexError`.
|
89
|
+
|
90
|
+
~~~ruby
|
91
|
+
|
92
|
+
iter = LineIterator.open('myfile.txt')
|
93
|
+
iter.skip 5
|
94
|
+
iter.last_line_number #=> 5
|
95
|
+
iter.skip(-3)
|
96
|
+
iter.last_line_number #=> 2
|
97
|
+
iter.next #=> <the third line of the file>
|
98
|
+
iter.last_line_number #=> 3
|
99
|
+
|
100
|
+
iter = LineIterator.open('myfile.txt')
|
101
|
+
iter.skip(1_000_000) #=> doesn't raise an error no matter how far you skip
|
102
|
+
iter.next #=> StopIteration error
|
103
|
+
|
104
|
+
iter = LineIterator.open('myfile.txt')
|
105
|
+
iter.skip(10)
|
106
|
+
iter.skip(-100) #=> IndexError
|
107
|
+
|
108
|
+
~~~
|
109
|
+
|
110
|
+
|
111
|
+
## Dealing with records
|
112
|
+
|
113
|
+
`LineIterator` has a simple line-oriented record interface. By default, it separates files into records on blank lines (lines with nothing but optional whitespace in them) and returns a "record" that simply consists of an array containing the appropriate lines from the file.
|
114
|
+
|
115
|
+
Like the line-based commands, the contents of the returned array are already `#chomp`ed.
|
116
|
+
|
117
|
+
Note two things:
|
118
|
+
|
119
|
+
* There's no `#skip` backwards implemented for records; you can use `#skip_record` or `#skip_records(n)` to skip records forward.
|
120
|
+
* If you mix `#next_record / #each_record` with `#next_line` / `#each_line` / `#skip`, things are usually going to get *really* screwey. Mixed use is not really supported.
|
121
|
+
|
122
|
+
### Using blank-line delimited records
|
123
|
+
|
124
|
+
|
125
|
+
Given the file:
|
126
|
+
|
127
|
+
~~~
|
128
|
+
One Hat
|
129
|
+
Two Hat
|
130
|
+
|
131
|
+
Red Hat
|
132
|
+
Blue Hat
|
133
|
+
|
134
|
+
by Dr. Seuss
|
135
|
+
~~~
|
136
|
+
|
137
|
+
We can use the record interface as follows:
|
138
|
+
|
139
|
+
~~~ruby
|
140
|
+
|
141
|
+
iter = LineIterator.new('onehat.txt')
|
142
|
+
x = iter.next_record #=> ['One Hat', 'Two Hat']
|
143
|
+
y = iter.next_record #=> ['Red Hat', 'Blue Hat']
|
144
|
+
|
145
|
+
iter.last_record_number #=> 2
|
146
|
+
|
147
|
+
iter.each_record do |rec|
|
148
|
+
puts rec
|
149
|
+
end #=> Show the one remaining record, ['by Dr. Seuss']
|
150
|
+
|
151
|
+
~~~
|
152
|
+
|
153
|
+
### Changing the end-of-record pattern
|
154
|
+
|
155
|
+
Maybe you have records that are separated by a line with nothing on it but dashes? You can set the pattern used to detect the end of a record by setting `#end_of_record_pattern`
|
156
|
+
|
157
|
+
Given the file:
|
158
|
+
|
159
|
+
~~~
|
160
|
+
Bill Dueber
|
161
|
+
1234 Sample st.
|
162
|
+
Ann Arbor, MI 4813
|
163
|
+
-----
|
164
|
+
Mike Dueber
|
165
|
+
1350 N. Nowhere
|
166
|
+
St. Paul, MN 55117
|
167
|
+
~~~
|
168
|
+
|
169
|
+
...you could get the two records out of it as follows:
|
170
|
+
|
171
|
+
~~~ruby
|
172
|
+
iter = LineIterator.new('addresses.txt')
|
173
|
+
iter.end_of_record_pattern = /\A--+\s*\Z/
|
174
|
+
iter.each_record do |rec|
|
175
|
+
# do something with the arrays of lines returned
|
176
|
+
end
|
177
|
+
~~~
|
178
|
+
|
179
|
+
|
180
|
+
### Sublcassing `LineIterator` for different kinds of records
|
181
|
+
|
182
|
+
You can subclass `LineIterator` and override the method `#end_of_record(buff)` to return true when there's an end of record. Usually this invovles calling `line, line_number = peek` to see what's coming up next.
|
183
|
+
|
184
|
+
The buffer passed in is the contents of the record so far.
|
185
|
+
|
186
|
+
Here's a simple implementation of a subclass that deals with records that are identified by contiguous lines that all have the same prefix string.
|
187
|
+
|
188
|
+
Given the file:
|
189
|
+
|
190
|
+
~~~
|
191
|
+
001 Red
|
192
|
+
001 White
|
193
|
+
001 Blue
|
194
|
+
002 One
|
195
|
+
002 Two
|
196
|
+
003 Alpha
|
197
|
+
003 Beta
|
198
|
+
003 Gamma
|
199
|
+
003 Delta
|
200
|
+
~~~
|
201
|
+
|
202
|
+
...we have three records, where the end of record is identified by the numeric prefix changing from one line to the next (or, for the last record, by the end of the input).
|
203
|
+
|
204
|
+
We can easily subclass `LineIterator` to take care of this case.
|
205
|
+
|
206
|
+
~~~ruby
|
207
|
+
|
208
|
+
# Subclass to override end_of_record(buff)
|
209
|
+
# We introduce a new instance variable to track the most recent prefix
|
210
|
+
class PrefixBasedRecordIterator < LineIterator
|
211
|
+
PREFIXP = /^(\d+)\s+/
|
212
|
+
def prefix(line)
|
213
|
+
(PREFIXP.match(line))[1]
|
214
|
+
end
|
215
|
+
|
216
|
+
def end_of_record(buff)
|
217
|
+
return true if self.done
|
218
|
+
line, line_no = peek
|
219
|
+
p = prefix(line)
|
220
|
+
if p != @previous_prefix
|
221
|
+
@previous_prefix = p
|
222
|
+
return !(buff.empty?)
|
223
|
+
else
|
224
|
+
return false
|
225
|
+
end
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
iter = PrefixBasedRecordIterator.new('prefix_file.txt')
|
230
|
+
rec = iter.next_record
|
231
|
+
#=> ['001 Red', '001 White', '001 Blue']
|
232
|
+
rec = iter.next_record
|
233
|
+
#=> ['002 One', '002 Two']
|
234
|
+
|
235
|
+
~~~
|
236
|
+
|
237
|
+
|
238
|
+
|
239
|
+
|
240
|
+
|
241
|
+
## Contributing
|
242
|
+
|
243
|
+
1. Fork it
|
244
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
245
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
246
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
247
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,195 @@
|
|
1
|
+
require "line_iterator/version"
|
2
|
+
require 'zlib'
|
3
|
+
|
4
|
+
class LineIterator
|
5
|
+
include Enumerable
|
6
|
+
|
7
|
+
BUFFER_SIZE = 100
|
8
|
+
|
9
|
+
attr_reader :last_line_number, :done, :last_record_number
|
10
|
+
attr_accessor :end_of_record_pattern
|
11
|
+
|
12
|
+
def to_s
|
13
|
+
"#{self.class} <#{@f}, last_line_number: #{last_line_number}>"
|
14
|
+
end
|
15
|
+
|
16
|
+
alias_method :inspect, :to_s
|
17
|
+
|
18
|
+
|
19
|
+
# Open up the input. If it's a string, assume a filename and open it up.
|
20
|
+
# Also, run it through GzipReader if the filename ends in .gz or if
|
21
|
+
# :gzip => true was passed in the opts
|
22
|
+
def initialize(input, opts={})
|
23
|
+
if input.is_a? IO
|
24
|
+
@f = input
|
25
|
+
# deal with IO object
|
26
|
+
else # assume string
|
27
|
+
@f = File.open(input)
|
28
|
+
opts[:gzip] ||= (input =~ /\.gz\Z/)
|
29
|
+
end
|
30
|
+
|
31
|
+
if opts[:gzip]
|
32
|
+
@f = Zlib::GzipReader.new(@f)
|
33
|
+
end
|
34
|
+
@base_iterator = @f.each_with_index
|
35
|
+
@last_line_number = 0
|
36
|
+
@last_record_number = 0
|
37
|
+
@done = false
|
38
|
+
@buffer = []
|
39
|
+
@backup_buffer = []
|
40
|
+
@end_of_record_pattern = /\A\s*\n/
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
|
45
|
+
# Override the normal enumerable #next to keep internal track
|
46
|
+
# of line numbers
|
47
|
+
def next
|
48
|
+
# Get the next line from the backup buffer or the stream
|
49
|
+
y = @backup_buffer.empty? ? @base_iterator.next : @backup_buffer.shift
|
50
|
+
|
51
|
+
# Feed the buffer
|
52
|
+
@buffer.shift if @buffer.size == BUFFER_SIZE
|
53
|
+
@buffer.push y
|
54
|
+
|
55
|
+
@last_line_number = y[1] + 1
|
56
|
+
return y[0].chomp
|
57
|
+
end
|
58
|
+
|
59
|
+
alias_method :next_line, :next
|
60
|
+
|
61
|
+
# Provide a #peek that takes into account the backup buffer
|
62
|
+
|
63
|
+
def peek
|
64
|
+
@backup_buffer.empty? ? @base_iterator.peek : @backup_buffer[0]
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
# Skip n lines (default: 1). Just calls next over and over again,
|
69
|
+
# but will *never* throw StopIteration error
|
70
|
+
def skip(n = 1)
|
71
|
+
if n == 0
|
72
|
+
return;
|
73
|
+
elsif n > 0
|
74
|
+
skip_forward(n)
|
75
|
+
elsif n < 0
|
76
|
+
skip_backwards(-n)
|
77
|
+
else
|
78
|
+
raise "Tried to skip backwards more than size of buffer (#{BUFFER_SIZE})"
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def skip_forward(n)
|
83
|
+
begin
|
84
|
+
n.times do
|
85
|
+
self.next
|
86
|
+
end
|
87
|
+
rescue StopIteration
|
88
|
+
@done = true
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def skip_backwards(n)
|
93
|
+
# can we back up?
|
94
|
+
raise IndexError.new, "Tried to skip backwards too far", nil if n > @buffer.size
|
95
|
+
n.times { @backup_buffer.unshift @buffer.pop }
|
96
|
+
@last_line_number = @backup_buffer[0][1]
|
97
|
+
end
|
98
|
+
|
99
|
+
# Override normal #each to track last_line_nunber
|
100
|
+
def each
|
101
|
+
unless block_given?
|
102
|
+
return enum_for :each
|
103
|
+
end
|
104
|
+
begin
|
105
|
+
while true
|
106
|
+
yield self.next
|
107
|
+
end
|
108
|
+
rescue StopIteration
|
109
|
+
@done = true
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
alias_method :each_line, :each
|
114
|
+
|
115
|
+
# Like #each_with_index, but track line numbers
|
116
|
+
# This allows you to call next/skip and still get the correct
|
117
|
+
# line number out
|
118
|
+
def each_with_line_number
|
119
|
+
unless block_given?
|
120
|
+
return enum_for :each_with_line_number
|
121
|
+
end
|
122
|
+
|
123
|
+
begin
|
124
|
+
while true
|
125
|
+
yield [self.next, self.last_line_number]
|
126
|
+
end
|
127
|
+
rescue StopIteration
|
128
|
+
@done = true
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# Detect the end_of_record for a line-based file, and
|
133
|
+
# do whatever you need to do
|
134
|
+
|
135
|
+
# This default implementation just checks for blank lines
|
136
|
+
# and eats them, but you can override this in a subclass
|
137
|
+
# (perhaps using the contents of the buffer to determine
|
138
|
+
# EOR status)
|
139
|
+
|
140
|
+
def end_of_record(buff)
|
141
|
+
y = peek
|
142
|
+
if end_of_record_pattern.match(y[0])
|
143
|
+
self.next # eat the next line
|
144
|
+
return true
|
145
|
+
else
|
146
|
+
return false
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
# Get the next record and return (as an array of chomp'ed lines)
|
151
|
+
def next_record
|
152
|
+
raise StopIteration if self.done
|
153
|
+
buff = []
|
154
|
+
begin
|
155
|
+
while true do
|
156
|
+
if end_of_record(buff) and not buff.empty?
|
157
|
+
@last_record_number += 1
|
158
|
+
return buff
|
159
|
+
else
|
160
|
+
buff << self.next
|
161
|
+
end
|
162
|
+
end
|
163
|
+
rescue StopIteration
|
164
|
+
@last_record_number += 1
|
165
|
+
@done = true
|
166
|
+
return buff
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
# iterate over records
|
171
|
+
def each_record
|
172
|
+
unless block_given?
|
173
|
+
return enum_for(:each_record)
|
174
|
+
end
|
175
|
+
|
176
|
+
begin
|
177
|
+
while !self.done
|
178
|
+
yield self.next_record
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
# Skip over records (forward only!!!!)
|
184
|
+
def skip_record(n = 1)
|
185
|
+
begin
|
186
|
+
n.times {self.next_record}
|
187
|
+
rescue StopIteration
|
188
|
+
@done = true
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
alias_method :skip_records, :skip_record
|
193
|
+
|
194
|
+
|
195
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'line_iterator/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "line_iterator"
|
8
|
+
spec.version = LineIterator::VERSION
|
9
|
+
spec.authors = ["Bill Dueber"]
|
10
|
+
spec.email = ["bill@dueber.com"]
|
11
|
+
spec.description = %q{Provides methods to more easily work with line-oriented text file and records within those files}
|
12
|
+
spec.summary = %q{Easily work with line-oriented text files}
|
13
|
+
spec.homepage = "http://github.com/billdueber/line_iterator"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency "minitest"
|
24
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
|
2
|
+
require 'line_iterator'
|
3
|
+
|
4
|
+
require 'minitest'
|
5
|
+
require 'minitest/spec'
|
6
|
+
require 'minitest/autorun'
|
7
|
+
|
8
|
+
|
9
|
+
def test_data(relative_path)
|
10
|
+
return File.expand_path(File.join("test_data", relative_path), File.dirname(__FILE__))
|
11
|
+
end
|
Binary file
|
@@ -0,0 +1,37 @@
|
|
1
|
+
Out of burlap sacks, out of bearing butter,
|
2
|
+
Out of black bean and wet slate bread,
|
3
|
+
Out of the acids of rage, the candor of tar,
|
4
|
+
Out of creosote, gasoline, drive shafts, wooden dollies,
|
5
|
+
They Lion grow.
|
6
|
+
|
7
|
+
Out of the gray hills
|
8
|
+
Of industrial barns, out of rain, out of bus ride,
|
9
|
+
West Virginia to Kiss My Ass, out of buried aunties,
|
10
|
+
Mothers hardening like pounded stumps, out of stumps,
|
11
|
+
Out of the bones' need to sharpen and the muscles' to stretch,
|
12
|
+
They Lion grow.
|
13
|
+
|
14
|
+
Earth is eating trees, fence posts,
|
15
|
+
Gutted cars, earth is calling in her little ones,
|
16
|
+
"Come home, Come home!" From pig balls,
|
17
|
+
From the ferocity of pig driven to holiness,
|
18
|
+
From the furred ear and the full jowl come
|
19
|
+
The repose of the hung belly, from the purpose
|
20
|
+
They Lion grow.
|
21
|
+
|
22
|
+
From the sweet glues of the trotters
|
23
|
+
Come the sweet kinks of the fist, from the full flower
|
24
|
+
Of the hams the thorax of caves,
|
25
|
+
From "Bow Down" come "Rise Up,"
|
26
|
+
Come they Lion from the reeds of shovels,
|
27
|
+
The grained arm that pulls the hands,
|
28
|
+
They Lion grow.
|
29
|
+
|
30
|
+
From my five arms and all my hands,
|
31
|
+
From all my white sins forgiven, they feed,
|
32
|
+
From my car passing under the stars,
|
33
|
+
They Lion, from my children inherit,
|
34
|
+
From the oak turned to a wall, they Lion,
|
35
|
+
From they sack and they belly opened
|
36
|
+
And all that was hidden burning on the oil-stained earth
|
37
|
+
They feed they Lion and he comes.
|
File without changes
|
@@ -0,0 +1,269 @@
|
|
1
|
+
require 'minitest_helper'
|
2
|
+
|
3
|
+
describe "creating a new iterator" do
|
4
|
+
it "opens a real file" do
|
5
|
+
i = LineIterator.new(test_data('numbers.txt'))
|
6
|
+
assert_equal "One", i.next
|
7
|
+
end
|
8
|
+
|
9
|
+
it "opens a gzipped file" do
|
10
|
+
i = LineIterator.new(test_data('numbers.txt.gz'))
|
11
|
+
assert_equal "One", i.next
|
12
|
+
end
|
13
|
+
|
14
|
+
it "deals with a file object" do
|
15
|
+
f = File.open(test_data('numbers.txt'))
|
16
|
+
i = LineIterator.new(f)
|
17
|
+
assert_equal 'One', i.next
|
18
|
+
end
|
19
|
+
|
20
|
+
it "deals with a gzipped file object" do
|
21
|
+
f = File.open(test_data('numbers.txt.gz'))
|
22
|
+
i = LineIterator.new(f, :gzip=>true)
|
23
|
+
assert_equal 'One', i.next
|
24
|
+
end
|
25
|
+
|
26
|
+
it "doesn't error on a zero-length file" do
|
27
|
+
f = File.open(test_data('zero_length_file.txt'))
|
28
|
+
f.each do |l|
|
29
|
+
assert(false) # should never be called
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
describe "skip forward" do
|
36
|
+
before do
|
37
|
+
@i = LineIterator.new(test_data('numbers.txt'))
|
38
|
+
end
|
39
|
+
|
40
|
+
it "skips forward one" do
|
41
|
+
@i.skip
|
42
|
+
assert_equal 'Two', @i.next
|
43
|
+
end
|
44
|
+
|
45
|
+
it "skips forward N" do
|
46
|
+
@i.skip(5)
|
47
|
+
assert_equal 5, @i.last_line_number
|
48
|
+
assert_equal 'Six', @i.next
|
49
|
+
end
|
50
|
+
|
51
|
+
it "skips to the end and will then raise stopiteration on next call" do
|
52
|
+
@i.skip(100)
|
53
|
+
assert_equal 12, @i.last_line_number
|
54
|
+
assert_raises(StopIteration) { @i.next }
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
describe "skip backwards" do
|
60
|
+
before do
|
61
|
+
@i = LineIterator.new(test_data('numbers.txt'))
|
62
|
+
end
|
63
|
+
|
64
|
+
it "does basic backwards skip" do
|
65
|
+
@i.skip(5)
|
66
|
+
@i.skip(-1)
|
67
|
+
assert_equal "Five", @i.next
|
68
|
+
end
|
69
|
+
|
70
|
+
it "errors out if you back up past the beginning of the file" do
|
71
|
+
@i.skip 2
|
72
|
+
assert_raises(IndexError) {@i.skip(-3)}
|
73
|
+
end
|
74
|
+
|
75
|
+
it "can exactly rewind" do
|
76
|
+
@i.skip 10
|
77
|
+
@i.skip -10
|
78
|
+
assert_equal "One", @i.next
|
79
|
+
assert_equal "Two", @i.next
|
80
|
+
end
|
81
|
+
|
82
|
+
it "errors out if you back up past the buffer" do
|
83
|
+
@i.skip 10
|
84
|
+
@i.skip(-5)
|
85
|
+
assert_raises(IndexError) {@i.skip(-6)}
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
describe "throwing Errors" do
|
90
|
+
it "throws StopIteration when #next is called too many times" do
|
91
|
+
@i = LineIterator.new(test_data('numbers.txt'))
|
92
|
+
assert_raises(StopIteration) { 20.times{@i.next}}
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
|
97
|
+
|
98
|
+
describe "maintain line number" do
|
99
|
+
before do
|
100
|
+
@i = LineIterator.new(test_data('numbers.txt'))
|
101
|
+
end
|
102
|
+
|
103
|
+
it "starts with a zero last_line_number" do
|
104
|
+
assert_equal 0, @i.last_line_number
|
105
|
+
end
|
106
|
+
|
107
|
+
it "advances when using next" do
|
108
|
+
@i.next
|
109
|
+
@i.next
|
110
|
+
assert_equal 2, @i.last_line_number
|
111
|
+
end
|
112
|
+
|
113
|
+
it "advances when using each" do
|
114
|
+
cnt = 0
|
115
|
+
@i.each do |y|
|
116
|
+
cnt += 1
|
117
|
+
break if cnt == 3
|
118
|
+
end
|
119
|
+
assert_equal 3, @i.last_line_number
|
120
|
+
end
|
121
|
+
|
122
|
+
it "advances when using skip" do
|
123
|
+
@i.skip(3)
|
124
|
+
assert_equal 3, @i.last_line_number
|
125
|
+
end
|
126
|
+
|
127
|
+
it "stays correct on mixed usage" do
|
128
|
+
@i.next
|
129
|
+
@i.next
|
130
|
+
assert_equal 2, @i.last_line_number
|
131
|
+
@i.each do |line|
|
132
|
+
assert_equal 3, @i.last_line_number
|
133
|
+
break
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
it "correctly resets when skipping backwards" do
|
138
|
+
@i.skip(3)
|
139
|
+
assert_equal 3, @i.last_line_number
|
140
|
+
@i.skip(-2)
|
141
|
+
assert_equal 1, @i.last_line_number
|
142
|
+
line = @i.next
|
143
|
+
assert_equal 'Two', line
|
144
|
+
assert_equal 2, @i.last_line_number
|
145
|
+
end
|
146
|
+
|
147
|
+
|
148
|
+
end
|
149
|
+
|
150
|
+
|
151
|
+
describe "working with pattern-delimited records" do
|
152
|
+
before do
|
153
|
+
@i = LineIterator.new(test_data('poetry.txt'))
|
154
|
+
end
|
155
|
+
|
156
|
+
it "works with each_record()" do
|
157
|
+
cnt = 0
|
158
|
+
@i.each_record do |r|
|
159
|
+
cnt += 1
|
160
|
+
end
|
161
|
+
|
162
|
+
assert_equal 5, cnt
|
163
|
+
assert_equal 5, @i.last_record_number
|
164
|
+
end
|
165
|
+
|
166
|
+
it "gets the first record" do
|
167
|
+
r = @i.next_record
|
168
|
+
assert_equal 5, r.size
|
169
|
+
end
|
170
|
+
|
171
|
+
|
172
|
+
it "correctly deals with next_record" do
|
173
|
+
r = @i.next_record
|
174
|
+
r = @i.next_record
|
175
|
+
assert_equal "Out of the bones' need to sharpen and the muscles' to stretch,", r[4]
|
176
|
+
end
|
177
|
+
|
178
|
+
it "throws StopIterator when it should stop" do
|
179
|
+
begin
|
180
|
+
while true
|
181
|
+
@i.next_record
|
182
|
+
end
|
183
|
+
rescue StopIteration
|
184
|
+
assert_equal 5, @i.last_record_number
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
it "breaks out of each_record correctly" do
|
189
|
+
rec = nil
|
190
|
+
@i.each_record do |r|
|
191
|
+
rec = r
|
192
|
+
break if @i.last_record_number == 2
|
193
|
+
end
|
194
|
+
assert_equal "Out of the bones' need to sharpen and the muscles' to stretch,", rec[4]
|
195
|
+
end
|
196
|
+
|
197
|
+
it "works with a backup buffer" do
|
198
|
+
r1 = @i.next_record
|
199
|
+
@i.skip(-(@i.last_line_number))
|
200
|
+
r2 = @i.next_record
|
201
|
+
assert_equal r1, r2
|
202
|
+
end
|
203
|
+
|
204
|
+
it "can use a custom pattern" do
|
205
|
+
iter = LineIterator.new(test_data('addresses.txt'))
|
206
|
+
iter.end_of_record_pattern = /\A--+\s*\Z/
|
207
|
+
recs = []
|
208
|
+
iter.each_record do |rec|
|
209
|
+
recs << rec
|
210
|
+
end
|
211
|
+
assert_equal 2, recs.size
|
212
|
+
assert_equal 'Bill Dueber', recs[0][0]
|
213
|
+
assert_equal 'Mike Dueber', recs[1][0]
|
214
|
+
end
|
215
|
+
|
216
|
+
|
217
|
+
end
|
218
|
+
|
219
|
+
|
220
|
+
# Subclass to override end_of_record(buff)
|
221
|
+
# We introduce a new instance variable to track the most recent prefix
|
222
|
+
class PrefixBasedRecordIterator < LineIterator
|
223
|
+
PREFIXP = /^(\d+)\s+/
|
224
|
+
def prefix(line)
|
225
|
+
(PREFIXP.match(line))[1]
|
226
|
+
end
|
227
|
+
|
228
|
+
def end_of_record(buff)
|
229
|
+
return true if self.done
|
230
|
+
line, line_no = peek
|
231
|
+
p = prefix(line)
|
232
|
+
if p != @previous_prefix
|
233
|
+
@previous_prefix = p
|
234
|
+
return !buff.empty?
|
235
|
+
else
|
236
|
+
return false
|
237
|
+
end
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
describe "subclass to do prefix-based records" do
|
242
|
+
before do
|
243
|
+
@i = PrefixBasedRecordIterator.new(test_data('prefix_based_record.txt'))
|
244
|
+
end
|
245
|
+
|
246
|
+
it "finds all the records with each_record" do
|
247
|
+
cnt = 0
|
248
|
+
@i.each_record do |rec|
|
249
|
+
cnt += 1
|
250
|
+
end
|
251
|
+
assert_equal 3, cnt
|
252
|
+
end
|
253
|
+
|
254
|
+
it "gets the prefixed records with next_record" do
|
255
|
+
rec = @i.next_record
|
256
|
+
assert_equal 3, rec.size
|
257
|
+
|
258
|
+
rec = @i.next_record
|
259
|
+
assert_equal 5, rec.size
|
260
|
+
|
261
|
+
rec = @i.next_record
|
262
|
+
assert_equal 2, rec.size
|
263
|
+
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
|
268
|
+
|
269
|
+
|
metadata
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: line_iterator
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Bill Dueber
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-01-18 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ! '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: minitest
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ! '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: Provides methods to more easily work with line-oriented text file and
|
56
|
+
records within those files
|
57
|
+
email:
|
58
|
+
- bill@dueber.com
|
59
|
+
executables: []
|
60
|
+
extensions: []
|
61
|
+
extra_rdoc_files: []
|
62
|
+
files:
|
63
|
+
- .gitignore
|
64
|
+
- .travis.yml
|
65
|
+
- Gemfile
|
66
|
+
- LICENSE.txt
|
67
|
+
- README.md
|
68
|
+
- Rakefile
|
69
|
+
- lib/line_iterator.rb
|
70
|
+
- lib/line_iterator/version.rb
|
71
|
+
- line_iterator.gemspec
|
72
|
+
- test/minitest_helper.rb
|
73
|
+
- test/test_data/addresses.txt
|
74
|
+
- test/test_data/numbers.txt
|
75
|
+
- test/test_data/numbers.txt.gz
|
76
|
+
- test/test_data/poetry.txt
|
77
|
+
- test/test_data/prefix_based_record.txt
|
78
|
+
- test/test_data/zero_length_file.txt
|
79
|
+
- test/test_line_iterator.rb
|
80
|
+
homepage: http://github.com/billdueber/line_iterator
|
81
|
+
licenses:
|
82
|
+
- MIT
|
83
|
+
metadata: {}
|
84
|
+
post_install_message:
|
85
|
+
rdoc_options: []
|
86
|
+
require_paths:
|
87
|
+
- lib
|
88
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - ! '>='
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '0'
|
93
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - ! '>='
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
98
|
+
requirements: []
|
99
|
+
rubyforge_project:
|
100
|
+
rubygems_version: 2.2.1
|
101
|
+
signing_key:
|
102
|
+
specification_version: 4
|
103
|
+
summary: Easily work with line-oriented text files
|
104
|
+
test_files:
|
105
|
+
- test/minitest_helper.rb
|
106
|
+
- test/test_data/addresses.txt
|
107
|
+
- test/test_data/numbers.txt
|
108
|
+
- test/test_data/numbers.txt.gz
|
109
|
+
- test/test_data/poetry.txt
|
110
|
+
- test/test_data/prefix_based_record.txt
|
111
|
+
- test/test_data/zero_length_file.txt
|
112
|
+
- test/test_line_iterator.rb
|
113
|
+
has_rdoc:
|