rfile 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rfile.rb +127 -52
- data/lib/rfile/version.rb +1 -0
- data/test/data/sep_string_test +8 -0
- data/test/data/testenummixin +3 -0
- data/test/tc_rfile.rb +94 -8
- metadata +7 -3
data/lib/rfile.rb
CHANGED
@@ -1,62 +1,137 @@
|
|
1
|
-
# $Id: rfile.rb
|
1
|
+
# $Id: rfile.rb 54 2006-08-16 19:29:37Z cmaujean $
|
2
2
|
#
|
3
|
-
|
4
|
-
#
|
5
|
-
# This class is a line oriented "file" object that operates
|
3
|
+
|
4
|
+
# This class is a line oriented file object that operates
|
6
5
|
# without keeping the file in memory.
|
7
6
|
#
|
8
|
-
# Enumerable is mixed in,
|
7
|
+
# Enumerable is mixed in, see Enumerable for more
|
9
8
|
# information.
|
10
9
|
#
|
11
10
|
class RFile
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
11
|
+
include Enumerable
|
12
|
+
attr_accessor :recycle, :filename
|
13
|
+
|
14
|
+
# parses and indexes <i>filename</i>.
|
15
|
+
#
|
16
|
+
# if recycle == true, the randomline
|
17
|
+
# method will reload the index (fast)
|
18
|
+
# when it runs out of unique lines to produce
|
19
|
+
#
|
20
|
+
# if sep_string is passed, "lines" will be determined
|
21
|
+
# by sep_string instead of $/
|
22
|
+
#--
|
23
|
+
# storing line information (length, offset, )
|
24
|
+
#++
|
25
|
+
def initialize(filename, recycle=false, sep_string=$/)
|
26
|
+
@filename = filename
|
27
|
+
@recycle = recycle
|
28
|
+
@sep_string = sep_string
|
29
|
+
@index = Array.new
|
30
|
+
@rndindex = []
|
31
|
+
count = 0
|
32
|
+
offset = 1
|
33
|
+
|
34
|
+
File.open(@filename).each_line(@sep_string) do |line|
|
35
|
+
@index[count] = IndexElement.new([line.length, offset-1, count+1])
|
36
|
+
offset += line.length
|
37
|
+
count+=1
|
38
|
+
end
|
39
|
+
@rndindex = RandomStack.new(@index.clone)
|
40
|
+
end
|
41
|
+
|
42
|
+
# returns a random line from the file. will not repeat lines.
|
43
|
+
# returns nil when the file is exausted. note: does not modify file.
|
44
|
+
def randomline
|
45
|
+
entry = nil
|
46
|
+
if @recycle and @rndindex.length == 0
|
47
|
+
@rndindex = RandomStack.new(@index)
|
48
|
+
end
|
49
|
+
while(entry.nil? and @rndindex.length > 0)
|
50
|
+
entry = @rndindex.pop
|
51
|
+
end
|
40
52
|
entry.nil? and return nil
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
53
|
+
return line(entry.linum)
|
54
|
+
end
|
55
|
+
|
56
|
+
# return true if there are no lines left for randomline(s) ( only useful if
|
57
|
+
# recycle=true )
|
58
|
+
def r_eof?
|
59
|
+
return true if @rndindex.length == 0
|
60
|
+
false
|
61
|
+
end
|
62
|
+
|
63
|
+
# yields num random lines or returns them as an array. see randomline for details
|
64
|
+
def randomlines(num) #:yields:line
|
65
|
+
arr = Array.new
|
66
|
+
doyield = block_given?
|
67
|
+
num.times do |i|
|
68
|
+
rline = randomline()
|
69
|
+
yield rline if doyield
|
70
|
+
arr.push rline
|
71
|
+
end
|
72
|
+
arr if not doyield
|
73
|
+
end
|
74
|
+
|
75
|
+
# returns the number of lines available to randomline based methods
|
76
|
+
# in the current cycle. useful if you want to know how close you
|
77
|
+
# are to recycling the file, or how close to r_eof? == true
|
78
|
+
def length
|
79
|
+
@rndindex.length
|
80
|
+
end
|
81
|
+
|
82
|
+
# returns the line at num (provided num is greater than or equal to 1)
|
83
|
+
# returns nil if num is larger than the lines available
|
84
|
+
def line(num)
|
85
|
+
if (num < 1) or (num > @index.length)
|
86
|
+
raise "line number: #{num} is out of bounds"
|
87
|
+
end
|
88
|
+
entry = @index[num-1]
|
89
|
+
IO.read(@filename, entry.length, entry.offset).chomp(@sep_string)
|
90
|
+
end
|
91
|
+
|
92
|
+
# yields each line in the file, in turn.
|
93
|
+
#
|
94
|
+
# <i>note: currently IO intensive as it will open and close the file
|
95
|
+
# for each line.</i>
|
96
|
+
#
|
97
|
+
def each # :yields:line
|
98
|
+
@index.each do |entry|
|
99
|
+
yield line(entry.linum) unless entry.nil?
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# A randomizing stack
|
105
|
+
class RandomStack
|
106
|
+
# incoming is an array that becomes the stack data
|
107
|
+
def initialize(incoming)
|
108
|
+
incoming.compact
|
109
|
+
@stack = incoming.sort_by { rand }.clone
|
110
|
+
end
|
111
|
+
|
112
|
+
# removes the top entry from the stack and returns it.
|
113
|
+
def pop
|
114
|
+
@stack.pop
|
115
|
+
end
|
116
|
+
|
117
|
+
# returns the number of items left in the stack
|
118
|
+
def length
|
119
|
+
@stack.length
|
120
|
+
end
|
60
121
|
end
|
61
122
|
|
123
|
+
# length, offset and line number for an indexed chunk of file
|
124
|
+
class IndexElement
|
125
|
+
attr_accessor :length, :offset, :linum
|
126
|
+
@length = 0
|
127
|
+
@offset = 0
|
128
|
+
@linum = 0
|
129
|
+
|
130
|
+
def initialize(index_data=[])
|
131
|
+
(@length,@offset,@linum) = index_data
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
require 'rfile/version'
|
136
|
+
|
62
137
|
# vi:sw=2 ts=2
|
@@ -0,0 +1 @@
|
|
1
|
+
RFile::VERSION = "0.2.0"
|
data/test/tc_rfile.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
require 'rfile'
|
2
1
|
require 'test/unit'
|
2
|
+
require 'rfile'
|
3
3
|
|
4
4
|
class TestRFile < Test::Unit::TestCase
|
5
5
|
def setup
|
@@ -9,22 +9,26 @@ class TestRFile < Test::Unit::TestCase
|
|
9
9
|
def test_rfile
|
10
10
|
assert(@f.randomline.is_a?(String), "line is a string")
|
11
11
|
testline = @f.randomline
|
12
|
-
assert((testline
|
12
|
+
assert(valid_line(testline), "Line is valid: #{testline}")
|
13
13
|
assert(@f.randomline)
|
14
14
|
assert(@f.line(1) == "Line 1", "line(1) returns Line 1: #{@f.line(1)}")
|
15
15
|
end
|
16
16
|
|
17
17
|
def test_enum_mixin
|
18
|
+
g = RFile.new("rfile/test/data/testenummixin")
|
18
19
|
count = 0
|
19
|
-
|
20
|
-
case i
|
21
|
-
when 0
|
22
|
-
assert_equal(l, "Line 1")
|
20
|
+
g.each_with_index do |l,i|
|
21
|
+
case (i+1)
|
23
22
|
when 1
|
24
|
-
assert_equal
|
23
|
+
assert_equal "Line 1", l
|
25
24
|
when 2
|
26
|
-
assert_equal
|
25
|
+
assert_equal "Line 2", l
|
26
|
+
when 3
|
27
|
+
assert_equal "Line 3", l
|
28
|
+
else
|
29
|
+
flunk "bad index: #{i}"
|
27
30
|
end
|
31
|
+
|
28
32
|
count+=1
|
29
33
|
end
|
30
34
|
assert count == 3, "EACH TEST: count should be 3 count is #{count}"
|
@@ -33,6 +37,88 @@ class TestRFile < Test::Unit::TestCase
|
|
33
37
|
assert( @f.reject {|t| t == "Line 3"} == ["Line 1", "Line 2"], "Enum: testing reject { }" )
|
34
38
|
assert( @f.collect {|t| "This is " + t } == ["This is Line 1", "This is Line 2", "This is Line 3"], "Enum: testing collect")
|
35
39
|
end
|
40
|
+
|
41
|
+
def test_recycle
|
42
|
+
g = RFile.new("rfile/test/data/testfile", true)
|
43
|
+
5.downto 0 do
|
44
|
+
g.randomline
|
45
|
+
end
|
46
|
+
assert valid_line(g.randomline), "Recycle failed"
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_length
|
50
|
+
f = RFile.new("rfile/test/data/testfile")
|
51
|
+
f.randomline
|
52
|
+
assert_equal 2, f.length, "Length should be 2"
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_r_eof
|
56
|
+
f = RFile.new("rfile/test/data/testfile")
|
57
|
+
3.downto 1 do
|
58
|
+
f.randomline
|
59
|
+
end
|
60
|
+
assert f.r_eof?, "length is #{f.length}"
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_randomlines
|
64
|
+
f = RFile.new("rfile/test/data/testfile")
|
65
|
+
|
66
|
+
# test block form
|
67
|
+
f.randomlines(2) do |line|
|
68
|
+
assert(valid_line(line), "Randomlines block form: #{line}")
|
69
|
+
end
|
70
|
+
|
71
|
+
g = RFile.new("rfile/test/data/testfile")
|
72
|
+
|
73
|
+
# test array form
|
74
|
+
arr = g.randomlines(2)
|
75
|
+
arr.each do |line|
|
76
|
+
assert(valid_line(line), "Randomlines array form: #{line}")
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def test_sep_string
|
81
|
+
f = RFile.new("rfile/test/data/sep_string_test", false, "--")
|
82
|
+
assert_equal "\nso here we see if this is line 4 or not.\n\n", f.line(4), "is: #{f.line(4)}"
|
83
|
+
end
|
84
|
+
|
85
|
+
def valid_line(line)
|
86
|
+
case line
|
87
|
+
when "Line 1"
|
88
|
+
true
|
89
|
+
when "Line 2"
|
90
|
+
true
|
91
|
+
when "Line 3"
|
92
|
+
true
|
93
|
+
else
|
94
|
+
false
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
class TestRandomStack < Test::Unit::TestCase
|
100
|
+
def setup
|
101
|
+
@rstack = RandomStack.new([ "foo", "bar", "baz" ])
|
102
|
+
end
|
103
|
+
def test_length_and_pop
|
104
|
+
[ 3, 2, 1, 0, 0 ].each do |i|
|
105
|
+
assert_equal i, @rstack.length, "length and pop, length test: i = #{i} and length = #{@rstack.length}"
|
106
|
+
popval = @rstack.pop
|
107
|
+
|
108
|
+
case popval
|
109
|
+
when "foo"
|
110
|
+
assert true, "Random value popped is valid"
|
111
|
+
when "bar"
|
112
|
+
assert true, "Random value popped is valid"
|
113
|
+
when "baz"
|
114
|
+
assert true, "Random value popped is valid"
|
115
|
+
when nil
|
116
|
+
assert true, "Random value popped is nil"
|
117
|
+
else
|
118
|
+
flunk "randomstack failed"
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
36
122
|
end
|
37
123
|
|
38
124
|
# vi:sw=2 ts=2
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: rfile
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
7
|
-
date: 2006-08-
|
6
|
+
version: 0.2.0
|
7
|
+
date: 2006-08-19 00:00:00 -07:00
|
8
8
|
summary: a read only, line oriented, sparse file class
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -30,10 +30,14 @@ authors:
|
|
30
30
|
- Christopher Maujean
|
31
31
|
files:
|
32
32
|
- lib/rfile.rb
|
33
|
+
- lib/rfile
|
33
34
|
- lib/LICENSE
|
34
|
-
-
|
35
|
+
- lib/rfile/version.rb
|
35
36
|
- test/tc_rfile.rb
|
37
|
+
- test/data
|
36
38
|
- test/data/testfile
|
39
|
+
- test/data/sep_string_test
|
40
|
+
- test/data/testenummixin
|
37
41
|
test_files:
|
38
42
|
- test/tc_rfile.rb
|
39
43
|
rdoc_options: []
|