rfile 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rfile.rb +127 -52
- data/lib/rfile/version.rb +1 -0
- data/test/data/sep_string_test +8 -0
- data/test/data/testenummixin +3 -0
- data/test/tc_rfile.rb +94 -8
- metadata +7 -3
data/lib/rfile.rb
CHANGED
@@ -1,62 +1,137 @@
|
|
1
|
-
# $Id: rfile.rb
|
1
|
+
# $Id: rfile.rb 54 2006-08-16 19:29:37Z cmaujean $
|
2
2
|
#
|
3
|
-
|
4
|
-
#
|
5
|
-
# This class is a line oriented "file" object that operates
|
3
|
+
|
4
|
+
# This class is a line oriented file object that operates
|
6
5
|
# without keeping the file in memory.
|
7
6
|
#
|
8
|
-
# Enumerable is mixed in,
|
7
|
+
# Enumerable is mixed in, see Enumerable for more
|
9
8
|
# information.
|
10
9
|
#
|
11
10
|
class RFile
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
11
|
+
include Enumerable
|
12
|
+
attr_accessor :recycle, :filename
|
13
|
+
|
14
|
+
# parses and indexes <i>filename</i>.
|
15
|
+
#
|
16
|
+
# if recycle == true, the randomline
|
17
|
+
# method will reload the index (fast)
|
18
|
+
# when it runs out of unique lines to produce
|
19
|
+
#
|
20
|
+
# if sep_string is passed, "lines" will be determined
|
21
|
+
# by sep_string instead of $/
|
22
|
+
#--
|
23
|
+
# storing line information (length, offset, )
|
24
|
+
#++
|
25
|
+
def initialize(filename, recycle=false, sep_string=$/)
|
26
|
+
@filename = filename
|
27
|
+
@recycle = recycle
|
28
|
+
@sep_string = sep_string
|
29
|
+
@index = Array.new
|
30
|
+
@rndindex = []
|
31
|
+
count = 0
|
32
|
+
offset = 1
|
33
|
+
|
34
|
+
File.open(@filename).each_line(@sep_string) do |line|
|
35
|
+
@index[count] = IndexElement.new([line.length, offset-1, count+1])
|
36
|
+
offset += line.length
|
37
|
+
count+=1
|
38
|
+
end
|
39
|
+
@rndindex = RandomStack.new(@index.clone)
|
40
|
+
end
|
41
|
+
|
42
|
+
# returns a random line from the file. will not repeat lines.
|
43
|
+
# returns nil when the file is exausted. note: does not modify file.
|
44
|
+
def randomline
|
45
|
+
entry = nil
|
46
|
+
if @recycle and @rndindex.length == 0
|
47
|
+
@rndindex = RandomStack.new(@index)
|
48
|
+
end
|
49
|
+
while(entry.nil? and @rndindex.length > 0)
|
50
|
+
entry = @rndindex.pop
|
51
|
+
end
|
40
52
|
entry.nil? and return nil
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
53
|
+
return line(entry.linum)
|
54
|
+
end
|
55
|
+
|
56
|
+
# return true if there are no lines left for randomline(s) ( only useful if
|
57
|
+
# recycle=true )
|
58
|
+
def r_eof?
|
59
|
+
return true if @rndindex.length == 0
|
60
|
+
false
|
61
|
+
end
|
62
|
+
|
63
|
+
# yields num random lines or returns them as an array. see randomline for details
|
64
|
+
def randomlines(num) #:yields:line
|
65
|
+
arr = Array.new
|
66
|
+
doyield = block_given?
|
67
|
+
num.times do |i|
|
68
|
+
rline = randomline()
|
69
|
+
yield rline if doyield
|
70
|
+
arr.push rline
|
71
|
+
end
|
72
|
+
arr if not doyield
|
73
|
+
end
|
74
|
+
|
75
|
+
# returns the number of lines available to randomline based methods
|
76
|
+
# in the current cycle. useful if you want to know how close you
|
77
|
+
# are to recycling the file, or how close to r_eof? == true
|
78
|
+
def length
|
79
|
+
@rndindex.length
|
80
|
+
end
|
81
|
+
|
82
|
+
# returns the line at num (provided num is greater than or equal to 1)
|
83
|
+
# returns nil if num is larger than the lines available
|
84
|
+
def line(num)
|
85
|
+
if (num < 1) or (num > @index.length)
|
86
|
+
raise "line number: #{num} is out of bounds"
|
87
|
+
end
|
88
|
+
entry = @index[num-1]
|
89
|
+
IO.read(@filename, entry.length, entry.offset).chomp(@sep_string)
|
90
|
+
end
|
91
|
+
|
92
|
+
# yields each line in the file, in turn.
|
93
|
+
#
|
94
|
+
# <i>note: currently IO intensive as it will open and close the file
|
95
|
+
# for each line.</i>
|
96
|
+
#
|
97
|
+
def each # :yields:line
|
98
|
+
@index.each do |entry|
|
99
|
+
yield line(entry.linum) unless entry.nil?
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# A randomizing stack
|
105
|
+
class RandomStack
|
106
|
+
# incoming is an array that becomes the stack data
|
107
|
+
def initialize(incoming)
|
108
|
+
incoming.compact
|
109
|
+
@stack = incoming.sort_by { rand }.clone
|
110
|
+
end
|
111
|
+
|
112
|
+
# removes the top entry from the stack and returns it.
|
113
|
+
def pop
|
114
|
+
@stack.pop
|
115
|
+
end
|
116
|
+
|
117
|
+
# returns the number of items left in the stack
|
118
|
+
def length
|
119
|
+
@stack.length
|
120
|
+
end
|
60
121
|
end
|
61
122
|
|
123
|
+
# length, offset and line number for an indexed chunk of file
|
124
|
+
class IndexElement
|
125
|
+
attr_accessor :length, :offset, :linum
|
126
|
+
@length = 0
|
127
|
+
@offset = 0
|
128
|
+
@linum = 0
|
129
|
+
|
130
|
+
def initialize(index_data=[])
|
131
|
+
(@length,@offset,@linum) = index_data
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
require 'rfile/version'
|
136
|
+
|
62
137
|
# vi:sw=2 ts=2
|
@@ -0,0 +1 @@
|
|
1
|
+
RFile::VERSION = "0.2.0"
|
data/test/tc_rfile.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
require 'rfile'
|
2
1
|
require 'test/unit'
|
2
|
+
require 'rfile'
|
3
3
|
|
4
4
|
class TestRFile < Test::Unit::TestCase
|
5
5
|
def setup
|
@@ -9,22 +9,26 @@ class TestRFile < Test::Unit::TestCase
|
|
9
9
|
def test_rfile
|
10
10
|
assert(@f.randomline.is_a?(String), "line is a string")
|
11
11
|
testline = @f.randomline
|
12
|
-
assert((testline
|
12
|
+
assert(valid_line(testline), "Line is valid: #{testline}")
|
13
13
|
assert(@f.randomline)
|
14
14
|
assert(@f.line(1) == "Line 1", "line(1) returns Line 1: #{@f.line(1)}")
|
15
15
|
end
|
16
16
|
|
17
17
|
def test_enum_mixin
|
18
|
+
g = RFile.new("rfile/test/data/testenummixin")
|
18
19
|
count = 0
|
19
|
-
|
20
|
-
case i
|
21
|
-
when 0
|
22
|
-
assert_equal(l, "Line 1")
|
20
|
+
g.each_with_index do |l,i|
|
21
|
+
case (i+1)
|
23
22
|
when 1
|
24
|
-
assert_equal
|
23
|
+
assert_equal "Line 1", l
|
25
24
|
when 2
|
26
|
-
assert_equal
|
25
|
+
assert_equal "Line 2", l
|
26
|
+
when 3
|
27
|
+
assert_equal "Line 3", l
|
28
|
+
else
|
29
|
+
flunk "bad index: #{i}"
|
27
30
|
end
|
31
|
+
|
28
32
|
count+=1
|
29
33
|
end
|
30
34
|
assert count == 3, "EACH TEST: count should be 3 count is #{count}"
|
@@ -33,6 +37,88 @@ class TestRFile < Test::Unit::TestCase
|
|
33
37
|
assert( @f.reject {|t| t == "Line 3"} == ["Line 1", "Line 2"], "Enum: testing reject { }" )
|
34
38
|
assert( @f.collect {|t| "This is " + t } == ["This is Line 1", "This is Line 2", "This is Line 3"], "Enum: testing collect")
|
35
39
|
end
|
40
|
+
|
41
|
+
def test_recycle
|
42
|
+
g = RFile.new("rfile/test/data/testfile", true)
|
43
|
+
5.downto 0 do
|
44
|
+
g.randomline
|
45
|
+
end
|
46
|
+
assert valid_line(g.randomline), "Recycle failed"
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_length
|
50
|
+
f = RFile.new("rfile/test/data/testfile")
|
51
|
+
f.randomline
|
52
|
+
assert_equal 2, f.length, "Length should be 2"
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_r_eof
|
56
|
+
f = RFile.new("rfile/test/data/testfile")
|
57
|
+
3.downto 1 do
|
58
|
+
f.randomline
|
59
|
+
end
|
60
|
+
assert f.r_eof?, "length is #{f.length}"
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_randomlines
|
64
|
+
f = RFile.new("rfile/test/data/testfile")
|
65
|
+
|
66
|
+
# test block form
|
67
|
+
f.randomlines(2) do |line|
|
68
|
+
assert(valid_line(line), "Randomlines block form: #{line}")
|
69
|
+
end
|
70
|
+
|
71
|
+
g = RFile.new("rfile/test/data/testfile")
|
72
|
+
|
73
|
+
# test array form
|
74
|
+
arr = g.randomlines(2)
|
75
|
+
arr.each do |line|
|
76
|
+
assert(valid_line(line), "Randomlines array form: #{line}")
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def test_sep_string
|
81
|
+
f = RFile.new("rfile/test/data/sep_string_test", false, "--")
|
82
|
+
assert_equal "\nso here we see if this is line 4 or not.\n\n", f.line(4), "is: #{f.line(4)}"
|
83
|
+
end
|
84
|
+
|
85
|
+
def valid_line(line)
|
86
|
+
case line
|
87
|
+
when "Line 1"
|
88
|
+
true
|
89
|
+
when "Line 2"
|
90
|
+
true
|
91
|
+
when "Line 3"
|
92
|
+
true
|
93
|
+
else
|
94
|
+
false
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
class TestRandomStack < Test::Unit::TestCase
|
100
|
+
def setup
|
101
|
+
@rstack = RandomStack.new([ "foo", "bar", "baz" ])
|
102
|
+
end
|
103
|
+
def test_length_and_pop
|
104
|
+
[ 3, 2, 1, 0, 0 ].each do |i|
|
105
|
+
assert_equal i, @rstack.length, "length and pop, length test: i = #{i} and length = #{@rstack.length}"
|
106
|
+
popval = @rstack.pop
|
107
|
+
|
108
|
+
case popval
|
109
|
+
when "foo"
|
110
|
+
assert true, "Random value popped is valid"
|
111
|
+
when "bar"
|
112
|
+
assert true, "Random value popped is valid"
|
113
|
+
when "baz"
|
114
|
+
assert true, "Random value popped is valid"
|
115
|
+
when nil
|
116
|
+
assert true, "Random value popped is nil"
|
117
|
+
else
|
118
|
+
flunk "randomstack failed"
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
36
122
|
end
|
37
123
|
|
38
124
|
# vi:sw=2 ts=2
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: rfile
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
7
|
-
date: 2006-08-
|
6
|
+
version: 0.2.0
|
7
|
+
date: 2006-08-19 00:00:00 -07:00
|
8
8
|
summary: a read only, line oriented, sparse file class
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -30,10 +30,14 @@ authors:
|
|
30
30
|
- Christopher Maujean
|
31
31
|
files:
|
32
32
|
- lib/rfile.rb
|
33
|
+
- lib/rfile
|
33
34
|
- lib/LICENSE
|
34
|
-
-
|
35
|
+
- lib/rfile/version.rb
|
35
36
|
- test/tc_rfile.rb
|
37
|
+
- test/data
|
36
38
|
- test/data/testfile
|
39
|
+
- test/data/sep_string_test
|
40
|
+
- test/data/testenummixin
|
37
41
|
test_files:
|
38
42
|
- test/tc_rfile.rb
|
39
43
|
rdoc_options: []
|