rfile 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,62 +1,137 @@
1
- # $Id: rfile.rb 46 2006-08-14 06:13:47Z cmaujean $
1
+ # $Id: rfile.rb 54 2006-08-16 19:29:37Z cmaujean $
2
2
  #
3
- # See LICENSE for copyright information
4
- #
5
- # This class is a line oriented "file" object that operates
3
+
4
+ # This class is a line oriented file object that operates
6
5
  # without keeping the file in memory.
7
6
  #
8
- # Enumerable is mixed in, so see Enumerable for more
7
+ # Enumerable is mixed in, see Enumerable for more
9
8
  # information.
10
9
  #
11
10
  class RFile
12
- include Enumerable
13
-
14
- # parses and indexes <i>filename</i>.
15
- #--
16
- # storing line information (offset and length)
17
- #++
18
- def initialize(filename)
19
- @filename = filename
20
- @index = []
21
- @rndindex = []
22
- count = 1
23
- offset = 1
24
-
25
- File.open(@filename).each_line do |line|
26
- @index[count] = [line.length, offset-1]
27
- offset += line.length
28
- count+=1
29
- end
30
- @rndindex = @index.clone
31
- end
32
-
33
- # returns a random line from the file. will not repeat lines.
34
- # returns nil when the file is exausted. note: does not modify file.
35
- def randomline
36
- entry = nil
37
- while entry.nil? and @rndindex.length > 0
38
- entry = @rndindex.delete_at(rand(@rndindex.length))
39
- end
11
+ include Enumerable
12
+ attr_accessor :recycle, :filename
13
+
14
+ # parses and indexes <i>filename</i>.
15
+ #
16
+ # if recycle == true, the randomline
17
+ # method will reload the index (fast)
18
+ # when it runs out of unique lines to produce
19
+ #
20
+ # if sep_string is passed, "lines" will be determined
21
+ # by sep_string instead of $/
22
+ #--
23
+ # storing line information (length, offset, )
24
+ #++
25
+ def initialize(filename, recycle=false, sep_string=$/)
26
+ @filename = filename
27
+ @recycle = recycle
28
+ @sep_string = sep_string
29
+ @index = Array.new
30
+ @rndindex = []
31
+ count = 0
32
+ offset = 1
33
+
34
+ File.open(@filename).each_line(@sep_string) do |line|
35
+ @index[count] = IndexElement.new([line.length, offset-1, count+1])
36
+ offset += line.length
37
+ count+=1
38
+ end
39
+ @rndindex = RandomStack.new(@index.clone)
40
+ end
41
+
42
+ # returns a random line from the file. will not repeat lines.
43
+ # returns nil when the file is exausted. note: does not modify file.
44
+ def randomline
45
+ entry = nil
46
+ if @recycle and @rndindex.length == 0
47
+ @rndindex = RandomStack.new(@index)
48
+ end
49
+ while(entry.nil? and @rndindex.length > 0)
50
+ entry = @rndindex.pop
51
+ end
40
52
  entry.nil? and return nil
41
- entry.length == 2 and return IO.read(@filename, entry[0], entry[1]).chomp
42
- end
43
-
44
- # returns the line at num
45
- def line(num)
46
- entry = @index[num]
47
- return IO.read(@filename, entry[0], entry[1]).chomp
48
- end
49
-
50
- # yields each line in the file, in turn.
51
- #
52
- # note: currently IO intensive as it will open and close the file
53
- # for each line.
54
- #
55
- def each # :yields:line
56
- @index.each_with_index do |entry,i|
57
- yield line(i) unless entry.nil?
58
- end
59
- end
53
+ return line(entry.linum)
54
+ end
55
+
56
+ # return true if there are no lines left for randomline(s) ( only useful if
57
+ # recycle=true )
58
+ def r_eof?
59
+ return true if @rndindex.length == 0
60
+ false
61
+ end
62
+
63
+ # yields num random lines or returns them as an array. see randomline for details
64
+ def randomlines(num) #:yields:line
65
+ arr = Array.new
66
+ doyield = block_given?
67
+ num.times do |i|
68
+ rline = randomline()
69
+ yield rline if doyield
70
+ arr.push rline
71
+ end
72
+ arr if not doyield
73
+ end
74
+
75
+ # returns the number of lines available to randomline based methods
76
+ # in the current cycle. useful if you want to know how close you
77
+ # are to recycling the file, or how close to r_eof? == true
78
+ def length
79
+ @rndindex.length
80
+ end
81
+
82
+ # returns the line at num (provided num is greater than or equal to 1)
83
+ # returns nil if num is larger than the lines available
84
+ def line(num)
85
+ if (num < 1) or (num > @index.length)
86
+ raise "line number: #{num} is out of bounds"
87
+ end
88
+ entry = @index[num-1]
89
+ IO.read(@filename, entry.length, entry.offset).chomp(@sep_string)
90
+ end
91
+
92
+ # yields each line in the file, in turn.
93
+ #
94
+ # <i>note: currently IO intensive as it will open and close the file
95
+ # for each line.</i>
96
+ #
97
+ def each # :yields:line
98
+ @index.each do |entry|
99
+ yield line(entry.linum) unless entry.nil?
100
+ end
101
+ end
102
+ end
103
+
104
+ # A randomizing stack
105
+ class RandomStack
106
+ # incoming is an array that becomes the stack data
107
+ def initialize(incoming)
108
+ incoming.compact
109
+ @stack = incoming.sort_by { rand }.clone
110
+ end
111
+
112
+ # removes the top entry from the stack and returns it.
113
+ def pop
114
+ @stack.pop
115
+ end
116
+
117
+ # returns the number of items left in the stack
118
+ def length
119
+ @stack.length
120
+ end
60
121
  end
61
122
 
123
+ # length, offset and line number for an indexed chunk of file
124
+ class IndexElement
125
+ attr_accessor :length, :offset, :linum
126
+ @length = 0
127
+ @offset = 0
128
+ @linum = 0
129
+
130
+ def initialize(index_data=[])
131
+ (@length,@offset,@linum) = index_data
132
+ end
133
+ end
134
+
135
+ require 'rfile/version'
136
+
62
137
  # vi:sw=2 ts=2
@@ -0,0 +1 @@
1
+ RFile::VERSION = "0.2.0"
@@ -0,0 +1,8 @@
1
+ This is a test file for the rfile
2
+ sep string test
3
+ to see if we can -- load multiple "lines" - and use a different sep string than the $/ default one.
4
+ --
5
+ --
6
+ so here we see if this is line 4 or not.
7
+
8
+ --
@@ -0,0 +1,3 @@
1
+ Line 1
2
+ Line 2
3
+ Line 3
@@ -1,5 +1,5 @@
1
- require 'rfile'
2
1
  require 'test/unit'
2
+ require 'rfile'
3
3
 
4
4
  class TestRFile < Test::Unit::TestCase
5
5
  def setup
@@ -9,22 +9,26 @@ class TestRFile < Test::Unit::TestCase
9
9
  def test_rfile
10
10
  assert(@f.randomline.is_a?(String), "line is a string")
11
11
  testline = @f.randomline
12
- assert((testline == "Line 1" or testline == "Line 2" or testline == "Line 3"), "Line is valid: #{testline}")
12
+ assert(valid_line(testline), "Line is valid: #{testline}")
13
13
  assert(@f.randomline)
14
14
  assert(@f.line(1) == "Line 1", "line(1) returns Line 1: #{@f.line(1)}")
15
15
  end
16
16
 
17
17
  def test_enum_mixin
18
+ g = RFile.new("rfile/test/data/testenummixin")
18
19
  count = 0
19
- @f.each_with_index do |l,i|
20
- case i
21
- when 0
22
- assert_equal(l, "Line 1")
20
+ g.each_with_index do |l,i|
21
+ case (i+1)
23
22
  when 1
24
- assert_equal(l, "Line 2")
23
+ assert_equal "Line 1", l
25
24
  when 2
26
- assert_equal(l, "Line 3")
25
+ assert_equal "Line 2", l
26
+ when 3
27
+ assert_equal "Line 3", l
28
+ else
29
+ flunk "bad index: #{i}"
27
30
  end
31
+
28
32
  count+=1
29
33
  end
30
34
  assert count == 3, "EACH TEST: count should be 3 count is #{count}"
@@ -33,6 +37,88 @@ class TestRFile < Test::Unit::TestCase
33
37
  assert( @f.reject {|t| t == "Line 3"} == ["Line 1", "Line 2"], "Enum: testing reject { }" )
34
38
  assert( @f.collect {|t| "This is " + t } == ["This is Line 1", "This is Line 2", "This is Line 3"], "Enum: testing collect")
35
39
  end
40
+
41
+ def test_recycle
42
+ g = RFile.new("rfile/test/data/testfile", true)
43
+ 5.downto 0 do
44
+ g.randomline
45
+ end
46
+ assert valid_line(g.randomline), "Recycle failed"
47
+ end
48
+
49
+ def test_length
50
+ f = RFile.new("rfile/test/data/testfile")
51
+ f.randomline
52
+ assert_equal 2, f.length, "Length should be 2"
53
+ end
54
+
55
+ def test_r_eof
56
+ f = RFile.new("rfile/test/data/testfile")
57
+ 3.downto 1 do
58
+ f.randomline
59
+ end
60
+ assert f.r_eof?, "length is #{f.length}"
61
+ end
62
+
63
+ def test_randomlines
64
+ f = RFile.new("rfile/test/data/testfile")
65
+
66
+ # test block form
67
+ f.randomlines(2) do |line|
68
+ assert(valid_line(line), "Randomlines block form: #{line}")
69
+ end
70
+
71
+ g = RFile.new("rfile/test/data/testfile")
72
+
73
+ # test array form
74
+ arr = g.randomlines(2)
75
+ arr.each do |line|
76
+ assert(valid_line(line), "Randomlines array form: #{line}")
77
+ end
78
+ end
79
+
80
+ def test_sep_string
81
+ f = RFile.new("rfile/test/data/sep_string_test", false, "--")
82
+ assert_equal "\nso here we see if this is line 4 or not.\n\n", f.line(4), "is: #{f.line(4)}"
83
+ end
84
+
85
+ def valid_line(line)
86
+ case line
87
+ when "Line 1"
88
+ true
89
+ when "Line 2"
90
+ true
91
+ when "Line 3"
92
+ true
93
+ else
94
+ false
95
+ end
96
+ end
97
+ end
98
+
99
+ class TestRandomStack < Test::Unit::TestCase
100
+ def setup
101
+ @rstack = RandomStack.new([ "foo", "bar", "baz" ])
102
+ end
103
+ def test_length_and_pop
104
+ [ 3, 2, 1, 0, 0 ].each do |i|
105
+ assert_equal i, @rstack.length, "length and pop, length test: i = #{i} and length = #{@rstack.length}"
106
+ popval = @rstack.pop
107
+
108
+ case popval
109
+ when "foo"
110
+ assert true, "Random value popped is valid"
111
+ when "bar"
112
+ assert true, "Random value popped is valid"
113
+ when "baz"
114
+ assert true, "Random value popped is valid"
115
+ when nil
116
+ assert true, "Random value popped is nil"
117
+ else
118
+ flunk "randomstack failed"
119
+ end
120
+ end
121
+ end
36
122
  end
37
123
 
38
124
  # vi:sw=2 ts=2
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: rfile
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.1
7
- date: 2006-08-13 00:00:00 -07:00
6
+ version: 0.2.0
7
+ date: 2006-08-19 00:00:00 -07:00
8
8
  summary: a read only, line oriented, sparse file class
9
9
  require_paths:
10
10
  - lib
@@ -30,10 +30,14 @@ authors:
30
30
  - Christopher Maujean
31
31
  files:
32
32
  - lib/rfile.rb
33
+ - lib/rfile
33
34
  - lib/LICENSE
34
- - test/data
35
+ - lib/rfile/version.rb
35
36
  - test/tc_rfile.rb
37
+ - test/data
36
38
  - test/data/testfile
39
+ - test/data/sep_string_test
40
+ - test/data/testenummixin
37
41
  test_files:
38
42
  - test/tc_rfile.rb
39
43
  rdoc_options: []