rfile 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,62 +1,137 @@
1
- # $Id: rfile.rb 46 2006-08-14 06:13:47Z cmaujean $
1
+ # $Id: rfile.rb 54 2006-08-16 19:29:37Z cmaujean $
2
2
  #
3
- # See LICENSE for copyright information
4
- #
5
- # This class is a line oriented "file" object that operates
3
+
4
+ # This class is a line oriented file object that operates
6
5
  # without keeping the file in memory.
7
6
  #
8
- # Enumerable is mixed in, so see Enumerable for more
7
+ # Enumerable is mixed in, see Enumerable for more
9
8
  # information.
10
9
  #
11
10
  class RFile
12
- include Enumerable
13
-
14
- # parses and indexes <i>filename</i>.
15
- #--
16
- # storing line information (offset and length)
17
- #++
18
- def initialize(filename)
19
- @filename = filename
20
- @index = []
21
- @rndindex = []
22
- count = 1
23
- offset = 1
24
-
25
- File.open(@filename).each_line do |line|
26
- @index[count] = [line.length, offset-1]
27
- offset += line.length
28
- count+=1
29
- end
30
- @rndindex = @index.clone
31
- end
32
-
33
- # returns a random line from the file. will not repeat lines.
34
- # returns nil when the file is exausted. note: does not modify file.
35
- def randomline
36
- entry = nil
37
- while entry.nil? and @rndindex.length > 0
38
- entry = @rndindex.delete_at(rand(@rndindex.length))
39
- end
11
+ include Enumerable
12
+ attr_accessor :recycle, :filename
13
+
14
+ # parses and indexes <i>filename</i>.
15
+ #
16
+ # if recycle == true, the randomline
17
+ # method will reload the index (fast)
18
+ # when it runs out of unique lines to produce
19
+ #
20
+ # if sep_string is passed, "lines" will be determined
21
+ # by sep_string instead of $/
22
+ #--
23
+ # storing line information (length, offset, )
24
+ #++
25
+ def initialize(filename, recycle=false, sep_string=$/)
26
+ @filename = filename
27
+ @recycle = recycle
28
+ @sep_string = sep_string
29
+ @index = Array.new
30
+ @rndindex = []
31
+ count = 0
32
+ offset = 1
33
+
34
+ File.open(@filename).each_line(@sep_string) do |line|
35
+ @index[count] = IndexElement.new([line.length, offset-1, count+1])
36
+ offset += line.length
37
+ count+=1
38
+ end
39
+ @rndindex = RandomStack.new(@index.clone)
40
+ end
41
+
42
+ # returns a random line from the file. will not repeat lines.
43
+ # returns nil when the file is exausted. note: does not modify file.
44
+ def randomline
45
+ entry = nil
46
+ if @recycle and @rndindex.length == 0
47
+ @rndindex = RandomStack.new(@index)
48
+ end
49
+ while(entry.nil? and @rndindex.length > 0)
50
+ entry = @rndindex.pop
51
+ end
40
52
  entry.nil? and return nil
41
- entry.length == 2 and return IO.read(@filename, entry[0], entry[1]).chomp
42
- end
43
-
44
- # returns the line at num
45
- def line(num)
46
- entry = @index[num]
47
- return IO.read(@filename, entry[0], entry[1]).chomp
48
- end
49
-
50
- # yields each line in the file, in turn.
51
- #
52
- # note: currently IO intensive as it will open and close the file
53
- # for each line.
54
- #
55
- def each # :yields:line
56
- @index.each_with_index do |entry,i|
57
- yield line(i) unless entry.nil?
58
- end
59
- end
53
+ return line(entry.linum)
54
+ end
55
+
56
+ # return true if there are no lines left for randomline(s) ( only useful if
57
+ # recycle=true )
58
+ def r_eof?
59
+ return true if @rndindex.length == 0
60
+ false
61
+ end
62
+
63
+ # yields num random lines or returns them as an array. see randomline for details
64
+ def randomlines(num) #:yields:line
65
+ arr = Array.new
66
+ doyield = block_given?
67
+ num.times do |i|
68
+ rline = randomline()
69
+ yield rline if doyield
70
+ arr.push rline
71
+ end
72
+ arr if not doyield
73
+ end
74
+
75
+ # returns the number of lines available to randomline based methods
76
+ # in the current cycle. useful if you want to know how close you
77
+ # are to recycling the file, or how close to r_eof? == true
78
+ def length
79
+ @rndindex.length
80
+ end
81
+
82
+ # returns the line at num (provided num is greater than or equal to 1)
83
+ # returns nil if num is larger than the lines available
84
+ def line(num)
85
+ if (num < 1) or (num > @index.length)
86
+ raise "line number: #{num} is out of bounds"
87
+ end
88
+ entry = @index[num-1]
89
+ IO.read(@filename, entry.length, entry.offset).chomp(@sep_string)
90
+ end
91
+
92
+ # yields each line in the file, in turn.
93
+ #
94
+ # <i>note: currently IO intensive as it will open and close the file
95
+ # for each line.</i>
96
+ #
97
+ def each # :yields:line
98
+ @index.each do |entry|
99
+ yield line(entry.linum) unless entry.nil?
100
+ end
101
+ end
102
+ end
103
+
104
+ # A randomizing stack
105
+ class RandomStack
106
+ # incoming is an array that becomes the stack data
107
+ def initialize(incoming)
108
+ incoming.compact
109
+ @stack = incoming.sort_by { rand }.clone
110
+ end
111
+
112
+ # removes the top entry from the stack and returns it.
113
+ def pop
114
+ @stack.pop
115
+ end
116
+
117
+ # returns the number of items left in the stack
118
+ def length
119
+ @stack.length
120
+ end
60
121
  end
61
122
 
123
+ # length, offset and line number for an indexed chunk of file
124
+ class IndexElement
125
+ attr_accessor :length, :offset, :linum
126
+ @length = 0
127
+ @offset = 0
128
+ @linum = 0
129
+
130
+ def initialize(index_data=[])
131
+ (@length,@offset,@linum) = index_data
132
+ end
133
+ end
134
+
135
+ require 'rfile/version'
136
+
62
137
  # vi:sw=2 ts=2
@@ -0,0 +1 @@
1
+ RFile::VERSION = "0.2.0"
@@ -0,0 +1,8 @@
1
+ This is a test file for the rfile
2
+ sep string test
3
+ to see if we can -- load multiple "lines" - and use a different sep string than the $/ default one.
4
+ --
5
+ --
6
+ so here we see if this is line 4 or not.
7
+
8
+ --
@@ -0,0 +1,3 @@
1
+ Line 1
2
+ Line 2
3
+ Line 3
@@ -1,5 +1,5 @@
1
- require 'rfile'
2
1
  require 'test/unit'
2
+ require 'rfile'
3
3
 
4
4
  class TestRFile < Test::Unit::TestCase
5
5
  def setup
@@ -9,22 +9,26 @@ class TestRFile < Test::Unit::TestCase
9
9
  def test_rfile
10
10
  assert(@f.randomline.is_a?(String), "line is a string")
11
11
  testline = @f.randomline
12
- assert((testline == "Line 1" or testline == "Line 2" or testline == "Line 3"), "Line is valid: #{testline}")
12
+ assert(valid_line(testline), "Line is valid: #{testline}")
13
13
  assert(@f.randomline)
14
14
  assert(@f.line(1) == "Line 1", "line(1) returns Line 1: #{@f.line(1)}")
15
15
  end
16
16
 
17
17
  def test_enum_mixin
18
+ g = RFile.new("rfile/test/data/testenummixin")
18
19
  count = 0
19
- @f.each_with_index do |l,i|
20
- case i
21
- when 0
22
- assert_equal(l, "Line 1")
20
+ g.each_with_index do |l,i|
21
+ case (i+1)
23
22
  when 1
24
- assert_equal(l, "Line 2")
23
+ assert_equal "Line 1", l
25
24
  when 2
26
- assert_equal(l, "Line 3")
25
+ assert_equal "Line 2", l
26
+ when 3
27
+ assert_equal "Line 3", l
28
+ else
29
+ flunk "bad index: #{i}"
27
30
  end
31
+
28
32
  count+=1
29
33
  end
30
34
  assert count == 3, "EACH TEST: count should be 3 count is #{count}"
@@ -33,6 +37,88 @@ class TestRFile < Test::Unit::TestCase
33
37
  assert( @f.reject {|t| t == "Line 3"} == ["Line 1", "Line 2"], "Enum: testing reject { }" )
34
38
  assert( @f.collect {|t| "This is " + t } == ["This is Line 1", "This is Line 2", "This is Line 3"], "Enum: testing collect")
35
39
  end
40
+
41
+ def test_recycle
42
+ g = RFile.new("rfile/test/data/testfile", true)
43
+ 5.downto 0 do
44
+ g.randomline
45
+ end
46
+ assert valid_line(g.randomline), "Recycle failed"
47
+ end
48
+
49
+ def test_length
50
+ f = RFile.new("rfile/test/data/testfile")
51
+ f.randomline
52
+ assert_equal 2, f.length, "Length should be 2"
53
+ end
54
+
55
+ def test_r_eof
56
+ f = RFile.new("rfile/test/data/testfile")
57
+ 3.downto 1 do
58
+ f.randomline
59
+ end
60
+ assert f.r_eof?, "length is #{f.length}"
61
+ end
62
+
63
+ def test_randomlines
64
+ f = RFile.new("rfile/test/data/testfile")
65
+
66
+ # test block form
67
+ f.randomlines(2) do |line|
68
+ assert(valid_line(line), "Randomlines block form: #{line}")
69
+ end
70
+
71
+ g = RFile.new("rfile/test/data/testfile")
72
+
73
+ # test array form
74
+ arr = g.randomlines(2)
75
+ arr.each do |line|
76
+ assert(valid_line(line), "Randomlines array form: #{line}")
77
+ end
78
+ end
79
+
80
+ def test_sep_string
81
+ f = RFile.new("rfile/test/data/sep_string_test", false, "--")
82
+ assert_equal "\nso here we see if this is line 4 or not.\n\n", f.line(4), "is: #{f.line(4)}"
83
+ end
84
+
85
+ def valid_line(line)
86
+ case line
87
+ when "Line 1"
88
+ true
89
+ when "Line 2"
90
+ true
91
+ when "Line 3"
92
+ true
93
+ else
94
+ false
95
+ end
96
+ end
97
+ end
98
+
99
+ class TestRandomStack < Test::Unit::TestCase
100
+ def setup
101
+ @rstack = RandomStack.new([ "foo", "bar", "baz" ])
102
+ end
103
+ def test_length_and_pop
104
+ [ 3, 2, 1, 0, 0 ].each do |i|
105
+ assert_equal i, @rstack.length, "length and pop, length test: i = #{i} and length = #{@rstack.length}"
106
+ popval = @rstack.pop
107
+
108
+ case popval
109
+ when "foo"
110
+ assert true, "Random value popped is valid"
111
+ when "bar"
112
+ assert true, "Random value popped is valid"
113
+ when "baz"
114
+ assert true, "Random value popped is valid"
115
+ when nil
116
+ assert true, "Random value popped is nil"
117
+ else
118
+ flunk "randomstack failed"
119
+ end
120
+ end
121
+ end
36
122
  end
37
123
 
38
124
  # vi:sw=2 ts=2
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: rfile
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.1
7
- date: 2006-08-13 00:00:00 -07:00
6
+ version: 0.2.0
7
+ date: 2006-08-19 00:00:00 -07:00
8
8
  summary: a read only, line oriented, sparse file class
9
9
  require_paths:
10
10
  - lib
@@ -30,10 +30,14 @@ authors:
30
30
  - Christopher Maujean
31
31
  files:
32
32
  - lib/rfile.rb
33
+ - lib/rfile
33
34
  - lib/LICENSE
34
- - test/data
35
+ - lib/rfile/version.rb
35
36
  - test/tc_rfile.rb
37
+ - test/data
36
38
  - test/data/testfile
39
+ - test/data/sep_string_test
40
+ - test/data/testenummixin
37
41
  test_files:
38
42
  - test/tc_rfile.rb
39
43
  rdoc_options: []