linesource 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/lib/linesource.rb +28 -5
- data/test/test_linesource.rb +55 -2
- metadata +1 -1
data/Rakefile
CHANGED
|
@@ -10,6 +10,7 @@ begin
|
|
|
10
10
|
gem.email = "daniel@thepolfers.com"
|
|
11
11
|
gem.homepage = "http://github.com/polfer/linesource"
|
|
12
12
|
gem.authors = ["Daniel Polfer"]
|
|
13
|
+
gem.has_rdoc = true
|
|
13
14
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
|
14
15
|
end
|
|
15
16
|
Jeweler::GemcutterTasks.new
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.0
|
|
1
|
+
0.1.0
|
data/lib/linesource.rb
CHANGED
|
@@ -1,17 +1,27 @@
|
|
|
1
1
|
require 'stringio'
|
|
2
2
|
require 'zlib'
|
|
3
3
|
|
|
4
|
-
#
|
|
5
|
-
#
|
|
6
|
-
#
|
|
7
|
-
#
|
|
8
|
-
|
|
4
|
+
# The LineSource class is initialized with a pattern for files that
|
|
5
|
+
# need to be read and processed line by line. For example, you could
|
|
6
|
+
# pass a pattern to all the log files in a date hierarchy ('**/*.log').
|
|
7
|
+
# Every line in every file will be read and the transition from file
|
|
8
|
+
# to file is handled automatically (with callbacks if desired).
|
|
9
9
|
class LineSource
|
|
10
10
|
|
|
11
11
|
include Enumerable
|
|
12
12
|
|
|
13
|
+
# Linenum represents the line number of the current file (1 based).
|
|
13
14
|
attr_reader :linenum
|
|
14
15
|
|
|
16
|
+
# Constructor can take a number of different options including
|
|
17
|
+
# the pattern of files to be read (Dir.glob style). The files can
|
|
18
|
+
# be regular text files or gzipped files (detected through a .gz)
|
|
19
|
+
# extension. The skiplines parameter can be used to specify a certain
|
|
20
|
+
# number of lines at the head of each file that should be skipped
|
|
21
|
+
# (usually header files, for example on a csv file). The filenew
|
|
22
|
+
# and filedone params represent a callback that can be fired at the
|
|
23
|
+
# start or end of processing for each of the files that match pattern.
|
|
24
|
+
# TODO: We should be able to pass multiple pattenrs to initialize.
|
|
15
25
|
def initialize( pattern, skiplines = 0, filenew = nil, filedone = nil )
|
|
16
26
|
@skiplines = skiplines
|
|
17
27
|
@filenames = Dir.glob(pattern).sort!.freeze
|
|
@@ -24,18 +34,24 @@ class LineSource
|
|
|
24
34
|
nextfile
|
|
25
35
|
end
|
|
26
36
|
|
|
37
|
+
# Allows the prefile callback to be set after LineSource creation.
|
|
27
38
|
def setfilenew( filenew )
|
|
28
39
|
@filenew = filenew
|
|
29
40
|
end
|
|
30
41
|
|
|
42
|
+
# Allows the postfile callback to be set after LineSource creation.
|
|
31
43
|
def setfiledone( filedone )
|
|
32
44
|
@filedone = filedone
|
|
33
45
|
end
|
|
34
46
|
|
|
47
|
+
# Tests if the end of _all_ files in the original pattern are processed.
|
|
35
48
|
def eof?
|
|
36
49
|
@content ? @content.eof? : true
|
|
37
50
|
end
|
|
38
51
|
|
|
52
|
+
# Closes all files held open on the current LineSource.
|
|
53
|
+
# Effectively ends the use of this LineSource.
|
|
54
|
+
# TODO: We could make it so that a close is like a "reset".
|
|
39
55
|
def close
|
|
40
56
|
if @content
|
|
41
57
|
@filedone.call(self) if @filedone
|
|
@@ -47,6 +63,7 @@ class LineSource
|
|
|
47
63
|
@linenum = -1
|
|
48
64
|
end
|
|
49
65
|
|
|
66
|
+
# Skips the current file being processed and sets to the next.
|
|
50
67
|
def nextfile
|
|
51
68
|
begin
|
|
52
69
|
if @content
|
|
@@ -78,6 +95,7 @@ class LineSource
|
|
|
78
95
|
end
|
|
79
96
|
end
|
|
80
97
|
|
|
98
|
+
# Returns an individual line from the LineSource.
|
|
81
99
|
def gets
|
|
82
100
|
@lastline = nil
|
|
83
101
|
if @content
|
|
@@ -91,18 +109,23 @@ class LineSource
|
|
|
91
109
|
@lastline
|
|
92
110
|
end
|
|
93
111
|
|
|
112
|
+
# Re-returns the last line fetched from a LineSource.
|
|
94
113
|
def lastline
|
|
95
114
|
@lastline
|
|
96
115
|
end
|
|
97
116
|
|
|
117
|
+
# Returns the name of the file currently being processed.
|
|
98
118
|
def filename
|
|
99
119
|
@index >= 0 ? @filenames[@index] : nil
|
|
100
120
|
end
|
|
101
121
|
|
|
122
|
+
# Returns the size of the file currently being processed. Note
|
|
123
|
+
# that in the case of a GZipped file this will be the compressed size.
|
|
102
124
|
def filesize
|
|
103
125
|
(@index >= 0 && @filenames[@index]) ? File.stat(@filenames[@index]).size : -1
|
|
104
126
|
end
|
|
105
127
|
|
|
128
|
+
# Calls a block of code for each line from the LineSource.
|
|
106
129
|
def each
|
|
107
130
|
while gets
|
|
108
131
|
yield @lastline
|
data/test/test_linesource.rb
CHANGED
|
@@ -1,7 +1,60 @@
|
|
|
1
1
|
require 'helper'
|
|
2
2
|
|
|
3
3
|
class TestLinesource < Test::Unit::TestCase
|
|
4
|
-
def
|
|
5
|
-
|
|
4
|
+
def setup
|
|
5
|
+
@testlines = IO.readlines(__FILE__)
|
|
6
6
|
end
|
|
7
|
+
|
|
8
|
+
def test_single_file
|
|
9
|
+
linenum = 0
|
|
10
|
+
ls = LineSource.new(__FILE__)
|
|
11
|
+
ls.each do |line|
|
|
12
|
+
linenum += 1
|
|
13
|
+
assert_equal(linenum, ls.linenum)
|
|
14
|
+
assert_equal(@testlines.shift,line)
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def test_line_skip
|
|
19
|
+
linenum = 4
|
|
20
|
+
4.times { @testlines.shift }
|
|
21
|
+
ls = LineSource.new( __FILE__, 4 )
|
|
22
|
+
ls.each do |line|
|
|
23
|
+
linenum += 1
|
|
24
|
+
assert_equal(linenum, ls.linenum)
|
|
25
|
+
assert_equal(line, @testlines.shift)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def test_single_prefile_callback
|
|
30
|
+
prefilehit = false
|
|
31
|
+
progresscallback = lambda do |ls|
|
|
32
|
+
assert_equal(__FILE__, ls.filename)
|
|
33
|
+
prefilehit = true
|
|
34
|
+
end
|
|
35
|
+
ls = LineSource.new( __FILE__, 0, progresscallback )
|
|
36
|
+
assert_equal( prefilehit, true )
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def test_single_postfile_no_callback
|
|
40
|
+
postfilehit = false
|
|
41
|
+
progresscallback = lambda do |ls|
|
|
42
|
+
assert_equal(__FILE__, ls.filename)
|
|
43
|
+
postfilehit = true
|
|
44
|
+
end
|
|
45
|
+
ls = LineSource.new( __FILE__, 0, nil, progresscallback )
|
|
46
|
+
assert_equal( postfilehit, false )
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def test_single_postfile_callback
|
|
50
|
+
postfilehit = false
|
|
51
|
+
progresscallback = lambda do |ls|
|
|
52
|
+
assert_equal(__FILE__, ls.filename)
|
|
53
|
+
postfilehit = true
|
|
54
|
+
end
|
|
55
|
+
ls = LineSource.new( __FILE__, 0, nil, progresscallback )
|
|
56
|
+
ls.each { |line| assert_equal(line, @testlines.shift) }
|
|
57
|
+
assert_equal( postfilehit, true )
|
|
58
|
+
end
|
|
59
|
+
|
|
7
60
|
end
|