log_slice 0.1 → 0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +40 -27
- data/lib/log_slice/search_boundary.rb +34 -0
- data/lib/log_slice.rb +83 -70
- data/log_slice.gemspec +1 -1
- data/spec/helper.rb +1 -1
- data/spec/log_slice_spec.rb +59 -14
- metadata +4 -3
data/README.md
CHANGED
@@ -5,46 +5,59 @@ Uses binary search to find a line quickly in a large log file. O(log2(n))
|
|
5
5
|
|
6
6
|
Can only search sorted data, which means it's probably only useful for searching by timestamp.
|
7
7
|
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
gem install log_slice
|
11
|
+
|
8
12
|
## Example
|
9
13
|
|
10
14
|
something-interesting.log:
|
11
15
|
|
12
|
-
[2012-08-29 18:41:12] (9640) something something something else 1
|
13
|
-
[2012-08-29 18:41:14] (9640) something something something else 2
|
14
|
-
[2012-08-29 18:41:14] (9640) something something something else 3
|
15
|
-
[2012-08-29 18:41:14] (9640) something something something else 4
|
16
|
-
[2012-08-29 18:42:18] (9640) something something something else 5
|
17
|
-
[2012-08-29 18:42:18] (9640) something something something else 6
|
18
|
-
[2012-08-29 18:42:18] (9640) something something something else 7
|
19
|
-
[2012-08-29 18:42:20] (9640) something something something else 8
|
20
|
-
[2012-08-29 18:42:20] (9640) something something something else 9
|
21
|
-
[2012-08-29 18:42:20] (9640) something something something else 10
|
16
|
+
[2012-08-29 18:41:12] (9640) 1 something something something else 1
|
17
|
+
[2012-08-29 18:41:14] (9640) 2 something something something else 2
|
18
|
+
[2012-08-29 18:41:14] (9640) 3 something something something else 3
|
19
|
+
[2012-08-29 18:41:14] (9640) 4 something something something else 4
|
20
|
+
[2012-08-29 18:42:18] (9640) 5 something something something else 5
|
21
|
+
[2012-08-29 18:42:18] (9640) 6 something something something else 6
|
22
|
+
[2012-08-29 18:42:18] (9640) 7 something something something else 7
|
23
|
+
[2012-08-29 18:42:20] (9640) 8 something something something else 8
|
24
|
+
[2012-08-29 18:42:20] (9640) 9 something something something else 9
|
25
|
+
[2012-08-29 18:42:20] (9640) 10 something something something else 10
|
26
|
+
|
27
|
+
extract everything that happened at or after 18:42:00:
|
28
|
+
```ruby
|
29
|
+
find_date = DateTime.parse("2012-08-29 18:42:00")
|
30
|
+
file = LogSlice.new("something-interesting.log").find do |line|
|
31
|
+
date_string = line.match(/^\[([^\]]+)\]/)[1]
|
32
|
+
find_date <=> DateTime.parse(date_string)
|
33
|
+
end
|
22
34
|
|
23
|
-
|
35
|
+
# this will yield an instance of File
|
36
|
+
# the position in the file is the first byte of the found line
|
24
37
|
|
25
|
-
|
26
|
-
|
27
|
-
date_string = line.match(/^\[([^\]]+)\]/)[1]
|
28
|
-
find_date <=> DateTime.parse(date_string)
|
29
|
-
end
|
38
|
+
file.readline
|
39
|
+
#=> "[2012-08-29 18:42:18] (9640) 5 something something something else 5"
|
30
40
|
|
31
|
-
|
32
|
-
|
41
|
+
# Once you found the line you were after,
|
42
|
+
# you can continue to read subsequent lines:
|
33
43
|
|
34
|
-
|
35
|
-
|
44
|
+
file.readline
|
45
|
+
#=> "[2012-08-29 18:42:18] (9640) 6 something something something else 6"
|
46
|
+
```
|
36
47
|
|
37
|
-
|
38
|
-
|
48
|
+
LogSlice.new takes a File or file path, and a comparison function, passed as a block.
|
49
|
+
When passed a line, the block must return -1 if the value represented by the line
|
50
|
+
is too high, 1 if it's too low, or 0 if it's just right.
|
39
51
|
|
40
|
-
|
41
|
-
|
42
|
-
|
52
|
+
```ruby
|
53
|
+
LogSlice.new(file_or_file_path).find(&comparison_function) #=> File or nil
|
54
|
+
```
|
43
55
|
|
44
56
|
## Limitations
|
45
57
|
|
46
|
-
* Can only search sorted data.
|
47
|
-
|
58
|
+
* Can only search sorted data. If the data isn't sorted, it will most likely not find anything (ie return nil).
|
59
|
+
In very rare cases it may find value the anyway by chance, so it's not guaranteed that unsorted
|
60
|
+
input will always yield nil.
|
48
61
|
|
49
62
|
## Disclaimer
|
50
63
|
|
@@ -0,0 +1,34 @@
|
|
1
|
+
class LogSlice
|
2
|
+
class SearchBoundary
|
3
|
+
|
4
|
+
attr_reader :cursor
|
5
|
+
|
6
|
+
def initialize file_size
|
7
|
+
@file_size = file_size
|
8
|
+
end
|
9
|
+
|
10
|
+
# reset the search boundary to cover the entire file
|
11
|
+
def reset
|
12
|
+
@lower = 0
|
13
|
+
@upper = @file_size
|
14
|
+
@cursor = 0
|
15
|
+
cursor_forward
|
16
|
+
self
|
17
|
+
end
|
18
|
+
|
19
|
+
# Move cursor forward.
|
20
|
+
# The cursor is moved half way between it's start location and the upper boundary.
|
21
|
+
def cursor_forward
|
22
|
+
@lower = @cursor
|
23
|
+
@cursor = @cursor + (@upper - @cursor) / 2
|
24
|
+
end
|
25
|
+
|
26
|
+
# Move cursor backward.
|
27
|
+
# The cursor is moved half way between it's start location and the lower boundary.
|
28
|
+
def cursor_back
|
29
|
+
@upper = @cursor
|
30
|
+
@cursor = @cursor - (@cursor - @lower) / 2
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
data/lib/log_slice.rb
CHANGED
@@ -1,129 +1,142 @@
|
|
1
|
+
require File.expand_path("log_slice/search_boundary", File.dirname(__FILE__))
|
2
|
+
|
1
3
|
class LogSlice
|
2
4
|
|
5
|
+
NEWLINE = "\n"
|
6
|
+
NEWLINE_CHAR = "\n"[0]
|
7
|
+
|
3
8
|
# @param log_file [File, String]
|
4
|
-
|
9
|
+
# @param options [Hash] :exact_match default false
|
10
|
+
def initialize log_file, options={}
|
5
11
|
@file = log_file.respond_to?(:seek) ? log_file : File.open(log_file, 'r')
|
6
|
-
@
|
7
|
-
@
|
8
|
-
@upper = @size
|
9
|
-
@char_cursor = nil
|
12
|
+
@exact_match = options[:exact_match] || false
|
13
|
+
@search_boundary = SearchBoundary.new(@file.stat.size)
|
10
14
|
@line_cursor = nil
|
11
15
|
end
|
12
16
|
|
13
|
-
#
|
14
|
-
#
|
17
|
+
# Find line in the file using the comparison function.
|
18
|
+
# Depends on lines being sorted.
|
19
|
+
# The comparison function will be passed lines from the file. It must
|
20
|
+
# return -1 if the line is later than the one it's looking for, 1 if
|
21
|
+
# the line is earlier than the one it's looking for, and 0 if it is
|
22
|
+
# the line it's looking for.
|
23
|
+
# @param compare [Proc] comparison function
|
24
|
+
# @return [File, nil] file after seeking to start of line or nil if line not found
|
15
25
|
def find &compare
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
return @file
|
28
|
-
when -1
|
29
|
-
direction = :back
|
30
|
-
when 1
|
31
|
-
direction = :forward
|
32
|
-
else
|
33
|
-
raise ArgumentError
|
26
|
+
reset_progress_check
|
27
|
+
@search_boundary.reset
|
28
|
+
line = find_next_newline
|
29
|
+
while making_progress?
|
30
|
+
comp_value = compare.call(line)
|
31
|
+
if comp_value == 0 # found matching line
|
32
|
+
backtrack_to_first_line_match compare
|
33
|
+
return @file
|
34
|
+
else
|
35
|
+
@search_boundary.send(comp_value < 0 ? :cursor_back : :cursor_forward)
|
36
|
+
line = find_next_newline
|
34
37
|
end
|
35
38
|
end
|
39
|
+
if @exact_match
|
40
|
+
nil
|
41
|
+
else
|
42
|
+
backtrack_to_gap compare
|
43
|
+
return @file.eof? ? nil : @file
|
44
|
+
end
|
36
45
|
end
|
37
46
|
|
38
47
|
private
|
39
48
|
|
40
|
-
#
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
49
|
+
# whether the cursor has moved since previous call
|
50
|
+
def making_progress?
|
51
|
+
return false if @previous_cursor_position == @line_cursor
|
52
|
+
@previous_cursor_position = @line_cursor
|
53
|
+
true
|
45
54
|
end
|
46
55
|
|
56
|
+
def reset_progress_check
|
57
|
+
@previous_cursor_position = nil
|
58
|
+
end
|
59
|
+
|
60
|
+
|
47
61
|
# once the line has been found, we must check the lines above it -
|
48
62
|
# if a line above also matches, we should seek to it.
|
49
63
|
# (this make search on some files O(n/2) instead of O(log2(n))) )
|
50
|
-
|
51
|
-
|
64
|
+
# @param compare [Proc] comparison function
|
65
|
+
def backtrack_to_first_line_match compare
|
66
|
+
previous_cursor_position = @line_cursor
|
67
|
+
each_line_reverse do |line|
|
68
|
+
if compare.call(line) != 0
|
69
|
+
# we've found a non-matching line,
|
70
|
+
# so we set @line_cursor back to the previous matching line
|
71
|
+
@line_cursor = previous_cursor_position
|
72
|
+
break
|
73
|
+
end
|
74
|
+
previous_cursor_position = @line_cursor
|
75
|
+
end
|
52
76
|
@file.seek(@line_cursor)
|
53
77
|
end
|
54
78
|
|
55
|
-
|
56
|
-
|
79
|
+
# if no match was found, we're sitting at too-high.
|
80
|
+
# backtrack up to the first too-high
|
81
|
+
def backtrack_to_gap compare
|
82
|
+
@line_cursor = @file.pos
|
83
|
+
previous_cursor_position = @line_cursor
|
57
84
|
each_line_reverse do |line|
|
58
|
-
if compare.call(line)
|
59
|
-
@line_cursor =
|
85
|
+
if compare.call(line) == 1
|
86
|
+
@line_cursor = previous_cursor_position
|
60
87
|
break
|
61
88
|
end
|
62
|
-
|
89
|
+
previous_cursor_position = @line_cursor
|
63
90
|
end
|
91
|
+
@file.seek(@line_cursor)
|
64
92
|
end
|
65
93
|
|
94
|
+
# iterate over each line from the current cursor position, in reverse.
|
66
95
|
def each_line_reverse
|
67
96
|
chunk_size = 512
|
68
|
-
left_over = ""
|
69
97
|
cursor = @line_cursor
|
98
|
+
left_over = ""
|
70
99
|
loop do
|
71
|
-
|
72
|
-
|
73
|
-
chunk_size = chunk_size + cursor
|
100
|
+
if chunk_size > cursor
|
101
|
+
chunk_size = cursor
|
74
102
|
cursor = 0
|
103
|
+
else
|
104
|
+
cursor -= chunk_size
|
75
105
|
end
|
76
106
|
break if chunk_size == 0
|
77
|
-
#puts "seeking to #{cursor}, chunk size #{chunk_size}, left over #{left_over.length}"
|
78
107
|
@file.seek(cursor)
|
79
108
|
chunk = @file.read(chunk_size) + left_over
|
80
|
-
lines = chunk.split(
|
109
|
+
lines = chunk.split(NEWLINE)
|
81
110
|
while lines.length > 1
|
82
111
|
line = lines.pop || ""
|
83
|
-
@line_cursor
|
112
|
+
@line_cursor -= (line.length + NEWLINE.length)
|
84
113
|
yield(line)
|
85
114
|
end
|
86
115
|
left_over = lines[0] || ""
|
87
116
|
lines = []
|
88
117
|
end
|
118
|
+
@line_cursor -= (left_over.length + NEWLINE.length)
|
89
119
|
yield left_over unless left_over == ''
|
90
120
|
end
|
91
121
|
|
122
|
+
# After the search is moved by cursor search_boundary.cursor_*, it's position
|
123
|
+
# is probably not at the start of a line, but somewhere within a line.
|
124
|
+
# find_next_newline advances the cursor until we're at the start of the
|
125
|
+
# next line.
|
92
126
|
def find_next_newline
|
93
|
-
|
94
|
-
@line_cursor = @char_cursor
|
127
|
+
@line_cursor = @search_boundary.cursor
|
95
128
|
@file.seek(@line_cursor)
|
96
|
-
current_char = nil
|
97
|
-
|
98
|
-
@line_cursor = @line_cursor + 1
|
129
|
+
while (current_char = @file.getc) != NEWLINE_CHAR && !current_char.nil?
|
130
|
+
@line_cursor += 1
|
99
131
|
end
|
100
|
-
if
|
101
|
-
# eof
|
132
|
+
if @file.eof?
|
102
133
|
""
|
103
134
|
else
|
104
|
-
@line_cursor
|
135
|
+
@line_cursor += 1
|
105
136
|
@file.seek(@line_cursor)
|
106
137
|
@file.readline
|
107
138
|
end
|
108
139
|
end
|
109
140
|
|
110
|
-
# @param direction [Symbol] direction in file to move the cursor, :forward or :back
|
111
|
-
def move_char_cursor direction
|
112
|
-
if @char_cursor
|
113
|
-
if direction == :forward
|
114
|
-
distance = (@upper - @char_cursor) / 2
|
115
|
-
old_cursor = @char_cursor
|
116
|
-
@char_cursor = @char_cursor + distance
|
117
|
-
@lower = old_cursor
|
118
|
-
else
|
119
|
-
distance = (@char_cursor - @lower) / 2
|
120
|
-
old_cursor = @char_cursor
|
121
|
-
@char_cursor = @char_cursor - distance
|
122
|
-
@upper = old_cursor
|
123
|
-
end
|
124
|
-
else
|
125
|
-
@char_cursor = @size / 2
|
126
|
-
end
|
127
|
-
end
|
128
141
|
|
129
142
|
end
|
data/log_slice.gemspec
CHANGED
data/spec/helper.rb
CHANGED
data/spec/log_slice_spec.rb
CHANGED
@@ -4,7 +4,7 @@ require 'helper'
|
|
4
4
|
describe LogSlice do
|
5
5
|
|
6
6
|
it "finds the line" do
|
7
|
-
log_slice = LogSlice.new(
|
7
|
+
log_slice = LogSlice.new enumerable_to_file(1..100), :exact_match => true
|
8
8
|
file = log_slice.find do |line|
|
9
9
|
42 <=> line.to_i
|
10
10
|
end
|
@@ -12,7 +12,7 @@ describe LogSlice do
|
|
12
12
|
end
|
13
13
|
|
14
14
|
it "finds the first line when there are many matching lines" do
|
15
|
-
log_slice = LogSlice.new
|
15
|
+
log_slice = LogSlice.new string_to_file((["1", "2"] + ["3"]*20).join("\n")), :exact_match => true
|
16
16
|
file = log_slice.find do |line|
|
17
17
|
3 <=> line.to_i
|
18
18
|
end
|
@@ -22,7 +22,7 @@ describe LogSlice do
|
|
22
22
|
|
23
23
|
it "finds a matching line with log2(lines)+1 calls to comparison function" do
|
24
24
|
[100, 10_000, 100_000].each do |total_lines|
|
25
|
-
log_slice = LogSlice.new(
|
25
|
+
log_slice = LogSlice.new enumerable_to_file(1..total_lines), :exact_match => true
|
26
26
|
comparisons_count = 0
|
27
27
|
log_slice.find do |line|
|
28
28
|
comparisons_count = comparisons_count + 1
|
@@ -32,24 +32,69 @@ describe LogSlice do
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
-
it "nil when no matching line is found (
|
36
|
-
log_slice = LogSlice.new(
|
35
|
+
it "nil when no matching line is found (all values lower)" do
|
36
|
+
log_slice = LogSlice.new enumerable_to_file(1..100), :exact_match => true
|
37
37
|
file = log_slice.find do |line|
|
38
38
|
1
|
39
39
|
end
|
40
40
|
file.should be_nil
|
41
41
|
end
|
42
42
|
|
43
|
-
it "nil when no matching line is found (
|
44
|
-
log_slice = LogSlice.new(
|
43
|
+
it "nil when no matching line is found (all values higher)" do
|
44
|
+
log_slice = LogSlice.new enumerable_to_file(1..100), :exact_match => true
|
45
45
|
file = log_slice.find do |line|
|
46
46
|
-1
|
47
47
|
end
|
48
48
|
file.should be_nil
|
49
49
|
end
|
50
50
|
|
51
|
+
it "exact_match nil when no matching line is found (higher and lower values)" do
|
52
|
+
log_slice = LogSlice.new enumerable_to_file((1..100).to_a-[42]), :exact_match => true
|
53
|
+
file = log_slice.find do |line|
|
54
|
+
42 <=> line.to_i
|
55
|
+
end
|
56
|
+
file.should be_nil
|
57
|
+
end
|
58
|
+
|
59
|
+
it "non-exact_match find when no matching line is found (higher and lower values)" do
|
60
|
+
log_slice = LogSlice.new enumerable_to_file((1..100).to_a-[42])
|
61
|
+
file = log_slice.find do |line|
|
62
|
+
42 <=> line.to_i
|
63
|
+
end
|
64
|
+
file.readline.should == "43\n"
|
65
|
+
end
|
66
|
+
|
67
|
+
it "non-exact_match find when no matching line is found, many dups" do
|
68
|
+
[0,2,4,6].each do |n|
|
69
|
+
log_slice = LogSlice.new enumerable_to_file([1,1,1,3,3,3,5,5,5,7,7,7])
|
70
|
+
file = log_slice.find do |line|
|
71
|
+
n <=> line.to_i
|
72
|
+
end
|
73
|
+
3.times do
|
74
|
+
file.readline.strip.should == "#{n+1}"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
it "non-exact_match find when no matching line is found, beyond" do
|
80
|
+
log_slice = LogSlice.new enumerable_to_file([1,1,1,3,3,3,5,5,5,7,7,7])
|
81
|
+
file = log_slice.find do |line|
|
82
|
+
8 <=> line.to_i
|
83
|
+
end
|
84
|
+
file.should be_nil
|
85
|
+
end
|
86
|
+
|
87
|
+
it "nil when lines are not sorted" do
|
88
|
+
unsorted = [1,99,4,96,7,70,15,67,24,45,30,40]
|
89
|
+
log_slice = LogSlice.new enumerable_to_file(unsorted), :exact_match => true
|
90
|
+
file = log_slice.find do |line|
|
91
|
+
42 <=> line.to_i
|
92
|
+
end
|
93
|
+
file.should be_nil
|
94
|
+
end
|
95
|
+
|
51
96
|
it "nil when acting on an empty file" do
|
52
|
-
log_slice = LogSlice.new(
|
97
|
+
log_slice = LogSlice.new string_to_file(""), :exact_match => true
|
53
98
|
file = log_slice.find do |line|
|
54
99
|
42 <=> line.to_i
|
55
100
|
end
|
@@ -57,8 +102,8 @@ describe LogSlice do
|
|
57
102
|
end
|
58
103
|
|
59
104
|
it "#each_line_reverse" do
|
60
|
-
log_slice = LogSlice.new(
|
61
|
-
log_slice.instance_eval { @line_cursor = @size }
|
105
|
+
log_slice = LogSlice.new enumerable_to_file(1..10000), :exact_match => true
|
106
|
+
log_slice.instance_eval { @line_cursor = @file.stat.size }
|
62
107
|
lines = []
|
63
108
|
file = log_slice.send(:each_line_reverse) do |line|
|
64
109
|
lines << line.strip.to_i
|
@@ -67,8 +112,8 @@ describe LogSlice do
|
|
67
112
|
end
|
68
113
|
|
69
114
|
it "#each_line_reverse when file is empty" do
|
70
|
-
log_slice = LogSlice.new(
|
71
|
-
log_slice.instance_eval { @line_cursor = @size }
|
115
|
+
log_slice = LogSlice.new string_to_file(""), :exact_match => true
|
116
|
+
log_slice.instance_eval { @line_cursor = @file.stat.size }
|
72
117
|
lines = []
|
73
118
|
file = log_slice.send(:each_line_reverse) do |line|
|
74
119
|
lines << line.strip.to_i
|
@@ -77,8 +122,8 @@ describe LogSlice do
|
|
77
122
|
end
|
78
123
|
|
79
124
|
it "#each_line_reverse when file has single newline char" do
|
80
|
-
log_slice = LogSlice.new(
|
81
|
-
log_slice.instance_eval { @line_cursor = @size }
|
125
|
+
log_slice = LogSlice.new string_to_file("\n"), :exact_match => true
|
126
|
+
log_slice.instance_eval { @line_cursor = @file.stat.size }
|
82
127
|
lines = []
|
83
128
|
file = log_slice.send(:each_line_reverse) do |line|
|
84
129
|
lines << line.strip.to_i
|
metadata
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: log_slice
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 15
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: "0.
|
8
|
+
- 2
|
9
|
+
version: "0.2"
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Joel Plane
|
@@ -44,6 +44,7 @@ files:
|
|
44
44
|
- Gemfile
|
45
45
|
- README.md
|
46
46
|
- lib/log_slice.rb
|
47
|
+
- lib/log_slice/search_boundary.rb
|
47
48
|
- log_slice.gemspec
|
48
49
|
- spec/helper.rb
|
49
50
|
- spec/log_slice_spec.rb
|