log_slice 0.1 → 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +40 -27
- data/lib/log_slice/search_boundary.rb +34 -0
- data/lib/log_slice.rb +83 -70
- data/log_slice.gemspec +1 -1
- data/spec/helper.rb +1 -1
- data/spec/log_slice_spec.rb +59 -14
- metadata +4 -3
data/README.md
CHANGED
@@ -5,46 +5,59 @@ Uses binary search to find a line quickly in a large log file. O(log2(n))
|
|
5
5
|
|
6
6
|
Can only search sorted data, which means it's probably only useful for searching by timestamp.
|
7
7
|
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
gem install log_slice
|
11
|
+
|
8
12
|
## Example
|
9
13
|
|
10
14
|
something-interesting.log:
|
11
15
|
|
12
|
-
[2012-08-29 18:41:12] (9640) something something something else 1
|
13
|
-
[2012-08-29 18:41:14] (9640) something something something else 2
|
14
|
-
[2012-08-29 18:41:14] (9640) something something something else 3
|
15
|
-
[2012-08-29 18:41:14] (9640) something something something else 4
|
16
|
-
[2012-08-29 18:42:18] (9640) something something something else 5
|
17
|
-
[2012-08-29 18:42:18] (9640) something something something else 6
|
18
|
-
[2012-08-29 18:42:18] (9640) something something something else 7
|
19
|
-
[2012-08-29 18:42:20] (9640) something something something else 8
|
20
|
-
[2012-08-29 18:42:20] (9640) something something something else 9
|
21
|
-
[2012-08-29 18:42:20] (9640) something something something else 10
|
16
|
+
[2012-08-29 18:41:12] (9640) 1 something something something else 1
|
17
|
+
[2012-08-29 18:41:14] (9640) 2 something something something else 2
|
18
|
+
[2012-08-29 18:41:14] (9640) 3 something something something else 3
|
19
|
+
[2012-08-29 18:41:14] (9640) 4 something something something else 4
|
20
|
+
[2012-08-29 18:42:18] (9640) 5 something something something else 5
|
21
|
+
[2012-08-29 18:42:18] (9640) 6 something something something else 6
|
22
|
+
[2012-08-29 18:42:18] (9640) 7 something something something else 7
|
23
|
+
[2012-08-29 18:42:20] (9640) 8 something something something else 8
|
24
|
+
[2012-08-29 18:42:20] (9640) 9 something something something else 9
|
25
|
+
[2012-08-29 18:42:20] (9640) 10 something something something else 10
|
26
|
+
|
27
|
+
extract everything that happened at or after 18:42:00:
|
28
|
+
```ruby
|
29
|
+
find_date = DateTime.parse("2012-08-29 18:42:00")
|
30
|
+
file = LogSlice.new("something-interesting.log").find do |line|
|
31
|
+
date_string = line.match(/^\[([^\]]+)\]/)[1]
|
32
|
+
find_date <=> DateTime.parse(date_string)
|
33
|
+
end
|
22
34
|
|
23
|
-
|
35
|
+
# this will yield an instance of File
|
36
|
+
# the position in the file is the first byte of the found line
|
24
37
|
|
25
|
-
|
26
|
-
|
27
|
-
date_string = line.match(/^\[([^\]]+)\]/)[1]
|
28
|
-
find_date <=> DateTime.parse(date_string)
|
29
|
-
end
|
38
|
+
file.readline
|
39
|
+
#=> "[2012-08-29 18:42:18] (9640) 5 something something something else 5"
|
30
40
|
|
31
|
-
|
32
|
-
|
41
|
+
# Once you found the line you were after,
|
42
|
+
# you can continue to read subsequent lines:
|
33
43
|
|
34
|
-
|
35
|
-
|
44
|
+
file.readline
|
45
|
+
#=> "[2012-08-29 18:42:18] (9640) 6 something something something else 6"
|
46
|
+
```
|
36
47
|
|
37
|
-
|
38
|
-
|
48
|
+
LogSlice.new takes a File or file path, and a comparison function, passed as a block.
|
49
|
+
When passed a line, the block must return -1 if the value represented by the line
|
50
|
+
is too high, 1 if it's too low, or 0 if it's just right.
|
39
51
|
|
40
|
-
|
41
|
-
|
42
|
-
|
52
|
+
```ruby
|
53
|
+
LogSlice.new(file_or_file_path).find(&comparison_function) #=> File or nil
|
54
|
+
```
|
43
55
|
|
44
56
|
## Limitations
|
45
57
|
|
46
|
-
* Can only search sorted data.
|
47
|
-
|
58
|
+
* Can only search sorted data. If the data isn't sorted, it will most likely not find anything (ie return nil).
|
59
|
+
In very rare cases it may find value the anyway by chance, so it's not guaranteed that unsorted
|
60
|
+
input will always yield nil.
|
48
61
|
|
49
62
|
## Disclaimer
|
50
63
|
|
@@ -0,0 +1,34 @@
|
|
1
|
+
class LogSlice
|
2
|
+
class SearchBoundary
|
3
|
+
|
4
|
+
attr_reader :cursor
|
5
|
+
|
6
|
+
def initialize file_size
|
7
|
+
@file_size = file_size
|
8
|
+
end
|
9
|
+
|
10
|
+
# reset the search boundary to cover the entire file
|
11
|
+
def reset
|
12
|
+
@lower = 0
|
13
|
+
@upper = @file_size
|
14
|
+
@cursor = 0
|
15
|
+
cursor_forward
|
16
|
+
self
|
17
|
+
end
|
18
|
+
|
19
|
+
# Move cursor forward.
|
20
|
+
# The cursor is moved half way between it's start location and the upper boundary.
|
21
|
+
def cursor_forward
|
22
|
+
@lower = @cursor
|
23
|
+
@cursor = @cursor + (@upper - @cursor) / 2
|
24
|
+
end
|
25
|
+
|
26
|
+
# Move cursor backward.
|
27
|
+
# The cursor is moved half way between it's start location and the lower boundary.
|
28
|
+
def cursor_back
|
29
|
+
@upper = @cursor
|
30
|
+
@cursor = @cursor - (@cursor - @lower) / 2
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
data/lib/log_slice.rb
CHANGED
@@ -1,129 +1,142 @@
|
|
1
|
+
require File.expand_path("log_slice/search_boundary", File.dirname(__FILE__))
|
2
|
+
|
1
3
|
class LogSlice
|
2
4
|
|
5
|
+
NEWLINE = "\n"
|
6
|
+
NEWLINE_CHAR = "\n"[0]
|
7
|
+
|
3
8
|
# @param log_file [File, String]
|
4
|
-
|
9
|
+
# @param options [Hash] :exact_match default false
|
10
|
+
def initialize log_file, options={}
|
5
11
|
@file = log_file.respond_to?(:seek) ? log_file : File.open(log_file, 'r')
|
6
|
-
@
|
7
|
-
@
|
8
|
-
@upper = @size
|
9
|
-
@char_cursor = nil
|
12
|
+
@exact_match = options[:exact_match] || false
|
13
|
+
@search_boundary = SearchBoundary.new(@file.stat.size)
|
10
14
|
@line_cursor = nil
|
11
15
|
end
|
12
16
|
|
13
|
-
#
|
14
|
-
#
|
17
|
+
# Find line in the file using the comparison function.
|
18
|
+
# Depends on lines being sorted.
|
19
|
+
# The comparison function will be passed lines from the file. It must
|
20
|
+
# return -1 if the line is later than the one it's looking for, 1 if
|
21
|
+
# the line is earlier than the one it's looking for, and 0 if it is
|
22
|
+
# the line it's looking for.
|
23
|
+
# @param compare [Proc] comparison function
|
24
|
+
# @return [File, nil] file after seeking to start of line or nil if line not found
|
15
25
|
def find &compare
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
return @file
|
28
|
-
when -1
|
29
|
-
direction = :back
|
30
|
-
when 1
|
31
|
-
direction = :forward
|
32
|
-
else
|
33
|
-
raise ArgumentError
|
26
|
+
reset_progress_check
|
27
|
+
@search_boundary.reset
|
28
|
+
line = find_next_newline
|
29
|
+
while making_progress?
|
30
|
+
comp_value = compare.call(line)
|
31
|
+
if comp_value == 0 # found matching line
|
32
|
+
backtrack_to_first_line_match compare
|
33
|
+
return @file
|
34
|
+
else
|
35
|
+
@search_boundary.send(comp_value < 0 ? :cursor_back : :cursor_forward)
|
36
|
+
line = find_next_newline
|
34
37
|
end
|
35
38
|
end
|
39
|
+
if @exact_match
|
40
|
+
nil
|
41
|
+
else
|
42
|
+
backtrack_to_gap compare
|
43
|
+
return @file.eof? ? nil : @file
|
44
|
+
end
|
36
45
|
end
|
37
46
|
|
38
47
|
private
|
39
48
|
|
40
|
-
#
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
49
|
+
# whether the cursor has moved since previous call
|
50
|
+
def making_progress?
|
51
|
+
return false if @previous_cursor_position == @line_cursor
|
52
|
+
@previous_cursor_position = @line_cursor
|
53
|
+
true
|
45
54
|
end
|
46
55
|
|
56
|
+
def reset_progress_check
|
57
|
+
@previous_cursor_position = nil
|
58
|
+
end
|
59
|
+
|
60
|
+
|
47
61
|
# once the line has been found, we must check the lines above it -
|
48
62
|
# if a line above also matches, we should seek to it.
|
49
63
|
# (this make search on some files O(n/2) instead of O(log2(n))) )
|
50
|
-
|
51
|
-
|
64
|
+
# @param compare [Proc] comparison function
|
65
|
+
def backtrack_to_first_line_match compare
|
66
|
+
previous_cursor_position = @line_cursor
|
67
|
+
each_line_reverse do |line|
|
68
|
+
if compare.call(line) != 0
|
69
|
+
# we've found a non-matching line,
|
70
|
+
# so we set @line_cursor back to the previous matching line
|
71
|
+
@line_cursor = previous_cursor_position
|
72
|
+
break
|
73
|
+
end
|
74
|
+
previous_cursor_position = @line_cursor
|
75
|
+
end
|
52
76
|
@file.seek(@line_cursor)
|
53
77
|
end
|
54
78
|
|
55
|
-
|
56
|
-
|
79
|
+
# if no match was found, we're sitting at too-high.
|
80
|
+
# backtrack up to the first too-high
|
81
|
+
def backtrack_to_gap compare
|
82
|
+
@line_cursor = @file.pos
|
83
|
+
previous_cursor_position = @line_cursor
|
57
84
|
each_line_reverse do |line|
|
58
|
-
if compare.call(line)
|
59
|
-
@line_cursor =
|
85
|
+
if compare.call(line) == 1
|
86
|
+
@line_cursor = previous_cursor_position
|
60
87
|
break
|
61
88
|
end
|
62
|
-
|
89
|
+
previous_cursor_position = @line_cursor
|
63
90
|
end
|
91
|
+
@file.seek(@line_cursor)
|
64
92
|
end
|
65
93
|
|
94
|
+
# iterate over each line from the current cursor position, in reverse.
|
66
95
|
def each_line_reverse
|
67
96
|
chunk_size = 512
|
68
|
-
left_over = ""
|
69
97
|
cursor = @line_cursor
|
98
|
+
left_over = ""
|
70
99
|
loop do
|
71
|
-
|
72
|
-
|
73
|
-
chunk_size = chunk_size + cursor
|
100
|
+
if chunk_size > cursor
|
101
|
+
chunk_size = cursor
|
74
102
|
cursor = 0
|
103
|
+
else
|
104
|
+
cursor -= chunk_size
|
75
105
|
end
|
76
106
|
break if chunk_size == 0
|
77
|
-
#puts "seeking to #{cursor}, chunk size #{chunk_size}, left over #{left_over.length}"
|
78
107
|
@file.seek(cursor)
|
79
108
|
chunk = @file.read(chunk_size) + left_over
|
80
|
-
lines = chunk.split(
|
109
|
+
lines = chunk.split(NEWLINE)
|
81
110
|
while lines.length > 1
|
82
111
|
line = lines.pop || ""
|
83
|
-
@line_cursor
|
112
|
+
@line_cursor -= (line.length + NEWLINE.length)
|
84
113
|
yield(line)
|
85
114
|
end
|
86
115
|
left_over = lines[0] || ""
|
87
116
|
lines = []
|
88
117
|
end
|
118
|
+
@line_cursor -= (left_over.length + NEWLINE.length)
|
89
119
|
yield left_over unless left_over == ''
|
90
120
|
end
|
91
121
|
|
122
|
+
# After the search is moved by cursor search_boundary.cursor_*, it's position
|
123
|
+
# is probably not at the start of a line, but somewhere within a line.
|
124
|
+
# find_next_newline advances the cursor until we're at the start of the
|
125
|
+
# next line.
|
92
126
|
def find_next_newline
|
93
|
-
|
94
|
-
@line_cursor = @char_cursor
|
127
|
+
@line_cursor = @search_boundary.cursor
|
95
128
|
@file.seek(@line_cursor)
|
96
|
-
current_char = nil
|
97
|
-
|
98
|
-
@line_cursor = @line_cursor + 1
|
129
|
+
while (current_char = @file.getc) != NEWLINE_CHAR && !current_char.nil?
|
130
|
+
@line_cursor += 1
|
99
131
|
end
|
100
|
-
if
|
101
|
-
# eof
|
132
|
+
if @file.eof?
|
102
133
|
""
|
103
134
|
else
|
104
|
-
@line_cursor
|
135
|
+
@line_cursor += 1
|
105
136
|
@file.seek(@line_cursor)
|
106
137
|
@file.readline
|
107
138
|
end
|
108
139
|
end
|
109
140
|
|
110
|
-
# @param direction [Symbol] direction in file to move the cursor, :forward or :back
|
111
|
-
def move_char_cursor direction
|
112
|
-
if @char_cursor
|
113
|
-
if direction == :forward
|
114
|
-
distance = (@upper - @char_cursor) / 2
|
115
|
-
old_cursor = @char_cursor
|
116
|
-
@char_cursor = @char_cursor + distance
|
117
|
-
@lower = old_cursor
|
118
|
-
else
|
119
|
-
distance = (@char_cursor - @lower) / 2
|
120
|
-
old_cursor = @char_cursor
|
121
|
-
@char_cursor = @char_cursor - distance
|
122
|
-
@upper = old_cursor
|
123
|
-
end
|
124
|
-
else
|
125
|
-
@char_cursor = @size / 2
|
126
|
-
end
|
127
|
-
end
|
128
141
|
|
129
142
|
end
|
data/log_slice.gemspec
CHANGED
data/spec/helper.rb
CHANGED
data/spec/log_slice_spec.rb
CHANGED
@@ -4,7 +4,7 @@ require 'helper'
|
|
4
4
|
describe LogSlice do
|
5
5
|
|
6
6
|
it "finds the line" do
|
7
|
-
log_slice = LogSlice.new(
|
7
|
+
log_slice = LogSlice.new enumerable_to_file(1..100), :exact_match => true
|
8
8
|
file = log_slice.find do |line|
|
9
9
|
42 <=> line.to_i
|
10
10
|
end
|
@@ -12,7 +12,7 @@ describe LogSlice do
|
|
12
12
|
end
|
13
13
|
|
14
14
|
it "finds the first line when there are many matching lines" do
|
15
|
-
log_slice = LogSlice.new
|
15
|
+
log_slice = LogSlice.new string_to_file((["1", "2"] + ["3"]*20).join("\n")), :exact_match => true
|
16
16
|
file = log_slice.find do |line|
|
17
17
|
3 <=> line.to_i
|
18
18
|
end
|
@@ -22,7 +22,7 @@ describe LogSlice do
|
|
22
22
|
|
23
23
|
it "finds a matching line with log2(lines)+1 calls to comparison function" do
|
24
24
|
[100, 10_000, 100_000].each do |total_lines|
|
25
|
-
log_slice = LogSlice.new(
|
25
|
+
log_slice = LogSlice.new enumerable_to_file(1..total_lines), :exact_match => true
|
26
26
|
comparisons_count = 0
|
27
27
|
log_slice.find do |line|
|
28
28
|
comparisons_count = comparisons_count + 1
|
@@ -32,24 +32,69 @@ describe LogSlice do
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
-
it "nil when no matching line is found (
|
36
|
-
log_slice = LogSlice.new(
|
35
|
+
it "nil when no matching line is found (all values lower)" do
|
36
|
+
log_slice = LogSlice.new enumerable_to_file(1..100), :exact_match => true
|
37
37
|
file = log_slice.find do |line|
|
38
38
|
1
|
39
39
|
end
|
40
40
|
file.should be_nil
|
41
41
|
end
|
42
42
|
|
43
|
-
it "nil when no matching line is found (
|
44
|
-
log_slice = LogSlice.new(
|
43
|
+
it "nil when no matching line is found (all values higher)" do
|
44
|
+
log_slice = LogSlice.new enumerable_to_file(1..100), :exact_match => true
|
45
45
|
file = log_slice.find do |line|
|
46
46
|
-1
|
47
47
|
end
|
48
48
|
file.should be_nil
|
49
49
|
end
|
50
50
|
|
51
|
+
it "exact_match nil when no matching line is found (higher and lower values)" do
|
52
|
+
log_slice = LogSlice.new enumerable_to_file((1..100).to_a-[42]), :exact_match => true
|
53
|
+
file = log_slice.find do |line|
|
54
|
+
42 <=> line.to_i
|
55
|
+
end
|
56
|
+
file.should be_nil
|
57
|
+
end
|
58
|
+
|
59
|
+
it "non-exact_match find when no matching line is found (higher and lower values)" do
|
60
|
+
log_slice = LogSlice.new enumerable_to_file((1..100).to_a-[42])
|
61
|
+
file = log_slice.find do |line|
|
62
|
+
42 <=> line.to_i
|
63
|
+
end
|
64
|
+
file.readline.should == "43\n"
|
65
|
+
end
|
66
|
+
|
67
|
+
it "non-exact_match find when no matching line is found, many dups" do
|
68
|
+
[0,2,4,6].each do |n|
|
69
|
+
log_slice = LogSlice.new enumerable_to_file([1,1,1,3,3,3,5,5,5,7,7,7])
|
70
|
+
file = log_slice.find do |line|
|
71
|
+
n <=> line.to_i
|
72
|
+
end
|
73
|
+
3.times do
|
74
|
+
file.readline.strip.should == "#{n+1}"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
it "non-exact_match find when no matching line is found, beyond" do
|
80
|
+
log_slice = LogSlice.new enumerable_to_file([1,1,1,3,3,3,5,5,5,7,7,7])
|
81
|
+
file = log_slice.find do |line|
|
82
|
+
8 <=> line.to_i
|
83
|
+
end
|
84
|
+
file.should be_nil
|
85
|
+
end
|
86
|
+
|
87
|
+
it "nil when lines are not sorted" do
|
88
|
+
unsorted = [1,99,4,96,7,70,15,67,24,45,30,40]
|
89
|
+
log_slice = LogSlice.new enumerable_to_file(unsorted), :exact_match => true
|
90
|
+
file = log_slice.find do |line|
|
91
|
+
42 <=> line.to_i
|
92
|
+
end
|
93
|
+
file.should be_nil
|
94
|
+
end
|
95
|
+
|
51
96
|
it "nil when acting on an empty file" do
|
52
|
-
log_slice = LogSlice.new(
|
97
|
+
log_slice = LogSlice.new string_to_file(""), :exact_match => true
|
53
98
|
file = log_slice.find do |line|
|
54
99
|
42 <=> line.to_i
|
55
100
|
end
|
@@ -57,8 +102,8 @@ describe LogSlice do
|
|
57
102
|
end
|
58
103
|
|
59
104
|
it "#each_line_reverse" do
|
60
|
-
log_slice = LogSlice.new(
|
61
|
-
log_slice.instance_eval { @line_cursor = @size }
|
105
|
+
log_slice = LogSlice.new enumerable_to_file(1..10000), :exact_match => true
|
106
|
+
log_slice.instance_eval { @line_cursor = @file.stat.size }
|
62
107
|
lines = []
|
63
108
|
file = log_slice.send(:each_line_reverse) do |line|
|
64
109
|
lines << line.strip.to_i
|
@@ -67,8 +112,8 @@ describe LogSlice do
|
|
67
112
|
end
|
68
113
|
|
69
114
|
it "#each_line_reverse when file is empty" do
|
70
|
-
log_slice = LogSlice.new(
|
71
|
-
log_slice.instance_eval { @line_cursor = @size }
|
115
|
+
log_slice = LogSlice.new string_to_file(""), :exact_match => true
|
116
|
+
log_slice.instance_eval { @line_cursor = @file.stat.size }
|
72
117
|
lines = []
|
73
118
|
file = log_slice.send(:each_line_reverse) do |line|
|
74
119
|
lines << line.strip.to_i
|
@@ -77,8 +122,8 @@ describe LogSlice do
|
|
77
122
|
end
|
78
123
|
|
79
124
|
it "#each_line_reverse when file has single newline char" do
|
80
|
-
log_slice = LogSlice.new(
|
81
|
-
log_slice.instance_eval { @line_cursor = @size }
|
125
|
+
log_slice = LogSlice.new string_to_file("\n"), :exact_match => true
|
126
|
+
log_slice.instance_eval { @line_cursor = @file.stat.size }
|
82
127
|
lines = []
|
83
128
|
file = log_slice.send(:each_line_reverse) do |line|
|
84
129
|
lines << line.strip.to_i
|
metadata
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: log_slice
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 15
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: "0.
|
8
|
+
- 2
|
9
|
+
version: "0.2"
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Joel Plane
|
@@ -44,6 +44,7 @@ files:
|
|
44
44
|
- Gemfile
|
45
45
|
- README.md
|
46
46
|
- lib/log_slice.rb
|
47
|
+
- lib/log_slice/search_boundary.rb
|
47
48
|
- log_slice.gemspec
|
48
49
|
- spec/helper.rb
|
49
50
|
- spec/log_slice_spec.rb
|