log_slice 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -0
- data/Gemfile +2 -0
- data/README.md +51 -0
- data/lib/log_slice.rb +129 -0
- data/log_slice.gemspec +15 -0
- data/spec/helper.rb +27 -0
- data/spec/log_slice_spec.rb +90 -0
- metadata +84 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
LogSlice
|
2
|
+
========
|
3
|
+
|
4
|
+
Uses binary search to find a line quickly in a large log file. O(log2(n))
|
5
|
+
|
6
|
+
Can only search sorted data, which means it's probably only useful for searching by timestamp.
|
7
|
+
|
8
|
+
## Example
|
9
|
+
|
10
|
+
something-interesting.log:
|
11
|
+
|
12
|
+
[2012-08-29 18:41:12] (9640) something something something else 1
|
13
|
+
[2012-08-29 18:41:14] (9640) something something something else 2
|
14
|
+
[2012-08-29 18:41:14] (9640) something something something else 3
|
15
|
+
[2012-08-29 18:41:14] (9640) something something something else 4
|
16
|
+
[2012-08-29 18:42:18] (9640) something something something else 5
|
17
|
+
[2012-08-29 18:42:18] (9640) something something something else 6
|
18
|
+
[2012-08-29 18:42:18] (9640) something something something else 7
|
19
|
+
[2012-08-29 18:42:20] (9640) something something something else 8
|
20
|
+
[2012-08-29 18:42:20] (9640) something something something else 9
|
21
|
+
[2012-08-29 18:42:20] (9640) something something something else 10
|
22
|
+
|
23
|
+
extract everything that happened at or after 18:42:18:
|
24
|
+
|
25
|
+
find_date = DateTime.parse("2012-08-29 18:42:18")
|
26
|
+
file = LogSlice.new("something-interesting.log").find do |line|
|
27
|
+
date_string = line.match(/^\[([^\]]+)\]/)[1]
|
28
|
+
find_date <=> DateTime.parse(date_string)
|
29
|
+
end
|
30
|
+
|
31
|
+
# this will yield an instance of File
|
32
|
+
# the position is the file is the first byte of the found line
|
33
|
+
|
34
|
+
file.readline
|
35
|
+
#=> "[2012-08-29 18:42:18] (9640) something something something else 5"
|
36
|
+
|
37
|
+
file.readline
|
38
|
+
#=> "[2012-08-29 18:42:18] (9640) something something something else 6"
|
39
|
+
|
40
|
+
LogSlice.new takes a File or file path, and a block. When passed a line,
|
41
|
+
the block must return -1 if the value represented by the line is too high,
|
42
|
+
1 if it's too low, or 0 if it's just right.
|
43
|
+
|
44
|
+
## Limitations
|
45
|
+
|
46
|
+
* Can only search sorted data. At the moment, if the data isn't sorting, it won't detect it and it will loop forever.
|
47
|
+
* Can only search for a known value. For example, searching for 18:42:19 in the example above will yield nothing.
|
48
|
+
|
49
|
+
## Disclaimer
|
50
|
+
|
51
|
+
Use this at your own risk. Better yet, don't use it, it probably doesn't work.
|
data/lib/log_slice.rb
ADDED
@@ -0,0 +1,129 @@
|
|
1
|
+
class LogSlice
|
2
|
+
|
3
|
+
# @param log_file [File, String]
|
4
|
+
def initialize log_file
|
5
|
+
@file = log_file.respond_to?(:seek) ? log_file : File.open(log_file, 'r')
|
6
|
+
@size = @file.stat.size
|
7
|
+
@lower = 0
|
8
|
+
@upper = @size
|
9
|
+
@char_cursor = nil
|
10
|
+
@line_cursor = nil
|
11
|
+
end
|
12
|
+
|
13
|
+
# Depends on lines being sorted
|
14
|
+
# @return [File] file after seeking to start of line
|
15
|
+
def find &compare
|
16
|
+
direction = :forward
|
17
|
+
line_cursor = nil
|
18
|
+
loop do
|
19
|
+
line = next_line direction
|
20
|
+
if line_cursor == @line_cursor
|
21
|
+
return nil
|
22
|
+
end
|
23
|
+
line_cursor = @line_cursor
|
24
|
+
case compare.call(line)
|
25
|
+
when 0 # found
|
26
|
+
walk_up_to_first_match compare
|
27
|
+
return @file
|
28
|
+
when -1
|
29
|
+
direction = :back
|
30
|
+
when 1
|
31
|
+
direction = :forward
|
32
|
+
else
|
33
|
+
raise ArgumentError
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
# @param direction [Symbol] direction in file to move, :forward or :back
|
41
|
+
# @return [String] line
|
42
|
+
def next_line direction
|
43
|
+
move_char_cursor direction
|
44
|
+
find_next_newline
|
45
|
+
end
|
46
|
+
|
47
|
+
# once the line has been found, we must check the lines above it -
|
48
|
+
# if a line above also matches, we should seek to it.
|
49
|
+
# (this make search on some files O(n/2) instead of O(log2(n))) )
|
50
|
+
def walk_up_to_first_match compare
|
51
|
+
move_to_previous_line compare
|
52
|
+
@file.seek(@line_cursor)
|
53
|
+
end
|
54
|
+
|
55
|
+
def move_to_previous_line compare
|
56
|
+
last_cursor_position = @line_cursor
|
57
|
+
each_line_reverse do |line|
|
58
|
+
if compare.call(line) != 0
|
59
|
+
@line_cursor = last_cursor_position
|
60
|
+
break
|
61
|
+
end
|
62
|
+
last_cursor_position = @line_cursor
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def each_line_reverse
|
67
|
+
chunk_size = 512
|
68
|
+
left_over = ""
|
69
|
+
cursor = @line_cursor
|
70
|
+
loop do
|
71
|
+
cursor = cursor - chunk_size
|
72
|
+
if cursor < 0
|
73
|
+
chunk_size = chunk_size + cursor
|
74
|
+
cursor = 0
|
75
|
+
end
|
76
|
+
break if chunk_size == 0
|
77
|
+
#puts "seeking to #{cursor}, chunk size #{chunk_size}, left over #{left_over.length}"
|
78
|
+
@file.seek(cursor)
|
79
|
+
chunk = @file.read(chunk_size) + left_over
|
80
|
+
lines = chunk.split("\n")
|
81
|
+
while lines.length > 1
|
82
|
+
line = lines.pop || ""
|
83
|
+
@line_cursor = @line_cursor - (line.length + 1)
|
84
|
+
yield(line)
|
85
|
+
end
|
86
|
+
left_over = lines[0] || ""
|
87
|
+
lines = []
|
88
|
+
end
|
89
|
+
yield left_over unless left_over == ''
|
90
|
+
end
|
91
|
+
|
92
|
+
def find_next_newline
|
93
|
+
newline_char = "\n"[0]
|
94
|
+
@line_cursor = @char_cursor
|
95
|
+
@file.seek(@line_cursor)
|
96
|
+
current_char = nil
|
97
|
+
while (current_char = @file.getc) != newline_char && !current_char.nil?
|
98
|
+
@line_cursor = @line_cursor + 1
|
99
|
+
end
|
100
|
+
if current_char.nil?
|
101
|
+
# eof
|
102
|
+
""
|
103
|
+
else
|
104
|
+
@line_cursor = @line_cursor + 1
|
105
|
+
@file.seek(@line_cursor)
|
106
|
+
@file.readline
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# @param direction [Symbol] direction in file to move the cursor, :forward or :back
|
111
|
+
def move_char_cursor direction
|
112
|
+
if @char_cursor
|
113
|
+
if direction == :forward
|
114
|
+
distance = (@upper - @char_cursor) / 2
|
115
|
+
old_cursor = @char_cursor
|
116
|
+
@char_cursor = @char_cursor + distance
|
117
|
+
@lower = old_cursor
|
118
|
+
else
|
119
|
+
distance = (@char_cursor - @lower) / 2
|
120
|
+
old_cursor = @char_cursor
|
121
|
+
@char_cursor = @char_cursor - distance
|
122
|
+
@upper = old_cursor
|
123
|
+
end
|
124
|
+
else
|
125
|
+
@char_cursor = @size / 2
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
data/log_slice.gemspec
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'log_slice'
|
3
|
+
s.version = '0.1'
|
4
|
+
s.authors = ["Joel Plane"]
|
5
|
+
s.email = ["joel.plane@gmail.com"]
|
6
|
+
s.homepage = 'https://github.com/joelplane/log_slice'
|
7
|
+
s.date = '2012-08-29'
|
8
|
+
s.summary = "Find a line in a log file"
|
9
|
+
s.description = "Find a line in a log file. Uses binary search to find the line quickly in a large log file. Can only search sorted data - which in the case of log file is the timestamp, and probably not much else."
|
10
|
+
s.files = `git ls-files`.split("\n")
|
11
|
+
s.test_files = `git ls-files -- {spec}/*`.split("\n")
|
12
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
13
|
+
s.require_paths = ["lib"]
|
14
|
+
s.add_development_dependency 'rspec'
|
15
|
+
end
|
data/spec/helper.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
|
3
|
+
RSpec.configure do |c|
|
4
|
+
c.include(Module.new do
|
5
|
+
|
6
|
+
def range_to_file range
|
7
|
+
file = Tempfile.new("test-#{range}")
|
8
|
+
file.write(range.to_a.join("\n"))
|
9
|
+
file.flush
|
10
|
+
file.seek(0)
|
11
|
+
file
|
12
|
+
end
|
13
|
+
|
14
|
+
def string_to_file string
|
15
|
+
file = Tempfile.new("test-string")
|
16
|
+
file.write(string)
|
17
|
+
file.flush
|
18
|
+
file.seek(0)
|
19
|
+
file
|
20
|
+
end
|
21
|
+
|
22
|
+
def log2 n
|
23
|
+
Math.log(n) / Math.log(2)
|
24
|
+
end
|
25
|
+
|
26
|
+
end)
|
27
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require File.expand_path('../lib/log_slice', File.dirname(__FILE__))
|
2
|
+
require 'helper'
|
3
|
+
|
4
|
+
describe LogSlice do
|
5
|
+
|
6
|
+
it "finds the line" do
|
7
|
+
log_slice = LogSlice.new(range_to_file 1..100)
|
8
|
+
file = log_slice.find do |line|
|
9
|
+
42 <=> line.to_i
|
10
|
+
end
|
11
|
+
file.readline.should == "42\n"
|
12
|
+
end
|
13
|
+
|
14
|
+
it "finds the first line when there are many matching lines" do
|
15
|
+
log_slice = LogSlice.new(string_to_file (["1", "2"] + ["3"]*20).join("\n"))
|
16
|
+
file = log_slice.find do |line|
|
17
|
+
3 <=> line.to_i
|
18
|
+
end
|
19
|
+
file.pos.should == "1\n2\n".length
|
20
|
+
file.readline.should == "3\n"
|
21
|
+
end
|
22
|
+
|
23
|
+
it "finds a matching line with log2(lines)+1 calls to comparison function" do
|
24
|
+
[100, 10_000, 100_000].each do |total_lines|
|
25
|
+
log_slice = LogSlice.new(range_to_file 1..total_lines)
|
26
|
+
comparisons_count = 0
|
27
|
+
log_slice.find do |line|
|
28
|
+
comparisons_count = comparisons_count + 1
|
29
|
+
42 <=> line.to_i
|
30
|
+
end
|
31
|
+
comparisons_count.should <= log2(total_lines).ceil + 1
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
it "nil when no matching line is found (1)" do
|
36
|
+
log_slice = LogSlice.new(range_to_file 1..100)
|
37
|
+
file = log_slice.find do |line|
|
38
|
+
1
|
39
|
+
end
|
40
|
+
file.should be_nil
|
41
|
+
end
|
42
|
+
|
43
|
+
it "nil when no matching line is found (2)" do
|
44
|
+
log_slice = LogSlice.new(range_to_file 1..100)
|
45
|
+
file = log_slice.find do |line|
|
46
|
+
-1
|
47
|
+
end
|
48
|
+
file.should be_nil
|
49
|
+
end
|
50
|
+
|
51
|
+
it "nil when acting on an empty file" do
|
52
|
+
log_slice = LogSlice.new(string_to_file "")
|
53
|
+
file = log_slice.find do |line|
|
54
|
+
42 <=> line.to_i
|
55
|
+
end
|
56
|
+
file.should be_nil
|
57
|
+
end
|
58
|
+
|
59
|
+
it "#each_line_reverse" do
|
60
|
+
log_slice = LogSlice.new(range_to_file 1..10000)
|
61
|
+
log_slice.instance_eval { @line_cursor = @size }
|
62
|
+
lines = []
|
63
|
+
file = log_slice.send(:each_line_reverse) do |line|
|
64
|
+
lines << line.strip.to_i
|
65
|
+
end
|
66
|
+
lines.should == Array(1..10000).reverse
|
67
|
+
end
|
68
|
+
|
69
|
+
it "#each_line_reverse when file is empty" do
|
70
|
+
log_slice = LogSlice.new(string_to_file "")
|
71
|
+
log_slice.instance_eval { @line_cursor = @size }
|
72
|
+
lines = []
|
73
|
+
file = log_slice.send(:each_line_reverse) do |line|
|
74
|
+
lines << line.strip.to_i
|
75
|
+
end
|
76
|
+
lines.should == []
|
77
|
+
end
|
78
|
+
|
79
|
+
it "#each_line_reverse when file has single newline char" do
|
80
|
+
log_slice = LogSlice.new(string_to_file "\n")
|
81
|
+
log_slice.instance_eval { @line_cursor = @size }
|
82
|
+
lines = []
|
83
|
+
file = log_slice.send(:each_line_reverse) do |line|
|
84
|
+
lines << line.strip.to_i
|
85
|
+
end
|
86
|
+
lines.should == []
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
end
|
metadata
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: log_slice
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 9
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: "0.1"
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Joel Plane
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2012-08-29 00:00:00 Z
|
18
|
+
dependencies:
|
19
|
+
- !ruby/object:Gem::Dependency
|
20
|
+
name: rspec
|
21
|
+
prerelease: false
|
22
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
23
|
+
none: false
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
hash: 3
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
version: "0"
|
31
|
+
type: :development
|
32
|
+
version_requirements: *id001
|
33
|
+
description: Find a line in a log file. Uses binary search to find the line quickly in a large log file. Can only search sorted data - which in the case of log file is the timestamp, and probably not much else.
|
34
|
+
email:
|
35
|
+
- joel.plane@gmail.com
|
36
|
+
executables: []
|
37
|
+
|
38
|
+
extensions: []
|
39
|
+
|
40
|
+
extra_rdoc_files: []
|
41
|
+
|
42
|
+
files:
|
43
|
+
- .gitignore
|
44
|
+
- Gemfile
|
45
|
+
- README.md
|
46
|
+
- lib/log_slice.rb
|
47
|
+
- log_slice.gemspec
|
48
|
+
- spec/helper.rb
|
49
|
+
- spec/log_slice_spec.rb
|
50
|
+
homepage: https://github.com/joelplane/log_slice
|
51
|
+
licenses: []
|
52
|
+
|
53
|
+
post_install_message:
|
54
|
+
rdoc_options: []
|
55
|
+
|
56
|
+
require_paths:
|
57
|
+
- lib
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
none: false
|
60
|
+
requirements:
|
61
|
+
- - ">="
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
hash: 3
|
64
|
+
segments:
|
65
|
+
- 0
|
66
|
+
version: "0"
|
67
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
68
|
+
none: false
|
69
|
+
requirements:
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
hash: 3
|
73
|
+
segments:
|
74
|
+
- 0
|
75
|
+
version: "0"
|
76
|
+
requirements: []
|
77
|
+
|
78
|
+
rubyforge_project:
|
79
|
+
rubygems_version: 1.7.2
|
80
|
+
signing_key:
|
81
|
+
specification_version: 3
|
82
|
+
summary: Find a line in a log file
|
83
|
+
test_files: []
|
84
|
+
|