log_slice 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
- data/Gemfile +2 -0
- data/README.md +51 -0
- data/lib/log_slice.rb +129 -0
- data/log_slice.gemspec +15 -0
- data/spec/helper.rb +27 -0
- data/spec/log_slice_spec.rb +90 -0
- metadata +84 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
LogSlice
|
2
|
+
========
|
3
|
+
|
4
|
+
Uses binary search to find a line quickly in a large log file. O(log2(n))
|
5
|
+
|
6
|
+
Can only search sorted data, which means it's probably only useful for searching by timestamp.
|
7
|
+
|
8
|
+
## Example
|
9
|
+
|
10
|
+
something-interesting.log:
|
11
|
+
|
12
|
+
[2012-08-29 18:41:12] (9640) something something something else 1
|
13
|
+
[2012-08-29 18:41:14] (9640) something something something else 2
|
14
|
+
[2012-08-29 18:41:14] (9640) something something something else 3
|
15
|
+
[2012-08-29 18:41:14] (9640) something something something else 4
|
16
|
+
[2012-08-29 18:42:18] (9640) something something something else 5
|
17
|
+
[2012-08-29 18:42:18] (9640) something something something else 6
|
18
|
+
[2012-08-29 18:42:18] (9640) something something something else 7
|
19
|
+
[2012-08-29 18:42:20] (9640) something something something else 8
|
20
|
+
[2012-08-29 18:42:20] (9640) something something something else 9
|
21
|
+
[2012-08-29 18:42:20] (9640) something something something else 10
|
22
|
+
|
23
|
+
extract everything that happened at or after 18:42:18:
|
24
|
+
|
25
|
+
find_date = DateTime.parse("2012-08-29 18:42:18")
|
26
|
+
file = LogSlice.new("something-interesting.log").find do |line|
|
27
|
+
date_string = line.match(/^\[([^\]]+)\]/)[1]
|
28
|
+
find_date <=> DateTime.parse(date_string)
|
29
|
+
end
|
30
|
+
|
31
|
+
# this will yield an instance of File
|
32
|
+
# the position is the file is the first byte of the found line
|
33
|
+
|
34
|
+
file.readline
|
35
|
+
#=> "[2012-08-29 18:42:18] (9640) something something something else 5"
|
36
|
+
|
37
|
+
file.readline
|
38
|
+
#=> "[2012-08-29 18:42:18] (9640) something something something else 6"
|
39
|
+
|
40
|
+
LogSlice.new takes a File or file path, and a block. When passed a line,
|
41
|
+
the block must return -1 if the value represented by the line is too high,
|
42
|
+
1 if it's too low, or 0 if it's just right.
|
43
|
+
|
44
|
+
## Limitations
|
45
|
+
|
46
|
+
* Can only search sorted data. At the moment, if the data isn't sorting, it won't detect it and it will loop forever.
|
47
|
+
* Can only search for a known value. For example, searching for 18:42:19 in the example above will yield nothing.
|
48
|
+
|
49
|
+
## Disclaimer
|
50
|
+
|
51
|
+
Use this at your own risk. Better yet, don't use it, it probably doesn't work.
|
data/lib/log_slice.rb
ADDED
@@ -0,0 +1,129 @@
|
|
1
|
+
class LogSlice
|
2
|
+
|
3
|
+
# @param log_file [File, String]
|
4
|
+
def initialize log_file
|
5
|
+
@file = log_file.respond_to?(:seek) ? log_file : File.open(log_file, 'r')
|
6
|
+
@size = @file.stat.size
|
7
|
+
@lower = 0
|
8
|
+
@upper = @size
|
9
|
+
@char_cursor = nil
|
10
|
+
@line_cursor = nil
|
11
|
+
end
|
12
|
+
|
13
|
+
# Depends on lines being sorted
|
14
|
+
# @return [File] file after seeking to start of line
|
15
|
+
def find &compare
|
16
|
+
direction = :forward
|
17
|
+
line_cursor = nil
|
18
|
+
loop do
|
19
|
+
line = next_line direction
|
20
|
+
if line_cursor == @line_cursor
|
21
|
+
return nil
|
22
|
+
end
|
23
|
+
line_cursor = @line_cursor
|
24
|
+
case compare.call(line)
|
25
|
+
when 0 # found
|
26
|
+
walk_up_to_first_match compare
|
27
|
+
return @file
|
28
|
+
when -1
|
29
|
+
direction = :back
|
30
|
+
when 1
|
31
|
+
direction = :forward
|
32
|
+
else
|
33
|
+
raise ArgumentError
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
# @param direction [Symbol] direction in file to move, :forward or :back
|
41
|
+
# @return [String] line
|
42
|
+
def next_line direction
|
43
|
+
move_char_cursor direction
|
44
|
+
find_next_newline
|
45
|
+
end
|
46
|
+
|
47
|
+
# once the line has been found, we must check the lines above it -
|
48
|
+
# if a line above also matches, we should seek to it.
|
49
|
+
# (this make search on some files O(n/2) instead of O(log2(n))) )
|
50
|
+
def walk_up_to_first_match compare
|
51
|
+
move_to_previous_line compare
|
52
|
+
@file.seek(@line_cursor)
|
53
|
+
end
|
54
|
+
|
55
|
+
def move_to_previous_line compare
|
56
|
+
last_cursor_position = @line_cursor
|
57
|
+
each_line_reverse do |line|
|
58
|
+
if compare.call(line) != 0
|
59
|
+
@line_cursor = last_cursor_position
|
60
|
+
break
|
61
|
+
end
|
62
|
+
last_cursor_position = @line_cursor
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def each_line_reverse
|
67
|
+
chunk_size = 512
|
68
|
+
left_over = ""
|
69
|
+
cursor = @line_cursor
|
70
|
+
loop do
|
71
|
+
cursor = cursor - chunk_size
|
72
|
+
if cursor < 0
|
73
|
+
chunk_size = chunk_size + cursor
|
74
|
+
cursor = 0
|
75
|
+
end
|
76
|
+
break if chunk_size == 0
|
77
|
+
#puts "seeking to #{cursor}, chunk size #{chunk_size}, left over #{left_over.length}"
|
78
|
+
@file.seek(cursor)
|
79
|
+
chunk = @file.read(chunk_size) + left_over
|
80
|
+
lines = chunk.split("\n")
|
81
|
+
while lines.length > 1
|
82
|
+
line = lines.pop || ""
|
83
|
+
@line_cursor = @line_cursor - (line.length + 1)
|
84
|
+
yield(line)
|
85
|
+
end
|
86
|
+
left_over = lines[0] || ""
|
87
|
+
lines = []
|
88
|
+
end
|
89
|
+
yield left_over unless left_over == ''
|
90
|
+
end
|
91
|
+
|
92
|
+
def find_next_newline
|
93
|
+
newline_char = "\n"[0]
|
94
|
+
@line_cursor = @char_cursor
|
95
|
+
@file.seek(@line_cursor)
|
96
|
+
current_char = nil
|
97
|
+
while (current_char = @file.getc) != newline_char && !current_char.nil?
|
98
|
+
@line_cursor = @line_cursor + 1
|
99
|
+
end
|
100
|
+
if current_char.nil?
|
101
|
+
# eof
|
102
|
+
""
|
103
|
+
else
|
104
|
+
@line_cursor = @line_cursor + 1
|
105
|
+
@file.seek(@line_cursor)
|
106
|
+
@file.readline
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# @param direction [Symbol] direction in file to move the cursor, :forward or :back
|
111
|
+
def move_char_cursor direction
|
112
|
+
if @char_cursor
|
113
|
+
if direction == :forward
|
114
|
+
distance = (@upper - @char_cursor) / 2
|
115
|
+
old_cursor = @char_cursor
|
116
|
+
@char_cursor = @char_cursor + distance
|
117
|
+
@lower = old_cursor
|
118
|
+
else
|
119
|
+
distance = (@char_cursor - @lower) / 2
|
120
|
+
old_cursor = @char_cursor
|
121
|
+
@char_cursor = @char_cursor - distance
|
122
|
+
@upper = old_cursor
|
123
|
+
end
|
124
|
+
else
|
125
|
+
@char_cursor = @size / 2
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
data/log_slice.gemspec
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'log_slice'
|
3
|
+
s.version = '0.1'
|
4
|
+
s.authors = ["Joel Plane"]
|
5
|
+
s.email = ["joel.plane@gmail.com"]
|
6
|
+
s.homepage = 'https://github.com/joelplane/log_slice'
|
7
|
+
s.date = '2012-08-29'
|
8
|
+
s.summary = "Find a line in a log file"
|
9
|
+
s.description = "Find a line in a log file. Uses binary search to find the line quickly in a large log file. Can only search sorted data - which in the case of log file is the timestamp, and probably not much else."
|
10
|
+
s.files = `git ls-files`.split("\n")
|
11
|
+
s.test_files = `git ls-files -- {spec}/*`.split("\n")
|
12
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
13
|
+
s.require_paths = ["lib"]
|
14
|
+
s.add_development_dependency 'rspec'
|
15
|
+
end
|
data/spec/helper.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
|
3
|
+
RSpec.configure do |c|
|
4
|
+
c.include(Module.new do
|
5
|
+
|
6
|
+
def range_to_file range
|
7
|
+
file = Tempfile.new("test-#{range}")
|
8
|
+
file.write(range.to_a.join("\n"))
|
9
|
+
file.flush
|
10
|
+
file.seek(0)
|
11
|
+
file
|
12
|
+
end
|
13
|
+
|
14
|
+
def string_to_file string
|
15
|
+
file = Tempfile.new("test-string")
|
16
|
+
file.write(string)
|
17
|
+
file.flush
|
18
|
+
file.seek(0)
|
19
|
+
file
|
20
|
+
end
|
21
|
+
|
22
|
+
def log2 n
|
23
|
+
Math.log(n) / Math.log(2)
|
24
|
+
end
|
25
|
+
|
26
|
+
end)
|
27
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require File.expand_path('../lib/log_slice', File.dirname(__FILE__))
|
2
|
+
require 'helper'
|
3
|
+
|
4
|
+
describe LogSlice do
|
5
|
+
|
6
|
+
it "finds the line" do
|
7
|
+
log_slice = LogSlice.new(range_to_file 1..100)
|
8
|
+
file = log_slice.find do |line|
|
9
|
+
42 <=> line.to_i
|
10
|
+
end
|
11
|
+
file.readline.should == "42\n"
|
12
|
+
end
|
13
|
+
|
14
|
+
it "finds the first line when there are many matching lines" do
|
15
|
+
log_slice = LogSlice.new(string_to_file (["1", "2"] + ["3"]*20).join("\n"))
|
16
|
+
file = log_slice.find do |line|
|
17
|
+
3 <=> line.to_i
|
18
|
+
end
|
19
|
+
file.pos.should == "1\n2\n".length
|
20
|
+
file.readline.should == "3\n"
|
21
|
+
end
|
22
|
+
|
23
|
+
it "finds a matching line with log2(lines)+1 calls to comparison function" do
|
24
|
+
[100, 10_000, 100_000].each do |total_lines|
|
25
|
+
log_slice = LogSlice.new(range_to_file 1..total_lines)
|
26
|
+
comparisons_count = 0
|
27
|
+
log_slice.find do |line|
|
28
|
+
comparisons_count = comparisons_count + 1
|
29
|
+
42 <=> line.to_i
|
30
|
+
end
|
31
|
+
comparisons_count.should <= log2(total_lines).ceil + 1
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
it "nil when no matching line is found (1)" do
|
36
|
+
log_slice = LogSlice.new(range_to_file 1..100)
|
37
|
+
file = log_slice.find do |line|
|
38
|
+
1
|
39
|
+
end
|
40
|
+
file.should be_nil
|
41
|
+
end
|
42
|
+
|
43
|
+
it "nil when no matching line is found (2)" do
|
44
|
+
log_slice = LogSlice.new(range_to_file 1..100)
|
45
|
+
file = log_slice.find do |line|
|
46
|
+
-1
|
47
|
+
end
|
48
|
+
file.should be_nil
|
49
|
+
end
|
50
|
+
|
51
|
+
it "nil when acting on an empty file" do
|
52
|
+
log_slice = LogSlice.new(string_to_file "")
|
53
|
+
file = log_slice.find do |line|
|
54
|
+
42 <=> line.to_i
|
55
|
+
end
|
56
|
+
file.should be_nil
|
57
|
+
end
|
58
|
+
|
59
|
+
it "#each_line_reverse" do
|
60
|
+
log_slice = LogSlice.new(range_to_file 1..10000)
|
61
|
+
log_slice.instance_eval { @line_cursor = @size }
|
62
|
+
lines = []
|
63
|
+
file = log_slice.send(:each_line_reverse) do |line|
|
64
|
+
lines << line.strip.to_i
|
65
|
+
end
|
66
|
+
lines.should == Array(1..10000).reverse
|
67
|
+
end
|
68
|
+
|
69
|
+
it "#each_line_reverse when file is empty" do
|
70
|
+
log_slice = LogSlice.new(string_to_file "")
|
71
|
+
log_slice.instance_eval { @line_cursor = @size }
|
72
|
+
lines = []
|
73
|
+
file = log_slice.send(:each_line_reverse) do |line|
|
74
|
+
lines << line.strip.to_i
|
75
|
+
end
|
76
|
+
lines.should == []
|
77
|
+
end
|
78
|
+
|
79
|
+
it "#each_line_reverse when file has single newline char" do
|
80
|
+
log_slice = LogSlice.new(string_to_file "\n")
|
81
|
+
log_slice.instance_eval { @line_cursor = @size }
|
82
|
+
lines = []
|
83
|
+
file = log_slice.send(:each_line_reverse) do |line|
|
84
|
+
lines << line.strip.to_i
|
85
|
+
end
|
86
|
+
lines.should == []
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
end
|
metadata
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: log_slice
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 9
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: "0.1"
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Joel Plane
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2012-08-29 00:00:00 Z
|
18
|
+
dependencies:
|
19
|
+
- !ruby/object:Gem::Dependency
|
20
|
+
name: rspec
|
21
|
+
prerelease: false
|
22
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
23
|
+
none: false
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
hash: 3
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
version: "0"
|
31
|
+
type: :development
|
32
|
+
version_requirements: *id001
|
33
|
+
description: Find a line in a log file. Uses binary search to find the line quickly in a large log file. Can only search sorted data - which in the case of log file is the timestamp, and probably not much else.
|
34
|
+
email:
|
35
|
+
- joel.plane@gmail.com
|
36
|
+
executables: []
|
37
|
+
|
38
|
+
extensions: []
|
39
|
+
|
40
|
+
extra_rdoc_files: []
|
41
|
+
|
42
|
+
files:
|
43
|
+
- .gitignore
|
44
|
+
- Gemfile
|
45
|
+
- README.md
|
46
|
+
- lib/log_slice.rb
|
47
|
+
- log_slice.gemspec
|
48
|
+
- spec/helper.rb
|
49
|
+
- spec/log_slice_spec.rb
|
50
|
+
homepage: https://github.com/joelplane/log_slice
|
51
|
+
licenses: []
|
52
|
+
|
53
|
+
post_install_message:
|
54
|
+
rdoc_options: []
|
55
|
+
|
56
|
+
require_paths:
|
57
|
+
- lib
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
none: false
|
60
|
+
requirements:
|
61
|
+
- - ">="
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
hash: 3
|
64
|
+
segments:
|
65
|
+
- 0
|
66
|
+
version: "0"
|
67
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
68
|
+
none: false
|
69
|
+
requirements:
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
hash: 3
|
73
|
+
segments:
|
74
|
+
- 0
|
75
|
+
version: "0"
|
76
|
+
requirements: []
|
77
|
+
|
78
|
+
rubyforge_project:
|
79
|
+
rubygems_version: 1.7.2
|
80
|
+
signing_key:
|
81
|
+
specification_version: 3
|
82
|
+
summary: Find a line in a log file
|
83
|
+
test_files: []
|
84
|
+
|