log_slice 0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ Gemfile.lock
2
+
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ gemspec
2
+
data/README.md ADDED
@@ -0,0 +1,51 @@
1
+ LogSlice
2
+ ========
3
+
4
+ Uses binary search to find a line quickly in a large log file. O(log2(n))
5
+
6
+ Can only search sorted data, which means it's probably only useful for searching by timestamp.
7
+
8
+ ## Example
9
+
10
+ something-interesting.log:
11
+
12
+ [2012-08-29 18:41:12] (9640) something something something else 1
13
+ [2012-08-29 18:41:14] (9640) something something something else 2
14
+ [2012-08-29 18:41:14] (9640) something something something else 3
15
+ [2012-08-29 18:41:14] (9640) something something something else 4
16
+ [2012-08-29 18:42:18] (9640) something something something else 5
17
+ [2012-08-29 18:42:18] (9640) something something something else 6
18
+ [2012-08-29 18:42:18] (9640) something something something else 7
19
+ [2012-08-29 18:42:20] (9640) something something something else 8
20
+ [2012-08-29 18:42:20] (9640) something something something else 9
21
+ [2012-08-29 18:42:20] (9640) something something something else 10
22
+
23
+ extract everything that happened at or after 18:42:18:
24
+
25
+ find_date = DateTime.parse("2012-08-29 18:42:18")
26
+ file = LogSlice.new("something-interesting.log").find do |line|
27
+ date_string = line.match(/^\[([^\]]+)\]/)[1]
28
+ find_date <=> DateTime.parse(date_string)
29
+ end
30
+
31
+ # this will yield an instance of File
32
+ # the position is the file is the first byte of the found line
33
+
34
+ file.readline
35
+ #=> "[2012-08-29 18:42:18] (9640) something something something else 5"
36
+
37
+ file.readline
38
+ #=> "[2012-08-29 18:42:18] (9640) something something something else 6"
39
+
40
+ LogSlice.new takes a File or file path, and a block. When passed a line,
41
+ the block must return -1 if the value represented by the line is too high,
42
+ 1 if it's too low, or 0 if it's just right.
43
+
44
+ ## Limitations
45
+
46
+ * Can only search sorted data. At the moment, if the data isn't sorting, it won't detect it and it will loop forever.
47
+ * Can only search for a known value. For example, searching for 18:42:19 in the example above will yield nothing.
48
+
49
+ ## Disclaimer
50
+
51
+ Use this at your own risk. Better yet, don't use it, it probably doesn't work.
data/lib/log_slice.rb ADDED
@@ -0,0 +1,129 @@
1
+ class LogSlice
2
+
3
+ # @param log_file [File, String]
4
+ def initialize log_file
5
+ @file = log_file.respond_to?(:seek) ? log_file : File.open(log_file, 'r')
6
+ @size = @file.stat.size
7
+ @lower = 0
8
+ @upper = @size
9
+ @char_cursor = nil
10
+ @line_cursor = nil
11
+ end
12
+
13
+ # Depends on lines being sorted
14
+ # @return [File] file after seeking to start of line
15
+ def find &compare
16
+ direction = :forward
17
+ line_cursor = nil
18
+ loop do
19
+ line = next_line direction
20
+ if line_cursor == @line_cursor
21
+ return nil
22
+ end
23
+ line_cursor = @line_cursor
24
+ case compare.call(line)
25
+ when 0 # found
26
+ walk_up_to_first_match compare
27
+ return @file
28
+ when -1
29
+ direction = :back
30
+ when 1
31
+ direction = :forward
32
+ else
33
+ raise ArgumentError
34
+ end
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ # @param direction [Symbol] direction in file to move, :forward or :back
41
+ # @return [String] line
42
+ def next_line direction
43
+ move_char_cursor direction
44
+ find_next_newline
45
+ end
46
+
47
+ # once the line has been found, we must check the lines above it -
48
+ # if a line above also matches, we should seek to it.
49
+ # (this make search on some files O(n/2) instead of O(log2(n))) )
50
+ def walk_up_to_first_match compare
51
+ move_to_previous_line compare
52
+ @file.seek(@line_cursor)
53
+ end
54
+
55
+ def move_to_previous_line compare
56
+ last_cursor_position = @line_cursor
57
+ each_line_reverse do |line|
58
+ if compare.call(line) != 0
59
+ @line_cursor = last_cursor_position
60
+ break
61
+ end
62
+ last_cursor_position = @line_cursor
63
+ end
64
+ end
65
+
66
+ def each_line_reverse
67
+ chunk_size = 512
68
+ left_over = ""
69
+ cursor = @line_cursor
70
+ loop do
71
+ cursor = cursor - chunk_size
72
+ if cursor < 0
73
+ chunk_size = chunk_size + cursor
74
+ cursor = 0
75
+ end
76
+ break if chunk_size == 0
77
+ #puts "seeking to #{cursor}, chunk size #{chunk_size}, left over #{left_over.length}"
78
+ @file.seek(cursor)
79
+ chunk = @file.read(chunk_size) + left_over
80
+ lines = chunk.split("\n")
81
+ while lines.length > 1
82
+ line = lines.pop || ""
83
+ @line_cursor = @line_cursor - (line.length + 1)
84
+ yield(line)
85
+ end
86
+ left_over = lines[0] || ""
87
+ lines = []
88
+ end
89
+ yield left_over unless left_over == ''
90
+ end
91
+
92
+ def find_next_newline
93
+ newline_char = "\n"[0]
94
+ @line_cursor = @char_cursor
95
+ @file.seek(@line_cursor)
96
+ current_char = nil
97
+ while (current_char = @file.getc) != newline_char && !current_char.nil?
98
+ @line_cursor = @line_cursor + 1
99
+ end
100
+ if current_char.nil?
101
+ # eof
102
+ ""
103
+ else
104
+ @line_cursor = @line_cursor + 1
105
+ @file.seek(@line_cursor)
106
+ @file.readline
107
+ end
108
+ end
109
+
110
+ # @param direction [Symbol] direction in file to move the cursor, :forward or :back
111
+ def move_char_cursor direction
112
+ if @char_cursor
113
+ if direction == :forward
114
+ distance = (@upper - @char_cursor) / 2
115
+ old_cursor = @char_cursor
116
+ @char_cursor = @char_cursor + distance
117
+ @lower = old_cursor
118
+ else
119
+ distance = (@char_cursor - @lower) / 2
120
+ old_cursor = @char_cursor
121
+ @char_cursor = @char_cursor - distance
122
+ @upper = old_cursor
123
+ end
124
+ else
125
+ @char_cursor = @size / 2
126
+ end
127
+ end
128
+
129
+ end
data/log_slice.gemspec ADDED
@@ -0,0 +1,15 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'log_slice'
3
+ s.version = '0.1'
4
+ s.authors = ["Joel Plane"]
5
+ s.email = ["joel.plane@gmail.com"]
6
+ s.homepage = 'https://github.com/joelplane/log_slice'
7
+ s.date = '2012-08-29'
8
+ s.summary = "Find a line in a log file"
9
+ s.description = "Find a line in a log file. Uses binary search to find the line quickly in a large log file. Can only search sorted data - which in the case of log file is the timestamp, and probably not much else."
10
+ s.files = `git ls-files`.split("\n")
11
+ s.test_files = `git ls-files -- {spec}/*`.split("\n")
12
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
13
+ s.require_paths = ["lib"]
14
+ s.add_development_dependency 'rspec'
15
+ end
data/spec/helper.rb ADDED
@@ -0,0 +1,27 @@
1
+ require 'tempfile'
2
+
3
+ RSpec.configure do |c|
4
+ c.include(Module.new do
5
+
6
+ def range_to_file range
7
+ file = Tempfile.new("test-#{range}")
8
+ file.write(range.to_a.join("\n"))
9
+ file.flush
10
+ file.seek(0)
11
+ file
12
+ end
13
+
14
+ def string_to_file string
15
+ file = Tempfile.new("test-string")
16
+ file.write(string)
17
+ file.flush
18
+ file.seek(0)
19
+ file
20
+ end
21
+
22
+ def log2 n
23
+ Math.log(n) / Math.log(2)
24
+ end
25
+
26
+ end)
27
+ end
@@ -0,0 +1,90 @@
1
+ require File.expand_path('../lib/log_slice', File.dirname(__FILE__))
2
+ require 'helper'
3
+
4
+ describe LogSlice do
5
+
6
+ it "finds the line" do
7
+ log_slice = LogSlice.new(range_to_file 1..100)
8
+ file = log_slice.find do |line|
9
+ 42 <=> line.to_i
10
+ end
11
+ file.readline.should == "42\n"
12
+ end
13
+
14
+ it "finds the first line when there are many matching lines" do
15
+ log_slice = LogSlice.new(string_to_file (["1", "2"] + ["3"]*20).join("\n"))
16
+ file = log_slice.find do |line|
17
+ 3 <=> line.to_i
18
+ end
19
+ file.pos.should == "1\n2\n".length
20
+ file.readline.should == "3\n"
21
+ end
22
+
23
+ it "finds a matching line with log2(lines)+1 calls to comparison function" do
24
+ [100, 10_000, 100_000].each do |total_lines|
25
+ log_slice = LogSlice.new(range_to_file 1..total_lines)
26
+ comparisons_count = 0
27
+ log_slice.find do |line|
28
+ comparisons_count = comparisons_count + 1
29
+ 42 <=> line.to_i
30
+ end
31
+ comparisons_count.should <= log2(total_lines).ceil + 1
32
+ end
33
+ end
34
+
35
+ it "nil when no matching line is found (1)" do
36
+ log_slice = LogSlice.new(range_to_file 1..100)
37
+ file = log_slice.find do |line|
38
+ 1
39
+ end
40
+ file.should be_nil
41
+ end
42
+
43
+ it "nil when no matching line is found (2)" do
44
+ log_slice = LogSlice.new(range_to_file 1..100)
45
+ file = log_slice.find do |line|
46
+ -1
47
+ end
48
+ file.should be_nil
49
+ end
50
+
51
+ it "nil when acting on an empty file" do
52
+ log_slice = LogSlice.new(string_to_file "")
53
+ file = log_slice.find do |line|
54
+ 42 <=> line.to_i
55
+ end
56
+ file.should be_nil
57
+ end
58
+
59
+ it "#each_line_reverse" do
60
+ log_slice = LogSlice.new(range_to_file 1..10000)
61
+ log_slice.instance_eval { @line_cursor = @size }
62
+ lines = []
63
+ file = log_slice.send(:each_line_reverse) do |line|
64
+ lines << line.strip.to_i
65
+ end
66
+ lines.should == Array(1..10000).reverse
67
+ end
68
+
69
+ it "#each_line_reverse when file is empty" do
70
+ log_slice = LogSlice.new(string_to_file "")
71
+ log_slice.instance_eval { @line_cursor = @size }
72
+ lines = []
73
+ file = log_slice.send(:each_line_reverse) do |line|
74
+ lines << line.strip.to_i
75
+ end
76
+ lines.should == []
77
+ end
78
+
79
+ it "#each_line_reverse when file has single newline char" do
80
+ log_slice = LogSlice.new(string_to_file "\n")
81
+ log_slice.instance_eval { @line_cursor = @size }
82
+ lines = []
83
+ file = log_slice.send(:each_line_reverse) do |line|
84
+ lines << line.strip.to_i
85
+ end
86
+ lines.should == []
87
+ end
88
+
89
+
90
+ end
metadata ADDED
@@ -0,0 +1,84 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: log_slice
3
+ version: !ruby/object:Gem::Version
4
+ hash: 9
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ version: "0.1"
10
+ platform: ruby
11
+ authors:
12
+ - Joel Plane
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2012-08-29 00:00:00 Z
18
+ dependencies:
19
+ - !ruby/object:Gem::Dependency
20
+ name: rspec
21
+ prerelease: false
22
+ requirement: &id001 !ruby/object:Gem::Requirement
23
+ none: false
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ hash: 3
28
+ segments:
29
+ - 0
30
+ version: "0"
31
+ type: :development
32
+ version_requirements: *id001
33
+ description: Find a line in a log file. Uses binary search to find the line quickly in a large log file. Can only search sorted data - which in the case of log file is the timestamp, and probably not much else.
34
+ email:
35
+ - joel.plane@gmail.com
36
+ executables: []
37
+
38
+ extensions: []
39
+
40
+ extra_rdoc_files: []
41
+
42
+ files:
43
+ - .gitignore
44
+ - Gemfile
45
+ - README.md
46
+ - lib/log_slice.rb
47
+ - log_slice.gemspec
48
+ - spec/helper.rb
49
+ - spec/log_slice_spec.rb
50
+ homepage: https://github.com/joelplane/log_slice
51
+ licenses: []
52
+
53
+ post_install_message:
54
+ rdoc_options: []
55
+
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ hash: 3
64
+ segments:
65
+ - 0
66
+ version: "0"
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ hash: 3
73
+ segments:
74
+ - 0
75
+ version: "0"
76
+ requirements: []
77
+
78
+ rubyforge_project:
79
+ rubygems_version: 1.7.2
80
+ signing_key:
81
+ specification_version: 3
82
+ summary: Find a line in a log file
83
+ test_files: []
84
+