log_slice 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ Gemfile.lock
2
+
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ gemspec
2
+
data/README.md ADDED
@@ -0,0 +1,51 @@
1
+ LogSlice
2
+ ========
3
+
4
+ Uses binary search to find a line quickly in a large log file. O(log2(n))
5
+
6
+ Can only search sorted data, which means it's probably only useful for searching by timestamp.
7
+
8
+ ## Example
9
+
10
+ something-interesting.log:
11
+
12
+ [2012-08-29 18:41:12] (9640) something something something else 1
13
+ [2012-08-29 18:41:14] (9640) something something something else 2
14
+ [2012-08-29 18:41:14] (9640) something something something else 3
15
+ [2012-08-29 18:41:14] (9640) something something something else 4
16
+ [2012-08-29 18:42:18] (9640) something something something else 5
17
+ [2012-08-29 18:42:18] (9640) something something something else 6
18
+ [2012-08-29 18:42:18] (9640) something something something else 7
19
+ [2012-08-29 18:42:20] (9640) something something something else 8
20
+ [2012-08-29 18:42:20] (9640) something something something else 9
21
+ [2012-08-29 18:42:20] (9640) something something something else 10
22
+
23
+ extract everything that happened at or after 18:42:18:
24
+
25
+ find_date = DateTime.parse("2012-08-29 18:42:18")
26
+ file = LogSlice.new("something-interesting.log").find do |line|
27
+ date_string = line.match(/^\[([^\]]+)\]/)[1]
28
+ find_date <=> DateTime.parse(date_string)
29
+ end
30
+
31
+ # this will yield an instance of File
32
+ # the position is the file is the first byte of the found line
33
+
34
+ file.readline
35
+ #=> "[2012-08-29 18:42:18] (9640) something something something else 5"
36
+
37
+ file.readline
38
+ #=> "[2012-08-29 18:42:18] (9640) something something something else 6"
39
+
40
+ LogSlice.new takes a File or file path, and a block. When passed a line,
41
+ the block must return -1 if the value represented by the line is too high,
42
+ 1 if it's too low, or 0 if it's just right.
43
+
44
+ ## Limitations
45
+
46
+ * Can only search sorted data. At the moment, if the data isn't sorting, it won't detect it and it will loop forever.
47
+ * Can only search for a known value. For example, searching for 18:42:19 in the example above will yield nothing.
48
+
49
+ ## Disclaimer
50
+
51
+ Use this at your own risk. Better yet, don't use it, it probably doesn't work.
data/lib/log_slice.rb ADDED
@@ -0,0 +1,129 @@
1
+ class LogSlice
2
+
3
+ # @param log_file [File, String]
4
+ def initialize log_file
5
+ @file = log_file.respond_to?(:seek) ? log_file : File.open(log_file, 'r')
6
+ @size = @file.stat.size
7
+ @lower = 0
8
+ @upper = @size
9
+ @char_cursor = nil
10
+ @line_cursor = nil
11
+ end
12
+
13
+ # Depends on lines being sorted
14
+ # @return [File] file after seeking to start of line
15
+ def find &compare
16
+ direction = :forward
17
+ line_cursor = nil
18
+ loop do
19
+ line = next_line direction
20
+ if line_cursor == @line_cursor
21
+ return nil
22
+ end
23
+ line_cursor = @line_cursor
24
+ case compare.call(line)
25
+ when 0 # found
26
+ walk_up_to_first_match compare
27
+ return @file
28
+ when -1
29
+ direction = :back
30
+ when 1
31
+ direction = :forward
32
+ else
33
+ raise ArgumentError
34
+ end
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ # @param direction [Symbol] direction in file to move, :forward or :back
41
+ # @return [String] line
42
+ def next_line direction
43
+ move_char_cursor direction
44
+ find_next_newline
45
+ end
46
+
47
+ # once the line has been found, we must check the lines above it -
48
+ # if a line above also matches, we should seek to it.
49
+ # (this make search on some files O(n/2) instead of O(log2(n))) )
50
+ def walk_up_to_first_match compare
51
+ move_to_previous_line compare
52
+ @file.seek(@line_cursor)
53
+ end
54
+
55
+ def move_to_previous_line compare
56
+ last_cursor_position = @line_cursor
57
+ each_line_reverse do |line|
58
+ if compare.call(line) != 0
59
+ @line_cursor = last_cursor_position
60
+ break
61
+ end
62
+ last_cursor_position = @line_cursor
63
+ end
64
+ end
65
+
66
+ def each_line_reverse
67
+ chunk_size = 512
68
+ left_over = ""
69
+ cursor = @line_cursor
70
+ loop do
71
+ cursor = cursor - chunk_size
72
+ if cursor < 0
73
+ chunk_size = chunk_size + cursor
74
+ cursor = 0
75
+ end
76
+ break if chunk_size == 0
77
+ #puts "seeking to #{cursor}, chunk size #{chunk_size}, left over #{left_over.length}"
78
+ @file.seek(cursor)
79
+ chunk = @file.read(chunk_size) + left_over
80
+ lines = chunk.split("\n")
81
+ while lines.length > 1
82
+ line = lines.pop || ""
83
+ @line_cursor = @line_cursor - (line.length + 1)
84
+ yield(line)
85
+ end
86
+ left_over = lines[0] || ""
87
+ lines = []
88
+ end
89
+ yield left_over unless left_over == ''
90
+ end
91
+
92
+ def find_next_newline
93
+ newline_char = "\n"[0]
94
+ @line_cursor = @char_cursor
95
+ @file.seek(@line_cursor)
96
+ current_char = nil
97
+ while (current_char = @file.getc) != newline_char && !current_char.nil?
98
+ @line_cursor = @line_cursor + 1
99
+ end
100
+ if current_char.nil?
101
+ # eof
102
+ ""
103
+ else
104
+ @line_cursor = @line_cursor + 1
105
+ @file.seek(@line_cursor)
106
+ @file.readline
107
+ end
108
+ end
109
+
110
+ # @param direction [Symbol] direction in file to move the cursor, :forward or :back
111
+ def move_char_cursor direction
112
+ if @char_cursor
113
+ if direction == :forward
114
+ distance = (@upper - @char_cursor) / 2
115
+ old_cursor = @char_cursor
116
+ @char_cursor = @char_cursor + distance
117
+ @lower = old_cursor
118
+ else
119
+ distance = (@char_cursor - @lower) / 2
120
+ old_cursor = @char_cursor
121
+ @char_cursor = @char_cursor - distance
122
+ @upper = old_cursor
123
+ end
124
+ else
125
+ @char_cursor = @size / 2
126
+ end
127
+ end
128
+
129
+ end
data/log_slice.gemspec ADDED
@@ -0,0 +1,15 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'log_slice'
3
+ s.version = '0.1'
4
+ s.authors = ["Joel Plane"]
5
+ s.email = ["joel.plane@gmail.com"]
6
+ s.homepage = 'https://github.com/joelplane/log_slice'
7
+ s.date = '2012-08-29'
8
+ s.summary = "Find a line in a log file"
9
+ s.description = "Find a line in a log file. Uses binary search to find the line quickly in a large log file. Can only search sorted data - which in the case of log file is the timestamp, and probably not much else."
10
+ s.files = `git ls-files`.split("\n")
11
+ s.test_files = `git ls-files -- {spec}/*`.split("\n")
12
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
13
+ s.require_paths = ["lib"]
14
+ s.add_development_dependency 'rspec'
15
+ end
data/spec/helper.rb ADDED
@@ -0,0 +1,27 @@
1
+ require 'tempfile'
2
+
3
+ RSpec.configure do |c|
4
+ c.include(Module.new do
5
+
6
+ def range_to_file range
7
+ file = Tempfile.new("test-#{range}")
8
+ file.write(range.to_a.join("\n"))
9
+ file.flush
10
+ file.seek(0)
11
+ file
12
+ end
13
+
14
+ def string_to_file string
15
+ file = Tempfile.new("test-string")
16
+ file.write(string)
17
+ file.flush
18
+ file.seek(0)
19
+ file
20
+ end
21
+
22
+ def log2 n
23
+ Math.log(n) / Math.log(2)
24
+ end
25
+
26
+ end)
27
+ end
@@ -0,0 +1,90 @@
1
+ require File.expand_path('../lib/log_slice', File.dirname(__FILE__))
2
+ require 'helper'
3
+
4
+ describe LogSlice do
5
+
6
+ it "finds the line" do
7
+ log_slice = LogSlice.new(range_to_file 1..100)
8
+ file = log_slice.find do |line|
9
+ 42 <=> line.to_i
10
+ end
11
+ file.readline.should == "42\n"
12
+ end
13
+
14
+ it "finds the first line when there are many matching lines" do
15
+ log_slice = LogSlice.new(string_to_file (["1", "2"] + ["3"]*20).join("\n"))
16
+ file = log_slice.find do |line|
17
+ 3 <=> line.to_i
18
+ end
19
+ file.pos.should == "1\n2\n".length
20
+ file.readline.should == "3\n"
21
+ end
22
+
23
+ it "finds a matching line with log2(lines)+1 calls to comparison function" do
24
+ [100, 10_000, 100_000].each do |total_lines|
25
+ log_slice = LogSlice.new(range_to_file 1..total_lines)
26
+ comparisons_count = 0
27
+ log_slice.find do |line|
28
+ comparisons_count = comparisons_count + 1
29
+ 42 <=> line.to_i
30
+ end
31
+ comparisons_count.should <= log2(total_lines).ceil + 1
32
+ end
33
+ end
34
+
35
+ it "nil when no matching line is found (1)" do
36
+ log_slice = LogSlice.new(range_to_file 1..100)
37
+ file = log_slice.find do |line|
38
+ 1
39
+ end
40
+ file.should be_nil
41
+ end
42
+
43
+ it "nil when no matching line is found (2)" do
44
+ log_slice = LogSlice.new(range_to_file 1..100)
45
+ file = log_slice.find do |line|
46
+ -1
47
+ end
48
+ file.should be_nil
49
+ end
50
+
51
+ it "nil when acting on an empty file" do
52
+ log_slice = LogSlice.new(string_to_file "")
53
+ file = log_slice.find do |line|
54
+ 42 <=> line.to_i
55
+ end
56
+ file.should be_nil
57
+ end
58
+
59
+ it "#each_line_reverse" do
60
+ log_slice = LogSlice.new(range_to_file 1..10000)
61
+ log_slice.instance_eval { @line_cursor = @size }
62
+ lines = []
63
+ file = log_slice.send(:each_line_reverse) do |line|
64
+ lines << line.strip.to_i
65
+ end
66
+ lines.should == Array(1..10000).reverse
67
+ end
68
+
69
+ it "#each_line_reverse when file is empty" do
70
+ log_slice = LogSlice.new(string_to_file "")
71
+ log_slice.instance_eval { @line_cursor = @size }
72
+ lines = []
73
+ file = log_slice.send(:each_line_reverse) do |line|
74
+ lines << line.strip.to_i
75
+ end
76
+ lines.should == []
77
+ end
78
+
79
+ it "#each_line_reverse when file has single newline char" do
80
+ log_slice = LogSlice.new(string_to_file "\n")
81
+ log_slice.instance_eval { @line_cursor = @size }
82
+ lines = []
83
+ file = log_slice.send(:each_line_reverse) do |line|
84
+ lines << line.strip.to_i
85
+ end
86
+ lines.should == []
87
+ end
88
+
89
+
90
+ end
metadata ADDED
@@ -0,0 +1,84 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: log_slice
3
+ version: !ruby/object:Gem::Version
4
+ hash: 9
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ version: "0.1"
10
+ platform: ruby
11
+ authors:
12
+ - Joel Plane
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2012-08-29 00:00:00 Z
18
+ dependencies:
19
+ - !ruby/object:Gem::Dependency
20
+ name: rspec
21
+ prerelease: false
22
+ requirement: &id001 !ruby/object:Gem::Requirement
23
+ none: false
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ hash: 3
28
+ segments:
29
+ - 0
30
+ version: "0"
31
+ type: :development
32
+ version_requirements: *id001
33
+ description: Find a line in a log file. Uses binary search to find the line quickly in a large log file. Can only search sorted data - which in the case of log file is the timestamp, and probably not much else.
34
+ email:
35
+ - joel.plane@gmail.com
36
+ executables: []
37
+
38
+ extensions: []
39
+
40
+ extra_rdoc_files: []
41
+
42
+ files:
43
+ - .gitignore
44
+ - Gemfile
45
+ - README.md
46
+ - lib/log_slice.rb
47
+ - log_slice.gemspec
48
+ - spec/helper.rb
49
+ - spec/log_slice_spec.rb
50
+ homepage: https://github.com/joelplane/log_slice
51
+ licenses: []
52
+
53
+ post_install_message:
54
+ rdoc_options: []
55
+
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ hash: 3
64
+ segments:
65
+ - 0
66
+ version: "0"
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ hash: 3
73
+ segments:
74
+ - 0
75
+ version: "0"
76
+ requirements: []
77
+
78
+ rubyforge_project:
79
+ rubygems_version: 1.7.2
80
+ signing_key:
81
+ specification_version: 3
82
+ summary: Find a line in a log file
83
+ test_files: []
84
+