garru-distributed_logreader 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/distributed_logreader.gemspec +5 -2
- data/lib/distributed_logreader/distributed_log_reader.rb +5 -3
- data/lib/distributed_logreader/distributer/mutex_counter.rb +48 -0
- data/lib/distributed_logreader/distributer/pandemic_processor.rb +143 -0
- data/lib/distributed_logreader/distributer/simple_forked_process.rb +51 -0
- data/lib/distributed_logreader/distributer/simple_thread_pool.rb +1 -1
- data/lib/distributed_logreader/log_reader.rb +3 -3
- metadata +5 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.7.0
|
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{distributed_logreader}
|
5
|
-
s.version = "0.
|
5
|
+
s.version = "0.7.0"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Gary Tsang"]
|
9
|
-
s.date = %q{2009-08-
|
9
|
+
s.date = %q{2009-08-10}
|
10
10
|
s.email = %q{gary@garru.com}
|
11
11
|
s.extra_rdoc_files = [
|
12
12
|
"LICENSE",
|
@@ -26,6 +26,9 @@ Gem::Specification.new do |s|
|
|
26
26
|
"lib/distributed_logreader/distributed_log_reader/rotater_reader.rb",
|
27
27
|
"lib/distributed_logreader/distributed_log_reader/scribe_reader.rb",
|
28
28
|
"lib/distributed_logreader/distributer.rb",
|
29
|
+
"lib/distributed_logreader/distributer/mutex_counter.rb",
|
30
|
+
"lib/distributed_logreader/distributer/pandemic_processor.rb",
|
31
|
+
"lib/distributed_logreader/distributer/simple_forked_process.rb",
|
29
32
|
"lib/distributed_logreader/distributer/simple_thread_pool.rb",
|
30
33
|
"lib/distributed_logreader/log_reader.rb",
|
31
34
|
"lib/distributed_logreader/selector.rb",
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), 'distributer', 'simple_thread_pool')
|
2
|
+
require File.join(File.dirname(__FILE__), 'distributer', 'simple_forked_process')
|
2
3
|
require File.join(File.dirname(__FILE__), 'selector', 'rotating_log')
|
3
4
|
require File.join(File.dirname(__FILE__), 'archiver', 'date_dir')
|
4
5
|
require File.join(File.dirname(__FILE__), 'log_reader')
|
@@ -7,9 +8,10 @@ module DLogReader
|
|
7
8
|
class DistributedLogReader
|
8
9
|
attr_accessor :distributer, :filename
|
9
10
|
attr_reader :log_reader
|
10
|
-
def initialize(filename, worker, num_threads =
|
11
|
+
def initialize(filename, worker, num_threads = 100)
|
11
12
|
self.filename = filename
|
12
|
-
self.distributer =
|
13
|
+
# self.distributer = SimpleForked.new(worker, 5, num_threads)
|
14
|
+
self.distributer = SimpleThreadPool.new(worker, 500)
|
13
15
|
end
|
14
16
|
|
15
17
|
# selector/archiver seem to be strongly connected. it's possible it
|
@@ -34,7 +36,7 @@ module DLogReader
|
|
34
36
|
end
|
35
37
|
|
36
38
|
#predefined hooks
|
37
|
-
def pre_process
|
39
|
+
def pre_process
|
38
40
|
end
|
39
41
|
|
40
42
|
#predefined hooks
|
@@ -0,0 +1,48 @@
|
|
1
|
+
class MutexCounter
|
2
|
+
MAX = (2 ** 30) - 1
|
3
|
+
def initialize(max = MAX)
|
4
|
+
@mutex = Mutex.new
|
5
|
+
@counter = 0
|
6
|
+
@resets = 0
|
7
|
+
@max = max
|
8
|
+
end
|
9
|
+
|
10
|
+
def real_total
|
11
|
+
@mutex.synchronize { (@resets * @max) + @counter }
|
12
|
+
end
|
13
|
+
alias_method :to_i, :real_total
|
14
|
+
|
15
|
+
def value
|
16
|
+
@mutex.synchronize { @counter }
|
17
|
+
end
|
18
|
+
|
19
|
+
def inc
|
20
|
+
@mutex.synchronize do
|
21
|
+
if @counter >= @max
|
22
|
+
@counter = 0 # to avoid Bignum, it's about 4x slower
|
23
|
+
@resets += 1
|
24
|
+
end
|
25
|
+
@counter += 1
|
26
|
+
end
|
27
|
+
end
|
28
|
+
alias_method :next, :inc
|
29
|
+
alias_method :succ, :inc
|
30
|
+
|
31
|
+
# decr only to zero
|
32
|
+
def decr
|
33
|
+
@mutex.synchronize do
|
34
|
+
if @counter > 0
|
35
|
+
@counter -= 1
|
36
|
+
else
|
37
|
+
if @resets > 1
|
38
|
+
@resets -= 1
|
39
|
+
@counter = @max
|
40
|
+
end
|
41
|
+
end
|
42
|
+
@counter
|
43
|
+
end
|
44
|
+
end
|
45
|
+
alias_method :pred, :decr
|
46
|
+
alias_method :prev, :decr
|
47
|
+
|
48
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
#stolen from pandemic
|
3
|
+
module DLogReader
|
4
|
+
class Processor
|
5
|
+
def initialize(handler, num_threads = 10)
|
6
|
+
read_from_parent, write_to_child = IO.pipe
|
7
|
+
read_from_child, write_to_parent = IO.pipe
|
8
|
+
|
9
|
+
@child_process_id = fork
|
10
|
+
if @child_process_id
|
11
|
+
# I'm the parent
|
12
|
+
write_to_parent.close
|
13
|
+
read_from_parent.close
|
14
|
+
@out = write_to_child
|
15
|
+
@in = read_from_child
|
16
|
+
@max_queue_size = 100
|
17
|
+
@counter = MutexCounter.new
|
18
|
+
wait_for_responses
|
19
|
+
else
|
20
|
+
$dlog_logger.debug("Forked")
|
21
|
+
# I'm the child
|
22
|
+
write_to_child.close
|
23
|
+
read_from_child.close
|
24
|
+
@out = write_to_parent
|
25
|
+
@in = read_from_parent
|
26
|
+
@handler = handler
|
27
|
+
@job_queue = Queue.new
|
28
|
+
@response_queue = Queue.new
|
29
|
+
wait_for_job_completion
|
30
|
+
num_threads.times do
|
31
|
+
create_thread
|
32
|
+
end
|
33
|
+
wait_for_jobs
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def num_jobs
|
38
|
+
if parent?
|
39
|
+
@counter.real_total
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def process(body)
|
44
|
+
if parent?
|
45
|
+
while(@counter.real_total > @max_queue_size)
|
46
|
+
sleep(0.01)
|
47
|
+
end
|
48
|
+
body = (body.chomp + "\n")
|
49
|
+
$dlog_logger.debug("Parent: writing #{body.inspect}")
|
50
|
+
@out.write(body)
|
51
|
+
@counter.inc
|
52
|
+
else
|
53
|
+
$dlog_logger.debug("Child Processing #{body}")
|
54
|
+
return @handler.call(body)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def close(status = 0)
|
59
|
+
if parent? && child_alive?
|
60
|
+
Process.detach(@child_process_id)
|
61
|
+
@out.puts(status.to_s)
|
62
|
+
@out.close
|
63
|
+
@in.close
|
64
|
+
else
|
65
|
+
Process.exit!(status)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def closed?
|
70
|
+
!child_alive?
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
|
75
|
+
def create_thread
|
76
|
+
Thread.new do
|
77
|
+
loop do
|
78
|
+
line = @job_queue.pop
|
79
|
+
process(line)
|
80
|
+
$dlog_logger.debug("Child: Finished #{line.inspect}")
|
81
|
+
@response_queue << :a
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def wait_for_responses
|
87
|
+
Thread.new do
|
88
|
+
loop do
|
89
|
+
ready, = IO.select([@in], nil, nil)
|
90
|
+
if ready
|
91
|
+
$dlog_logger.debug("Parent: Reading Response")
|
92
|
+
@in.readchar
|
93
|
+
@counter.decr
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
#child process
|
100
|
+
def wait_for_jobs
|
101
|
+
if child?
|
102
|
+
while true
|
103
|
+
$dlog_logger.debug("Child waiting")
|
104
|
+
ready, = IO.select([@in], nil, nil)
|
105
|
+
if ready && !@in.eof?
|
106
|
+
line = @in.gets
|
107
|
+
$dlog_logger.debug("Child: #{line.inspect}")
|
108
|
+
@job_queue << line
|
109
|
+
else
|
110
|
+
self.close(line.to_i)
|
111
|
+
break
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
#child process
|
118
|
+
def wait_for_job_completion
|
119
|
+
if child?
|
120
|
+
Thread.new do
|
121
|
+
while true
|
122
|
+
@response_queue.pop
|
123
|
+
$dlog_logger.debug("Child: Writing To Parent")
|
124
|
+
@out.write("|")
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
def parent?
|
132
|
+
!!@child_process_id
|
133
|
+
end
|
134
|
+
|
135
|
+
def child?
|
136
|
+
!parent?
|
137
|
+
end
|
138
|
+
|
139
|
+
def child_alive?
|
140
|
+
parent? && !@in.closed?
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'thread'
|
2
|
+
require File.join(File.dirname(__FILE__), 'pandemic_processor')
|
3
|
+
require File.join(File.dirname(__FILE__), 'mutex_counter')
|
4
|
+
module DLogReader
|
5
|
+
class SimpleForked
|
6
|
+
attr_accessor :num_threads_per_process, :worker, :thread_pool, :queue, :max_queue_size, :processors
|
7
|
+
def initialize(worker, num_processes = 3, num_threads_per_process = 10)
|
8
|
+
self.worker = worker
|
9
|
+
self.num_threads_per_process = num_threads_per_process
|
10
|
+
self.queue = Queue.new
|
11
|
+
self.max_queue_size = 100
|
12
|
+
self.processors = []
|
13
|
+
num_processes.times do |x|
|
14
|
+
$dlog_logger.debug("Forking #{x} process")
|
15
|
+
self.processors << create_process
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def process(line)
|
20
|
+
while(queue.size > self.max_queue_size)
|
21
|
+
sleep(0.1)
|
22
|
+
end
|
23
|
+
self.queue << line
|
24
|
+
end
|
25
|
+
|
26
|
+
def join
|
27
|
+
num_jobs_outstanding = self.processors.inject(0){|a,b| a + b.num_jobs}
|
28
|
+
while(queue.size > 0 || num_jobs_outstanding > 0)
|
29
|
+
sleep 0.1
|
30
|
+
num_jobs_outstanding = self.processors.inject(0){|a,b| a + b.num_jobs}
|
31
|
+
$dlog_logger.debug("Shutting down #{num_jobs_outstanding} left")
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
protected
|
36
|
+
def create_process
|
37
|
+
processor = Processor.new(self.worker, self.num_threads_per_process)
|
38
|
+
Thread.new do
|
39
|
+
loop do
|
40
|
+
line = self.queue.pop
|
41
|
+
begin
|
42
|
+
processor.process(line)
|
43
|
+
rescue Exception => e
|
44
|
+
$dlog_logger.warn("Exception in processing thread #{line} -- #{e.message}")
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
processor
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -10,11 +10,11 @@ module DLogReader
|
|
10
10
|
end
|
11
11
|
|
12
12
|
def run
|
13
|
-
raise IOError.new("no file given") if filename.nil?
|
13
|
+
# raise IOError.new("no file given") if filename.nil?
|
14
14
|
raise IOError.new("File not readable") unless File.readable?(filename)
|
15
15
|
f = File.open(filename, "r+")
|
16
16
|
load_saved_state(f)
|
17
|
-
raise IOError.new("File is locked") unless f.flock(File::LOCK_EX | File::LOCK_NB)
|
17
|
+
# raise IOError.new("File is locked") unless f.flock(File::LOCK_EX | File::LOCK_NB)
|
18
18
|
unless f.eof?
|
19
19
|
line_count = 0
|
20
20
|
f.each_line do |line|
|
@@ -27,7 +27,7 @@ module DLogReader
|
|
27
27
|
end
|
28
28
|
save_state(f)
|
29
29
|
end
|
30
|
-
f.flock(File::LOCK_UN)
|
30
|
+
# f.flock(File::LOCK_UN)
|
31
31
|
end
|
32
32
|
|
33
33
|
def statefile
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: garru-distributed_logreader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gary Tsang
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-08-
|
12
|
+
date: 2009-08-10 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -36,6 +36,9 @@ files:
|
|
36
36
|
- lib/distributed_logreader/distributed_log_reader/rotater_reader.rb
|
37
37
|
- lib/distributed_logreader/distributed_log_reader/scribe_reader.rb
|
38
38
|
- lib/distributed_logreader/distributer.rb
|
39
|
+
- lib/distributed_logreader/distributer/mutex_counter.rb
|
40
|
+
- lib/distributed_logreader/distributer/pandemic_processor.rb
|
41
|
+
- lib/distributed_logreader/distributer/simple_forked_process.rb
|
39
42
|
- lib/distributed_logreader/distributer/simple_thread_pool.rb
|
40
43
|
- lib/distributed_logreader/log_reader.rb
|
41
44
|
- lib/distributed_logreader/selector.rb
|