distributed_logreader 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. data/.gitignore +7 -0
  2. data/LICENSE +20 -0
  3. data/README.rdoc +8 -0
  4. data/Rakefile +48 -0
  5. data/VERSION +1 -0
  6. data/distributed_logreader.gemspec +90 -0
  7. data/lib/distributed_logreader.rb +11 -0
  8. data/lib/distributed_logreader/achiver.rb +10 -0
  9. data/lib/distributed_logreader/archiver/date_dir.rb +36 -0
  10. data/lib/distributed_logreader/distributed_log_reader.rb +46 -0
  11. data/lib/distributed_logreader/distributed_log_reader/rotater_reader.rb +21 -0
  12. data/lib/distributed_logreader/distributed_log_reader/scribe_reader.rb +53 -0
  13. data/lib/distributed_logreader/distributer.rb +13 -0
  14. data/lib/distributed_logreader/distributer/mutex_counter.rb +48 -0
  15. data/lib/distributed_logreader/distributer/pandemic_processor.rb +149 -0
  16. data/lib/distributed_logreader/distributer/simple_forked_process.rb +47 -0
  17. data/lib/distributed_logreader/distributer/simple_thread_pool.rb +43 -0
  18. data/lib/distributed_logreader/log_reader.rb +67 -0
  19. data/lib/distributed_logreader/selector.rb +12 -0
  20. data/lib/distributed_logreader/selector/rotating_log.rb +43 -0
  21. data/lib/distributed_logreader/util.rb +7 -0
  22. data/spec/archiver/date_dir_spec.rb +25 -0
  23. data/spec/archiver_spec.rb +13 -0
  24. data/spec/distributed_log_reader/rotater_reader_spec.rb +26 -0
  25. data/spec/distributed_log_reader/scribe_reader_spec.rb +15 -0
  26. data/spec/distributed_log_reader_spec.rb +20 -0
  27. data/spec/distributer/simple_thread_pool_spec.rb +17 -0
  28. data/spec/distributer_spec.rb +13 -0
  29. data/spec/fixtures/copytruncate/test +0 -0
  30. data/spec/fixtures/copytruncate/test.1 +0 -0
  31. data/spec/fixtures/copytruncate/test_current +0 -0
  32. data/spec/fixtures/logrotate/test-20090101 +0 -0
  33. data/spec/fixtures/logrotate/test-20090102 +0 -0
  34. data/spec/fixtures/symlink/test +0 -0
  35. data/spec/fixtures/symlink/test_older_sym +0 -0
  36. data/spec/fixtures/test_file +4 -0
  37. data/spec/fixtures/virality_metrics/test +7 -0
  38. data/spec/fixtures/virality_metrics/virality_metrics_current +7 -0
  39. data/spec/log_reader_spec.rb +57 -0
  40. data/spec/selector/rotating_log_spec.rb +24 -0
  41. data/spec/selector_spec.rb +13 -0
  42. data/spec/spec_helper.rb +9 -0
  43. metadata +107 -0
@@ -0,0 +1,7 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
6
+ *log
7
+ temp_backup_dir/*
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Gary Tsang
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,8 @@
1
+ = Distributed Log Reader
2
+
3
+ is a log reader that allows for parallel processing. It is highly configurable
4
+ allowing you to choose different parallel distribution and archiving strategies.
5
+
6
+ == Copyright
7
+
8
+ Copyright (c) 2009 Gary Tsang. See LICENSE for details.
@@ -0,0 +1,48 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "distributed_logreader"
8
+ gem.summary = %Q{incomplete distribute log reader. planned to use pandemic to distribute work}
9
+ gem.email = "gary@garru.com"
10
+ gem.homepage = "http://github.com/garru/distributed_logreader"
11
+ gem.authors = ["Gary Tsang"]
12
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
13
+ end
14
+
15
+ rescue LoadError
16
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
17
+ end
18
+
19
+ require 'spec/rake/spectask'
20
+ Spec::Rake::SpecTask.new(:spec) do |spec|
21
+ spec.libs << 'lib' << 'spec'
22
+ spec.spec_files = FileList['spec/**/*_spec.rb']
23
+ end
24
+
25
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
26
+ spec.libs << 'lib' << 'spec'
27
+ spec.pattern = 'spec/**/*_spec.rb'
28
+ spec.rcov = true
29
+ end
30
+
31
+
32
+ task :default => :spec
33
+
34
+ require 'rake/rdoctask'
35
+ Rake::RDocTask.new do |rdoc|
36
+ if File.exist?('VERSION.yml')
37
+ config = YAML.load(File.read('VERSION.yml'))
38
+ version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
39
+ else
40
+ version = ""
41
+ end
42
+
43
+ rdoc.rdoc_dir = 'rdoc'
44
+ rdoc.title = "distributed_logreader #{version}"
45
+ rdoc.rdoc_files.include('README*')
46
+ rdoc.rdoc_files.include('lib/**/*.rb')
47
+ end
48
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.11.0
@@ -0,0 +1,90 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{distributed_logreader}
8
+ s.version = "0.11.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Gary Tsang"]
12
+ s.date = %q{2009-10-27}
13
+ s.email = %q{gary@garru.com}
14
+ s.extra_rdoc_files = [
15
+ "LICENSE",
16
+ "README.rdoc"
17
+ ]
18
+ s.files = [
19
+ ".gitignore",
20
+ "LICENSE",
21
+ "README.rdoc",
22
+ "Rakefile",
23
+ "VERSION",
24
+ "distributed_logreader.gemspec",
25
+ "lib/distributed_logreader.rb",
26
+ "lib/distributed_logreader/achiver.rb",
27
+ "lib/distributed_logreader/archiver/date_dir.rb",
28
+ "lib/distributed_logreader/distributed_log_reader.rb",
29
+ "lib/distributed_logreader/distributed_log_reader/rotater_reader.rb",
30
+ "lib/distributed_logreader/distributed_log_reader/scribe_reader.rb",
31
+ "lib/distributed_logreader/distributer.rb",
32
+ "lib/distributed_logreader/distributer/mutex_counter.rb",
33
+ "lib/distributed_logreader/distributer/pandemic_processor.rb",
34
+ "lib/distributed_logreader/distributer/simple_forked_process.rb",
35
+ "lib/distributed_logreader/distributer/simple_thread_pool.rb",
36
+ "lib/distributed_logreader/log_reader.rb",
37
+ "lib/distributed_logreader/selector.rb",
38
+ "lib/distributed_logreader/selector/rotating_log.rb",
39
+ "lib/distributed_logreader/util.rb",
40
+ "spec/archiver/date_dir_spec.rb",
41
+ "spec/archiver_spec.rb",
42
+ "spec/distributed_log_reader/rotater_reader_spec.rb",
43
+ "spec/distributed_log_reader/scribe_reader_spec.rb",
44
+ "spec/distributed_log_reader_spec.rb",
45
+ "spec/distributer/simple_thread_pool_spec.rb",
46
+ "spec/distributer_spec.rb",
47
+ "spec/fixtures/copytruncate/test",
48
+ "spec/fixtures/copytruncate/test.1",
49
+ "spec/fixtures/copytruncate/test_current",
50
+ "spec/fixtures/logrotate/test-20090101",
51
+ "spec/fixtures/logrotate/test-20090102",
52
+ "spec/fixtures/symlink/test",
53
+ "spec/fixtures/symlink/test_older_sym",
54
+ "spec/fixtures/test_file",
55
+ "spec/fixtures/virality_metrics/test",
56
+ "spec/fixtures/virality_metrics/virality_metrics_current",
57
+ "spec/log_reader_spec.rb",
58
+ "spec/selector/rotating_log_spec.rb",
59
+ "spec/selector_spec.rb",
60
+ "spec/spec_helper.rb"
61
+ ]
62
+ s.homepage = %q{http://github.com/garru/distributed_logreader}
63
+ s.rdoc_options = ["--charset=UTF-8"]
64
+ s.require_paths = ["lib"]
65
+ s.rubygems_version = %q{1.3.5}
66
+ s.summary = %q{incomplete distributed log reader. plan to use pandemic to distribute}
67
+ s.test_files = [
68
+ "spec/archiver/date_dir_spec.rb",
69
+ "spec/archiver_spec.rb",
70
+ "spec/distributed_log_reader/rotater_reader_spec.rb",
71
+ "spec/distributed_log_reader/scribe_reader_spec.rb",
72
+ "spec/distributed_log_reader_spec.rb",
73
+ "spec/distributer/simple_thread_pool_spec.rb",
74
+ "spec/distributer_spec.rb",
75
+ "spec/log_reader_spec.rb",
76
+ "spec/selector/rotating_log_spec.rb",
77
+ "spec/selector_spec.rb",
78
+ "spec/spec_helper.rb"
79
+ ]
80
+
81
+ if s.respond_to? :specification_version then
82
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
83
+ s.specification_version = 3
84
+
85
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
86
+ else
87
+ end
88
+ else
89
+ end
90
+ end
@@ -0,0 +1,11 @@
1
+ require 'distributed_logreader/selector.rb'
2
+ require 'distributed_logreader/achiver.rb'
3
+ require 'distributed_logreader/util.rb'
4
+ require 'distributed_logreader/distributed_log_reader'
5
+ require 'distributed_logreader/distributed_log_reader/rotater_reader'
6
+ require 'distributed_logreader/distributed_log_reader/scribe_reader'
7
+ require 'logger'
8
+
9
+ $dlog_logger = Logger.new("/var/log/distributed_logreader.log")
10
+ $dlog_logger.level = Logger::DEBUG
11
+ $dlog_logger.datetime_format = "%Y-%m-%d %H:%M:%S "
@@ -0,0 +1,10 @@
1
+ # This abstract class defines the interface that handles log file archiving
2
+
3
+ module DLogReader
4
+ class Archiver
5
+ # archive file as you see fit
6
+ def archive(filename)
7
+ raise NotImplementedError.new("archive not implemented. Are you sure you created a conrete class?")
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,36 @@
1
+ require 'fileutils'
2
+ module DLogReader
3
+ class DateDir < Archiver
4
+ include FileUtils
5
+ attr_accessor :base_backup_dir
6
+
7
+ def initialize(backup_dir)
8
+ self.base_backup_dir = backup_dir
9
+ end
10
+
11
+ def archive(file)
12
+ unless base_backup_dir.nil?
13
+ backup_filename = backup_dir
14
+ mv(file, backup_dir)
15
+ `bzip2 #{File.join(backup_dir, file.split('/').last)}`
16
+ end
17
+ end
18
+
19
+ protected
20
+
21
+ def backup_dir
22
+ time = Time.now
23
+ directory_structure = [base_backup_dir, time.year.to_s, time.month.to_s, time.day.to_s]
24
+ temp_dir = []
25
+ directory_structure.each do |x|
26
+ temp_dir << x
27
+ temp_file = File.join(temp_dir)
28
+ unless File.exist?(temp_file)
29
+ mkdir(temp_file)
30
+ end
31
+ end
32
+ File.join(temp_dir)
33
+ end
34
+
35
+ end
36
+ end
@@ -0,0 +1,46 @@
1
+ require File.join(File.dirname(__FILE__), 'distributer', 'simple_thread_pool')
2
+ require File.join(File.dirname(__FILE__), 'distributer', 'simple_forked_process')
3
+ require File.join(File.dirname(__FILE__), 'selector', 'rotating_log')
4
+ require File.join(File.dirname(__FILE__), 'archiver', 'date_dir')
5
+ require File.join(File.dirname(__FILE__), 'log_reader')
6
+
7
+ module DLogReader
8
+ class DistributedLogReader
9
+ attr_accessor :distributer, :filename
10
+ attr_reader :log_reader
11
+ def initialize(filename, worker, num_threads = 100)
12
+ self.filename = filename
13
+ # self.distributer = SimpleForked.new(worker, 5, num_threads)
14
+ self.distributer = SimpleThreadPool.new(worker, num_threads)
15
+ end
16
+
17
+ # selector/archiver seem to be strongly connected. it's possible it
18
+ # needs to be moved into LogReader
19
+ def process
20
+ pre_process
21
+
22
+ $dlog_logger.info("Started #{log_file}:")
23
+ lines_processed = 0
24
+ @log_reader = LogReader.new(log_file) do |line|
25
+ self.distributer.process(line)
26
+ lines_processed += 1
27
+ end
28
+ @log_reader.run
29
+ self.distributer.join
30
+ $dlog_logger.info("Finished #{log_file}: Processed (#{lines_processed}) lines")
31
+ post_process
32
+ end
33
+
34
+ def log_file
35
+ self.filename
36
+ end
37
+
38
+ #predefined hooks
39
+ def pre_process
40
+ end
41
+
42
+ #predefined hooks
43
+ def post_process
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,21 @@
1
+ module DLogReader
2
+ class RotaterReader < DistributedLogReader
3
+ attr_accessor :selector, :archiver
4
+ attr_reader :log_reader
5
+ def initialize(filename, backupdir, worker, num_threads = 10)
6
+ super(filename, worker, num_threads)
7
+ self.selector = RotatingLog.new
8
+ self.archiver = DateDir.new(backupdir)
9
+ end
10
+
11
+ def log_file
12
+ @log_file ||= begin
13
+ selector.file_to_process(filename)
14
+ end
15
+ end
16
+
17
+ def post_process
18
+ self.archiver.archive(log_file)
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,53 @@
1
+ module DLogReader
2
+ class ScribeReader
3
+ attr_accessor :selector, :archiver, :filename
4
+ attr_reader :log_reader
5
+ def initialize(filename, backupdir, worker, num_threads = 10)
6
+ self.filename = filename
7
+ self.selector = RotatingLog.new
8
+ self.selector.ignore_conditions << lambda{|x| !x.match(/scribe_stats/).nil?}
9
+ @worker = worker
10
+ self.archiver = DateDir.new(backupdir)
11
+ end
12
+
13
+ def process
14
+ $dlog_logger.info("Started #{log_file}:")
15
+ lines_processed = 0
16
+ @log_reader = LogReader.new(log_file) do |line|
17
+ begin
18
+ @worker.call(line)
19
+ rescue Exception => e
20
+ $dlog_logger.warn("Exception thrown in worker #{e.message}")
21
+ end
22
+ lines_processed += 1
23
+ end
24
+ @log_reader.run
25
+ $dlog_logger.info("Finished #{log_file}: Processed (#{lines_processed}) lines")
26
+ post_process
27
+ end
28
+
29
+ def log_file
30
+ self.filename
31
+ end
32
+
33
+ def log_file
34
+ @log_file ||= begin
35
+ selector.file_to_process(filename)
36
+ end
37
+ end
38
+
39
+ def post_process
40
+ unless current?
41
+ self.archiver.archive(log_file)
42
+ File.delete(@log_reader.statefile) rescue nil
43
+ end
44
+ end
45
+
46
+ def current?
47
+ directory = File.dirname(log_file)
48
+ basename = File.basename(directory)
49
+ current_file = Dir[File.join(directory, "*")].detect{|x| x.match(/current/)}
50
+ !current_file.nil? && File.exists?(current_file) && File.identical?(current_file, log_file)
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,13 @@
1
+ module DLogReader
2
+ class Distributer
3
+ attr_accessor :worker
4
+
5
+ def initialize(worker)
6
+ self.worker = worker
7
+ end
8
+
9
+ def process(line)
10
+ worker.call(line)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,48 @@
1
+ class MutexCounter
2
+ MAX = (2 ** 30) - 1
3
+ def initialize(max = MAX)
4
+ @mutex = Mutex.new
5
+ @counter = 0
6
+ @resets = 0
7
+ @max = max
8
+ end
9
+
10
+ def real_total
11
+ @mutex.synchronize { (@resets * @max) + @counter }
12
+ end
13
+ alias_method :to_i, :real_total
14
+
15
+ def value
16
+ @mutex.synchronize { @counter }
17
+ end
18
+
19
+ def inc
20
+ @mutex.synchronize do
21
+ if @counter >= @max
22
+ @counter = 0 # to avoid Bignum, it's about 4x slower
23
+ @resets += 1
24
+ end
25
+ @counter += 1
26
+ end
27
+ end
28
+ alias_method :next, :inc
29
+ alias_method :succ, :inc
30
+
31
+ # decr only to zero
32
+ def decr
33
+ @mutex.synchronize do
34
+ if @counter > 0
35
+ @counter -= 1
36
+ else
37
+ if @resets > 1
38
+ @resets -= 1
39
+ @counter = @max
40
+ end
41
+ end
42
+ @counter
43
+ end
44
+ end
45
+ alias_method :pred, :decr
46
+ alias_method :prev, :decr
47
+
48
+ end
@@ -0,0 +1,149 @@
1
+ require 'rubygems'
2
+ #stolen from pandemic
3
+ module DLogReader
4
+ class Processor
5
+ def initialize(handler, num_threads = 10)
6
+ read_from_parent, write_to_child = IO.pipe
7
+ read_from_child, write_to_parent = IO.pipe
8
+
9
+ @child_process_id = fork
10
+ if @child_process_id
11
+ # I'm the parent
12
+ write_to_parent.close
13
+ read_from_parent.close
14
+ @out = write_to_child
15
+ @in = read_from_child
16
+ @max_queue_size = 100
17
+ @counter = MutexCounter.new
18
+ @job_mutex = Mutex.new
19
+ wait_for_responses
20
+ else
21
+ $dlog_logger.debug("Forked")
22
+ # I'm the child
23
+ write_to_child.close
24
+ read_from_child.close
25
+ @out = write_to_parent
26
+ @in = read_from_parent
27
+ @handler = handler
28
+ @job_queue = Queue.new
29
+ @response_queue = Queue.new
30
+ wait_for_job_completion
31
+ num_threads.times do
32
+ create_thread
33
+ end
34
+ wait_for_jobs
35
+ end
36
+ end
37
+
38
+ def num_jobs
39
+ if parent?
40
+ @counter.real_total
41
+ end
42
+ end
43
+
44
+ def process(body)
45
+ if parent?
46
+ while(@counter.real_total > @max_queue_size)
47
+ $dlog_logger.debug("Max process queue size: #{@counter.real_total}")
48
+ sleep(0.01)
49
+ end
50
+ body = (body.chomp + "\n")
51
+ @job_mutex.synchronize do
52
+ @out.write(body)
53
+ in_queue = @counter.inc
54
+ $dlog_logger.debug("Parent: writing #{body.inspect} - #{in_queue}")
55
+ end
56
+ else
57
+ $dlog_logger.debug("Child Processing #{body}")
58
+ return @handler.call(body)
59
+ end
60
+ end
61
+
62
+ def close(status = 0)
63
+ if parent? && child_alive?
64
+ Process.detach(@child_process_id)
65
+ @out.puts(status.to_s)
66
+ @out.close
67
+ @in.close
68
+ else
69
+ Process.exit!(status)
70
+ end
71
+ end
72
+
73
+ def closed?
74
+ !child_alive?
75
+ end
76
+
77
+ private
78
+
79
+ def create_thread
80
+ Thread.new do
81
+ loop do
82
+ line = @job_queue.pop
83
+ @job_mutex.synchronize do
84
+ process(line)
85
+ @response_queue << :a
86
+ $dlog_logger.debug("Child: Finished #{line.inspect}")
87
+ end
88
+ end
89
+ end
90
+ end
91
+
92
+ def wait_for_responses
93
+ Thread.new do
94
+ loop do
95
+ ready, = IO.select([@in], nil, nil)
96
+ if ready
97
+ @in.readchar
98
+ in_queue = @counter.decr
99
+ $dlog_logger.debug("Parent: Reading Response #{in_queue}")
100
+ end
101
+ end
102
+ end
103
+ end
104
+
105
+ #child process
106
+ def wait_for_jobs
107
+ if child?
108
+ while true
109
+ $dlog_logger.debug("Child waiting")
110
+ ready, = IO.select([@in], nil, nil)
111
+ if ready && !@in.eof?
112
+ line = @in.gets
113
+ $dlog_logger.debug("Child: #{line.inspect}")
114
+ @job_queue << line
115
+ else
116
+ self.close(line.to_i)
117
+ break
118
+ end
119
+ end
120
+ end
121
+ end
122
+
123
+ #child process
124
+ def wait_for_job_completion
125
+ if child?
126
+ Thread.new do
127
+ while true
128
+ @response_queue.pop
129
+ $dlog_logger.debug("Child: Writing To Parent")
130
+ @out.write("|")
131
+ end
132
+ end
133
+ end
134
+ end
135
+
136
+
137
+ def parent?
138
+ !!@child_process_id
139
+ end
140
+
141
+ def child?
142
+ !parent?
143
+ end
144
+
145
+ def child_alive?
146
+ parent? && !@in.closed?
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,47 @@
1
+ require 'thread'
2
+ require File.join(File.dirname(__FILE__), 'pandemic_processor')
3
+ require File.join(File.dirname(__FILE__), 'mutex_counter')
4
+ module DLogReader
5
+ class SimpleForked
6
+ attr_accessor :num_threads_per_process, :worker, :thread_pool, :queue, :processors
7
+ def initialize(worker, num_processes = 3, num_threads_per_process = 10)
8
+ self.worker = worker
9
+ self.num_threads_per_process = (num_threads_per_process || 10)
10
+ self.queue = Queue.new
11
+ self.processors = []
12
+ num_processes.times do |x|
13
+ $dlog_logger.debug("Forking #{x} process")
14
+ self.processors << create_process
15
+ end
16
+ end
17
+
18
+ def process(line)
19
+ self.queue << line
20
+ end
21
+
22
+ def join
23
+ num_jobs_outstanding = self.processors.inject(0){|a,b| a + b.num_jobs}
24
+ while(queue.size > 0 || num_jobs_outstanding > 0)
25
+ sleep 0.1
26
+ num_jobs_outstanding = self.processors.inject(0){|a,b| a + b.num_jobs}
27
+ $dlog_logger.debug("Shutting down #{num_jobs_outstanding} left")
28
+ end
29
+ end
30
+
31
+ protected
32
+ def create_process
33
+ processor = Processor.new(self.worker, self.num_threads_per_process)
34
+ Thread.new do
35
+ loop do
36
+ line = self.queue.pop
37
+ begin
38
+ processor.process(line)
39
+ rescue Exception => e
40
+ $dlog_logger.warn("Exception in processing thread #{line} -- #{e.message}")
41
+ end
42
+ end
43
+ end
44
+ processor
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,43 @@
1
+ require 'thread'
2
+
3
+ module DLogReader
4
+ class SimpleThreadPool
5
+ attr_accessor :num_threads, :worker, :thread_pool, :queue, :max_queue_size
6
+ def initialize(worker, num_threads = 5)
7
+ self.worker = worker
8
+ self.num_threads = num_threads
9
+ self.queue = Queue.new
10
+ self.max_queue_size = 100
11
+ num_threads.times do
12
+ create_thread
13
+ end
14
+ end
15
+
16
+ def process(line)
17
+ while(queue.size > self.max_queue_size)
18
+ sleep(0.01)
19
+ end
20
+ self.queue << line
21
+ end
22
+
23
+ def join
24
+ while(queue.size > 0)
25
+ sleep 0.1
26
+ end
27
+ end
28
+
29
+ protected
30
+ def create_thread
31
+ Thread.new do
32
+ loop do
33
+ line = self.queue.pop
34
+ begin
35
+ self.worker.call(line)
36
+ rescue Exception => e
37
+ $dlog_logger.warn("Exception in processing thread #{line} -- #{e.message}")
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,67 @@
1
+ require 'digest/md5'
2
+ module DLogReader
3
+ class LogReader
4
+ attr_accessor :filename
5
+ attr_writer :statefile
6
+
7
+ def initialize(filename, &b)
8
+ self.filename = filename
9
+ @b = b
10
+ end
11
+
12
+ def run
13
+ # raise IOError.new("no file given") if filename.nil?
14
+ raise IOError.new("File not readable") unless File.readable?(filename)
15
+ f = File.open(filename, "r+")
16
+ load_saved_state(f)
17
+ # raise IOError.new("File is locked") unless f.flock(File::LOCK_EX | File::LOCK_NB)
18
+ unless f.eof?
19
+ last_report = Time.now
20
+ line_count = 0
21
+ f.each_line do |line|
22
+ @b.call(line)
23
+ line_count += 1
24
+ if (line_count % 100 == 0)
25
+ time_passed = Time.now - last_report
26
+ $dlog_logger.info( "#{Time.now.to_s} #{filename}: Processed (#{line_count}) lines [#{(100.0 / time_passed.to_f).to_i} lines/s]")
27
+ last_report = Time.now
28
+ save_state(f)
29
+ end
30
+ end
31
+ save_state(f)
32
+ end
33
+ # f.flock(File::LOCK_UN)
34
+ end
35
+
36
+ def statefile
37
+ @statefile ||= begin
38
+ log_basename = File.basename(filename)
39
+ File.join("/tmp", "log_state_#{log_basename}")
40
+ end
41
+ end
42
+
43
+ protected
44
+
45
+ def load_saved_state(log_filehandle)
46
+ return unless File.exists?(statefile) && !(state = File.read(statefile)).nil?
47
+ pos, l_digest = Marshal.load(state)
48
+ return if File.size(log_filehandle) < pos
49
+ log_filehandle.pos = pos if digest(log_filehandle, pos) == l_digest
50
+ end
51
+
52
+ def save_state(log_filehandle)
53
+ File.open(statefile, "w") do |f|
54
+ f.write(Marshal.dump([log_filehandle.pos, digest(log_filehandle, log_filehandle.pos)]))
55
+ end
56
+ end
57
+
58
+ def digest(log_filehandle, position)
59
+ log_filehandle.pos = 0
60
+ read_length = [position, 50].min
61
+ l = log_filehandle.read(read_length)
62
+ f_digest = Digest::MD5.hexdigest(l)
63
+ log_filehandle.pos = position
64
+ f_digest
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,12 @@
1
+ module DLogReader
2
+ # This abstract class defines the interface to decide which log
3
+ # file to read. The identity strategy is the simplist, to return the file
4
+ # inputed. However, to handle rotating log files, we'll need some more complex
5
+ # strategies.
6
+ class Selector
7
+ # determines the file to process from file path input
8
+ def file_to_process(file_or_dir)
9
+ raise NotImplementedError.new, "file_to_process not implemented. Are you sure you created a conrete class?"
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,43 @@
1
+ module DLogReader
2
+ # This class chooses the oldest log file in the directory that matches the
3
+ # input filename. This should work with a variety of log rotating schemes:
4
+ # including copytruncate and date suffix.
5
+ class RotatingLog < Selector
6
+
7
+ attr_accessor :ignore_conditions
8
+
9
+ def initialize
10
+ self.ignore_conditions = []
11
+ self.ignore_conditions << lambda{|x| symlink_file_in_dir?(x)}
12
+ # self.ignore_conditions << lambda{|x| true}
13
+ end
14
+
15
+ def file_to_process(file_or_dir)
16
+ if File.directory?(file_or_dir)
17
+ directory = file_or_dir
18
+ basename = '/'
19
+ else
20
+ directory = File.dirname(file_or_dir)
21
+ basename = File.basename(file_or_dir)
22
+ end
23
+ oldest_logfile(directory, basename)
24
+ end
25
+
26
+ protected
27
+
28
+ def oldest_logfile(directory, basename)
29
+ file_list = Dir[File.join(directory, "#{basename}*")]
30
+ file_list.reject!{|x| reject?(x)}
31
+ file = file_list.size > 0 ? file_list.sort_by{|a| File.new(a).mtime}.first : nil
32
+ end
33
+
34
+ def reject?(filename)
35
+ self.ignore_conditions.inject(false){|candidate, condition| candidate || condition.call(filename)}
36
+ end
37
+
38
+ # returns true if filename is a symlink and its referring to a file already inside the current directory
39
+ def symlink_file_in_dir?(filename)
40
+ File.symlink?(filename) && File.dirname(File.readlink(filename)) == File.dirname(filename)
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,7 @@
1
+ module DLogReader
2
+ module Util
3
+ def logger
4
+ $dlog_logger
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,25 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'distributed_logreader/archiver/date_dir'
3
+ require 'fileutils'
4
+
5
+ describe "DLogReader::DateDir" do
6
+ before(:all) do
7
+ FileUtils.cp(File.join(File.dirname(__FILE__), '..', 'fixtures', 'test_file'), File.join(File.dirname(__FILE__), '..', 'fixtures', 'test_file2'))
8
+ @file_path = File.join(File.dirname(__FILE__), '..', 'fixtures', 'test_file2')
9
+ @base_backup_dir = File.join(File.dirname(__FILE__), '..', 'fixtures', 'temp_backup_dir')
10
+ time = Time.now
11
+ @backup_dir = File.join([@base_backup_dir, time.year.to_s, time.month.to_s, time.day.to_s])
12
+ @archiver = DLogReader::DateDir.new(@base_backup_dir)
13
+ end
14
+
15
+ describe "backup" do
16
+ it "should move file into Y/M/D backup directory" do
17
+ @archiver.archive(@file_path)
18
+ File.exist?(@backup_dir).should == true
19
+ end
20
+ end
21
+
22
+ after(:all) do
23
+ FileUtils.rm_r(@base_backup_dir)
24
+ end
25
+ end
@@ -0,0 +1,13 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "DLogReader::LogArchiver" do
4
+ before(:all) do
5
+ @archiver = DLogReader::Archiver.new
6
+ end
7
+
8
+ describe "archive" do
9
+ it "should raise NotImplementedError" do
10
+ lambda{ @archiver.archive('dummy_file') }.should raise_error(NotImplementedError)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,26 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'fileutils'
3
+
4
+ describe "DLogReader::RotaterLogreader" do
5
+ before(:all) do
6
+ FileUtils.cp(File.join(File.dirname(__FILE__), '..', 'fixtures', 'test_file'), File.join(File.dirname(__FILE__), '..', 'fixtures', 'test_file2'))
7
+ @file_path = File.join(File.dirname(__FILE__), '..', 'fixtures', 'test_file2')
8
+ @base_backup_dir = File.join(File.dirname(__FILE__), '..', 'fixtures', 'temp_backup_dir')
9
+ time = Time.now
10
+ @backup_dir = File.join([@base_backup_dir, time.year.to_s, time.month.to_s, time.day.to_s])
11
+ @logreader = DLogReader::RotaterReader.new(@file_path, @base_backup_dir, lambda{|x| puts x})
12
+ end
13
+
14
+ describe "process" do
15
+ it 'should' do
16
+ @logreader.process
17
+ File.exist?(@backup_dir).should == true
18
+ File.exist?(@file_path).should == false
19
+ end
20
+ end
21
+
22
+ after(:all) do
23
+ FileUtils.rm_r(@base_backup_dir)
24
+ FileUtils.rm_r(@logreader.log_reader.statefile)
25
+ end
26
+ end
@@ -0,0 +1,15 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'fileutils'
3
+
4
+ describe "DLogReader::RotaterLogreader" do
5
+ before(:all) do
6
+ @file_path = File.join(File.dirname(__FILE__), '..', 'fixtures', 'virality_metrics')
7
+ @logreader = DLogReader::ScribeReader.new(@file_path, 'tmp', lambda{|x| puts x})
8
+ end
9
+
10
+ describe "current" do
11
+ it 'should say that the log_file is the currently rotated one' do
12
+ @logreader.current?.should == true
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,20 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ require 'fileutils'
3
+
4
+ describe "DLogReader::DistributedLogreader" do
5
+ before(:all) do
6
+ FileUtils.cp(File.join(File.dirname(__FILE__), 'fixtures', 'test_file'), File.join(File.dirname(__FILE__), 'fixtures', 'test_file2'))
7
+ @file_path = File.join(File.dirname(__FILE__), 'fixtures', 'test_file2')
8
+ @logreader = DLogReader::DistributedLogReader.new(@file_path, lambda{|x| puts x})
9
+ end
10
+
11
+ describe "process" do
12
+ it 'should' do
13
+ @logreader.process
14
+ end
15
+ end
16
+
17
+ after(:all) do
18
+ FileUtils.rm_r(@logreader.log_reader.statefile)
19
+ end
20
+ end
@@ -0,0 +1,17 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'distributed_logreader/distributer/simple_thread_pool'
3
+
4
+ describe "DLogReader::SimpleThreadPool" do
5
+ before(:all) do
6
+ @thread_pool = DLogReader::SimpleThreadPool.new(lambda{|x| x}, 10)
7
+ end
8
+
9
+ describe "process" do
10
+ it 'should process with lots of threads' do
11
+ 100.times do |x|
12
+ @thread_pool.process(x.to_s)
13
+ end
14
+ @thread_pool.join
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,13 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ require 'distributed_logreader/distributer'
3
+
4
+ describe "DLogReader::Distributer" do
5
+ before(:all) do
6
+ @distributer = DLogReader::Distributer.new(lambda{|x| x})
7
+ end
8
+
9
+ describe "process" do
10
+ it "should raise NotImplementedError" do
11
+ end
12
+ end
13
+ end
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -0,0 +1,4 @@
1
+ This is a test log file.
2
+ With many lines.
3
+ Lots and Lots of lines.
4
+ Just kidding this is the last line.
@@ -0,0 +1,7 @@
1
+ asdf
2
+ asdf
3
+ asdf
4
+ asdf
5
+ asdfa
6
+ sdfa
7
+ sdfas
@@ -0,0 +1,7 @@
1
+ asdf
2
+ asdf
3
+ asdf
4
+ asdf
5
+ asdfa
6
+ sdfa
7
+ sdfas
@@ -0,0 +1,57 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ require 'distributed_logreader/log_reader'
3
+ require 'fileutils'
4
+
5
+ describe "DLogReader::LogReader" do
6
+ before(:all) do
7
+ test_file = File.join(File.dirname(__FILE__), 'fixtures', 'test_file')
8
+ FileUtils.mkdir(File.join(File.dirname(__FILE__), 'fixtures', 'logreading')) rescue nil
9
+ @test_cp = File.join(File.dirname(__FILE__), 'fixtures', 'logreading', 'test')
10
+ FileUtils.cp(test_file, @test_cp)
11
+ test_fh = File.open(test_file)
12
+
13
+ @reader = DLogReader::LogReader.new(@test_cp) do |line|
14
+ unless test_fh.readline == line
15
+ raise RuntimeError.new, 'you messed up bud'
16
+ end
17
+ end
18
+ @test_line = "this is an added line. this should be read first\n"
19
+ @state_writer = DLogReader::LogReader.new(@test_cp){|line| line;}
20
+ @state_reader = DLogReader::LogReader.new(@test_cp) do |line|
21
+ unless line == @test_line
22
+ raise RuntimeError.new, 'you messed up worse'
23
+ end
24
+ end
25
+ end
26
+
27
+ describe "run" do
28
+ it 'should read log files' do
29
+ lambda{@reader.run}.should_not raise_error
30
+ end
31
+
32
+ it 'should resume from last access' do
33
+ #lets read to the end of file and write state
34
+ lambda{@state_writer.run}.should_not raise_error
35
+ fh = File.open(@test_cp, 'a')
36
+ fh.write(@test_line)
37
+ fh.close
38
+ lambda{@state_reader.run}.should_not raise_error
39
+ end
40
+
41
+ it 'should detect if log is different from last and to start from beg of file' do
42
+ lambda{@state_writer.run}.should_not raise_error
43
+ fh = File.open(@test_cp, 'w')
44
+ fh.write(@test_line)
45
+ fh.close
46
+ lambda{@state_reader.run}.should_not raise_error
47
+ fh = File.open(@test_cp, 'w')
48
+ fh.write('')
49
+ fh.close
50
+ lambda{@state_reader.run}.should_not raise_error
51
+ end
52
+ end
53
+
54
+ after(:all) do
55
+ FileUtils.rm_r(File.dirname(@test_cp))
56
+ end
57
+ end
@@ -0,0 +1,24 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'distributed_logreader/selector/rotating_log'
3
+ require 'fileutils'
4
+
5
+ describe "DLogReader::RotatingLog" do
6
+ before(:all) do
7
+ @chooser = DLogReader::RotatingLog.new
8
+ end
9
+
10
+ describe "file_to_process" do
11
+ it "should pick the oldest file for logs in copytruncate format (file)" do
12
+ @chooser.file_to_process(File.join(File.dirname(__FILE__), '..', 'fixtures', 'copytruncate', 'test')).should == File.join(File.dirname(__FILE__), '..', 'fixtures', 'copytruncate', 'test.1')
13
+ end
14
+
15
+ it "should pick the oldest file in timestamp suffix log format (dirname)" do
16
+ @chooser.file_to_process(File.join(File.dirname(__FILE__), '..', 'fixtures', 'logrotate')).should == File.join(File.dirname(__FILE__), '..', 'fixtures', 'logrotate', 'test-20090101')
17
+ end
18
+
19
+ it "should pick the oldest file ignoring symlinks pointing to files already in dir" do
20
+ @chooser.file_to_process(File.join(File.dirname(__FILE__), '..', 'fixtures', 'symlink')).should == File.join(File.dirname(__FILE__), '..', 'fixtures', 'symlink', 'test')
21
+ end
22
+ end
23
+
24
+ end
@@ -0,0 +1,13 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "DLogReader::Selector" do
4
+ before(:all) do
5
+ @chooser = DLogReader::Selector.new
6
+ end
7
+
8
+ describe "file_to_process" do
9
+ it "should raise NotImplementedError" do
10
+ lambda{ @chooser.file_to_process('dummy_file') }.should raise_error(NotImplementedError)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec'
2
+
3
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
4
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
5
+ require 'distributed_logreader'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,107 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: distributed_logreader
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.11.0
5
+ platform: ruby
6
+ authors:
7
+ - Gary Tsang
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-27 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: gary@garru.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - LICENSE
24
+ - README.rdoc
25
+ files:
26
+ - .gitignore
27
+ - LICENSE
28
+ - README.rdoc
29
+ - Rakefile
30
+ - VERSION
31
+ - distributed_logreader.gemspec
32
+ - lib/distributed_logreader.rb
33
+ - lib/distributed_logreader/achiver.rb
34
+ - lib/distributed_logreader/archiver/date_dir.rb
35
+ - lib/distributed_logreader/distributed_log_reader.rb
36
+ - lib/distributed_logreader/distributed_log_reader/rotater_reader.rb
37
+ - lib/distributed_logreader/distributed_log_reader/scribe_reader.rb
38
+ - lib/distributed_logreader/distributer.rb
39
+ - lib/distributed_logreader/distributer/mutex_counter.rb
40
+ - lib/distributed_logreader/distributer/pandemic_processor.rb
41
+ - lib/distributed_logreader/distributer/simple_forked_process.rb
42
+ - lib/distributed_logreader/distributer/simple_thread_pool.rb
43
+ - lib/distributed_logreader/log_reader.rb
44
+ - lib/distributed_logreader/selector.rb
45
+ - lib/distributed_logreader/selector/rotating_log.rb
46
+ - lib/distributed_logreader/util.rb
47
+ - spec/archiver/date_dir_spec.rb
48
+ - spec/archiver_spec.rb
49
+ - spec/distributed_log_reader/rotater_reader_spec.rb
50
+ - spec/distributed_log_reader/scribe_reader_spec.rb
51
+ - spec/distributed_log_reader_spec.rb
52
+ - spec/distributer/simple_thread_pool_spec.rb
53
+ - spec/distributer_spec.rb
54
+ - spec/fixtures/copytruncate/test
55
+ - spec/fixtures/copytruncate/test.1
56
+ - spec/fixtures/copytruncate/test_current
57
+ - spec/fixtures/logrotate/test-20090101
58
+ - spec/fixtures/logrotate/test-20090102
59
+ - spec/fixtures/symlink/test
60
+ - spec/fixtures/symlink/test_older_sym
61
+ - spec/fixtures/test_file
62
+ - spec/fixtures/virality_metrics/test
63
+ - spec/fixtures/virality_metrics/virality_metrics_current
64
+ - spec/log_reader_spec.rb
65
+ - spec/selector/rotating_log_spec.rb
66
+ - spec/selector_spec.rb
67
+ - spec/spec_helper.rb
68
+ has_rdoc: true
69
+ homepage: http://github.com/garru/distributed_logreader
70
+ licenses: []
71
+
72
+ post_install_message:
73
+ rdoc_options:
74
+ - --charset=UTF-8
75
+ require_paths:
76
+ - lib
77
+ required_ruby_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: "0"
82
+ version:
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: "0"
88
+ version:
89
+ requirements: []
90
+
91
+ rubyforge_project:
92
+ rubygems_version: 1.3.5
93
+ signing_key:
94
+ specification_version: 3
95
+ summary: incomplete distributed log reader. plan to use pandemic to distribute
96
+ test_files:
97
+ - spec/archiver/date_dir_spec.rb
98
+ - spec/archiver_spec.rb
99
+ - spec/distributed_log_reader/rotater_reader_spec.rb
100
+ - spec/distributed_log_reader/scribe_reader_spec.rb
101
+ - spec/distributed_log_reader_spec.rb
102
+ - spec/distributer/simple_thread_pool_spec.rb
103
+ - spec/distributer_spec.rb
104
+ - spec/log_reader_spec.rb
105
+ - spec/selector/rotating_log_spec.rb
106
+ - spec/selector_spec.rb
107
+ - spec/spec_helper.rb