distributed_logreader 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. data/.gitignore +7 -0
  2. data/LICENSE +20 -0
  3. data/README.rdoc +8 -0
  4. data/Rakefile +48 -0
  5. data/VERSION +1 -0
  6. data/distributed_logreader.gemspec +90 -0
  7. data/lib/distributed_logreader.rb +11 -0
  8. data/lib/distributed_logreader/achiver.rb +10 -0
  9. data/lib/distributed_logreader/archiver/date_dir.rb +36 -0
  10. data/lib/distributed_logreader/distributed_log_reader.rb +46 -0
  11. data/lib/distributed_logreader/distributed_log_reader/rotater_reader.rb +21 -0
  12. data/lib/distributed_logreader/distributed_log_reader/scribe_reader.rb +53 -0
  13. data/lib/distributed_logreader/distributer.rb +13 -0
  14. data/lib/distributed_logreader/distributer/mutex_counter.rb +48 -0
  15. data/lib/distributed_logreader/distributer/pandemic_processor.rb +149 -0
  16. data/lib/distributed_logreader/distributer/simple_forked_process.rb +47 -0
  17. data/lib/distributed_logreader/distributer/simple_thread_pool.rb +43 -0
  18. data/lib/distributed_logreader/log_reader.rb +67 -0
  19. data/lib/distributed_logreader/selector.rb +12 -0
  20. data/lib/distributed_logreader/selector/rotating_log.rb +43 -0
  21. data/lib/distributed_logreader/util.rb +7 -0
  22. data/spec/archiver/date_dir_spec.rb +25 -0
  23. data/spec/archiver_spec.rb +13 -0
  24. data/spec/distributed_log_reader/rotater_reader_spec.rb +26 -0
  25. data/spec/distributed_log_reader/scribe_reader_spec.rb +15 -0
  26. data/spec/distributed_log_reader_spec.rb +20 -0
  27. data/spec/distributer/simple_thread_pool_spec.rb +17 -0
  28. data/spec/distributer_spec.rb +13 -0
  29. data/spec/fixtures/copytruncate/test +0 -0
  30. data/spec/fixtures/copytruncate/test.1 +0 -0
  31. data/spec/fixtures/copytruncate/test_current +0 -0
  32. data/spec/fixtures/logrotate/test-20090101 +0 -0
  33. data/spec/fixtures/logrotate/test-20090102 +0 -0
  34. data/spec/fixtures/symlink/test +0 -0
  35. data/spec/fixtures/symlink/test_older_sym +0 -0
  36. data/spec/fixtures/test_file +4 -0
  37. data/spec/fixtures/virality_metrics/test +7 -0
  38. data/spec/fixtures/virality_metrics/virality_metrics_current +7 -0
  39. data/spec/log_reader_spec.rb +57 -0
  40. data/spec/selector/rotating_log_spec.rb +24 -0
  41. data/spec/selector_spec.rb +13 -0
  42. data/spec/spec_helper.rb +9 -0
  43. metadata +107 -0
@@ -0,0 +1,7 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
6
+ *log
7
+ temp_backup_dir/*
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Gary Tsang
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,8 @@
1
+ = Distributed Log Reader
2
+
3
+ is a log reader that allows for parallel processing. It is highly configurable
4
+ allowing you to choose different parallel distribution and archiving strategies.
5
+
6
+ == Copyright
7
+
8
+ Copyright (c) 2009 Gary Tsang. See LICENSE for details.
@@ -0,0 +1,48 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "distributed_logreader"
8
+ gem.summary = %Q{incomplete distribute log reader. planned to use pandemic to distribute work}
9
+ gem.email = "gary@garru.com"
10
+ gem.homepage = "http://github.com/garru/distributed_logreader"
11
+ gem.authors = ["Gary Tsang"]
12
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
13
+ end
14
+
15
+ rescue LoadError
16
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
17
+ end
18
+
19
+ require 'spec/rake/spectask'
20
+ Spec::Rake::SpecTask.new(:spec) do |spec|
21
+ spec.libs << 'lib' << 'spec'
22
+ spec.spec_files = FileList['spec/**/*_spec.rb']
23
+ end
24
+
25
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
26
+ spec.libs << 'lib' << 'spec'
27
+ spec.pattern = 'spec/**/*_spec.rb'
28
+ spec.rcov = true
29
+ end
30
+
31
+
32
+ task :default => :spec
33
+
34
+ require 'rake/rdoctask'
35
+ Rake::RDocTask.new do |rdoc|
36
+ if File.exist?('VERSION.yml')
37
+ config = YAML.load(File.read('VERSION.yml'))
38
+ version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
39
+ else
40
+ version = ""
41
+ end
42
+
43
+ rdoc.rdoc_dir = 'rdoc'
44
+ rdoc.title = "distributed_logreader #{version}"
45
+ rdoc.rdoc_files.include('README*')
46
+ rdoc.rdoc_files.include('lib/**/*.rb')
47
+ end
48
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.11.0
@@ -0,0 +1,90 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{distributed_logreader}
8
+ s.version = "0.11.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Gary Tsang"]
12
+ s.date = %q{2009-10-27}
13
+ s.email = %q{gary@garru.com}
14
+ s.extra_rdoc_files = [
15
+ "LICENSE",
16
+ "README.rdoc"
17
+ ]
18
+ s.files = [
19
+ ".gitignore",
20
+ "LICENSE",
21
+ "README.rdoc",
22
+ "Rakefile",
23
+ "VERSION",
24
+ "distributed_logreader.gemspec",
25
+ "lib/distributed_logreader.rb",
26
+ "lib/distributed_logreader/achiver.rb",
27
+ "lib/distributed_logreader/archiver/date_dir.rb",
28
+ "lib/distributed_logreader/distributed_log_reader.rb",
29
+ "lib/distributed_logreader/distributed_log_reader/rotater_reader.rb",
30
+ "lib/distributed_logreader/distributed_log_reader/scribe_reader.rb",
31
+ "lib/distributed_logreader/distributer.rb",
32
+ "lib/distributed_logreader/distributer/mutex_counter.rb",
33
+ "lib/distributed_logreader/distributer/pandemic_processor.rb",
34
+ "lib/distributed_logreader/distributer/simple_forked_process.rb",
35
+ "lib/distributed_logreader/distributer/simple_thread_pool.rb",
36
+ "lib/distributed_logreader/log_reader.rb",
37
+ "lib/distributed_logreader/selector.rb",
38
+ "lib/distributed_logreader/selector/rotating_log.rb",
39
+ "lib/distributed_logreader/util.rb",
40
+ "spec/archiver/date_dir_spec.rb",
41
+ "spec/archiver_spec.rb",
42
+ "spec/distributed_log_reader/rotater_reader_spec.rb",
43
+ "spec/distributed_log_reader/scribe_reader_spec.rb",
44
+ "spec/distributed_log_reader_spec.rb",
45
+ "spec/distributer/simple_thread_pool_spec.rb",
46
+ "spec/distributer_spec.rb",
47
+ "spec/fixtures/copytruncate/test",
48
+ "spec/fixtures/copytruncate/test.1",
49
+ "spec/fixtures/copytruncate/test_current",
50
+ "spec/fixtures/logrotate/test-20090101",
51
+ "spec/fixtures/logrotate/test-20090102",
52
+ "spec/fixtures/symlink/test",
53
+ "spec/fixtures/symlink/test_older_sym",
54
+ "spec/fixtures/test_file",
55
+ "spec/fixtures/virality_metrics/test",
56
+ "spec/fixtures/virality_metrics/virality_metrics_current",
57
+ "spec/log_reader_spec.rb",
58
+ "spec/selector/rotating_log_spec.rb",
59
+ "spec/selector_spec.rb",
60
+ "spec/spec_helper.rb"
61
+ ]
62
+ s.homepage = %q{http://github.com/garru/distributed_logreader}
63
+ s.rdoc_options = ["--charset=UTF-8"]
64
+ s.require_paths = ["lib"]
65
+ s.rubygems_version = %q{1.3.5}
66
+ s.summary = %q{incomplete distributed log reader. plan to use pandemic to distribute}
67
+ s.test_files = [
68
+ "spec/archiver/date_dir_spec.rb",
69
+ "spec/archiver_spec.rb",
70
+ "spec/distributed_log_reader/rotater_reader_spec.rb",
71
+ "spec/distributed_log_reader/scribe_reader_spec.rb",
72
+ "spec/distributed_log_reader_spec.rb",
73
+ "spec/distributer/simple_thread_pool_spec.rb",
74
+ "spec/distributer_spec.rb",
75
+ "spec/log_reader_spec.rb",
76
+ "spec/selector/rotating_log_spec.rb",
77
+ "spec/selector_spec.rb",
78
+ "spec/spec_helper.rb"
79
+ ]
80
+
81
+ if s.respond_to? :specification_version then
82
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
83
+ s.specification_version = 3
84
+
85
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
86
+ else
87
+ end
88
+ else
89
+ end
90
+ end
@@ -0,0 +1,11 @@
1
+ require 'distributed_logreader/selector.rb'
2
+ require 'distributed_logreader/achiver.rb'
3
+ require 'distributed_logreader/util.rb'
4
+ require 'distributed_logreader/distributed_log_reader'
5
+ require 'distributed_logreader/distributed_log_reader/rotater_reader'
6
+ require 'distributed_logreader/distributed_log_reader/scribe_reader'
7
+ require 'logger'
8
+
9
+ $dlog_logger = Logger.new("/var/log/distributed_logreader.log")
10
+ $dlog_logger.level = Logger::DEBUG
11
+ $dlog_logger.datetime_format = "%Y-%m-%d %H:%M:%S "
@@ -0,0 +1,10 @@
1
+ # This abstract class defines the interface that handles log file archiving
2
+
3
+ module DLogReader
4
+ class Archiver
5
+ # archive file as you see fit
6
+ def archive(filename)
7
+ raise NotImplementedError.new("archive not implemented. Are you sure you created a conrete class?")
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,36 @@
1
+ require 'fileutils'
2
+ module DLogReader
3
+ class DateDir < Archiver
4
+ include FileUtils
5
+ attr_accessor :base_backup_dir
6
+
7
+ def initialize(backup_dir)
8
+ self.base_backup_dir = backup_dir
9
+ end
10
+
11
+ def archive(file)
12
+ unless base_backup_dir.nil?
13
+ backup_filename = backup_dir
14
+ mv(file, backup_dir)
15
+ `bzip2 #{File.join(backup_dir, file.split('/').last)}`
16
+ end
17
+ end
18
+
19
+ protected
20
+
21
+ def backup_dir
22
+ time = Time.now
23
+ directory_structure = [base_backup_dir, time.year.to_s, time.month.to_s, time.day.to_s]
24
+ temp_dir = []
25
+ directory_structure.each do |x|
26
+ temp_dir << x
27
+ temp_file = File.join(temp_dir)
28
+ unless File.exist?(temp_file)
29
+ mkdir(temp_file)
30
+ end
31
+ end
32
+ File.join(temp_dir)
33
+ end
34
+
35
+ end
36
+ end
@@ -0,0 +1,46 @@
1
+ require File.join(File.dirname(__FILE__), 'distributer', 'simple_thread_pool')
2
+ require File.join(File.dirname(__FILE__), 'distributer', 'simple_forked_process')
3
+ require File.join(File.dirname(__FILE__), 'selector', 'rotating_log')
4
+ require File.join(File.dirname(__FILE__), 'archiver', 'date_dir')
5
+ require File.join(File.dirname(__FILE__), 'log_reader')
6
+
7
+ module DLogReader
8
+ class DistributedLogReader
9
+ attr_accessor :distributer, :filename
10
+ attr_reader :log_reader
11
+ def initialize(filename, worker, num_threads = 100)
12
+ self.filename = filename
13
+ # self.distributer = SimpleForked.new(worker, 5, num_threads)
14
+ self.distributer = SimpleThreadPool.new(worker, num_threads)
15
+ end
16
+
17
+ # selector/archiver seem to be strongly connected. it's possible it
18
+ # needs to be moved into LogReader
19
+ def process
20
+ pre_process
21
+
22
+ $dlog_logger.info("Started #{log_file}:")
23
+ lines_processed = 0
24
+ @log_reader = LogReader.new(log_file) do |line|
25
+ self.distributer.process(line)
26
+ lines_processed += 1
27
+ end
28
+ @log_reader.run
29
+ self.distributer.join
30
+ $dlog_logger.info("Finished #{log_file}: Processed (#{lines_processed}) lines")
31
+ post_process
32
+ end
33
+
34
+ def log_file
35
+ self.filename
36
+ end
37
+
38
+ #predefined hooks
39
+ def pre_process
40
+ end
41
+
42
+ #predefined hooks
43
+ def post_process
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,21 @@
1
+ module DLogReader
2
+ class RotaterReader < DistributedLogReader
3
+ attr_accessor :selector, :archiver
4
+ attr_reader :log_reader
5
+ def initialize(filename, backupdir, worker, num_threads = 10)
6
+ super(filename, worker, num_threads)
7
+ self.selector = RotatingLog.new
8
+ self.archiver = DateDir.new(backupdir)
9
+ end
10
+
11
+ def log_file
12
+ @log_file ||= begin
13
+ selector.file_to_process(filename)
14
+ end
15
+ end
16
+
17
+ def post_process
18
+ self.archiver.archive(log_file)
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,53 @@
1
+ module DLogReader
2
+ class ScribeReader
3
+ attr_accessor :selector, :archiver, :filename
4
+ attr_reader :log_reader
5
+ def initialize(filename, backupdir, worker, num_threads = 10)
6
+ self.filename = filename
7
+ self.selector = RotatingLog.new
8
+ self.selector.ignore_conditions << lambda{|x| !x.match(/scribe_stats/).nil?}
9
+ @worker = worker
10
+ self.archiver = DateDir.new(backupdir)
11
+ end
12
+
13
+ def process
14
+ $dlog_logger.info("Started #{log_file}:")
15
+ lines_processed = 0
16
+ @log_reader = LogReader.new(log_file) do |line|
17
+ begin
18
+ @worker.call(line)
19
+ rescue Exception => e
20
+ $dlog_logger.warn("Exception thrown in worker #{e.message}")
21
+ end
22
+ lines_processed += 1
23
+ end
24
+ @log_reader.run
25
+ $dlog_logger.info("Finished #{log_file}: Processed (#{lines_processed}) lines")
26
+ post_process
27
+ end
28
+
29
+ def log_file
30
+ self.filename
31
+ end
32
+
33
+ def log_file
34
+ @log_file ||= begin
35
+ selector.file_to_process(filename)
36
+ end
37
+ end
38
+
39
+ def post_process
40
+ unless current?
41
+ self.archiver.archive(log_file)
42
+ File.delete(@log_reader.statefile) rescue nil
43
+ end
44
+ end
45
+
46
+ def current?
47
+ directory = File.dirname(log_file)
48
+ basename = File.basename(directory)
49
+ current_file = Dir[File.join(directory, "*")].detect{|x| x.match(/current/)}
50
+ !current_file.nil? && File.exists?(current_file) && File.identical?(current_file, log_file)
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,13 @@
1
+ module DLogReader
2
+ class Distributer
3
+ attr_accessor :worker
4
+
5
+ def initialize(worker)
6
+ self.worker = worker
7
+ end
8
+
9
+ def process(line)
10
+ worker.call(line)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,48 @@
1
+ class MutexCounter
2
+ MAX = (2 ** 30) - 1
3
+ def initialize(max = MAX)
4
+ @mutex = Mutex.new
5
+ @counter = 0
6
+ @resets = 0
7
+ @max = max
8
+ end
9
+
10
+ def real_total
11
+ @mutex.synchronize { (@resets * @max) + @counter }
12
+ end
13
+ alias_method :to_i, :real_total
14
+
15
+ def value
16
+ @mutex.synchronize { @counter }
17
+ end
18
+
19
+ def inc
20
+ @mutex.synchronize do
21
+ if @counter >= @max
22
+ @counter = 0 # to avoid Bignum, it's about 4x slower
23
+ @resets += 1
24
+ end
25
+ @counter += 1
26
+ end
27
+ end
28
+ alias_method :next, :inc
29
+ alias_method :succ, :inc
30
+
31
+ # decr only to zero
32
+ def decr
33
+ @mutex.synchronize do
34
+ if @counter > 0
35
+ @counter -= 1
36
+ else
37
+ if @resets > 1
38
+ @resets -= 1
39
+ @counter = @max
40
+ end
41
+ end
42
+ @counter
43
+ end
44
+ end
45
+ alias_method :pred, :decr
46
+ alias_method :prev, :decr
47
+
48
+ end
@@ -0,0 +1,149 @@
1
+ require 'rubygems'
2
+ #stolen from pandemic
3
+ module DLogReader
4
+ class Processor
5
+ def initialize(handler, num_threads = 10)
6
+ read_from_parent, write_to_child = IO.pipe
7
+ read_from_child, write_to_parent = IO.pipe
8
+
9
+ @child_process_id = fork
10
+ if @child_process_id
11
+ # I'm the parent
12
+ write_to_parent.close
13
+ read_from_parent.close
14
+ @out = write_to_child
15
+ @in = read_from_child
16
+ @max_queue_size = 100
17
+ @counter = MutexCounter.new
18
+ @job_mutex = Mutex.new
19
+ wait_for_responses
20
+ else
21
+ $dlog_logger.debug("Forked")
22
+ # I'm the child
23
+ write_to_child.close
24
+ read_from_child.close
25
+ @out = write_to_parent
26
+ @in = read_from_parent
27
+ @handler = handler
28
+ @job_queue = Queue.new
29
+ @response_queue = Queue.new
30
+ wait_for_job_completion
31
+ num_threads.times do
32
+ create_thread
33
+ end
34
+ wait_for_jobs
35
+ end
36
+ end
37
+
38
+ def num_jobs
39
+ if parent?
40
+ @counter.real_total
41
+ end
42
+ end
43
+
44
+ def process(body)
45
+ if parent?
46
+ while(@counter.real_total > @max_queue_size)
47
+ $dlog_logger.debug("Max process queue size: #{@counter.real_total}")
48
+ sleep(0.01)
49
+ end
50
+ body = (body.chomp + "\n")
51
+ @job_mutex.synchronize do
52
+ @out.write(body)
53
+ in_queue = @counter.inc
54
+ $dlog_logger.debug("Parent: writing #{body.inspect} - #{in_queue}")
55
+ end
56
+ else
57
+ $dlog_logger.debug("Child Processing #{body}")
58
+ return @handler.call(body)
59
+ end
60
+ end
61
+
62
+ def close(status = 0)
63
+ if parent? && child_alive?
64
+ Process.detach(@child_process_id)
65
+ @out.puts(status.to_s)
66
+ @out.close
67
+ @in.close
68
+ else
69
+ Process.exit!(status)
70
+ end
71
+ end
72
+
73
+ def closed?
74
+ !child_alive?
75
+ end
76
+
77
+ private
78
+
79
+ def create_thread
80
+ Thread.new do
81
+ loop do
82
+ line = @job_queue.pop
83
+ @job_mutex.synchronize do
84
+ process(line)
85
+ @response_queue << :a
86
+ $dlog_logger.debug("Child: Finished #{line.inspect}")
87
+ end
88
+ end
89
+ end
90
+ end
91
+
92
+ def wait_for_responses
93
+ Thread.new do
94
+ loop do
95
+ ready, = IO.select([@in], nil, nil)
96
+ if ready
97
+ @in.readchar
98
+ in_queue = @counter.decr
99
+ $dlog_logger.debug("Parent: Reading Response #{in_queue}")
100
+ end
101
+ end
102
+ end
103
+ end
104
+
105
+ #child process
106
+ def wait_for_jobs
107
+ if child?
108
+ while true
109
+ $dlog_logger.debug("Child waiting")
110
+ ready, = IO.select([@in], nil, nil)
111
+ if ready && !@in.eof?
112
+ line = @in.gets
113
+ $dlog_logger.debug("Child: #{line.inspect}")
114
+ @job_queue << line
115
+ else
116
+ self.close(line.to_i)
117
+ break
118
+ end
119
+ end
120
+ end
121
+ end
122
+
123
+ #child process
124
+ def wait_for_job_completion
125
+ if child?
126
+ Thread.new do
127
+ while true
128
+ @response_queue.pop
129
+ $dlog_logger.debug("Child: Writing To Parent")
130
+ @out.write("|")
131
+ end
132
+ end
133
+ end
134
+ end
135
+
136
+
137
+ def parent?
138
+ !!@child_process_id
139
+ end
140
+
141
+ def child?
142
+ !parent?
143
+ end
144
+
145
+ def child_alive?
146
+ parent? && !@in.closed?
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,47 @@
1
+ require 'thread'
2
+ require File.join(File.dirname(__FILE__), 'pandemic_processor')
3
+ require File.join(File.dirname(__FILE__), 'mutex_counter')
4
+ module DLogReader
5
+ class SimpleForked
6
+ attr_accessor :num_threads_per_process, :worker, :thread_pool, :queue, :processors
7
+ def initialize(worker, num_processes = 3, num_threads_per_process = 10)
8
+ self.worker = worker
9
+ self.num_threads_per_process = (num_threads_per_process || 10)
10
+ self.queue = Queue.new
11
+ self.processors = []
12
+ num_processes.times do |x|
13
+ $dlog_logger.debug("Forking #{x} process")
14
+ self.processors << create_process
15
+ end
16
+ end
17
+
18
+ def process(line)
19
+ self.queue << line
20
+ end
21
+
22
+ def join
23
+ num_jobs_outstanding = self.processors.inject(0){|a,b| a + b.num_jobs}
24
+ while(queue.size > 0 || num_jobs_outstanding > 0)
25
+ sleep 0.1
26
+ num_jobs_outstanding = self.processors.inject(0){|a,b| a + b.num_jobs}
27
+ $dlog_logger.debug("Shutting down #{num_jobs_outstanding} left")
28
+ end
29
+ end
30
+
31
+ protected
32
+ def create_process
33
+ processor = Processor.new(self.worker, self.num_threads_per_process)
34
+ Thread.new do
35
+ loop do
36
+ line = self.queue.pop
37
+ begin
38
+ processor.process(line)
39
+ rescue Exception => e
40
+ $dlog_logger.warn("Exception in processing thread #{line} -- #{e.message}")
41
+ end
42
+ end
43
+ end
44
+ processor
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,43 @@
1
+ require 'thread'
2
+
3
+ module DLogReader
4
+ class SimpleThreadPool
5
+ attr_accessor :num_threads, :worker, :thread_pool, :queue, :max_queue_size
6
+ def initialize(worker, num_threads = 5)
7
+ self.worker = worker
8
+ self.num_threads = num_threads
9
+ self.queue = Queue.new
10
+ self.max_queue_size = 100
11
+ num_threads.times do
12
+ create_thread
13
+ end
14
+ end
15
+
16
+ def process(line)
17
+ while(queue.size > self.max_queue_size)
18
+ sleep(0.01)
19
+ end
20
+ self.queue << line
21
+ end
22
+
23
+ def join
24
+ while(queue.size > 0)
25
+ sleep 0.1
26
+ end
27
+ end
28
+
29
+ protected
30
+ def create_thread
31
+ Thread.new do
32
+ loop do
33
+ line = self.queue.pop
34
+ begin
35
+ self.worker.call(line)
36
+ rescue Exception => e
37
+ $dlog_logger.warn("Exception in processing thread #{line} -- #{e.message}")
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,67 @@
1
+ require 'digest/md5'
2
+ module DLogReader
3
+ class LogReader
4
+ attr_accessor :filename
5
+ attr_writer :statefile
6
+
7
+ def initialize(filename, &b)
8
+ self.filename = filename
9
+ @b = b
10
+ end
11
+
12
+ def run
13
+ # raise IOError.new("no file given") if filename.nil?
14
+ raise IOError.new("File not readable") unless File.readable?(filename)
15
+ f = File.open(filename, "r+")
16
+ load_saved_state(f)
17
+ # raise IOError.new("File is locked") unless f.flock(File::LOCK_EX | File::LOCK_NB)
18
+ unless f.eof?
19
+ last_report = Time.now
20
+ line_count = 0
21
+ f.each_line do |line|
22
+ @b.call(line)
23
+ line_count += 1
24
+ if (line_count % 100 == 0)
25
+ time_passed = Time.now - last_report
26
+ $dlog_logger.info( "#{Time.now.to_s} #{filename}: Processed (#{line_count}) lines [#{(100.0 / time_passed.to_f).to_i} lines/s]")
27
+ last_report = Time.now
28
+ save_state(f)
29
+ end
30
+ end
31
+ save_state(f)
32
+ end
33
+ # f.flock(File::LOCK_UN)
34
+ end
35
+
36
+ def statefile
37
+ @statefile ||= begin
38
+ log_basename = File.basename(filename)
39
+ File.join("/tmp", "log_state_#{log_basename}")
40
+ end
41
+ end
42
+
43
+ protected
44
+
45
+ def load_saved_state(log_filehandle)
46
+ return unless File.exists?(statefile) && !(state = File.read(statefile)).nil?
47
+ pos, l_digest = Marshal.load(state)
48
+ return if File.size(log_filehandle) < pos
49
+ log_filehandle.pos = pos if digest(log_filehandle, pos) == l_digest
50
+ end
51
+
52
+ def save_state(log_filehandle)
53
+ File.open(statefile, "w") do |f|
54
+ f.write(Marshal.dump([log_filehandle.pos, digest(log_filehandle, log_filehandle.pos)]))
55
+ end
56
+ end
57
+
58
+ def digest(log_filehandle, position)
59
+ log_filehandle.pos = 0
60
+ read_length = [position, 50].min
61
+ l = log_filehandle.read(read_length)
62
+ f_digest = Digest::MD5.hexdigest(l)
63
+ log_filehandle.pos = position
64
+ f_digest
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,12 @@
1
+ module DLogReader
2
+ # This abstract class defines the interface to decide which log
3
+ # file to read. The identity strategy is the simplist, to return the file
4
+ # inputed. However, to handle rotating log files, we'll need some more complex
5
+ # strategies.
6
+ class Selector
7
+ # determines the file to process from file path input
8
+ def file_to_process(file_or_dir)
9
+ raise NotImplementedError.new, "file_to_process not implemented. Are you sure you created a conrete class?"
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,43 @@
1
+ module DLogReader
2
+ # This class chooses the oldest log file in the directory that matches the
3
+ # input filename. This should work with a variety of log rotating schemes:
4
+ # including copytruncate and date suffix.
5
+ class RotatingLog < Selector
6
+
7
+ attr_accessor :ignore_conditions
8
+
9
+ def initialize
10
+ self.ignore_conditions = []
11
+ self.ignore_conditions << lambda{|x| symlink_file_in_dir?(x)}
12
+ # self.ignore_conditions << lambda{|x| true}
13
+ end
14
+
15
+ def file_to_process(file_or_dir)
16
+ if File.directory?(file_or_dir)
17
+ directory = file_or_dir
18
+ basename = '/'
19
+ else
20
+ directory = File.dirname(file_or_dir)
21
+ basename = File.basename(file_or_dir)
22
+ end
23
+ oldest_logfile(directory, basename)
24
+ end
25
+
26
+ protected
27
+
28
+ def oldest_logfile(directory, basename)
29
+ file_list = Dir[File.join(directory, "#{basename}*")]
30
+ file_list.reject!{|x| reject?(x)}
31
+ file = file_list.size > 0 ? file_list.sort_by{|a| File.new(a).mtime}.first : nil
32
+ end
33
+
34
+ def reject?(filename)
35
+ self.ignore_conditions.inject(false){|candidate, condition| candidate || condition.call(filename)}
36
+ end
37
+
38
+ # returns true if filename is a symlink and its referring to a file already inside the current directory
39
+ def symlink_file_in_dir?(filename)
40
+ File.symlink?(filename) && File.dirname(File.readlink(filename)) == File.dirname(filename)
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,7 @@
1
+ module DLogReader
2
+ module Util
3
+ def logger
4
+ $dlog_logger
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,25 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'distributed_logreader/archiver/date_dir'
3
+ require 'fileutils'
4
+
5
+ describe "DLogReader::DateDir" do
6
+ before(:all) do
7
+ FileUtils.cp(File.join(File.dirname(__FILE__), '..', 'fixtures', 'test_file'), File.join(File.dirname(__FILE__), '..', 'fixtures', 'test_file2'))
8
+ @file_path = File.join(File.dirname(__FILE__), '..', 'fixtures', 'test_file2')
9
+ @base_backup_dir = File.join(File.dirname(__FILE__), '..', 'fixtures', 'temp_backup_dir')
10
+ time = Time.now
11
+ @backup_dir = File.join([@base_backup_dir, time.year.to_s, time.month.to_s, time.day.to_s])
12
+ @archiver = DLogReader::DateDir.new(@base_backup_dir)
13
+ end
14
+
15
+ describe "backup" do
16
+ it "should move file into Y/M/D backup directory" do
17
+ @archiver.archive(@file_path)
18
+ File.exist?(@backup_dir).should == true
19
+ end
20
+ end
21
+
22
+ after(:all) do
23
+ FileUtils.rm_r(@base_backup_dir)
24
+ end
25
+ end
@@ -0,0 +1,13 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "DLogReader::LogArchiver" do
4
+ before(:all) do
5
+ @archiver = DLogReader::Archiver.new
6
+ end
7
+
8
+ describe "archive" do
9
+ it "should raise NotImplementedError" do
10
+ lambda{ @archiver.archive('dummy_file') }.should raise_error(NotImplementedError)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,26 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'fileutils'
3
+
4
+ describe "DLogReader::RotaterLogreader" do
5
+ before(:all) do
6
+ FileUtils.cp(File.join(File.dirname(__FILE__), '..', 'fixtures', 'test_file'), File.join(File.dirname(__FILE__), '..', 'fixtures', 'test_file2'))
7
+ @file_path = File.join(File.dirname(__FILE__), '..', 'fixtures', 'test_file2')
8
+ @base_backup_dir = File.join(File.dirname(__FILE__), '..', 'fixtures', 'temp_backup_dir')
9
+ time = Time.now
10
+ @backup_dir = File.join([@base_backup_dir, time.year.to_s, time.month.to_s, time.day.to_s])
11
+ @logreader = DLogReader::RotaterReader.new(@file_path, @base_backup_dir, lambda{|x| puts x})
12
+ end
13
+
14
+ describe "process" do
15
+ it 'should' do
16
+ @logreader.process
17
+ File.exist?(@backup_dir).should == true
18
+ File.exist?(@file_path).should == false
19
+ end
20
+ end
21
+
22
+ after(:all) do
23
+ FileUtils.rm_r(@base_backup_dir)
24
+ FileUtils.rm_r(@logreader.log_reader.statefile)
25
+ end
26
+ end
@@ -0,0 +1,15 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'fileutils'
3
+
4
+ describe "DLogReader::RotaterLogreader" do
5
+ before(:all) do
6
+ @file_path = File.join(File.dirname(__FILE__), '..', 'fixtures', 'virality_metrics')
7
+ @logreader = DLogReader::ScribeReader.new(@file_path, 'tmp', lambda{|x| puts x})
8
+ end
9
+
10
+ describe "current" do
11
+ it 'should say that the log_file is the currently rotated one' do
12
+ @logreader.current?.should == true
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,20 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ require 'fileutils'
3
+
4
+ describe "DLogReader::DistributedLogreader" do
5
+ before(:all) do
6
+ FileUtils.cp(File.join(File.dirname(__FILE__), 'fixtures', 'test_file'), File.join(File.dirname(__FILE__), 'fixtures', 'test_file2'))
7
+ @file_path = File.join(File.dirname(__FILE__), 'fixtures', 'test_file2')
8
+ @logreader = DLogReader::DistributedLogReader.new(@file_path, lambda{|x| puts x})
9
+ end
10
+
11
+ describe "process" do
12
+ it 'should' do
13
+ @logreader.process
14
+ end
15
+ end
16
+
17
+ after(:all) do
18
+ FileUtils.rm_r(@logreader.log_reader.statefile)
19
+ end
20
+ end
@@ -0,0 +1,17 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'distributed_logreader/distributer/simple_thread_pool'
3
+
4
+ describe "DLogReader::SimpleThreadPool" do
5
+ before(:all) do
6
+ @thread_pool = DLogReader::SimpleThreadPool.new(lambda{|x| x}, 10)
7
+ end
8
+
9
+ describe "process" do
10
+ it 'should process with lots of threads' do
11
+ 100.times do |x|
12
+ @thread_pool.process(x.to_s)
13
+ end
14
+ @thread_pool.join
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,13 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ require 'distributed_logreader/distributer'
3
+
4
+ describe "DLogReader::Distributer" do
5
+ before(:all) do
6
+ @distributer = DLogReader::Distributer.new(lambda{|x| x})
7
+ end
8
+
9
+ describe "process" do
10
+ it "should raise NotImplementedError" do
11
+ end
12
+ end
13
+ end
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -0,0 +1,4 @@
1
+ This is a test log file.
2
+ With many lines.
3
+ Lots and Lots of lines.
4
+ Just kidding this is the last line.
@@ -0,0 +1,7 @@
1
+ asdf
2
+ asdf
3
+ asdf
4
+ asdf
5
+ asdfa
6
+ sdfa
7
+ sdfas
@@ -0,0 +1,7 @@
1
+ asdf
2
+ asdf
3
+ asdf
4
+ asdf
5
+ asdfa
6
+ sdfa
7
+ sdfas
@@ -0,0 +1,57 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ require 'distributed_logreader/log_reader'
3
+ require 'fileutils'
4
+
5
+ describe "DLogReader::LogReader" do
6
+ before(:all) do
7
+ test_file = File.join(File.dirname(__FILE__), 'fixtures', 'test_file')
8
+ FileUtils.mkdir(File.join(File.dirname(__FILE__), 'fixtures', 'logreading')) rescue nil
9
+ @test_cp = File.join(File.dirname(__FILE__), 'fixtures', 'logreading', 'test')
10
+ FileUtils.cp(test_file, @test_cp)
11
+ test_fh = File.open(test_file)
12
+
13
+ @reader = DLogReader::LogReader.new(@test_cp) do |line|
14
+ unless test_fh.readline == line
15
+ raise RuntimeError.new, 'you messed up bud'
16
+ end
17
+ end
18
+ @test_line = "this is an added line. this should be read first\n"
19
+ @state_writer = DLogReader::LogReader.new(@test_cp){|line| line;}
20
+ @state_reader = DLogReader::LogReader.new(@test_cp) do |line|
21
+ unless line == @test_line
22
+ raise RuntimeError.new, 'you messed up worse'
23
+ end
24
+ end
25
+ end
26
+
27
+ describe "run" do
28
+ it 'should read log files' do
29
+ lambda{@reader.run}.should_not raise_error
30
+ end
31
+
32
+ it 'should resume from last access' do
33
+ #lets read to the end of file and write state
34
+ lambda{@state_writer.run}.should_not raise_error
35
+ fh = File.open(@test_cp, 'a')
36
+ fh.write(@test_line)
37
+ fh.close
38
+ lambda{@state_reader.run}.should_not raise_error
39
+ end
40
+
41
+ it 'should detect if log is different from last and to start from beg of file' do
42
+ lambda{@state_writer.run}.should_not raise_error
43
+ fh = File.open(@test_cp, 'w')
44
+ fh.write(@test_line)
45
+ fh.close
46
+ lambda{@state_reader.run}.should_not raise_error
47
+ fh = File.open(@test_cp, 'w')
48
+ fh.write('')
49
+ fh.close
50
+ lambda{@state_reader.run}.should_not raise_error
51
+ end
52
+ end
53
+
54
+ after(:all) do
55
+ FileUtils.rm_r(File.dirname(@test_cp))
56
+ end
57
+ end
@@ -0,0 +1,24 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'distributed_logreader/selector/rotating_log'
3
+ require 'fileutils'
4
+
5
+ describe "DLogReader::RotatingLog" do
6
+ before(:all) do
7
+ @chooser = DLogReader::RotatingLog.new
8
+ end
9
+
10
+ describe "file_to_process" do
11
+ it "should pick the oldest file for logs in copytruncate format (file)" do
12
+ @chooser.file_to_process(File.join(File.dirname(__FILE__), '..', 'fixtures', 'copytruncate', 'test')).should == File.join(File.dirname(__FILE__), '..', 'fixtures', 'copytruncate', 'test.1')
13
+ end
14
+
15
+ it "should pick the oldest file in timestamp suffix log format (dirname)" do
16
+ @chooser.file_to_process(File.join(File.dirname(__FILE__), '..', 'fixtures', 'logrotate')).should == File.join(File.dirname(__FILE__), '..', 'fixtures', 'logrotate', 'test-20090101')
17
+ end
18
+
19
+ it "should pick the oldest file ignoring symlinks pointing to files already in dir" do
20
+ @chooser.file_to_process(File.join(File.dirname(__FILE__), '..', 'fixtures', 'symlink')).should == File.join(File.dirname(__FILE__), '..', 'fixtures', 'symlink', 'test')
21
+ end
22
+ end
23
+
24
+ end
@@ -0,0 +1,13 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "DLogReader::Selector" do
4
+ before(:all) do
5
+ @chooser = DLogReader::Selector.new
6
+ end
7
+
8
+ describe "file_to_process" do
9
+ it "should raise NotImplementedError" do
10
+ lambda{ @chooser.file_to_process('dummy_file') }.should raise_error(NotImplementedError)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec'
2
+
3
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
4
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
5
+ require 'distributed_logreader'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,107 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: distributed_logreader
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.11.0
5
+ platform: ruby
6
+ authors:
7
+ - Gary Tsang
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-27 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: gary@garru.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - LICENSE
24
+ - README.rdoc
25
+ files:
26
+ - .gitignore
27
+ - LICENSE
28
+ - README.rdoc
29
+ - Rakefile
30
+ - VERSION
31
+ - distributed_logreader.gemspec
32
+ - lib/distributed_logreader.rb
33
+ - lib/distributed_logreader/achiver.rb
34
+ - lib/distributed_logreader/archiver/date_dir.rb
35
+ - lib/distributed_logreader/distributed_log_reader.rb
36
+ - lib/distributed_logreader/distributed_log_reader/rotater_reader.rb
37
+ - lib/distributed_logreader/distributed_log_reader/scribe_reader.rb
38
+ - lib/distributed_logreader/distributer.rb
39
+ - lib/distributed_logreader/distributer/mutex_counter.rb
40
+ - lib/distributed_logreader/distributer/pandemic_processor.rb
41
+ - lib/distributed_logreader/distributer/simple_forked_process.rb
42
+ - lib/distributed_logreader/distributer/simple_thread_pool.rb
43
+ - lib/distributed_logreader/log_reader.rb
44
+ - lib/distributed_logreader/selector.rb
45
+ - lib/distributed_logreader/selector/rotating_log.rb
46
+ - lib/distributed_logreader/util.rb
47
+ - spec/archiver/date_dir_spec.rb
48
+ - spec/archiver_spec.rb
49
+ - spec/distributed_log_reader/rotater_reader_spec.rb
50
+ - spec/distributed_log_reader/scribe_reader_spec.rb
51
+ - spec/distributed_log_reader_spec.rb
52
+ - spec/distributer/simple_thread_pool_spec.rb
53
+ - spec/distributer_spec.rb
54
+ - spec/fixtures/copytruncate/test
55
+ - spec/fixtures/copytruncate/test.1
56
+ - spec/fixtures/copytruncate/test_current
57
+ - spec/fixtures/logrotate/test-20090101
58
+ - spec/fixtures/logrotate/test-20090102
59
+ - spec/fixtures/symlink/test
60
+ - spec/fixtures/symlink/test_older_sym
61
+ - spec/fixtures/test_file
62
+ - spec/fixtures/virality_metrics/test
63
+ - spec/fixtures/virality_metrics/virality_metrics_current
64
+ - spec/log_reader_spec.rb
65
+ - spec/selector/rotating_log_spec.rb
66
+ - spec/selector_spec.rb
67
+ - spec/spec_helper.rb
68
+ has_rdoc: true
69
+ homepage: http://github.com/garru/distributed_logreader
70
+ licenses: []
71
+
72
+ post_install_message:
73
+ rdoc_options:
74
+ - --charset=UTF-8
75
+ require_paths:
76
+ - lib
77
+ required_ruby_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: "0"
82
+ version:
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: "0"
88
+ version:
89
+ requirements: []
90
+
91
+ rubyforge_project:
92
+ rubygems_version: 1.3.5
93
+ signing_key:
94
+ specification_version: 3
95
+ summary: incomplete distributed log reader. plan to use pandemic to distribute
96
+ test_files:
97
+ - spec/archiver/date_dir_spec.rb
98
+ - spec/archiver_spec.rb
99
+ - spec/distributed_log_reader/rotater_reader_spec.rb
100
+ - spec/distributed_log_reader/scribe_reader_spec.rb
101
+ - spec/distributed_log_reader_spec.rb
102
+ - spec/distributer/simple_thread_pool_spec.rb
103
+ - spec/distributer_spec.rb
104
+ - spec/log_reader_spec.rb
105
+ - spec/selector/rotating_log_spec.rb
106
+ - spec/selector_spec.rb
107
+ - spec/spec_helper.rb