garru-distributed_logreader 0.2.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -1,6 +1,7 @@
1
- = distributed_logreader
1
+ = Distributed Log Reader
2
2
 
3
- Description goes here.
3
+ is a log reader that allows for parallel processing. It is highly configurable
4
+ allowing you to choose different parallel distribution and archiving strategies.
4
5
 
5
6
  == Copyright
6
7
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.0
1
+ 0.5.0
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{distributed_logreader}
5
- s.version = "0.2.0"
5
+ s.version = "0.5.0"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Gary Tsang"]
9
- s.date = %q{2009-07-30}
9
+ s.date = %q{2009-08-05}
10
10
  s.email = %q{gary@garru.com}
11
11
  s.extra_rdoc_files = [
12
12
  "LICENSE",
@@ -24,6 +24,7 @@ Gem::Specification.new do |s|
24
24
  "lib/distributed_logreader/archiver/date_dir.rb",
25
25
  "lib/distributed_logreader/distributed_log_reader.rb",
26
26
  "lib/distributed_logreader/distributed_log_reader/rotater_reader.rb",
27
+ "lib/distributed_logreader/distributed_log_reader/scribe_reader.rb",
27
28
  "lib/distributed_logreader/distributer.rb",
28
29
  "lib/distributed_logreader/distributer/simple_thread_pool.rb",
29
30
  "lib/distributed_logreader/log_reader.rb",
@@ -33,6 +34,7 @@ Gem::Specification.new do |s|
33
34
  "spec/archiver/date_dir_spec.rb",
34
35
  "spec/archiver_spec.rb",
35
36
  "spec/distributed_log_reader/rotater_reader_spec.rb",
37
+ "spec/distributed_log_reader/scribe_reader_spec.rb",
36
38
  "spec/distributed_log_reader_spec.rb",
37
39
  "spec/distributer/simple_thread_pool_spec.rb",
38
40
  "spec/distributer_spec.rb",
@@ -44,6 +46,8 @@ Gem::Specification.new do |s|
44
46
  "spec/fixtures/symlink/test",
45
47
  "spec/fixtures/symlink/test_older_sym",
46
48
  "spec/fixtures/test_file",
49
+ "spec/fixtures/virality_metrics/test",
50
+ "spec/fixtures/virality_metrics/virality_metrics_current",
47
51
  "spec/log_reader_spec.rb",
48
52
  "spec/selector/rotating_log_spec.rb",
49
53
  "spec/selector_spec.rb",
@@ -59,6 +63,7 @@ Gem::Specification.new do |s|
59
63
  "spec/archiver/date_dir_spec.rb",
60
64
  "spec/archiver_spec.rb",
61
65
  "spec/distributed_log_reader/rotater_reader_spec.rb",
66
+ "spec/distributed_log_reader/scribe_reader_spec.rb",
62
67
  "spec/distributed_log_reader_spec.rb",
63
68
  "spec/distributer/simple_thread_pool_spec.rb",
64
69
  "spec/distributer_spec.rb",
@@ -4,8 +4,9 @@ require 'distributed_logreader/achiver.rb'
4
4
  require 'distributed_logreader/util.rb'
5
5
  require 'distributed_logreader/distributed_log_reader'
6
6
  require 'distributed_logreader/distributed_log_reader/rotater_reader'
7
+ require 'distributed_logreader/distributed_log_reader/scribe_reader'
7
8
  require 'logger'
8
9
 
9
- $dlog_logger = Logger.new("distributed_logreader.log")
10
+ $dlog_logger = Logger.new("/var/log/distributed_logreader.log")
10
11
  $dlog_logger.level = Logger::INFO
11
12
  $dlog_logger.datetime_format = "%Y-%m-%d %H:%M:%S "
@@ -8,7 +8,7 @@ module DLogReader
8
8
  self.base_backup_dir = backup_dir
9
9
  end
10
10
 
11
- def backup(file)
11
+ def archive(file)
12
12
  mv(file, backup_dir) unless base_backup_dir.nil?
13
13
 
14
14
  end
@@ -16,11 +16,20 @@ module DLogReader
16
16
  # needs to be moved into LogReader
17
17
  def process
18
18
  pre_process
19
+
20
+ $dlog_logger.info("Started #{log_file}:")
21
+ lines_processed = 0
19
22
  @log_reader = LogReader.new(log_file) do |line|
20
- self.distributer.process(line)
23
+ begin
24
+ self.distributer.process(line)
25
+ rescue Exception => e
26
+ $dlog_logger.warn("Exception in processing thread #{log_file}:#{log_file.pos} -- #{line} -- #{e.message}")
27
+ end
28
+ lines_processed += 1
21
29
  end
22
30
  @log_reader.run
23
31
  self.distributer.join
32
+ $dlog_logger.info("Finished #{log_file}: Processed (#{lines_processed}) lines")
24
33
  post_process
25
34
  end
26
35
 
@@ -15,7 +15,7 @@ module DLogReader
15
15
  end
16
16
 
17
17
  def post_process
18
- self.archiver.backup(log_file)
18
+ self.archiver.archive(log_file)
19
19
  end
20
20
  end
21
21
  end
@@ -0,0 +1,29 @@
1
+ module DLogReader
2
+ class ScribeReader < DistributedLogReader
3
+ attr_accessor :selector, :archiver
4
+ attr_reader :log_reader
5
+ def initialize(filename, backupdir, worker, num_threads = 10)
6
+ super(filename, worker, num_threads)
7
+ self.selector = RotatingLog.new
8
+ self.selector.ignore_conditions << lambda{|x| x.match(/scribe_stats/)}
9
+ self.archiver = DateDir.new(backupdir)
10
+ end
11
+
12
+ def log_file
13
+ @log_file ||= begin
14
+ selector.file_to_process(filename)
15
+ end
16
+ end
17
+
18
+ def post_process
19
+ self.archiver.archive(log_file) unless current?
20
+ end
21
+
22
+ def current?
23
+ directory = File.dirname(log_file)
24
+ basename = File.basename(directory)
25
+ current_file = File.join(directory, "#{basename}_current")
26
+ File.identical?(current_file, log_file)
27
+ end
28
+ end
29
+ end
@@ -3,6 +3,14 @@ module DLogReader
3
3
  # input filename. This should work with a variety of log rotating schemes:
4
4
  # including copytruncate and date suffix.
5
5
  class RotatingLog < Selector
6
+
7
+ attr_accessor :ignore_conditions
8
+
9
+ def initialize
10
+ self.ignore_conditions = []
11
+ self.ignore_conditions << lambda{|x| symlink_file_in_dir?(x)}
12
+ # self.ignore_conditions << lambda{|x| true}
13
+ end
6
14
 
7
15
  def file_to_process(file_or_dir)
8
16
  if File.directory?(file_or_dir)
@@ -15,14 +23,18 @@ module DLogReader
15
23
  oldest_logfile(directory, basename)
16
24
  end
17
25
 
18
- protected
26
+ protected
19
27
 
20
28
  def oldest_logfile(directory, basename)
21
29
  file_list = Dir[File.join(directory, "#{basename}*")]
22
- file_list.reject!{|x| symlink_file_in_dir?(x)}
30
+ file_list.reject!{|x| reject?(x)}
23
31
  file = file_list.size > 0 ? file_list.sort_by{|a| File.new(a).mtime}.first : nil
24
32
  end
25
-
33
+
34
+ def reject?(filename)
35
+ self.ignore_conditions.inject(false){|candidate, condition| candidate || condition.call(filename)}
36
+ end
37
+
26
38
  # returns true if filename is a symlink and its referring to a file already inside the current directory
27
39
  def symlink_file_in_dir?(filename)
28
40
  File.symlink?(filename) && File.dirname(File.readlink(filename)) == File.dirname(filename)
@@ -14,7 +14,7 @@ describe "DLogReader::DateDir" do
14
14
 
15
15
  describe "backup" do
16
16
  it "should move file into Y/M/D backup directory" do
17
- @archiver.backup(@file_path)
17
+ @archiver.archive(@file_path)
18
18
  File.exist?(@backup_dir).should == true
19
19
  end
20
20
  end
@@ -0,0 +1,15 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'fileutils'
3
+
4
+ describe "DLogReader::RotaterLogreader" do
5
+ before(:all) do
6
+ @file_path = File.join(File.dirname(__FILE__), '..', 'fixtures', 'virality_metrics')
7
+ @logreader = DLogReader::ScribeReader.new(@file_path, 'tmp', lambda{|x| puts x})
8
+ end
9
+
10
+ describe "current" do
11
+ it 'should say that the log_file is the currently rotated one' do
12
+ @logreader.current?.should == true
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,7 @@
1
+ asdf
2
+ asdf
3
+ asdf
4
+ asdf
5
+ asdfa
6
+ sdfa
7
+ sdfas
@@ -0,0 +1,7 @@
1
+ asdf
2
+ asdf
3
+ asdf
4
+ asdf
5
+ asdfa
6
+ sdfa
7
+ sdfas
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: garru-distributed_logreader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gary Tsang
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-07-30 00:00:00 -07:00
12
+ date: 2009-08-05 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -34,6 +34,7 @@ files:
34
34
  - lib/distributed_logreader/archiver/date_dir.rb
35
35
  - lib/distributed_logreader/distributed_log_reader.rb
36
36
  - lib/distributed_logreader/distributed_log_reader/rotater_reader.rb
37
+ - lib/distributed_logreader/distributed_log_reader/scribe_reader.rb
37
38
  - lib/distributed_logreader/distributer.rb
38
39
  - lib/distributed_logreader/distributer/simple_thread_pool.rb
39
40
  - lib/distributed_logreader/log_reader.rb
@@ -43,6 +44,7 @@ files:
43
44
  - spec/archiver/date_dir_spec.rb
44
45
  - spec/archiver_spec.rb
45
46
  - spec/distributed_log_reader/rotater_reader_spec.rb
47
+ - spec/distributed_log_reader/scribe_reader_spec.rb
46
48
  - spec/distributed_log_reader_spec.rb
47
49
  - spec/distributer/simple_thread_pool_spec.rb
48
50
  - spec/distributer_spec.rb
@@ -54,6 +56,8 @@ files:
54
56
  - spec/fixtures/symlink/test
55
57
  - spec/fixtures/symlink/test_older_sym
56
58
  - spec/fixtures/test_file
59
+ - spec/fixtures/virality_metrics/test
60
+ - spec/fixtures/virality_metrics/virality_metrics_current
57
61
  - spec/log_reader_spec.rb
58
62
  - spec/selector/rotating_log_spec.rb
59
63
  - spec/selector_spec.rb
@@ -89,6 +93,7 @@ test_files:
89
93
  - spec/archiver/date_dir_spec.rb
90
94
  - spec/archiver_spec.rb
91
95
  - spec/distributed_log_reader/rotater_reader_spec.rb
96
+ - spec/distributed_log_reader/scribe_reader_spec.rb
92
97
  - spec/distributed_log_reader_spec.rb
93
98
  - spec/distributer/simple_thread_pool_spec.rb
94
99
  - spec/distributer_spec.rb