garru-distributed_logreader 0.2.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -1,6 +1,7 @@
1
- = distributed_logreader
1
+ = Distributed Log Reader
2
2
 
3
- Description goes here.
3
+ is a log reader that allows for parallel processing. It is highly configurable
4
+ allowing you to choose different parallel distribution and archiving strategies.
4
5
 
5
6
  == Copyright
6
7
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.0
1
+ 0.5.0
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{distributed_logreader}
5
- s.version = "0.2.0"
5
+ s.version = "0.5.0"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Gary Tsang"]
9
- s.date = %q{2009-07-30}
9
+ s.date = %q{2009-08-05}
10
10
  s.email = %q{gary@garru.com}
11
11
  s.extra_rdoc_files = [
12
12
  "LICENSE",
@@ -24,6 +24,7 @@ Gem::Specification.new do |s|
24
24
  "lib/distributed_logreader/archiver/date_dir.rb",
25
25
  "lib/distributed_logreader/distributed_log_reader.rb",
26
26
  "lib/distributed_logreader/distributed_log_reader/rotater_reader.rb",
27
+ "lib/distributed_logreader/distributed_log_reader/scribe_reader.rb",
27
28
  "lib/distributed_logreader/distributer.rb",
28
29
  "lib/distributed_logreader/distributer/simple_thread_pool.rb",
29
30
  "lib/distributed_logreader/log_reader.rb",
@@ -33,6 +34,7 @@ Gem::Specification.new do |s|
33
34
  "spec/archiver/date_dir_spec.rb",
34
35
  "spec/archiver_spec.rb",
35
36
  "spec/distributed_log_reader/rotater_reader_spec.rb",
37
+ "spec/distributed_log_reader/scribe_reader_spec.rb",
36
38
  "spec/distributed_log_reader_spec.rb",
37
39
  "spec/distributer/simple_thread_pool_spec.rb",
38
40
  "spec/distributer_spec.rb",
@@ -44,6 +46,8 @@ Gem::Specification.new do |s|
44
46
  "spec/fixtures/symlink/test",
45
47
  "spec/fixtures/symlink/test_older_sym",
46
48
  "spec/fixtures/test_file",
49
+ "spec/fixtures/virality_metrics/test",
50
+ "spec/fixtures/virality_metrics/virality_metrics_current",
47
51
  "spec/log_reader_spec.rb",
48
52
  "spec/selector/rotating_log_spec.rb",
49
53
  "spec/selector_spec.rb",
@@ -59,6 +63,7 @@ Gem::Specification.new do |s|
59
63
  "spec/archiver/date_dir_spec.rb",
60
64
  "spec/archiver_spec.rb",
61
65
  "spec/distributed_log_reader/rotater_reader_spec.rb",
66
+ "spec/distributed_log_reader/scribe_reader_spec.rb",
62
67
  "spec/distributed_log_reader_spec.rb",
63
68
  "spec/distributer/simple_thread_pool_spec.rb",
64
69
  "spec/distributer_spec.rb",
@@ -4,8 +4,9 @@ require 'distributed_logreader/achiver.rb'
4
4
  require 'distributed_logreader/util.rb'
5
5
  require 'distributed_logreader/distributed_log_reader'
6
6
  require 'distributed_logreader/distributed_log_reader/rotater_reader'
7
+ require 'distributed_logreader/distributed_log_reader/scribe_reader'
7
8
  require 'logger'
8
9
 
9
- $dlog_logger = Logger.new("distributed_logreader.log")
10
+ $dlog_logger = Logger.new("/var/log/distributed_logreader.log")
10
11
  $dlog_logger.level = Logger::INFO
11
12
  $dlog_logger.datetime_format = "%Y-%m-%d %H:%M:%S "
@@ -8,7 +8,7 @@ module DLogReader
8
8
  self.base_backup_dir = backup_dir
9
9
  end
10
10
 
11
- def backup(file)
11
+ def archive(file)
12
12
  mv(file, backup_dir) unless base_backup_dir.nil?
13
13
 
14
14
  end
@@ -16,11 +16,20 @@ module DLogReader
16
16
  # needs to be moved into LogReader
17
17
  def process
18
18
  pre_process
19
+
20
+ $dlog_logger.info("Started #{log_file}:")
21
+ lines_processed = 0
19
22
  @log_reader = LogReader.new(log_file) do |line|
20
- self.distributer.process(line)
23
+ begin
24
+ self.distributer.process(line)
25
+ rescue Exception => e
26
+ $dlog_logger.warn("Exception in processing thread #{log_file}:#{log_file.pos} -- #{line} -- #{e.message}")
27
+ end
28
+ lines_processed += 1
21
29
  end
22
30
  @log_reader.run
23
31
  self.distributer.join
32
+ $dlog_logger.info("Finished #{log_file}: Processed (#{lines_processed}) lines")
24
33
  post_process
25
34
  end
26
35
 
@@ -15,7 +15,7 @@ module DLogReader
15
15
  end
16
16
 
17
17
  def post_process
18
- self.archiver.backup(log_file)
18
+ self.archiver.archive(log_file)
19
19
  end
20
20
  end
21
21
  end
@@ -0,0 +1,29 @@
1
+ module DLogReader
2
+ class ScribeReader < DistributedLogReader
3
+ attr_accessor :selector, :archiver
4
+ attr_reader :log_reader
5
+ def initialize(filename, backupdir, worker, num_threads = 10)
6
+ super(filename, worker, num_threads)
7
+ self.selector = RotatingLog.new
8
+ self.selector.ignore_conditions << lambda{|x| x.match(/scribe_stats/)}
9
+ self.archiver = DateDir.new(backupdir)
10
+ end
11
+
12
+ def log_file
13
+ @log_file ||= begin
14
+ selector.file_to_process(filename)
15
+ end
16
+ end
17
+
18
+ def post_process
19
+ self.archiver.archive(log_file) unless current?
20
+ end
21
+
22
+ def current?
23
+ directory = File.dirname(log_file)
24
+ basename = File.basename(directory)
25
+ current_file = File.join(directory, "#{basename}_current")
26
+ File.identical?(current_file, log_file)
27
+ end
28
+ end
29
+ end
@@ -3,6 +3,14 @@ module DLogReader
3
3
  # input filename. This should work with a variety of log rotating schemes:
4
4
  # including copytruncate and date suffix.
5
5
  class RotatingLog < Selector
6
+
7
+ attr_accessor :ignore_conditions
8
+
9
+ def initialize
10
+ self.ignore_conditions = []
11
+ self.ignore_conditions << lambda{|x| symlink_file_in_dir?(x)}
12
+ # self.ignore_conditions << lambda{|x| true}
13
+ end
6
14
 
7
15
  def file_to_process(file_or_dir)
8
16
  if File.directory?(file_or_dir)
@@ -15,14 +23,18 @@ module DLogReader
15
23
  oldest_logfile(directory, basename)
16
24
  end
17
25
 
18
- protected
26
+ protected
19
27
 
20
28
  def oldest_logfile(directory, basename)
21
29
  file_list = Dir[File.join(directory, "#{basename}*")]
22
- file_list.reject!{|x| symlink_file_in_dir?(x)}
30
+ file_list.reject!{|x| reject?(x)}
23
31
  file = file_list.size > 0 ? file_list.sort_by{|a| File.new(a).mtime}.first : nil
24
32
  end
25
-
33
+
34
+ def reject?(filename)
35
+ self.ignore_conditions.inject(false){|candidate, condition| candidate || condition.call(filename)}
36
+ end
37
+
26
38
  # returns true if filename is a symlink and its referring to a file already inside the current directory
27
39
  def symlink_file_in_dir?(filename)
28
40
  File.symlink?(filename) && File.dirname(File.readlink(filename)) == File.dirname(filename)
@@ -14,7 +14,7 @@ describe "DLogReader::DateDir" do
14
14
 
15
15
  describe "backup" do
16
16
  it "should move file into Y/M/D backup directory" do
17
- @archiver.backup(@file_path)
17
+ @archiver.archive(@file_path)
18
18
  File.exist?(@backup_dir).should == true
19
19
  end
20
20
  end
@@ -0,0 +1,15 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'fileutils'
3
+
4
+ describe "DLogReader::RotaterLogreader" do
5
+ before(:all) do
6
+ @file_path = File.join(File.dirname(__FILE__), '..', 'fixtures', 'virality_metrics')
7
+ @logreader = DLogReader::ScribeReader.new(@file_path, 'tmp', lambda{|x| puts x})
8
+ end
9
+
10
+ describe "current" do
11
+ it 'should say that the log_file is the currently rotated one' do
12
+ @logreader.current?.should == true
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,7 @@
1
+ asdf
2
+ asdf
3
+ asdf
4
+ asdf
5
+ asdfa
6
+ sdfa
7
+ sdfas
@@ -0,0 +1,7 @@
1
+ asdf
2
+ asdf
3
+ asdf
4
+ asdf
5
+ asdfa
6
+ sdfa
7
+ sdfas
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: garru-distributed_logreader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gary Tsang
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-07-30 00:00:00 -07:00
12
+ date: 2009-08-05 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -34,6 +34,7 @@ files:
34
34
  - lib/distributed_logreader/archiver/date_dir.rb
35
35
  - lib/distributed_logreader/distributed_log_reader.rb
36
36
  - lib/distributed_logreader/distributed_log_reader/rotater_reader.rb
37
+ - lib/distributed_logreader/distributed_log_reader/scribe_reader.rb
37
38
  - lib/distributed_logreader/distributer.rb
38
39
  - lib/distributed_logreader/distributer/simple_thread_pool.rb
39
40
  - lib/distributed_logreader/log_reader.rb
@@ -43,6 +44,7 @@ files:
43
44
  - spec/archiver/date_dir_spec.rb
44
45
  - spec/archiver_spec.rb
45
46
  - spec/distributed_log_reader/rotater_reader_spec.rb
47
+ - spec/distributed_log_reader/scribe_reader_spec.rb
46
48
  - spec/distributed_log_reader_spec.rb
47
49
  - spec/distributer/simple_thread_pool_spec.rb
48
50
  - spec/distributer_spec.rb
@@ -54,6 +56,8 @@ files:
54
56
  - spec/fixtures/symlink/test
55
57
  - spec/fixtures/symlink/test_older_sym
56
58
  - spec/fixtures/test_file
59
+ - spec/fixtures/virality_metrics/test
60
+ - spec/fixtures/virality_metrics/virality_metrics_current
57
61
  - spec/log_reader_spec.rb
58
62
  - spec/selector/rotating_log_spec.rb
59
63
  - spec/selector_spec.rb
@@ -89,6 +93,7 @@ test_files:
89
93
  - spec/archiver/date_dir_spec.rb
90
94
  - spec/archiver_spec.rb
91
95
  - spec/distributed_log_reader/rotater_reader_spec.rb
96
+ - spec/distributed_log_reader/scribe_reader_spec.rb
92
97
  - spec/distributed_log_reader_spec.rb
93
98
  - spec/distributer/simple_thread_pool_spec.rb
94
99
  - spec/distributer_spec.rb