ralf 0.1.5 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/README.rdoc +126 -0
- data/Rakefile +57 -0
- data/VERSION +1 -0
- data/bin/ralf +41 -0
- data/lib/ralf/bucket.rb +66 -0
- data/lib/ralf/config.rb +176 -0
- data/lib/ralf/interpolation.rb +48 -0
- data/lib/ralf/log.rb +18 -0
- data/lib/ralf/option_parser.rb +149 -0
- data/lib/ralf.rb +173 -0
- data/spec/fixtures/apache.log +7 -0
- data/spec/fixtures/example_buckets.yaml +48 -0
- data/spec/ralf/bucket_spec.rb +119 -0
- data/spec/ralf/config_spec.rb +131 -0
- data/spec/ralf/interpolation_spec.rb +45 -0
- data/spec/ralf/log_spec.rb +63 -0
- data/spec/ralf/option_parser_spec.rb +97 -0
- data/spec/ralf_spec.rb +274 -206
- data/spec/spec.opts +5 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/support/fakeweb.rb +4 -0
- metadata +105 -27
data/.gitignore
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
= Synopsis
|
2
|
+
|
3
|
+
Download, merge and convert Amazon S3 bucket log files for a specified date or date range.
|
4
|
+
|
5
|
+
* Download S3 bucket log files produced by Amazon S3. Log files are downloaded once and cached locally.
|
6
|
+
* Merge those log files together into a single logfile per bucket (sorting on ascending timestamp)
|
7
|
+
* Convert the log file from Amazon Server Access Log Format to Apache Common Log Format
|
8
|
+
|
9
|
+
Ralf is an acronym for Retrieve Amazon Log Files. Ralf does the following things:
|
10
|
+
|
11
|
+
= Usage
|
12
|
+
|
13
|
+
Usage: ./bin/ralf [options]
|
14
|
+
|
15
|
+
Download and merge Amazon S3 bucket log files for a specified date range and
|
16
|
+
output a Common Log File. Ralf is an acronym for Retrieve Amazon Log Files.
|
17
|
+
|
18
|
+
Ralf downloads bucket log files to local cache directories, merges the Amazon Log
|
19
|
+
Files and converts them to Common Log Format.
|
20
|
+
|
21
|
+
Example: ./bin/ralf --range month --now yesterday --output-file '/var/log/amazon/:year/:month/:bucket.log'
|
22
|
+
|
23
|
+
AWS credentials (Access Key Id and Secret Access Key) are required to access
|
24
|
+
S3 buckets. For security reasons these credentials can only be specified in a
|
25
|
+
configuration file (see --config-file) or through the environment using the
|
26
|
+
AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables.
|
27
|
+
|
28
|
+
Log selection options:
|
29
|
+
-l, --[no-]list List buckets that have logging enabled. Does not process log files.
|
30
|
+
-b, --buckets x,y,z Buckets for which to process log files. Defaults to all log-enabled buckets.
|
31
|
+
-r, --range BEGIN[,END] Date or date range to process. Defaults to 'today'.
|
32
|
+
-t, --now TIME Date to use as base for range. Defaults to 'today'.
|
33
|
+
|
34
|
+
You can use Chronic expressions for '--range' and '--now'. See http://chronic.rubyforge.org.
|
35
|
+
|
36
|
+
Example: --range 'last week'
|
37
|
+
All days of previous week.
|
38
|
+
Example: --range 'this week'
|
39
|
+
Beginning of this week (sunday) upto and including today.
|
40
|
+
Example: --range '2010-01-01','2010-04-30'
|
41
|
+
First four months of this year.
|
42
|
+
Example: --range 'this month' --now yesterday
|
43
|
+
This will select log files from the beginning of yesterday's month upto and including yesterday.
|
44
|
+
|
45
|
+
The --buckets, --range and --now options are optional. If unspecified, (incomplete)
|
46
|
+
logging for today will be processed for all buckets (that have logging enabled).
|
47
|
+
This is equivalent to specifying "--range 'today'" and "--now 'today'".
|
48
|
+
|
49
|
+
Output options:
|
50
|
+
-o, --output-file FORMAT Output file, e.g. '/var/log/s3/:year/:month/:bucket.log'. Required.
|
51
|
+
|
52
|
+
The --output-file format uses the last day of the range specified by (--range)
|
53
|
+
to determine the filename. E.g. when the format contains ':year/:month/:day' and
|
54
|
+
the range is 2010-01-15..2010-02-14, then the output file will be '2010/02/14'.
|
55
|
+
|
56
|
+
-x, --cache-dir FORMAT Directory name(s) in which to cache downloaded log files. Optional.
|
57
|
+
|
58
|
+
The --cache-dir format expands to as many directory names as needed for the
|
59
|
+
range specified by --range. E.g. "/var/run/s3_cache/:year/:month/:day/:bucket"
|
60
|
+
expands to 31 directories for range 2010-01-01..2010-01-31.
|
61
|
+
|
62
|
+
Defaults to '~/.ralf/:bucket' or '/var/log/ralf/:bucket' (when running as root).
|
63
|
+
|
64
|
+
Config file options:
|
65
|
+
-c, --config-file [FILE] Path to file with configuration settings (in YAML format).
|
66
|
+
|
67
|
+
Configuration settings are read from the (-c) specified configuration file
|
68
|
+
or from ~/.ralf.conf or from /etc/ralf.conf (when running as root).
|
69
|
+
Command-line options override settings read from the configuration file.
|
70
|
+
|
71
|
+
The configuration file must be in YAML format. Each command-line options has an
|
72
|
+
equivalent setting in a configuration file replacing dash (-) by underscore(_).
|
73
|
+
|
74
|
+
The Amazon Access Key Id and Secret Access Key can only be specified in the
|
75
|
+
|
76
|
+
Example:
|
77
|
+
output_file: /var/log/amazon_s3/:year:month/:bucket.log
|
78
|
+
aws_access_key_id: my_access_key_id
|
79
|
+
aws_secret_access_key: my_secret_access_key
|
80
|
+
|
81
|
+
To only use command-line options simply specify -c or --config-file without
|
82
|
+
an argument.
|
83
|
+
|
84
|
+
Debug options:
|
85
|
+
-d, --[no-]debug [aws] Show debug messages.
|
86
|
+
|
87
|
+
Common options:
|
88
|
+
-h, --help Show this message.
|
89
|
+
-v, --version Show version.
|
90
|
+
|
91
|
+
= Library
|
92
|
+
|
93
|
+
You can also use Ralf from within your own ruby code. Each command-line option
|
94
|
+
has a corresponding option in the options has passed to Ralf.new and Ralf.run.
|
95
|
+
Replace a dash (-) by an underscore (_) in the names:
|
96
|
+
|
97
|
+
options = { :output_file => '/var/log/s3/:bucket.log' }
|
98
|
+
|
99
|
+
require 'rubygems'
|
100
|
+
require 'ralf'
|
101
|
+
r = Ralf.new({ :config_file => '/Users/me/ralf.yaml' }.merge(options))
|
102
|
+
r.run
|
103
|
+
|
104
|
+
Or run it in one go:
|
105
|
+
|
106
|
+
Ralf.run({ :config_file => '/Users/me/ralf.yaml' }.merge(options))
|
107
|
+
|
108
|
+
= Requirements
|
109
|
+
|
110
|
+
* Credentials for an Amazon S3 account
|
111
|
+
* Enable logging on S3
|
112
|
+
You can use Cyberduck[http://cyberduck.ch/] for example.
|
113
|
+
|
114
|
+
= Gem dependencies
|
115
|
+
|
116
|
+
Ralf depends on the following gems which will automatically installed when you
|
117
|
+
install the ralf gem.
|
118
|
+
|
119
|
+
* chronic
|
120
|
+
* right_aws
|
121
|
+
* logmerge
|
122
|
+
|
123
|
+
= Authors
|
124
|
+
Authors: {Leon Berenschot}[http://github.com/LeipeLeon] and {K.J. Wierenga}[http://github.com/kjwierenga]
|
125
|
+
|
126
|
+
This program is used for {kerkdienstgemist.nl}[http://kerkdienstgemist.nl] {Amazon S3}[http://aws.amazon.com/s3/] log file processing.
|
data/Rakefile
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
8
|
+
|
9
|
+
gem.name = "ralf"
|
10
|
+
gem.summary = "Retrieve Amazon Log Files"
|
11
|
+
gem.description = <<-EOF
|
12
|
+
Download logfiles from Amazon S3 buckets to local disk and combine them in one Apache CLF per bucket
|
13
|
+
EOF
|
14
|
+
gem.email = [ "k.j.wierenga@gmail.com", "leonb@beriedata.nl" ]
|
15
|
+
gem.homepage = "http://github.com/kjwierenga/ralf"
|
16
|
+
gem.authors = ["Klaas Jan Wierenga", "Leon Berenschot"]
|
17
|
+
|
18
|
+
gem.add_development_dependency 'rspec', '>= 1.3.0'
|
19
|
+
gem.add_development_dependency 'fakeweb', '>= 1.2.8'
|
20
|
+
|
21
|
+
gem.add_dependency 'right_aws', '>= 1.10.0'
|
22
|
+
gem.add_dependency 'logmerge', '>= 1.0.2'
|
23
|
+
gem.add_dependency 'chronic', '>= 0.2.3'
|
24
|
+
|
25
|
+
gem.rdoc_options << '--exclude' << '.'
|
26
|
+
gem.has_rdoc = false
|
27
|
+
end
|
28
|
+
Jeweler::GemcutterTasks.new
|
29
|
+
rescue LoadError
|
30
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
31
|
+
end
|
32
|
+
|
33
|
+
require 'spec/rake/spectask'
|
34
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
35
|
+
spec.libs << 'lib' << 'spec'
|
36
|
+
spec.spec_files = FileList['spec/**/*_spec.rb']
|
37
|
+
end
|
38
|
+
|
39
|
+
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
40
|
+
spec.libs << 'lib' << 'spec'
|
41
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
42
|
+
spec.rcov = true
|
43
|
+
end
|
44
|
+
|
45
|
+
task :spec => :check_dependencies
|
46
|
+
|
47
|
+
task :default => :spec
|
48
|
+
|
49
|
+
require 'rake/rdoctask'
|
50
|
+
Rake::RDocTask.new do |rdoc|
|
51
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
52
|
+
|
53
|
+
rdoc.rdoc_dir = 'rdoc'
|
54
|
+
rdoc.title = "ralf #{version}"
|
55
|
+
rdoc.rdoc_files.include('README*')
|
56
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
57
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.0.0
|
data/bin/ralf
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
|
+
|
4
|
+
require 'ralf'
|
5
|
+
require 'ralf/option_parser'
|
6
|
+
|
7
|
+
# Class to suppress writing of a particular string to an IO instance
|
8
|
+
class IOSuppress < IO
|
9
|
+
|
10
|
+
@expect_newline = false
|
11
|
+
@suppress = nil
|
12
|
+
|
13
|
+
def initialize(suppress, *args)
|
14
|
+
@suppress = suppress
|
15
|
+
super(*args)
|
16
|
+
end
|
17
|
+
|
18
|
+
# override write method to suppress string
|
19
|
+
def write(str)
|
20
|
+
case str
|
21
|
+
when "\n"
|
22
|
+
if @expect_newline
|
23
|
+
@expect_newline = false
|
24
|
+
return 1
|
25
|
+
end
|
26
|
+
when @suppress
|
27
|
+
@expect_newline = true
|
28
|
+
return @suppress.size
|
29
|
+
end
|
30
|
+
super(str)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
$stderr = IOSuppress.new("warning: peer certificate won't be verified in this SSL session", $stderr.fileno, "w")
|
34
|
+
|
35
|
+
begin
|
36
|
+
if options = Ralf::OptionParser.parse(ARGV)
|
37
|
+
Ralf.run(options)
|
38
|
+
end
|
39
|
+
rescue => e
|
40
|
+
$stderr.puts "Error: #{e}"
|
41
|
+
end
|
data/lib/ralf/bucket.rb
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'ralf/log'
|
2
|
+
|
3
|
+
class Ralf
|
4
|
+
|
5
|
+
class Bucket
|
6
|
+
|
7
|
+
def initialize(bucket)
|
8
|
+
raise ArgumentError.new("Bucket.s3 not assigned yet") if @@s3.nil?
|
9
|
+
|
10
|
+
@bucket = bucket
|
11
|
+
@logging_info = @bucket.logging_info
|
12
|
+
if @logging_info[:enabled] and @bucket.name != @logging_info[:targetbucket]
|
13
|
+
@targetbucket = @@s3.bucket(@logging_info[:targetbucket])
|
14
|
+
else
|
15
|
+
@targetbucket = @bucket
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.s3=(s3)
|
20
|
+
@@s3 = s3
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.each(names = nil, with_logging = true)
|
24
|
+
# find specified buckets
|
25
|
+
if names
|
26
|
+
names.map do |name|
|
27
|
+
if s3_bucket = @@s3.bucket(name)
|
28
|
+
bucket = Bucket.new(s3_bucket)
|
29
|
+
yield bucket if !with_logging or bucket.logging_enabled?
|
30
|
+
else
|
31
|
+
puts("Warning: bucket '#{name}' not found.") if bucket.nil?
|
32
|
+
end
|
33
|
+
end
|
34
|
+
else
|
35
|
+
@@s3.buckets.each do |s3_bucket|
|
36
|
+
bucket = Bucket.new(s3_bucket)
|
37
|
+
yield bucket if !with_logging or bucket.logging_enabled?
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def name
|
43
|
+
@bucket.name
|
44
|
+
end
|
45
|
+
|
46
|
+
def logging_enabled?
|
47
|
+
!!@logging_info[:enabled]
|
48
|
+
end
|
49
|
+
|
50
|
+
def targetbucket
|
51
|
+
@logging_info[:targetbucket]
|
52
|
+
end
|
53
|
+
|
54
|
+
def targetprefix
|
55
|
+
@logging_info[:targetprefix]
|
56
|
+
end
|
57
|
+
|
58
|
+
def each_log(date)
|
59
|
+
search_string = "%s%s" % [@logging_info[:targetprefix], date]
|
60
|
+
@targetbucket.keys(:prefix => search_string).each do |key|
|
61
|
+
yield Log.new(key, @logging_info[:targetprefix])
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
data/lib/ralf/config.rb
ADDED
@@ -0,0 +1,176 @@
|
|
1
|
+
require 'ralf/interpolation'
|
2
|
+
|
3
|
+
class Ralf::Config
|
4
|
+
|
5
|
+
USER_DEFAULT_CACHE_DIR = '~/.ralf/:bucket'
|
6
|
+
ROOT_DEFAULT_CACHE_DIR = '/var/log/ralf/:bucket'
|
7
|
+
|
8
|
+
class ConfigurationError < StandardError ; end
|
9
|
+
class RangeError < StandardError ; end
|
10
|
+
|
11
|
+
attr_accessor \
|
12
|
+
:buckets,
|
13
|
+
:now,
|
14
|
+
# :range,
|
15
|
+
:aws_access_key_id,
|
16
|
+
:aws_secret_access_key
|
17
|
+
|
18
|
+
attr_writer \
|
19
|
+
:debug, # reader is debug?
|
20
|
+
:output_file, # reader interpolates format
|
21
|
+
:cache_dir # reader interpolates format
|
22
|
+
|
23
|
+
attr_reader :errors
|
24
|
+
|
25
|
+
protected
|
26
|
+
|
27
|
+
attr_accessor :options
|
28
|
+
|
29
|
+
public
|
30
|
+
|
31
|
+
def self.load_file(filepath)
|
32
|
+
self.new(YAML.load_file(filepath))
|
33
|
+
end
|
34
|
+
|
35
|
+
def initialize(options = {})
|
36
|
+
@options = options.dup
|
37
|
+
|
38
|
+
# assign defaults
|
39
|
+
@options[:now] ||= nil
|
40
|
+
@options[:range] ||= 'today'
|
41
|
+
@options[:cache_dir] ||= (0 == Process.uid ? ROOT_DEFAULT_CACHE_DIR : File.expand_path(USER_DEFAULT_CACHE_DIR))
|
42
|
+
|
43
|
+
assign_options(@options)
|
44
|
+
end
|
45
|
+
|
46
|
+
def merge!(options)
|
47
|
+
@options.merge!(options)
|
48
|
+
|
49
|
+
assign_options(options)
|
50
|
+
end
|
51
|
+
|
52
|
+
def debug?
|
53
|
+
@debug || false
|
54
|
+
end
|
55
|
+
|
56
|
+
# compare two configurations
|
57
|
+
def ==(other)
|
58
|
+
@options == other.options
|
59
|
+
end
|
60
|
+
|
61
|
+
# return the range
|
62
|
+
def range
|
63
|
+
raise ArgumentError unless 2 == @range.size
|
64
|
+
Range.new(time_to_date(@range.first), time_to_date(@range.last)) # inclusive
|
65
|
+
end
|
66
|
+
|
67
|
+
# set a range by a single Chronic expression or an array of 1 or 2 Chronic expressions
|
68
|
+
def range=(args)
|
69
|
+
args ||= []
|
70
|
+
args = [args] unless args.is_a?(Array)
|
71
|
+
|
72
|
+
@range_value = args
|
73
|
+
|
74
|
+
raise ArgumentError.new("too many range items") if args.size > 2
|
75
|
+
|
76
|
+
range = []
|
77
|
+
args.each_with_index do |expr, i|
|
78
|
+
raise RangeError if i > 1 # this should have been caught by ArgumentError before the loop
|
79
|
+
|
80
|
+
chronic_options = { :context => :past, :guess => false }
|
81
|
+
if self.now
|
82
|
+
chronic_options.merge!(:now => Chronic.parse(self.now, :context => :past))
|
83
|
+
end
|
84
|
+
|
85
|
+
if span = Chronic.parse(expr, chronic_options)
|
86
|
+
if span.width <= 24 * 3600 # on same date
|
87
|
+
range << span.begin
|
88
|
+
else
|
89
|
+
raise RangeError, "range end '#{expr}' is not a single date" if i > 0
|
90
|
+
range << span.begin
|
91
|
+
range << span.end + (self.now ? 0 : -1)
|
92
|
+
end
|
93
|
+
else
|
94
|
+
raise RangeError, "invalid expression '#{expr}'"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
range = [ Date.today ] if range.empty? # empty range means today
|
99
|
+
range = range*2 if 1 == range.size # single day has begin == end
|
100
|
+
|
101
|
+
@range = range
|
102
|
+
end
|
103
|
+
|
104
|
+
def output_file(variables)
|
105
|
+
Ralf::Interpolation.interpolate(@output_file, variables)
|
106
|
+
end
|
107
|
+
|
108
|
+
def output_file_format
|
109
|
+
@output_file
|
110
|
+
end
|
111
|
+
|
112
|
+
def cache_dir(variables)
|
113
|
+
Ralf::Interpolation.interpolate(@cache_dir, variables, [:bucket])
|
114
|
+
end
|
115
|
+
|
116
|
+
def cache_dir_format
|
117
|
+
@cache_dir
|
118
|
+
end
|
119
|
+
|
120
|
+
def empty?
|
121
|
+
@options.empty?
|
122
|
+
end
|
123
|
+
|
124
|
+
def valid?
|
125
|
+
@errors = []
|
126
|
+
unless (@aws_access_key_id || ENV['AWS_ACCESS_KEY_ID'])
|
127
|
+
@errors << 'aws_access_key_id missing'
|
128
|
+
end
|
129
|
+
|
130
|
+
unless (@aws_secret_access_key || ENV['AWS_SECRET_ACCESS_KEY'])
|
131
|
+
@errors << 'aws_secret_access_key missing'
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def validate!
|
136
|
+
valid?
|
137
|
+
unless @errors.empty?
|
138
|
+
raise ConfigurationError.new(@errors.join(', '))
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def output_file_missing?
|
143
|
+
!@output_file
|
144
|
+
end
|
145
|
+
|
146
|
+
private
|
147
|
+
|
148
|
+
def time_to_date(time)
|
149
|
+
Date.new(time.year, time.month, time.day)
|
150
|
+
end
|
151
|
+
|
152
|
+
def assign_options(new_options)
|
153
|
+
options = new_options.dup
|
154
|
+
|
155
|
+
# always re-assign range in case now has changed
|
156
|
+
if options.has_key?(:now)
|
157
|
+
self.now = options.delete(:now)
|
158
|
+
self.range = options.delete(:range) || @range_value
|
159
|
+
end
|
160
|
+
options.each do |attr, val|
|
161
|
+
begin
|
162
|
+
self.send("#{attr.to_s}=", val)
|
163
|
+
rescue NoMethodError => e
|
164
|
+
puts "Warning: invalid configuration variable: #{method_name(e)}"
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
# Take NoMethodException string and extract the method name,
|
170
|
+
# e.g. "undefined method `out_path=' for #<Ralf::Config:0x17931b8>"
|
171
|
+
# should return 'out_path'
|
172
|
+
def method_name(e)
|
173
|
+
e.to_s.split('`')[1].split('=')[0]
|
174
|
+
end
|
175
|
+
|
176
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
class Ralf
|
2
|
+
|
3
|
+
class Interpolation
|
4
|
+
class NotAllInterpolationsSatisfied < StandardError ; end
|
5
|
+
class VariableMissing < StandardError ; end
|
6
|
+
|
7
|
+
def self.interpolate(string, variables, required_variables = [])
|
8
|
+
required_variables.each do |name|
|
9
|
+
raise VariableMissing, ":#{name.to_s} variable missing" unless string.match(/:#{name.to_s}/)
|
10
|
+
end
|
11
|
+
processor = Ralf::Interpolation.new(string, variables)
|
12
|
+
raise NotAllInterpolationsSatisfied, "Not all keys are interpolated: '#{string}'" if processor.result.match(/:/)
|
13
|
+
processor.result
|
14
|
+
end
|
15
|
+
|
16
|
+
attr :result
|
17
|
+
|
18
|
+
def initialize(string, variables)
|
19
|
+
@variables = variables
|
20
|
+
@result = string.dup
|
21
|
+
(Ralf::Interpolation.public_instance_methods(false) - ['result']).each do |tag|
|
22
|
+
@result.gsub!(/:#{tag}/, self.send( tag )) unless self.send(tag).nil?
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def bucket
|
27
|
+
@variables[:bucket]
|
28
|
+
end
|
29
|
+
|
30
|
+
def week
|
31
|
+
"%02d" % @variables[:date].cweek if @variables[:date]
|
32
|
+
end
|
33
|
+
|
34
|
+
def day
|
35
|
+
"%02d" % @variables[:date].day if @variables[:date]
|
36
|
+
end
|
37
|
+
|
38
|
+
def month
|
39
|
+
"%02d" % @variables[:date].month if @variables[:date]
|
40
|
+
end
|
41
|
+
|
42
|
+
def year
|
43
|
+
"%04d" % @variables[:date].year if @variables[:date]
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
data/lib/ralf/log.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
class Ralf
|
2
|
+
class Log
|
3
|
+
def initialize(key, targetprefix)
|
4
|
+
@key = key
|
5
|
+
@targetprefix = targetprefix
|
6
|
+
end
|
7
|
+
|
8
|
+
def name
|
9
|
+
@key.name.gsub(@targetprefix, '')
|
10
|
+
end
|
11
|
+
|
12
|
+
def save_to_dir(dir, use_cache = true)
|
13
|
+
file = File.join(dir, name)
|
14
|
+
File.open(file, 'w') { |f| f.write(@key.data) } unless use_cache and File.exist?(file)
|
15
|
+
file # return saved filename
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,149 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
|
3
|
+
class Ralf::OptionParser
|
4
|
+
|
5
|
+
def self.parse(args, output = $stdout)
|
6
|
+
options = {}
|
7
|
+
|
8
|
+
opts = ::OptionParser.new do |opts|
|
9
|
+
opts.banner = <<USAGE_END
|
10
|
+
Usage: #{$0} [options]
|
11
|
+
|
12
|
+
Download and merge Amazon S3 bucket log files for a specified date range and
|
13
|
+
output a Common Log File. Ralf is an acronym for Retrieve Amazon Log Files.
|
14
|
+
|
15
|
+
Ralf downloads bucket log files to local cache directories, merges the Amazon Log
|
16
|
+
Files and converts them to Common Log Format.
|
17
|
+
|
18
|
+
Example: #{$0} --range month --now yesterday --output-file '/var/log/amazon/:year/:month/:bucket.log'
|
19
|
+
|
20
|
+
AWS credentials (Access Key Id and Secret Access Key) are required to access
|
21
|
+
S3 buckets. For security reasons these credentials can only be specified in a
|
22
|
+
configuration file (see --config-file) or through the environment using the
|
23
|
+
AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables.
|
24
|
+
USAGE_END
|
25
|
+
|
26
|
+
opts.separator ""
|
27
|
+
opts.separator "Log selection options:"
|
28
|
+
opts.on("-l", "--[no-]list", "List buckets that have logging enabled. Does not process log files.") do |value|
|
29
|
+
options[:list] = value
|
30
|
+
end
|
31
|
+
opts.on("-b", "--buckets x,y,z", Array, "Buckets for which to process log files. Defaults to all log-enabled buckets.") do |buckets|
|
32
|
+
options[:buckets] = buckets.compact
|
33
|
+
end
|
34
|
+
opts.on("-r", "--range BEGIN[,END]", Array, "Date or date range to process. Defaults to 'today'.") do |range|
|
35
|
+
options[:range] = range.compact
|
36
|
+
end
|
37
|
+
log_selection_help =<<LOG_SELECTION_HELP
|
38
|
+
Date to use as base for range. Defaults to 'today'.
|
39
|
+
|
40
|
+
You can use Chronic expressions for '--range' and '--now'. See http://chronic.rubyforge.org.
|
41
|
+
|
42
|
+
Example: --range 'last week'
|
43
|
+
All days of previous week.
|
44
|
+
Example: --range 'this week'
|
45
|
+
Beginning of this week (sunday) upto and including today.
|
46
|
+
Example: --range '2010-01-01','2010-04-30'
|
47
|
+
First four months of this year.
|
48
|
+
Example: --range 'this month' --now yesterday
|
49
|
+
This will select log files from the beginning of yesterday's month upto and including yesterday.
|
50
|
+
|
51
|
+
The --buckets, --range and --now options are optional. If unspecified, (incomplete)
|
52
|
+
logging for today will be processed for all buckets (that have logging enabled).
|
53
|
+
This is equivalent to specifying "--range 'today'" and "--now 'today'".
|
54
|
+
LOG_SELECTION_HELP
|
55
|
+
opts.on("-t", "--now TIME", log_selection_help) do |now|
|
56
|
+
options[:now] = now
|
57
|
+
end
|
58
|
+
|
59
|
+
# opts.on("-m", "--[no-]rename-bucket-keys", "Rename original log files on Amazon using format from '--cache-dir' option.") do |value|
|
60
|
+
# options[:rename_bucket_keys] = value
|
61
|
+
# end
|
62
|
+
|
63
|
+
# opts.separator ""
|
64
|
+
opts.separator "Output options:"
|
65
|
+
|
66
|
+
output_file_help =<<OUTPUT_FILE_HELP
|
67
|
+
Output file, e.g. '/var/log/s3/:year/:month/:bucket.log'. Required.
|
68
|
+
|
69
|
+
The --output-file format uses the last day of the range specified by (--range)
|
70
|
+
to determine the filename. E.g. when the format contains ':year/:month/:day' and
|
71
|
+
the range is 2010-01-15..2010-02-14, then the output file will be '2010/02/14'.
|
72
|
+
OUTPUT_FILE_HELP
|
73
|
+
opts.on("-o", "--output-file FORMAT", output_file_help) do |format|
|
74
|
+
options[:output_file] = format
|
75
|
+
end
|
76
|
+
|
77
|
+
cache_dir_help =<<CACHE_DIR_HELP
|
78
|
+
Directory name(s) in which to cache downloaded log files. Optional.
|
79
|
+
|
80
|
+
The --cache-dir format expands to as many directory names as needed for the
|
81
|
+
range specified by --range. E.g. "/var/run/s3_cache/:year/:month/:day/:bucket"
|
82
|
+
expands to 31 directories for range 2010-01-01..2010-01-31.
|
83
|
+
|
84
|
+
Defaults to '~/.ralf/:bucket' or '/var/log/ralf/:bucket' (when running as root).
|
85
|
+
CACHE_DIR_HELP
|
86
|
+
opts.on("-x", "--cache-dir FORMAT", cache_dir_help) do |format|
|
87
|
+
options[:cache_dir] = format
|
88
|
+
end
|
89
|
+
|
90
|
+
# opts.on("-f", "--output-dir-format FORMAT", "Output directory format, e.g. ':year/:month/:day'") do |format|
|
91
|
+
# options[:output_dir_format] = format
|
92
|
+
# end
|
93
|
+
|
94
|
+
# opts.on("-o", "--output-basedir DIR", "Base directory for output files.") do |dir|
|
95
|
+
# options[:output_basedir] = dir
|
96
|
+
# end
|
97
|
+
|
98
|
+
# opts.on("-p", "--output-prefix STRING", "Prefix string for output files.") do |string|
|
99
|
+
# options[:output_prefix] = string
|
100
|
+
# end
|
101
|
+
|
102
|
+
# opts.separator ""
|
103
|
+
opts.separator "Config file options:"
|
104
|
+
config_file_help =<<CONFIG_FILE_HELP
|
105
|
+
Path to file with configuration settings (in YAML format).
|
106
|
+
|
107
|
+
Configuration settings are read from the (-c) specified configuration file
|
108
|
+
or from ~/.ralf.conf or from /etc/ralf.conf (when running as root).
|
109
|
+
Command-line options override settings read from the configuration file.
|
110
|
+
|
111
|
+
The configuration file must be in YAML format. Each command-line options has an
|
112
|
+
equivalent setting in a configuration file replacing dash (-) by underscore(_).
|
113
|
+
|
114
|
+
The Amazon Access Key Id and Secret Access Key can only be specified in the
|
115
|
+
|
116
|
+
Example:
|
117
|
+
output_file: /var/log/amazon_s3/:year:month/:bucket.log
|
118
|
+
aws_access_key_id: my_access_key_id
|
119
|
+
aws_secret_access_key: my_secret_access_key
|
120
|
+
|
121
|
+
To only use command-line options simply specify -c or --config-file without
|
122
|
+
an argument.
|
123
|
+
CONFIG_FILE_HELP
|
124
|
+
opts.on("-c", "--config-file [FILE]", config_file_help) do |file|
|
125
|
+
options[:config_file] = file
|
126
|
+
end
|
127
|
+
|
128
|
+
opts.separator "Debug options:"
|
129
|
+
opts.on("-d", "--[no-]debug [aws]", "Show debug messages.") do |aws|
|
130
|
+
options[:debug] = aws || true
|
131
|
+
end
|
132
|
+
|
133
|
+
opts.separator ""
|
134
|
+
opts.separator "Common options:"
|
135
|
+
opts.on_tail("-h", "--help", "Show this message.") do
|
136
|
+
output.puts opts
|
137
|
+
return nil
|
138
|
+
end
|
139
|
+
opts.on_tail("-v", "--version", "Show version.") do
|
140
|
+
output.print File.read(File.join(File.dirname(__FILE__), '..', '..', 'VERSION'))
|
141
|
+
return nil
|
142
|
+
end
|
143
|
+
end
|
144
|
+
remaining = opts.parse!(args)
|
145
|
+
opts.warn "Warning: unused arguments: #{remaining.join(' ')}" unless remaining.empty?
|
146
|
+
options
|
147
|
+
end
|
148
|
+
|
149
|
+
end
|