ralf 0.1.5 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/README.rdoc +126 -0
- data/Rakefile +57 -0
- data/VERSION +1 -0
- data/bin/ralf +41 -0
- data/lib/ralf/bucket.rb +66 -0
- data/lib/ralf/config.rb +176 -0
- data/lib/ralf/interpolation.rb +48 -0
- data/lib/ralf/log.rb +18 -0
- data/lib/ralf/option_parser.rb +149 -0
- data/lib/ralf.rb +173 -0
- data/spec/fixtures/apache.log +7 -0
- data/spec/fixtures/example_buckets.yaml +48 -0
- data/spec/ralf/bucket_spec.rb +119 -0
- data/spec/ralf/config_spec.rb +131 -0
- data/spec/ralf/interpolation_spec.rb +45 -0
- data/spec/ralf/log_spec.rb +63 -0
- data/spec/ralf/option_parser_spec.rb +97 -0
- data/spec/ralf_spec.rb +274 -206
- data/spec/spec.opts +5 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/support/fakeweb.rb +4 -0
- metadata +105 -27
data/.gitignore
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
= Synopsis
|
2
|
+
|
3
|
+
Download, merge and convert Amazon S3 bucket log files for a specified date or date range.
|
4
|
+
|
5
|
+
* Download S3 bucket log files produced by Amazon S3. Log files are downloaded once and cached locally.
|
6
|
+
* Merge those log files together into a single logfile per bucket (sorting on ascending timestamp)
|
7
|
+
* Convert the log file from Amazon Server Access Log Format to Apache Common Log Format
|
8
|
+
|
9
|
+
Ralf is an acronym for Retrieve Amazon Log Files. Ralf does the following things:
|
10
|
+
|
11
|
+
= Usage
|
12
|
+
|
13
|
+
Usage: ./bin/ralf [options]
|
14
|
+
|
15
|
+
Download and merge Amazon S3 bucket log files for a specified date range and
|
16
|
+
output a Common Log File. Ralf is an acronym for Retrieve Amazon Log Files.
|
17
|
+
|
18
|
+
Ralf downloads bucket log files to local cache directories, merges the Amazon Log
|
19
|
+
Files and converts them to Common Log Format.
|
20
|
+
|
21
|
+
Example: ./bin/ralf --range month --now yesterday --output-file '/var/log/amazon/:year/:month/:bucket.log'
|
22
|
+
|
23
|
+
AWS credentials (Access Key Id and Secret Access Key) are required to access
|
24
|
+
S3 buckets. For security reasons these credentials can only be specified in a
|
25
|
+
configuration file (see --config-file) or through the environment using the
|
26
|
+
AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables.
|
27
|
+
|
28
|
+
Log selection options:
|
29
|
+
-l, --[no-]list List buckets that have logging enabled. Does not process log files.
|
30
|
+
-b, --buckets x,y,z Buckets for which to process log files. Defaults to all log-enabled buckets.
|
31
|
+
-r, --range BEGIN[,END] Date or date range to process. Defaults to 'today'.
|
32
|
+
-t, --now TIME Date to use as base for range. Defaults to 'today'.
|
33
|
+
|
34
|
+
You can use Chronic expressions for '--range' and '--now'. See http://chronic.rubyforge.org.
|
35
|
+
|
36
|
+
Example: --range 'last week'
|
37
|
+
All days of previous week.
|
38
|
+
Example: --range 'this week'
|
39
|
+
Beginning of this week (sunday) upto and including today.
|
40
|
+
Example: --range '2010-01-01','2010-04-30'
|
41
|
+
First four months of this year.
|
42
|
+
Example: --range 'this month' --now yesterday
|
43
|
+
This will select log files from the beginning of yesterday's month upto and including yesterday.
|
44
|
+
|
45
|
+
The --buckets, --range and --now options are optional. If unspecified, (incomplete)
|
46
|
+
logging for today will be processed for all buckets (that have logging enabled).
|
47
|
+
This is equivalent to specifying "--range 'today'" and "--now 'today'".
|
48
|
+
|
49
|
+
Output options:
|
50
|
+
-o, --output-file FORMAT Output file, e.g. '/var/log/s3/:year/:month/:bucket.log'. Required.
|
51
|
+
|
52
|
+
The --output-file format uses the last day of the range specified by (--range)
|
53
|
+
to determine the filename. E.g. when the format contains ':year/:month/:day' and
|
54
|
+
the range is 2010-01-15..2010-02-14, then the output file will be '2010/02/14'.
|
55
|
+
|
56
|
+
-x, --cache-dir FORMAT Directory name(s) in which to cache downloaded log files. Optional.
|
57
|
+
|
58
|
+
The --cache-dir format expands to as many directory names as needed for the
|
59
|
+
range specified by --range. E.g. "/var/run/s3_cache/:year/:month/:day/:bucket"
|
60
|
+
expands to 31 directories for range 2010-01-01..2010-01-31.
|
61
|
+
|
62
|
+
Defaults to '~/.ralf/:bucket' or '/var/log/ralf/:bucket' (when running as root).
|
63
|
+
|
64
|
+
Config file options:
|
65
|
+
-c, --config-file [FILE] Path to file with configuration settings (in YAML format).
|
66
|
+
|
67
|
+
Configuration settings are read from the (-c) specified configuration file
|
68
|
+
or from ~/.ralf.conf or from /etc/ralf.conf (when running as root).
|
69
|
+
Command-line options override settings read from the configuration file.
|
70
|
+
|
71
|
+
The configuration file must be in YAML format. Each command-line options has an
|
72
|
+
equivalent setting in a configuration file replacing dash (-) by underscore(_).
|
73
|
+
|
74
|
+
The Amazon Access Key Id and Secret Access Key can only be specified in the
|
75
|
+
|
76
|
+
Example:
|
77
|
+
output_file: /var/log/amazon_s3/:year:month/:bucket.log
|
78
|
+
aws_access_key_id: my_access_key_id
|
79
|
+
aws_secret_access_key: my_secret_access_key
|
80
|
+
|
81
|
+
To only use command-line options simply specify -c or --config-file without
|
82
|
+
an argument.
|
83
|
+
|
84
|
+
Debug options:
|
85
|
+
-d, --[no-]debug [aws] Show debug messages.
|
86
|
+
|
87
|
+
Common options:
|
88
|
+
-h, --help Show this message.
|
89
|
+
-v, --version Show version.
|
90
|
+
|
91
|
+
= Library
|
92
|
+
|
93
|
+
You can also use Ralf from within your own ruby code. Each command-line option
|
94
|
+
has a corresponding option in the options has passed to Ralf.new and Ralf.run.
|
95
|
+
Replace a dash (-) by an underscore (_) in the names:
|
96
|
+
|
97
|
+
options = { :output_file => '/var/log/s3/:bucket.log' }
|
98
|
+
|
99
|
+
require 'rubygems'
|
100
|
+
require 'ralf'
|
101
|
+
r = Ralf.new({ :config_file => '/Users/me/ralf.yaml' }.merge(options))
|
102
|
+
r.run
|
103
|
+
|
104
|
+
Or run it in one go:
|
105
|
+
|
106
|
+
Ralf.run({ :config_file => '/Users/me/ralf.yaml' }.merge(options))
|
107
|
+
|
108
|
+
= Requirements
|
109
|
+
|
110
|
+
* Credentials for an Amazon S3 account
|
111
|
+
* Enable logging on S3
|
112
|
+
You can use Cyberduck[http://cyberduck.ch/] for example.
|
113
|
+
|
114
|
+
= Gem dependencies
|
115
|
+
|
116
|
+
Ralf depends on the following gems which will automatically installed when you
|
117
|
+
install the ralf gem.
|
118
|
+
|
119
|
+
* chronic
|
120
|
+
* right_aws
|
121
|
+
* logmerge
|
122
|
+
|
123
|
+
= Authors
|
124
|
+
Authors: {Leon Berenschot}[http://github.com/LeipeLeon] and {K.J. Wierenga}[http://github.com/kjwierenga]
|
125
|
+
|
126
|
+
This program is used for {kerkdienstgemist.nl}[http://kerkdienstgemist.nl] {Amazon S3}[http://aws.amazon.com/s3/] log file processing.
|
data/Rakefile
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
8
|
+
|
9
|
+
gem.name = "ralf"
|
10
|
+
gem.summary = "Retrieve Amazon Log Files"
|
11
|
+
gem.description = <<-EOF
|
12
|
+
Download logfiles from Amazon S3 buckets to local disk and combine them in one Apache CLF per bucket
|
13
|
+
EOF
|
14
|
+
gem.email = [ "k.j.wierenga@gmail.com", "leonb@beriedata.nl" ]
|
15
|
+
gem.homepage = "http://github.com/kjwierenga/ralf"
|
16
|
+
gem.authors = ["Klaas Jan Wierenga", "Leon Berenschot"]
|
17
|
+
|
18
|
+
gem.add_development_dependency 'rspec', '>= 1.3.0'
|
19
|
+
gem.add_development_dependency 'fakeweb', '>= 1.2.8'
|
20
|
+
|
21
|
+
gem.add_dependency 'right_aws', '>= 1.10.0'
|
22
|
+
gem.add_dependency 'logmerge', '>= 1.0.2'
|
23
|
+
gem.add_dependency 'chronic', '>= 0.2.3'
|
24
|
+
|
25
|
+
gem.rdoc_options << '--exclude' << '.'
|
26
|
+
gem.has_rdoc = false
|
27
|
+
end
|
28
|
+
Jeweler::GemcutterTasks.new
|
29
|
+
rescue LoadError
|
30
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
31
|
+
end
|
32
|
+
|
33
|
+
require 'spec/rake/spectask'
|
34
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
35
|
+
spec.libs << 'lib' << 'spec'
|
36
|
+
spec.spec_files = FileList['spec/**/*_spec.rb']
|
37
|
+
end
|
38
|
+
|
39
|
+
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
40
|
+
spec.libs << 'lib' << 'spec'
|
41
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
42
|
+
spec.rcov = true
|
43
|
+
end
|
44
|
+
|
45
|
+
task :spec => :check_dependencies
|
46
|
+
|
47
|
+
task :default => :spec
|
48
|
+
|
49
|
+
require 'rake/rdoctask'
|
50
|
+
Rake::RDocTask.new do |rdoc|
|
51
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
52
|
+
|
53
|
+
rdoc.rdoc_dir = 'rdoc'
|
54
|
+
rdoc.title = "ralf #{version}"
|
55
|
+
rdoc.rdoc_files.include('README*')
|
56
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
57
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.0.0
|
data/bin/ralf
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
|
+
|
4
|
+
require 'ralf'
|
5
|
+
require 'ralf/option_parser'
|
6
|
+
|
7
|
+
# Class to suppress writing of a particular string to an IO instance
|
8
|
+
class IOSuppress < IO
|
9
|
+
|
10
|
+
@expect_newline = false
|
11
|
+
@suppress = nil
|
12
|
+
|
13
|
+
def initialize(suppress, *args)
|
14
|
+
@suppress = suppress
|
15
|
+
super(*args)
|
16
|
+
end
|
17
|
+
|
18
|
+
# override write method to suppress string
|
19
|
+
def write(str)
|
20
|
+
case str
|
21
|
+
when "\n"
|
22
|
+
if @expect_newline
|
23
|
+
@expect_newline = false
|
24
|
+
return 1
|
25
|
+
end
|
26
|
+
when @suppress
|
27
|
+
@expect_newline = true
|
28
|
+
return @suppress.size
|
29
|
+
end
|
30
|
+
super(str)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
$stderr = IOSuppress.new("warning: peer certificate won't be verified in this SSL session", $stderr.fileno, "w")
|
34
|
+
|
35
|
+
begin
|
36
|
+
if options = Ralf::OptionParser.parse(ARGV)
|
37
|
+
Ralf.run(options)
|
38
|
+
end
|
39
|
+
rescue => e
|
40
|
+
$stderr.puts "Error: #{e}"
|
41
|
+
end
|
data/lib/ralf/bucket.rb
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'ralf/log'
|
2
|
+
|
3
|
+
class Ralf
|
4
|
+
|
5
|
+
class Bucket
|
6
|
+
|
7
|
+
def initialize(bucket)
|
8
|
+
raise ArgumentError.new("Bucket.s3 not assigned yet") if @@s3.nil?
|
9
|
+
|
10
|
+
@bucket = bucket
|
11
|
+
@logging_info = @bucket.logging_info
|
12
|
+
if @logging_info[:enabled] and @bucket.name != @logging_info[:targetbucket]
|
13
|
+
@targetbucket = @@s3.bucket(@logging_info[:targetbucket])
|
14
|
+
else
|
15
|
+
@targetbucket = @bucket
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.s3=(s3)
|
20
|
+
@@s3 = s3
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.each(names = nil, with_logging = true)
|
24
|
+
# find specified buckets
|
25
|
+
if names
|
26
|
+
names.map do |name|
|
27
|
+
if s3_bucket = @@s3.bucket(name)
|
28
|
+
bucket = Bucket.new(s3_bucket)
|
29
|
+
yield bucket if !with_logging or bucket.logging_enabled?
|
30
|
+
else
|
31
|
+
puts("Warning: bucket '#{name}' not found.") if bucket.nil?
|
32
|
+
end
|
33
|
+
end
|
34
|
+
else
|
35
|
+
@@s3.buckets.each do |s3_bucket|
|
36
|
+
bucket = Bucket.new(s3_bucket)
|
37
|
+
yield bucket if !with_logging or bucket.logging_enabled?
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def name
|
43
|
+
@bucket.name
|
44
|
+
end
|
45
|
+
|
46
|
+
def logging_enabled?
|
47
|
+
!!@logging_info[:enabled]
|
48
|
+
end
|
49
|
+
|
50
|
+
def targetbucket
|
51
|
+
@logging_info[:targetbucket]
|
52
|
+
end
|
53
|
+
|
54
|
+
def targetprefix
|
55
|
+
@logging_info[:targetprefix]
|
56
|
+
end
|
57
|
+
|
58
|
+
def each_log(date)
|
59
|
+
search_string = "%s%s" % [@logging_info[:targetprefix], date]
|
60
|
+
@targetbucket.keys(:prefix => search_string).each do |key|
|
61
|
+
yield Log.new(key, @logging_info[:targetprefix])
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
data/lib/ralf/config.rb
ADDED
@@ -0,0 +1,176 @@
|
|
1
|
+
require 'ralf/interpolation'
|
2
|
+
|
3
|
+
class Ralf::Config
|
4
|
+
|
5
|
+
USER_DEFAULT_CACHE_DIR = '~/.ralf/:bucket'
|
6
|
+
ROOT_DEFAULT_CACHE_DIR = '/var/log/ralf/:bucket'
|
7
|
+
|
8
|
+
class ConfigurationError < StandardError ; end
|
9
|
+
class RangeError < StandardError ; end
|
10
|
+
|
11
|
+
attr_accessor \
|
12
|
+
:buckets,
|
13
|
+
:now,
|
14
|
+
# :range,
|
15
|
+
:aws_access_key_id,
|
16
|
+
:aws_secret_access_key
|
17
|
+
|
18
|
+
attr_writer \
|
19
|
+
:debug, # reader is debug?
|
20
|
+
:output_file, # reader interpolates format
|
21
|
+
:cache_dir # reader interpolates format
|
22
|
+
|
23
|
+
attr_reader :errors
|
24
|
+
|
25
|
+
protected
|
26
|
+
|
27
|
+
attr_accessor :options
|
28
|
+
|
29
|
+
public
|
30
|
+
|
31
|
+
def self.load_file(filepath)
|
32
|
+
self.new(YAML.load_file(filepath))
|
33
|
+
end
|
34
|
+
|
35
|
+
def initialize(options = {})
|
36
|
+
@options = options.dup
|
37
|
+
|
38
|
+
# assign defaults
|
39
|
+
@options[:now] ||= nil
|
40
|
+
@options[:range] ||= 'today'
|
41
|
+
@options[:cache_dir] ||= (0 == Process.uid ? ROOT_DEFAULT_CACHE_DIR : File.expand_path(USER_DEFAULT_CACHE_DIR))
|
42
|
+
|
43
|
+
assign_options(@options)
|
44
|
+
end
|
45
|
+
|
46
|
+
def merge!(options)
|
47
|
+
@options.merge!(options)
|
48
|
+
|
49
|
+
assign_options(options)
|
50
|
+
end
|
51
|
+
|
52
|
+
def debug?
|
53
|
+
@debug || false
|
54
|
+
end
|
55
|
+
|
56
|
+
# compare two configurations
|
57
|
+
def ==(other)
|
58
|
+
@options == other.options
|
59
|
+
end
|
60
|
+
|
61
|
+
# return the range
|
62
|
+
def range
|
63
|
+
raise ArgumentError unless 2 == @range.size
|
64
|
+
Range.new(time_to_date(@range.first), time_to_date(@range.last)) # inclusive
|
65
|
+
end
|
66
|
+
|
67
|
+
# set a range by a single Chronic expression or an array of 1 or 2 Chronic expressions
|
68
|
+
def range=(args)
|
69
|
+
args ||= []
|
70
|
+
args = [args] unless args.is_a?(Array)
|
71
|
+
|
72
|
+
@range_value = args
|
73
|
+
|
74
|
+
raise ArgumentError.new("too many range items") if args.size > 2
|
75
|
+
|
76
|
+
range = []
|
77
|
+
args.each_with_index do |expr, i|
|
78
|
+
raise RangeError if i > 1 # this should have been caught by ArgumentError before the loop
|
79
|
+
|
80
|
+
chronic_options = { :context => :past, :guess => false }
|
81
|
+
if self.now
|
82
|
+
chronic_options.merge!(:now => Chronic.parse(self.now, :context => :past))
|
83
|
+
end
|
84
|
+
|
85
|
+
if span = Chronic.parse(expr, chronic_options)
|
86
|
+
if span.width <= 24 * 3600 # on same date
|
87
|
+
range << span.begin
|
88
|
+
else
|
89
|
+
raise RangeError, "range end '#{expr}' is not a single date" if i > 0
|
90
|
+
range << span.begin
|
91
|
+
range << span.end + (self.now ? 0 : -1)
|
92
|
+
end
|
93
|
+
else
|
94
|
+
raise RangeError, "invalid expression '#{expr}'"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
range = [ Date.today ] if range.empty? # empty range means today
|
99
|
+
range = range*2 if 1 == range.size # single day has begin == end
|
100
|
+
|
101
|
+
@range = range
|
102
|
+
end
|
103
|
+
|
104
|
+
def output_file(variables)
|
105
|
+
Ralf::Interpolation.interpolate(@output_file, variables)
|
106
|
+
end
|
107
|
+
|
108
|
+
def output_file_format
|
109
|
+
@output_file
|
110
|
+
end
|
111
|
+
|
112
|
+
def cache_dir(variables)
|
113
|
+
Ralf::Interpolation.interpolate(@cache_dir, variables, [:bucket])
|
114
|
+
end
|
115
|
+
|
116
|
+
def cache_dir_format
|
117
|
+
@cache_dir
|
118
|
+
end
|
119
|
+
|
120
|
+
def empty?
|
121
|
+
@options.empty?
|
122
|
+
end
|
123
|
+
|
124
|
+
def valid?
|
125
|
+
@errors = []
|
126
|
+
unless (@aws_access_key_id || ENV['AWS_ACCESS_KEY_ID'])
|
127
|
+
@errors << 'aws_access_key_id missing'
|
128
|
+
end
|
129
|
+
|
130
|
+
unless (@aws_secret_access_key || ENV['AWS_SECRET_ACCESS_KEY'])
|
131
|
+
@errors << 'aws_secret_access_key missing'
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def validate!
|
136
|
+
valid?
|
137
|
+
unless @errors.empty?
|
138
|
+
raise ConfigurationError.new(@errors.join(', '))
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def output_file_missing?
|
143
|
+
!@output_file
|
144
|
+
end
|
145
|
+
|
146
|
+
private
|
147
|
+
|
148
|
+
def time_to_date(time)
|
149
|
+
Date.new(time.year, time.month, time.day)
|
150
|
+
end
|
151
|
+
|
152
|
+
def assign_options(new_options)
|
153
|
+
options = new_options.dup
|
154
|
+
|
155
|
+
# always re-assign range in case now has changed
|
156
|
+
if options.has_key?(:now)
|
157
|
+
self.now = options.delete(:now)
|
158
|
+
self.range = options.delete(:range) || @range_value
|
159
|
+
end
|
160
|
+
options.each do |attr, val|
|
161
|
+
begin
|
162
|
+
self.send("#{attr.to_s}=", val)
|
163
|
+
rescue NoMethodError => e
|
164
|
+
puts "Warning: invalid configuration variable: #{method_name(e)}"
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
# Take NoMethodException string and extract the method name,
|
170
|
+
# e.g. "undefined method `out_path=' for #<Ralf::Config:0x17931b8>"
|
171
|
+
# should return 'out_path'
|
172
|
+
def method_name(e)
|
173
|
+
e.to_s.split('`')[1].split('=')[0]
|
174
|
+
end
|
175
|
+
|
176
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
class Ralf
|
2
|
+
|
3
|
+
class Interpolation
|
4
|
+
class NotAllInterpolationsSatisfied < StandardError ; end
|
5
|
+
class VariableMissing < StandardError ; end
|
6
|
+
|
7
|
+
def self.interpolate(string, variables, required_variables = [])
|
8
|
+
required_variables.each do |name|
|
9
|
+
raise VariableMissing, ":#{name.to_s} variable missing" unless string.match(/:#{name.to_s}/)
|
10
|
+
end
|
11
|
+
processor = Ralf::Interpolation.new(string, variables)
|
12
|
+
raise NotAllInterpolationsSatisfied, "Not all keys are interpolated: '#{string}'" if processor.result.match(/:/)
|
13
|
+
processor.result
|
14
|
+
end
|
15
|
+
|
16
|
+
attr :result
|
17
|
+
|
18
|
+
def initialize(string, variables)
|
19
|
+
@variables = variables
|
20
|
+
@result = string.dup
|
21
|
+
(Ralf::Interpolation.public_instance_methods(false) - ['result']).each do |tag|
|
22
|
+
@result.gsub!(/:#{tag}/, self.send( tag )) unless self.send(tag).nil?
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def bucket
|
27
|
+
@variables[:bucket]
|
28
|
+
end
|
29
|
+
|
30
|
+
def week
|
31
|
+
"%02d" % @variables[:date].cweek if @variables[:date]
|
32
|
+
end
|
33
|
+
|
34
|
+
def day
|
35
|
+
"%02d" % @variables[:date].day if @variables[:date]
|
36
|
+
end
|
37
|
+
|
38
|
+
def month
|
39
|
+
"%02d" % @variables[:date].month if @variables[:date]
|
40
|
+
end
|
41
|
+
|
42
|
+
def year
|
43
|
+
"%04d" % @variables[:date].year if @variables[:date]
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
data/lib/ralf/log.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
class Ralf
|
2
|
+
class Log
|
3
|
+
def initialize(key, targetprefix)
|
4
|
+
@key = key
|
5
|
+
@targetprefix = targetprefix
|
6
|
+
end
|
7
|
+
|
8
|
+
def name
|
9
|
+
@key.name.gsub(@targetprefix, '')
|
10
|
+
end
|
11
|
+
|
12
|
+
def save_to_dir(dir, use_cache = true)
|
13
|
+
file = File.join(dir, name)
|
14
|
+
File.open(file, 'w') { |f| f.write(@key.data) } unless use_cache and File.exist?(file)
|
15
|
+
file # return saved filename
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,149 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
|
3
|
+
class Ralf::OptionParser
|
4
|
+
|
5
|
+
def self.parse(args, output = $stdout)
|
6
|
+
options = {}
|
7
|
+
|
8
|
+
opts = ::OptionParser.new do |opts|
|
9
|
+
opts.banner = <<USAGE_END
|
10
|
+
Usage: #{$0} [options]
|
11
|
+
|
12
|
+
Download and merge Amazon S3 bucket log files for a specified date range and
|
13
|
+
output a Common Log File. Ralf is an acronym for Retrieve Amazon Log Files.
|
14
|
+
|
15
|
+
Ralf downloads bucket log files to local cache directories, merges the Amazon Log
|
16
|
+
Files and converts them to Common Log Format.
|
17
|
+
|
18
|
+
Example: #{$0} --range month --now yesterday --output-file '/var/log/amazon/:year/:month/:bucket.log'
|
19
|
+
|
20
|
+
AWS credentials (Access Key Id and Secret Access Key) are required to access
|
21
|
+
S3 buckets. For security reasons these credentials can only be specified in a
|
22
|
+
configuration file (see --config-file) or through the environment using the
|
23
|
+
AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables.
|
24
|
+
USAGE_END
|
25
|
+
|
26
|
+
opts.separator ""
|
27
|
+
opts.separator "Log selection options:"
|
28
|
+
opts.on("-l", "--[no-]list", "List buckets that have logging enabled. Does not process log files.") do |value|
|
29
|
+
options[:list] = value
|
30
|
+
end
|
31
|
+
opts.on("-b", "--buckets x,y,z", Array, "Buckets for which to process log files. Defaults to all log-enabled buckets.") do |buckets|
|
32
|
+
options[:buckets] = buckets.compact
|
33
|
+
end
|
34
|
+
opts.on("-r", "--range BEGIN[,END]", Array, "Date or date range to process. Defaults to 'today'.") do |range|
|
35
|
+
options[:range] = range.compact
|
36
|
+
end
|
37
|
+
log_selection_help =<<LOG_SELECTION_HELP
|
38
|
+
Date to use as base for range. Defaults to 'today'.
|
39
|
+
|
40
|
+
You can use Chronic expressions for '--range' and '--now'. See http://chronic.rubyforge.org.
|
41
|
+
|
42
|
+
Example: --range 'last week'
|
43
|
+
All days of previous week.
|
44
|
+
Example: --range 'this week'
|
45
|
+
Beginning of this week (sunday) upto and including today.
|
46
|
+
Example: --range '2010-01-01','2010-04-30'
|
47
|
+
First four months of this year.
|
48
|
+
Example: --range 'this month' --now yesterday
|
49
|
+
This will select log files from the beginning of yesterday's month upto and including yesterday.
|
50
|
+
|
51
|
+
The --buckets, --range and --now options are optional. If unspecified, (incomplete)
|
52
|
+
logging for today will be processed for all buckets (that have logging enabled).
|
53
|
+
This is equivalent to specifying "--range 'today'" and "--now 'today'".
|
54
|
+
LOG_SELECTION_HELP
|
55
|
+
opts.on("-t", "--now TIME", log_selection_help) do |now|
|
56
|
+
options[:now] = now
|
57
|
+
end
|
58
|
+
|
59
|
+
# opts.on("-m", "--[no-]rename-bucket-keys", "Rename original log files on Amazon using format from '--cache-dir' option.") do |value|
|
60
|
+
# options[:rename_bucket_keys] = value
|
61
|
+
# end
|
62
|
+
|
63
|
+
# opts.separator ""
|
64
|
+
opts.separator "Output options:"
|
65
|
+
|
66
|
+
output_file_help =<<OUTPUT_FILE_HELP
|
67
|
+
Output file, e.g. '/var/log/s3/:year/:month/:bucket.log'. Required.
|
68
|
+
|
69
|
+
The --output-file format uses the last day of the range specified by (--range)
|
70
|
+
to determine the filename. E.g. when the format contains ':year/:month/:day' and
|
71
|
+
the range is 2010-01-15..2010-02-14, then the output file will be '2010/02/14'.
|
72
|
+
OUTPUT_FILE_HELP
|
73
|
+
opts.on("-o", "--output-file FORMAT", output_file_help) do |format|
|
74
|
+
options[:output_file] = format
|
75
|
+
end
|
76
|
+
|
77
|
+
cache_dir_help =<<CACHE_DIR_HELP
|
78
|
+
Directory name(s) in which to cache downloaded log files. Optional.
|
79
|
+
|
80
|
+
The --cache-dir format expands to as many directory names as needed for the
|
81
|
+
range specified by --range. E.g. "/var/run/s3_cache/:year/:month/:day/:bucket"
|
82
|
+
expands to 31 directories for range 2010-01-01..2010-01-31.
|
83
|
+
|
84
|
+
Defaults to '~/.ralf/:bucket' or '/var/log/ralf/:bucket' (when running as root).
|
85
|
+
CACHE_DIR_HELP
|
86
|
+
opts.on("-x", "--cache-dir FORMAT", cache_dir_help) do |format|
|
87
|
+
options[:cache_dir] = format
|
88
|
+
end
|
89
|
+
|
90
|
+
# opts.on("-f", "--output-dir-format FORMAT", "Output directory format, e.g. ':year/:month/:day'") do |format|
|
91
|
+
# options[:output_dir_format] = format
|
92
|
+
# end
|
93
|
+
|
94
|
+
# opts.on("-o", "--output-basedir DIR", "Base directory for output files.") do |dir|
|
95
|
+
# options[:output_basedir] = dir
|
96
|
+
# end
|
97
|
+
|
98
|
+
# opts.on("-p", "--output-prefix STRING", "Prefix string for output files.") do |string|
|
99
|
+
# options[:output_prefix] = string
|
100
|
+
# end
|
101
|
+
|
102
|
+
# opts.separator ""
|
103
|
+
opts.separator "Config file options:"
|
104
|
+
config_file_help =<<CONFIG_FILE_HELP
|
105
|
+
Path to file with configuration settings (in YAML format).
|
106
|
+
|
107
|
+
Configuration settings are read from the (-c) specified configuration file
|
108
|
+
or from ~/.ralf.conf or from /etc/ralf.conf (when running as root).
|
109
|
+
Command-line options override settings read from the configuration file.
|
110
|
+
|
111
|
+
The configuration file must be in YAML format. Each command-line options has an
|
112
|
+
equivalent setting in a configuration file replacing dash (-) by underscore(_).
|
113
|
+
|
114
|
+
The Amazon Access Key Id and Secret Access Key can only be specified in the
|
115
|
+
|
116
|
+
Example:
|
117
|
+
output_file: /var/log/amazon_s3/:year:month/:bucket.log
|
118
|
+
aws_access_key_id: my_access_key_id
|
119
|
+
aws_secret_access_key: my_secret_access_key
|
120
|
+
|
121
|
+
To only use command-line options simply specify -c or --config-file without
|
122
|
+
an argument.
|
123
|
+
CONFIG_FILE_HELP
|
124
|
+
opts.on("-c", "--config-file [FILE]", config_file_help) do |file|
|
125
|
+
options[:config_file] = file
|
126
|
+
end
|
127
|
+
|
128
|
+
opts.separator "Debug options:"
|
129
|
+
opts.on("-d", "--[no-]debug [aws]", "Show debug messages.") do |aws|
|
130
|
+
options[:debug] = aws || true
|
131
|
+
end
|
132
|
+
|
133
|
+
opts.separator ""
|
134
|
+
opts.separator "Common options:"
|
135
|
+
opts.on_tail("-h", "--help", "Show this message.") do
|
136
|
+
output.puts opts
|
137
|
+
return nil
|
138
|
+
end
|
139
|
+
opts.on_tail("-v", "--version", "Show version.") do
|
140
|
+
output.print File.read(File.join(File.dirname(__FILE__), '..', '..', 'VERSION'))
|
141
|
+
return nil
|
142
|
+
end
|
143
|
+
end
|
144
|
+
remaining = opts.parse!(args)
|
145
|
+
opts.warn "Warning: unused arguments: #{remaining.join(' ')}" unless remaining.empty?
|
146
|
+
options
|
147
|
+
end
|
148
|
+
|
149
|
+
end
|