alblogs 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +52 -0
- data/README.md +27 -0
- data/alblogs.gemspec +14 -0
- data/bin/alblogs +315 -0
- metadata +46 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 40a4878aed41640c3d06e5734de49c18c96121fecb4bfb981b360aae7c19e70a
|
4
|
+
data.tar.gz: 9150dbb3f1d1fb9085c75431c8756d2d318a36b49b8005179fab72baccc7f71f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a35fc4c33ceeb1be95779ec968f4d3fb74378a4f7978b9f19fe9bf9969ceac662a49feb8038823a09661214ad15b00459053b21ce6230ea0808dc41d68202ba1
|
7
|
+
data.tar.gz: ab6d34862e7a22a125fe323530ebfc058ec33d69e5ff65d7ca874a98e781853880749c1e4b56c248b82e71c88d3c874baed2b2425223b8330c737711fd84d1d5
|
data/.gitignore
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/spec/examples.txt
|
9
|
+
/test/tmp/
|
10
|
+
/test/version_tmp/
|
11
|
+
/tmp/
|
12
|
+
|
13
|
+
# Used by dotenv library to load environment variables.
|
14
|
+
# .env
|
15
|
+
|
16
|
+
## Specific to RubyMotion:
|
17
|
+
.dat*
|
18
|
+
.repl_history
|
19
|
+
build/
|
20
|
+
*.bridgesupport
|
21
|
+
build-iPhoneOS/
|
22
|
+
build-iPhoneSimulator/
|
23
|
+
|
24
|
+
## Specific to RubyMotion (use of CocoaPods):
|
25
|
+
#
|
26
|
+
# We recommend against adding the Pods directory to your .gitignore. However
|
27
|
+
# you should judge for yourself, the pros and cons are mentioned at:
|
28
|
+
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
|
29
|
+
#
|
30
|
+
# vendor/Pods/
|
31
|
+
|
32
|
+
## Documentation cache and generated files:
|
33
|
+
/.yardoc/
|
34
|
+
/_yardoc/
|
35
|
+
/doc/
|
36
|
+
/rdoc/
|
37
|
+
|
38
|
+
## Environment normalization:
|
39
|
+
/.bundle/
|
40
|
+
/vendor/bundle
|
41
|
+
/lib/bundler/man/
|
42
|
+
|
43
|
+
# for a library or gem, you might want to ignore these files since the code is
|
44
|
+
# intended to run in multiple environments; otherwise, check them in:
|
45
|
+
# Gemfile.lock
|
46
|
+
# .ruby-version
|
47
|
+
# .ruby-gemset
|
48
|
+
|
49
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
50
|
+
.rvmrc
|
51
|
+
|
52
|
+
*~
|
data/README.md
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# alblogs
|
2
|
+
|
3
|
+
Utility script for processing ALB access logs over a given time range
|
4
|
+
|
5
|
+
### Usage
|
6
|
+
|
7
|
+
```
|
8
|
+
Usage: alblogs [options]
|
9
|
+
-s, --start=TIME_EXP Start time
|
10
|
+
-e, --end=TIME_EXP End time
|
11
|
+
--include=REGEX Include filter
|
12
|
+
--exclude=REGEX Exclude filter
|
13
|
+
-p, --profile=PROFILE AWS profile
|
14
|
+
-b, --bucket=ALB_S3_BUCKET ALB S3 Bucket and Path
|
15
|
+
-o, --output=OUTPUT_FILE File to stream matching ALB log entries to
|
16
|
+
--stats Display Stats
|
17
|
+
--request-times-over=SECONDS Find requests that took over X seconds
|
18
|
+
```
|
19
|
+
|
20
|
+
### Example
|
21
|
+
|
22
|
+
Find all requests that took over 500ms to process in the last 12 hours.
|
23
|
+
|
24
|
+
```
|
25
|
+
alblogs -b 's3://<my-aws-alb-bucket-name>/access_logs/AWSLogs/<aws-account-id>/elasticloadbalancing/<aws-region>' -s '12 hours' -o slow-requests.log --request-times-over 0.5
|
26
|
+
```
|
27
|
+
|
data/alblogs.gemspec
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = 'alblogs'
|
5
|
+
s.version = '0.0.1'
|
6
|
+
s.summary = 'ALB access log processing'
|
7
|
+
s.description = 'Utility script for processing ALB access logs over a given time range'
|
8
|
+
s.authors = ['Doug Youch']
|
9
|
+
s.email = 'dougyouch@gmail.com'
|
10
|
+
s.homepage = 'https://github.com/dougyouch/alblogs'
|
11
|
+
s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
12
|
+
s.bindir = 'bin'
|
13
|
+
s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
14
|
+
end
|
data/bin/alblogs
ADDED
@@ -0,0 +1,315 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'time'
|
5
|
+
require 'shellwords'
|
6
|
+
require 'json'
|
7
|
+
|
8
|
+
def run_or_die(cmd)
|
9
|
+
res = `#{cmd}`
|
10
|
+
raise("command failed with #{$?}, #{cmd}") unless $?.success?
|
11
|
+
res
|
12
|
+
end
|
13
|
+
|
14
|
+
def parse_time_offset(str)
|
15
|
+
if str =~ /min/
|
16
|
+
str.sub(/ *min.*/, '').to_i * 60
|
17
|
+
elsif str =~ /hour/
|
18
|
+
str.sub(/ *hour.*/, '').to_i * 3600
|
19
|
+
elsif str =~ /day/
|
20
|
+
str.sub(/ *day.*/, '').to_i * 86400
|
21
|
+
else
|
22
|
+
nil
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def time_ago(now, str)
|
27
|
+
if offset = parse_time_offset(str)
|
28
|
+
time = now - offset
|
29
|
+
time - (time.to_i % 60) # round to the start of the minute
|
30
|
+
else
|
31
|
+
Time.parse(str).utc
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_s3_files(bucket, date_path, profile)
|
36
|
+
s3_url = "#{bucket}/#{date_path}/"
|
37
|
+
cmd = "aws"
|
38
|
+
cmd << " --profile #{Shellwords.escape(profile)}" if profile
|
39
|
+
cmd << " s3 ls #{Shellwords.escape(s3_url)}"
|
40
|
+
output = run_or_die(cmd)
|
41
|
+
output.split("\n").map do |line|
|
42
|
+
line =~ /(\d{4}\-\d{2}\-\d{2} \d{2}:\d{2}:\d{2}) +(\d+) +(.+)/
|
43
|
+
last_modified_at = Time.parse($1).utc
|
44
|
+
file_size = $2.to_i
|
45
|
+
file = $3
|
46
|
+
S3File.new("#{s3_url}#{file}", file_size, last_modified_at)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def get_s3_files_in_range(range, bucket, profile)
|
51
|
+
s3_files = {}
|
52
|
+
time = range.begin
|
53
|
+
while time < range.end
|
54
|
+
date_path = time.strftime('%Y/%m/%d')
|
55
|
+
get_s3_files(bucket, date_path, profile).each do |s3_file|
|
56
|
+
next unless s3_file.in_range?(range)
|
57
|
+
s3_files[s3_file.file] ||= s3_file
|
58
|
+
end
|
59
|
+
time += 86_400
|
60
|
+
end
|
61
|
+
s3_files
|
62
|
+
end
|
63
|
+
|
64
|
+
def download_s3_file(s3_file, dest, profile)
|
65
|
+
cmd = "aws"
|
66
|
+
cmd << " --profile #{Shellwords.escape(profile)}" if profile
|
67
|
+
cmd << " s3 cp #{Shellwords.escape(s3_file.file)} #{Shellwords.escape(dest)}.gz"
|
68
|
+
run_or_die(cmd)
|
69
|
+
cmd = "gzip -f -d #{Shellwords.escape(dest)}.gz"
|
70
|
+
run_or_die(cmd)
|
71
|
+
end
|
72
|
+
|
73
|
+
def alb_log_fields
|
74
|
+
@alb_log_fields ||=
|
75
|
+
begin
|
76
|
+
not_a_space = '([^ ]+)'
|
77
|
+
in_quotes = '"(.*?)"'
|
78
|
+
|
79
|
+
{
|
80
|
+
type: not_a_space,
|
81
|
+
timestamp: not_a_space,
|
82
|
+
elb: not_a_space,
|
83
|
+
client_port: not_a_space,
|
84
|
+
target_port: not_a_space,
|
85
|
+
request_processing_time: not_a_space,
|
86
|
+
target_processing_time: not_a_space,
|
87
|
+
response_processing_time: not_a_space,
|
88
|
+
elb_status_code: not_a_space,
|
89
|
+
target_status_code: not_a_space,
|
90
|
+
received_bytes: not_a_space,
|
91
|
+
sent_bytes: not_a_space,
|
92
|
+
request: in_quotes,
|
93
|
+
user_agent: in_quotes,
|
94
|
+
ssl_cipher: not_a_space,
|
95
|
+
ssl_protocol: not_a_space,
|
96
|
+
target_group_arn: not_a_space,
|
97
|
+
trace_id: in_quotes,
|
98
|
+
domain_name: in_quotes,
|
99
|
+
chosen_cert_arn: in_quotes,
|
100
|
+
matched_rule_priority: not_a_space,
|
101
|
+
request_creation_time: not_a_space,
|
102
|
+
actions_executed: in_quotes,
|
103
|
+
redirect_url: in_quotes,
|
104
|
+
error_reason: in_quotes
|
105
|
+
}
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def alb_log_fields_regex
|
110
|
+
@alb_log_fields_regex ||=
|
111
|
+
begin
|
112
|
+
Regexp.new alb_log_fields.values.join(' ')
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def get_alb_log_fields(line)
|
117
|
+
matches = alb_log_fields_regex.match(line).to_a
|
118
|
+
matches.shift
|
119
|
+
matches
|
120
|
+
end
|
121
|
+
|
122
|
+
def get_alb_log_entry(line)
|
123
|
+
entry = AlbLogEntry.new(*get_alb_log_fields(line))
|
124
|
+
entry.line = line
|
125
|
+
entry
|
126
|
+
end
|
127
|
+
|
128
|
+
def measure
|
129
|
+
start = Time.now
|
130
|
+
yield
|
131
|
+
Time.now - start
|
132
|
+
end
|
133
|
+
|
134
|
+
def display_stats(stats)
|
135
|
+
stats[:elapsed_time] = Time.now.utc - stats[:started_at]
|
136
|
+
$stderr.puts stats.inspect
|
137
|
+
end
|
138
|
+
|
139
|
+
class S3File
|
140
|
+
MINUTES_5 = 5 * 60
|
141
|
+
|
142
|
+
attr_reader :file,
|
143
|
+
:file_size,
|
144
|
+
:last_modified_at
|
145
|
+
|
146
|
+
def initialize(file, file_size, last_modified_at)
|
147
|
+
@file = file
|
148
|
+
@file_size = file_size
|
149
|
+
@last_modified_at = last_modified_at
|
150
|
+
end
|
151
|
+
|
152
|
+
def end_time
|
153
|
+
@end_time ||=
|
154
|
+
begin
|
155
|
+
unless @file =~ /_(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})Z_/
|
156
|
+
raise("unable to find time stamp in #{@file}")
|
157
|
+
end
|
158
|
+
Time.new($1, $2, $3, $4, $5, 0, 0)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def start_time
|
163
|
+
@start_time ||= (end_time - MINUTES_5)
|
164
|
+
end
|
165
|
+
|
166
|
+
def in_range?(range)
|
167
|
+
return false if end_time < range.begin
|
168
|
+
return false if start_time > range.end
|
169
|
+
true
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
class AlbLogEntry < Struct.new(*alb_log_fields.keys)
|
174
|
+
attr_accessor :line
|
175
|
+
|
176
|
+
def timestamp
|
177
|
+
@timestamp ||= Time.iso8601(self[:timestamp])
|
178
|
+
end
|
179
|
+
|
180
|
+
def target_processing_time
|
181
|
+
@target_processing_time ||= self[:target_processing_time].to_f
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
class RequestMatcher
|
186
|
+
attr_reader :range
|
187
|
+
|
188
|
+
def initialize(options)
|
189
|
+
@range = options[:start_time]..options[:end_time]
|
190
|
+
@exclude_filter = options[:exclude_filter]
|
191
|
+
@include_filter = options[:include_filter]
|
192
|
+
@request_times_over = options[:request_times_over]
|
193
|
+
end
|
194
|
+
|
195
|
+
def match?(entry)
|
196
|
+
return false unless @range.cover?(entry.timestamp)
|
197
|
+
return false if @include_filter && ! @include_filter.match?(entry.line)
|
198
|
+
return false if @exclude_filter && @exclude_filter.match?(entry.line)
|
199
|
+
return false if @request_times_over && @request_times_over > entry.target_processing_time
|
200
|
+
true
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
started_at = Time.now.utc
|
205
|
+
|
206
|
+
options = {
|
207
|
+
start_time: time_ago(started_at, '30 min'),
|
208
|
+
end_time: started_at,
|
209
|
+
include_filter: nil,
|
210
|
+
exclude_filter: nil,
|
211
|
+
alb_s3_bucket: nil,
|
212
|
+
aws_profile: nil,
|
213
|
+
log_file: $stdout,
|
214
|
+
display_stats: false,
|
215
|
+
request_times_over: nil
|
216
|
+
}
|
217
|
+
OptionParser.new do |opts|
|
218
|
+
opts.banner = "Usage: alblogs [options]"
|
219
|
+
|
220
|
+
opts.on("-s", "--start=TIME_EXP", "Start time") do |v|
|
221
|
+
options[:start_time] = time_ago(started_at, v)
|
222
|
+
end
|
223
|
+
|
224
|
+
opts.on("-e", "--end=TIME_EXP", "End time") do |v|
|
225
|
+
options[:end_time] = time_ago(started_at, v)
|
226
|
+
end
|
227
|
+
|
228
|
+
opts.on("--include=REGEX", "Include filter") do |v|
|
229
|
+
options[:include_filter] = Regexp.new(v)
|
230
|
+
end
|
231
|
+
|
232
|
+
opts.on("--exclude=REGEX", "Exclude filter") do |v|
|
233
|
+
options[:exclude_filter] = Regexp.new(v)
|
234
|
+
end
|
235
|
+
|
236
|
+
opts.on("-p", "--profile=PROFILE", "AWS profile") do |v|
|
237
|
+
options[:aws_profile] = v
|
238
|
+
end
|
239
|
+
|
240
|
+
opts.on("-b", "--bucket=ALB_S3_BUCKET", "ALB S3 Bucket and Path") do |v|
|
241
|
+
options[:alb_s3_bucket] = v
|
242
|
+
end
|
243
|
+
|
244
|
+
opts.on('-o', "--output=OUTPUT_FILE", 'File to stream matching ALB log entries to') do |v|
|
245
|
+
f = File.open(v, 'wb')
|
246
|
+
f.sync = true
|
247
|
+
options[:log_file] = f
|
248
|
+
end
|
249
|
+
|
250
|
+
opts.on("--stats", "Display Stats") do
|
251
|
+
options[:display_stats] = true
|
252
|
+
end
|
253
|
+
|
254
|
+
opts.on('--request-times-over=SECONDS', 'Find requests that took over X seconds') do |v|
|
255
|
+
options[:request_times_over] = v.to_f
|
256
|
+
end
|
257
|
+
end.parse!
|
258
|
+
|
259
|
+
raise("no bucket specified") unless options[:alb_s3_bucket]
|
260
|
+
|
261
|
+
# just forgive the user and swap the values
|
262
|
+
if options[:end_time] && options[:end_time] < options[:start_time]
|
263
|
+
$stderr.puts 'swapping start/end times'
|
264
|
+
options[:start_time], options[:end_time] = options[:end_time], options[:start_time]
|
265
|
+
end
|
266
|
+
|
267
|
+
request_matcher = RequestMatcher.new options
|
268
|
+
|
269
|
+
stats = Hash.new(0)
|
270
|
+
stats[:started_at] = started_at
|
271
|
+
stats[:range_starts_at] = request_matcher.range.begin
|
272
|
+
stats[:range_ends_at] = request_matcher.range.end
|
273
|
+
stats[:min_log_time] = nil
|
274
|
+
stats[:max_log_time] = nil
|
275
|
+
stats[:min_matched_log_time] = nil
|
276
|
+
stats[:max_matched_log_time] = nil
|
277
|
+
|
278
|
+
tmp_file = '.download.alblogs.log'
|
279
|
+
File.unlink(tmp_file) if File.exists?(tmp_file)
|
280
|
+
File.unlink("#{tmp_file}.gz") if File.exists?("#{tmp_file}.gz")
|
281
|
+
|
282
|
+
$stop = false
|
283
|
+
trap("INT") { $stop = true }
|
284
|
+
|
285
|
+
get_s3_files_in_range(request_matcher.range, options[:alb_s3_bucket], options[:aws_profile]).values.each do |s3_file|
|
286
|
+
stats[:files] += 1
|
287
|
+
|
288
|
+
stats[:total_download_time] += measure do
|
289
|
+
download_s3_file(s3_file, tmp_file, options[:aws_profile])
|
290
|
+
end
|
291
|
+
|
292
|
+
stats[:total_file_processing_time] += measure do
|
293
|
+
File.open(tmp_file, 'rb') do |f|
|
294
|
+
while(! f.eof? && ! $stop)
|
295
|
+
stats[:lines] += 1
|
296
|
+
line = f.readline
|
297
|
+
entry = get_alb_log_entry(line)
|
298
|
+
stats[:min_log_time] = ! stats[:min_log_time] || stats[:min_log_time] > entry.timestamp ? entry.timestamp : stats[:min_log_time]
|
299
|
+
stats[:max_log_time] = ! stats[:max_log_time] || stats[:max_log_time] < entry.timestamp ? entry.timestamp : stats[:max_log_time]
|
300
|
+
next unless request_matcher.match?(entry)
|
301
|
+
stats[:matching_lines] += 1
|
302
|
+
stats[:min_matched_log_time] = ! stats[:min_matched_log_time] || stats[:min_matched_log_time] > entry.timestamp ? entry.timestamp : stats[:min_matched_log_time]
|
303
|
+
stats[:max_matched_log_time] = ! stats[:max_matched_log_time] || stats[:max_matched_log_time] < entry.timestamp ? entry.timestamp : stats[:max_matched_log_time]
|
304
|
+
options[:log_file].puts line
|
305
|
+
end
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
File.unlink(tmp_file)
|
310
|
+
|
311
|
+
display_stats(stats) if options[:display_stats]
|
312
|
+
break if $stop
|
313
|
+
end
|
314
|
+
|
315
|
+
options[:log_file].close
|
metadata
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: alblogs
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Doug Youch
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-06-06 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Utility script for processing ALB access logs over a given time range
|
14
|
+
email: dougyouch@gmail.com
|
15
|
+
executables:
|
16
|
+
- alblogs
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- ".gitignore"
|
21
|
+
- README.md
|
22
|
+
- alblogs.gemspec
|
23
|
+
- bin/alblogs
|
24
|
+
homepage: https://github.com/dougyouch/alblogs
|
25
|
+
licenses: []
|
26
|
+
metadata: {}
|
27
|
+
post_install_message:
|
28
|
+
rdoc_options: []
|
29
|
+
require_paths:
|
30
|
+
- lib
|
31
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
32
|
+
requirements:
|
33
|
+
- - ">="
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '0'
|
36
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
requirements: []
|
42
|
+
rubygems_version: 3.0.3
|
43
|
+
signing_key:
|
44
|
+
specification_version: 4
|
45
|
+
summary: ALB access log processing
|
46
|
+
test_files: []
|