alblogs 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +52 -0
- data/README.md +27 -0
- data/alblogs.gemspec +14 -0
- data/bin/alblogs +315 -0
- metadata +46 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 40a4878aed41640c3d06e5734de49c18c96121fecb4bfb981b360aae7c19e70a
|
4
|
+
data.tar.gz: 9150dbb3f1d1fb9085c75431c8756d2d318a36b49b8005179fab72baccc7f71f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a35fc4c33ceeb1be95779ec968f4d3fb74378a4f7978b9f19fe9bf9969ceac662a49feb8038823a09661214ad15b00459053b21ce6230ea0808dc41d68202ba1
|
7
|
+
data.tar.gz: ab6d34862e7a22a125fe323530ebfc058ec33d69e5ff65d7ca874a98e781853880749c1e4b56c248b82e71c88d3c874baed2b2425223b8330c737711fd84d1d5
|
data/.gitignore
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/spec/examples.txt
|
9
|
+
/test/tmp/
|
10
|
+
/test/version_tmp/
|
11
|
+
/tmp/
|
12
|
+
|
13
|
+
# Used by dotenv library to load environment variables.
|
14
|
+
# .env
|
15
|
+
|
16
|
+
## Specific to RubyMotion:
|
17
|
+
.dat*
|
18
|
+
.repl_history
|
19
|
+
build/
|
20
|
+
*.bridgesupport
|
21
|
+
build-iPhoneOS/
|
22
|
+
build-iPhoneSimulator/
|
23
|
+
|
24
|
+
## Specific to RubyMotion (use of CocoaPods):
|
25
|
+
#
|
26
|
+
# We recommend against adding the Pods directory to your .gitignore. However
|
27
|
+
# you should judge for yourself, the pros and cons are mentioned at:
|
28
|
+
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
|
29
|
+
#
|
30
|
+
# vendor/Pods/
|
31
|
+
|
32
|
+
## Documentation cache and generated files:
|
33
|
+
/.yardoc/
|
34
|
+
/_yardoc/
|
35
|
+
/doc/
|
36
|
+
/rdoc/
|
37
|
+
|
38
|
+
## Environment normalization:
|
39
|
+
/.bundle/
|
40
|
+
/vendor/bundle
|
41
|
+
/lib/bundler/man/
|
42
|
+
|
43
|
+
# for a library or gem, you might want to ignore these files since the code is
|
44
|
+
# intended to run in multiple environments; otherwise, check them in:
|
45
|
+
# Gemfile.lock
|
46
|
+
# .ruby-version
|
47
|
+
# .ruby-gemset
|
48
|
+
|
49
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
50
|
+
.rvmrc
|
51
|
+
|
52
|
+
*~
|
data/README.md
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# alblogs
|
2
|
+
|
3
|
+
Utility script for processing ALB access logs over a given time range
|
4
|
+
|
5
|
+
### Usage
|
6
|
+
|
7
|
+
```
|
8
|
+
Usage: alblogs [options]
|
9
|
+
-s, --start=TIME_EXP Start time
|
10
|
+
-e, --end=TIME_EXP End time
|
11
|
+
--include=REGEX Include filter
|
12
|
+
--exclude=REGEX Exclude filter
|
13
|
+
-p, --profile=PROFILE AWS profile
|
14
|
+
-b, --bucket=ALB_S3_BUCKET ALB S3 Bucket and Path
|
15
|
+
-o, --output=OUTPUT_FILE File to stream matching ALB log entries to
|
16
|
+
--stats Display Stats
|
17
|
+
--request-times-over=SECONDS Find requests that took over X seconds
|
18
|
+
```
|
19
|
+
|
20
|
+
### Example
|
21
|
+
|
22
|
+
Find all requests that took over 500ms to process in the last 12 hours.
|
23
|
+
|
24
|
+
```
|
25
|
+
alblogs -b 's3://<my-aws-alb-bucket-name>/access_logs/AWSLogs/<aws-account-id>/elasticloadbalancing/<aws-region>' -s '12 hours' -o slow-requests.log --request-times-over 0.5
|
26
|
+
```
|
27
|
+
|
data/alblogs.gemspec
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = 'alblogs'
|
5
|
+
s.version = '0.0.1'
|
6
|
+
s.summary = 'ALB access log processing'
|
7
|
+
s.description = 'Utility script for processing ALB access logs over a given time range'
|
8
|
+
s.authors = ['Doug Youch']
|
9
|
+
s.email = 'dougyouch@gmail.com'
|
10
|
+
s.homepage = 'https://github.com/dougyouch/alblogs'
|
11
|
+
s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
12
|
+
s.bindir = 'bin'
|
13
|
+
s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
14
|
+
end
|
data/bin/alblogs
ADDED
@@ -0,0 +1,315 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'time'
|
5
|
+
require 'shellwords'
|
6
|
+
require 'json'
|
7
|
+
|
8
|
+
def run_or_die(cmd)
|
9
|
+
res = `#{cmd}`
|
10
|
+
raise("command failed with #{$?}, #{cmd}") unless $?.success?
|
11
|
+
res
|
12
|
+
end
|
13
|
+
|
14
|
+
def parse_time_offset(str)
|
15
|
+
if str =~ /min/
|
16
|
+
str.sub(/ *min.*/, '').to_i * 60
|
17
|
+
elsif str =~ /hour/
|
18
|
+
str.sub(/ *hour.*/, '').to_i * 3600
|
19
|
+
elsif str =~ /day/
|
20
|
+
str.sub(/ *day.*/, '').to_i * 86400
|
21
|
+
else
|
22
|
+
nil
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def time_ago(now, str)
|
27
|
+
if offset = parse_time_offset(str)
|
28
|
+
time = now - offset
|
29
|
+
time - (time.to_i % 60) # round to the start of the minute
|
30
|
+
else
|
31
|
+
Time.parse(str).utc
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_s3_files(bucket, date_path, profile)
|
36
|
+
s3_url = "#{bucket}/#{date_path}/"
|
37
|
+
cmd = "aws"
|
38
|
+
cmd << " --profile #{Shellwords.escape(profile)}" if profile
|
39
|
+
cmd << " s3 ls #{Shellwords.escape(s3_url)}"
|
40
|
+
output = run_or_die(cmd)
|
41
|
+
output.split("\n").map do |line|
|
42
|
+
line =~ /(\d{4}\-\d{2}\-\d{2} \d{2}:\d{2}:\d{2}) +(\d+) +(.+)/
|
43
|
+
last_modified_at = Time.parse($1).utc
|
44
|
+
file_size = $2.to_i
|
45
|
+
file = $3
|
46
|
+
S3File.new("#{s3_url}#{file}", file_size, last_modified_at)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def get_s3_files_in_range(range, bucket, profile)
|
51
|
+
s3_files = {}
|
52
|
+
time = range.begin
|
53
|
+
while time < range.end
|
54
|
+
date_path = time.strftime('%Y/%m/%d')
|
55
|
+
get_s3_files(bucket, date_path, profile).each do |s3_file|
|
56
|
+
next unless s3_file.in_range?(range)
|
57
|
+
s3_files[s3_file.file] ||= s3_file
|
58
|
+
end
|
59
|
+
time += 86_400
|
60
|
+
end
|
61
|
+
s3_files
|
62
|
+
end
|
63
|
+
|
64
|
+
def download_s3_file(s3_file, dest, profile)
|
65
|
+
cmd = "aws"
|
66
|
+
cmd << " --profile #{Shellwords.escape(profile)}" if profile
|
67
|
+
cmd << " s3 cp #{Shellwords.escape(s3_file.file)} #{Shellwords.escape(dest)}.gz"
|
68
|
+
run_or_die(cmd)
|
69
|
+
cmd = "gzip -f -d #{Shellwords.escape(dest)}.gz"
|
70
|
+
run_or_die(cmd)
|
71
|
+
end
|
72
|
+
|
73
|
+
def alb_log_fields
|
74
|
+
@alb_log_fields ||=
|
75
|
+
begin
|
76
|
+
not_a_space = '([^ ]+)'
|
77
|
+
in_quotes = '"(.*?)"'
|
78
|
+
|
79
|
+
{
|
80
|
+
type: not_a_space,
|
81
|
+
timestamp: not_a_space,
|
82
|
+
elb: not_a_space,
|
83
|
+
client_port: not_a_space,
|
84
|
+
target_port: not_a_space,
|
85
|
+
request_processing_time: not_a_space,
|
86
|
+
target_processing_time: not_a_space,
|
87
|
+
response_processing_time: not_a_space,
|
88
|
+
elb_status_code: not_a_space,
|
89
|
+
target_status_code: not_a_space,
|
90
|
+
received_bytes: not_a_space,
|
91
|
+
sent_bytes: not_a_space,
|
92
|
+
request: in_quotes,
|
93
|
+
user_agent: in_quotes,
|
94
|
+
ssl_cipher: not_a_space,
|
95
|
+
ssl_protocol: not_a_space,
|
96
|
+
target_group_arn: not_a_space,
|
97
|
+
trace_id: in_quotes,
|
98
|
+
domain_name: in_quotes,
|
99
|
+
chosen_cert_arn: in_quotes,
|
100
|
+
matched_rule_priority: not_a_space,
|
101
|
+
request_creation_time: not_a_space,
|
102
|
+
actions_executed: in_quotes,
|
103
|
+
redirect_url: in_quotes,
|
104
|
+
error_reason: in_quotes
|
105
|
+
}
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def alb_log_fields_regex
|
110
|
+
@alb_log_fields_regex ||=
|
111
|
+
begin
|
112
|
+
Regexp.new alb_log_fields.values.join(' ')
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def get_alb_log_fields(line)
|
117
|
+
matches = alb_log_fields_regex.match(line).to_a
|
118
|
+
matches.shift
|
119
|
+
matches
|
120
|
+
end
|
121
|
+
|
122
|
+
def get_alb_log_entry(line)
|
123
|
+
entry = AlbLogEntry.new(*get_alb_log_fields(line))
|
124
|
+
entry.line = line
|
125
|
+
entry
|
126
|
+
end
|
127
|
+
|
128
|
+
def measure
|
129
|
+
start = Time.now
|
130
|
+
yield
|
131
|
+
Time.now - start
|
132
|
+
end
|
133
|
+
|
134
|
+
def display_stats(stats)
|
135
|
+
stats[:elapsed_time] = Time.now.utc - stats[:started_at]
|
136
|
+
$stderr.puts stats.inspect
|
137
|
+
end
|
138
|
+
|
139
|
+
class S3File
|
140
|
+
MINUTES_5 = 5 * 60
|
141
|
+
|
142
|
+
attr_reader :file,
|
143
|
+
:file_size,
|
144
|
+
:last_modified_at
|
145
|
+
|
146
|
+
def initialize(file, file_size, last_modified_at)
|
147
|
+
@file = file
|
148
|
+
@file_size = file_size
|
149
|
+
@last_modified_at = last_modified_at
|
150
|
+
end
|
151
|
+
|
152
|
+
def end_time
|
153
|
+
@end_time ||=
|
154
|
+
begin
|
155
|
+
unless @file =~ /_(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})Z_/
|
156
|
+
raise("unable to find time stamp in #{@file}")
|
157
|
+
end
|
158
|
+
Time.new($1, $2, $3, $4, $5, 0, 0)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def start_time
|
163
|
+
@start_time ||= (end_time - MINUTES_5)
|
164
|
+
end
|
165
|
+
|
166
|
+
def in_range?(range)
|
167
|
+
return false if end_time < range.begin
|
168
|
+
return false if start_time > range.end
|
169
|
+
true
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
class AlbLogEntry < Struct.new(*alb_log_fields.keys)
|
174
|
+
attr_accessor :line
|
175
|
+
|
176
|
+
def timestamp
|
177
|
+
@timestamp ||= Time.iso8601(self[:timestamp])
|
178
|
+
end
|
179
|
+
|
180
|
+
def target_processing_time
|
181
|
+
@target_processing_time ||= self[:target_processing_time].to_f
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
class RequestMatcher
|
186
|
+
attr_reader :range
|
187
|
+
|
188
|
+
def initialize(options)
|
189
|
+
@range = options[:start_time]..options[:end_time]
|
190
|
+
@exclude_filter = options[:exclude_filter]
|
191
|
+
@include_filter = options[:include_filter]
|
192
|
+
@request_times_over = options[:request_times_over]
|
193
|
+
end
|
194
|
+
|
195
|
+
def match?(entry)
|
196
|
+
return false unless @range.cover?(entry.timestamp)
|
197
|
+
return false if @include_filter && ! @include_filter.match?(entry.line)
|
198
|
+
return false if @exclude_filter && @exclude_filter.match?(entry.line)
|
199
|
+
return false if @request_times_over && @request_times_over > entry.target_processing_time
|
200
|
+
true
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
started_at = Time.now.utc
|
205
|
+
|
206
|
+
options = {
|
207
|
+
start_time: time_ago(started_at, '30 min'),
|
208
|
+
end_time: started_at,
|
209
|
+
include_filter: nil,
|
210
|
+
exclude_filter: nil,
|
211
|
+
alb_s3_bucket: nil,
|
212
|
+
aws_profile: nil,
|
213
|
+
log_file: $stdout,
|
214
|
+
display_stats: false,
|
215
|
+
request_times_over: nil
|
216
|
+
}
|
217
|
+
OptionParser.new do |opts|
|
218
|
+
opts.banner = "Usage: alblogs [options]"
|
219
|
+
|
220
|
+
opts.on("-s", "--start=TIME_EXP", "Start time") do |v|
|
221
|
+
options[:start_time] = time_ago(started_at, v)
|
222
|
+
end
|
223
|
+
|
224
|
+
opts.on("-e", "--end=TIME_EXP", "End time") do |v|
|
225
|
+
options[:end_time] = time_ago(started_at, v)
|
226
|
+
end
|
227
|
+
|
228
|
+
opts.on("--include=REGEX", "Include filter") do |v|
|
229
|
+
options[:include_filter] = Regexp.new(v)
|
230
|
+
end
|
231
|
+
|
232
|
+
opts.on("--exclude=REGEX", "Exclude filter") do |v|
|
233
|
+
options[:exclude_filter] = Regexp.new(v)
|
234
|
+
end
|
235
|
+
|
236
|
+
opts.on("-p", "--profile=PROFILE", "AWS profile") do |v|
|
237
|
+
options[:aws_profile] = v
|
238
|
+
end
|
239
|
+
|
240
|
+
opts.on("-b", "--bucket=ALB_S3_BUCKET", "ALB S3 Bucket and Path") do |v|
|
241
|
+
options[:alb_s3_bucket] = v
|
242
|
+
end
|
243
|
+
|
244
|
+
opts.on('-o', "--output=OUTPUT_FILE", 'File to stream matching ALB log entries to') do |v|
|
245
|
+
f = File.open(v, 'wb')
|
246
|
+
f.sync = true
|
247
|
+
options[:log_file] = f
|
248
|
+
end
|
249
|
+
|
250
|
+
opts.on("--stats", "Display Stats") do
|
251
|
+
options[:display_stats] = true
|
252
|
+
end
|
253
|
+
|
254
|
+
opts.on('--request-times-over=SECONDS', 'Find requests that took over X seconds') do |v|
|
255
|
+
options[:request_times_over] = v.to_f
|
256
|
+
end
|
257
|
+
end.parse!
|
258
|
+
|
259
|
+
raise("no bucket specified") unless options[:alb_s3_bucket]
|
260
|
+
|
261
|
+
# just forgive the user and swap the values
|
262
|
+
if options[:end_time] && options[:end_time] < options[:start_time]
|
263
|
+
$stderr.puts 'swapping start/end times'
|
264
|
+
options[:start_time], options[:end_time] = options[:end_time], options[:start_time]
|
265
|
+
end
|
266
|
+
|
267
|
+
request_matcher = RequestMatcher.new options
|
268
|
+
|
269
|
+
stats = Hash.new(0)
|
270
|
+
stats[:started_at] = started_at
|
271
|
+
stats[:range_starts_at] = request_matcher.range.begin
|
272
|
+
stats[:range_ends_at] = request_matcher.range.end
|
273
|
+
stats[:min_log_time] = nil
|
274
|
+
stats[:max_log_time] = nil
|
275
|
+
stats[:min_matched_log_time] = nil
|
276
|
+
stats[:max_matched_log_time] = nil
|
277
|
+
|
278
|
+
tmp_file = '.download.alblogs.log'
|
279
|
+
File.unlink(tmp_file) if File.exists?(tmp_file)
|
280
|
+
File.unlink("#{tmp_file}.gz") if File.exists?("#{tmp_file}.gz")
|
281
|
+
|
282
|
+
$stop = false
|
283
|
+
trap("INT") { $stop = true }
|
284
|
+
|
285
|
+
get_s3_files_in_range(request_matcher.range, options[:alb_s3_bucket], options[:aws_profile]).values.each do |s3_file|
|
286
|
+
stats[:files] += 1
|
287
|
+
|
288
|
+
stats[:total_download_time] += measure do
|
289
|
+
download_s3_file(s3_file, tmp_file, options[:aws_profile])
|
290
|
+
end
|
291
|
+
|
292
|
+
stats[:total_file_processing_time] += measure do
|
293
|
+
File.open(tmp_file, 'rb') do |f|
|
294
|
+
while(! f.eof? && ! $stop)
|
295
|
+
stats[:lines] += 1
|
296
|
+
line = f.readline
|
297
|
+
entry = get_alb_log_entry(line)
|
298
|
+
stats[:min_log_time] = ! stats[:min_log_time] || stats[:min_log_time] > entry.timestamp ? entry.timestamp : stats[:min_log_time]
|
299
|
+
stats[:max_log_time] = ! stats[:max_log_time] || stats[:max_log_time] < entry.timestamp ? entry.timestamp : stats[:max_log_time]
|
300
|
+
next unless request_matcher.match?(entry)
|
301
|
+
stats[:matching_lines] += 1
|
302
|
+
stats[:min_matched_log_time] = ! stats[:min_matched_log_time] || stats[:min_matched_log_time] > entry.timestamp ? entry.timestamp : stats[:min_matched_log_time]
|
303
|
+
stats[:max_matched_log_time] = ! stats[:max_matched_log_time] || stats[:max_matched_log_time] < entry.timestamp ? entry.timestamp : stats[:max_matched_log_time]
|
304
|
+
options[:log_file].puts line
|
305
|
+
end
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
File.unlink(tmp_file)
|
310
|
+
|
311
|
+
display_stats(stats) if options[:display_stats]
|
312
|
+
break if $stop
|
313
|
+
end
|
314
|
+
|
315
|
+
options[:log_file].close
|
metadata
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: alblogs
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Doug Youch
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-06-06 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Utility script for processing ALB access logs over a given time range
|
14
|
+
email: dougyouch@gmail.com
|
15
|
+
executables:
|
16
|
+
- alblogs
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- ".gitignore"
|
21
|
+
- README.md
|
22
|
+
- alblogs.gemspec
|
23
|
+
- bin/alblogs
|
24
|
+
homepage: https://github.com/dougyouch/alblogs
|
25
|
+
licenses: []
|
26
|
+
metadata: {}
|
27
|
+
post_install_message:
|
28
|
+
rdoc_options: []
|
29
|
+
require_paths:
|
30
|
+
- lib
|
31
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
32
|
+
requirements:
|
33
|
+
- - ">="
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '0'
|
36
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
requirements: []
|
42
|
+
rubygems_version: 3.0.3
|
43
|
+
signing_key:
|
44
|
+
specification_version: 4
|
45
|
+
summary: ALB access log processing
|
46
|
+
test_files: []
|