alblogs 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (6) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +52 -0
  3. data/README.md +27 -0
  4. data/alblogs.gemspec +14 -0
  5. data/bin/alblogs +315 -0
  6. metadata +46 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 40a4878aed41640c3d06e5734de49c18c96121fecb4bfb981b360aae7c19e70a
4
+ data.tar.gz: 9150dbb3f1d1fb9085c75431c8756d2d318a36b49b8005179fab72baccc7f71f
5
+ SHA512:
6
+ metadata.gz: a35fc4c33ceeb1be95779ec968f4d3fb74378a4f7978b9f19fe9bf9969ceac662a49feb8038823a09661214ad15b00459053b21ce6230ea0808dc41d68202ba1
7
+ data.tar.gz: ab6d34862e7a22a125fe323530ebfc058ec33d69e5ff65d7ca874a98e781853880749c1e4b56c248b82e71c88d3c874baed2b2425223b8330c737711fd84d1d5
data/.gitignore ADDED
@@ -0,0 +1,52 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /spec/examples.txt
9
+ /test/tmp/
10
+ /test/version_tmp/
11
+ /tmp/
12
+
13
+ # Used by dotenv library to load environment variables.
14
+ # .env
15
+
16
+ ## Specific to RubyMotion:
17
+ .dat*
18
+ .repl_history
19
+ build/
20
+ *.bridgesupport
21
+ build-iPhoneOS/
22
+ build-iPhoneSimulator/
23
+
24
+ ## Specific to RubyMotion (use of CocoaPods):
25
+ #
26
+ # We recommend against adding the Pods directory to your .gitignore. However
27
+ # you should judge for yourself, the pros and cons are mentioned at:
28
+ # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
29
+ #
30
+ # vendor/Pods/
31
+
32
+ ## Documentation cache and generated files:
33
+ /.yardoc/
34
+ /_yardoc/
35
+ /doc/
36
+ /rdoc/
37
+
38
+ ## Environment normalization:
39
+ /.bundle/
40
+ /vendor/bundle
41
+ /lib/bundler/man/
42
+
43
+ # for a library or gem, you might want to ignore these files since the code is
44
+ # intended to run in multiple environments; otherwise, check them in:
45
+ # Gemfile.lock
46
+ # .ruby-version
47
+ # .ruby-gemset
48
+
49
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
50
+ .rvmrc
51
+
52
+ *~
data/README.md ADDED
@@ -0,0 +1,27 @@
1
+ # alblogs
2
+
3
+ Utility script for processing ALB access logs over a given time range
4
+
5
+ ### Usage
6
+
7
+ ```
8
+ Usage: alblogs [options]
9
+ -s, --start=TIME_EXP Start time
10
+ -e, --end=TIME_EXP End time
11
+ --include=REGEX Include filter
12
+ --exclude=REGEX Exclude filter
13
+ -p, --profile=PROFILE AWS profile
14
+ -b, --bucket=ALB_S3_BUCKET ALB S3 Bucket and Path
15
+ -o, --output=OUTPUT_FILE File to stream matching ALB log entries to
16
+ --stats Display Stats
17
+ --request-times-over=SECONDS Find requests that took over X seconds
18
+ ```
19
+
20
+ ### Example
21
+
22
+ Find all requests that took over 500ms to process in the last 12 hours.
23
+
24
+ ```
25
+ alblogs -b 's3://<my-aws-alb-bucket-name>/access_logs/AWSLogs/<aws-account-id>/elasticloadbalancing/<aws-region>' -s '12 hours' -o slow-requests.log --request-times-over 0.5
26
+ ```
27
+
data/alblogs.gemspec ADDED
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'alblogs'
5
+ s.version = '0.0.1'
6
+ s.summary = 'ALB access log processing'
7
+ s.description = 'Utility script for processing ALB access logs over a given time range'
8
+ s.authors = ['Doug Youch']
9
+ s.email = 'dougyouch@gmail.com'
10
+ s.homepage = 'https://github.com/dougyouch/alblogs'
11
+ s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
12
+ s.bindir = 'bin'
13
+ s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
14
+ end
data/bin/alblogs ADDED
@@ -0,0 +1,315 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'time'
5
+ require 'shellwords'
6
+ require 'json'
7
+
8
+ def run_or_die(cmd)
9
+ res = `#{cmd}`
10
+ raise("command failed with #{$?}, #{cmd}") unless $?.success?
11
+ res
12
+ end
13
+
14
+ def parse_time_offset(str)
15
+ if str =~ /min/
16
+ str.sub(/ *min.*/, '').to_i * 60
17
+ elsif str =~ /hour/
18
+ str.sub(/ *hour.*/, '').to_i * 3600
19
+ elsif str =~ /day/
20
+ str.sub(/ *day.*/, '').to_i * 86400
21
+ else
22
+ nil
23
+ end
24
+ end
25
+
26
+ def time_ago(now, str)
27
+ if offset = parse_time_offset(str)
28
+ time = now - offset
29
+ time - (time.to_i % 60) # round to the start of the minute
30
+ else
31
+ Time.parse(str).utc
32
+ end
33
+ end
34
+
35
+ def get_s3_files(bucket, date_path, profile)
36
+ s3_url = "#{bucket}/#{date_path}/"
37
+ cmd = "aws"
38
+ cmd << " --profile #{Shellwords.escape(profile)}" if profile
39
+ cmd << " s3 ls #{Shellwords.escape(s3_url)}"
40
+ output = run_or_die(cmd)
41
+ output.split("\n").map do |line|
42
+ line =~ /(\d{4}\-\d{2}\-\d{2} \d{2}:\d{2}:\d{2}) +(\d+) +(.+)/
43
+ last_modified_at = Time.parse($1).utc
44
+ file_size = $2.to_i
45
+ file = $3
46
+ S3File.new("#{s3_url}#{file}", file_size, last_modified_at)
47
+ end
48
+ end
49
+
50
+ def get_s3_files_in_range(range, bucket, profile)
51
+ s3_files = {}
52
+ time = range.begin
53
+ while time < range.end
54
+ date_path = time.strftime('%Y/%m/%d')
55
+ get_s3_files(bucket, date_path, profile).each do |s3_file|
56
+ next unless s3_file.in_range?(range)
57
+ s3_files[s3_file.file] ||= s3_file
58
+ end
59
+ time += 86_400
60
+ end
61
+ s3_files
62
+ end
63
+
64
+ def download_s3_file(s3_file, dest, profile)
65
+ cmd = "aws"
66
+ cmd << " --profile #{Shellwords.escape(profile)}" if profile
67
+ cmd << " s3 cp #{Shellwords.escape(s3_file.file)} #{Shellwords.escape(dest)}.gz"
68
+ run_or_die(cmd)
69
+ cmd = "gzip -f -d #{Shellwords.escape(dest)}.gz"
70
+ run_or_die(cmd)
71
+ end
72
+
73
+ def alb_log_fields
74
+ @alb_log_fields ||=
75
+ begin
76
+ not_a_space = '([^ ]+)'
77
+ in_quotes = '"(.*?)"'
78
+
79
+ {
80
+ type: not_a_space,
81
+ timestamp: not_a_space,
82
+ elb: not_a_space,
83
+ client_port: not_a_space,
84
+ target_port: not_a_space,
85
+ request_processing_time: not_a_space,
86
+ target_processing_time: not_a_space,
87
+ response_processing_time: not_a_space,
88
+ elb_status_code: not_a_space,
89
+ target_status_code: not_a_space,
90
+ received_bytes: not_a_space,
91
+ sent_bytes: not_a_space,
92
+ request: in_quotes,
93
+ user_agent: in_quotes,
94
+ ssl_cipher: not_a_space,
95
+ ssl_protocol: not_a_space,
96
+ target_group_arn: not_a_space,
97
+ trace_id: in_quotes,
98
+ domain_name: in_quotes,
99
+ chosen_cert_arn: in_quotes,
100
+ matched_rule_priority: not_a_space,
101
+ request_creation_time: not_a_space,
102
+ actions_executed: in_quotes,
103
+ redirect_url: in_quotes,
104
+ error_reason: in_quotes
105
+ }
106
+ end
107
+ end
108
+
109
+ def alb_log_fields_regex
110
+ @alb_log_fields_regex ||=
111
+ begin
112
+ Regexp.new alb_log_fields.values.join(' ')
113
+ end
114
+ end
115
+
116
+ def get_alb_log_fields(line)
117
+ matches = alb_log_fields_regex.match(line).to_a
118
+ matches.shift
119
+ matches
120
+ end
121
+
122
+ def get_alb_log_entry(line)
123
+ entry = AlbLogEntry.new(*get_alb_log_fields(line))
124
+ entry.line = line
125
+ entry
126
+ end
127
+
128
+ def measure
129
+ start = Time.now
130
+ yield
131
+ Time.now - start
132
+ end
133
+
134
+ def display_stats(stats)
135
+ stats[:elapsed_time] = Time.now.utc - stats[:started_at]
136
+ $stderr.puts stats.inspect
137
+ end
138
+
139
+ class S3File
140
+ MINUTES_5 = 5 * 60
141
+
142
+ attr_reader :file,
143
+ :file_size,
144
+ :last_modified_at
145
+
146
+ def initialize(file, file_size, last_modified_at)
147
+ @file = file
148
+ @file_size = file_size
149
+ @last_modified_at = last_modified_at
150
+ end
151
+
152
+ def end_time
153
+ @end_time ||=
154
+ begin
155
+ unless @file =~ /_(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})Z_/
156
+ raise("unable to find time stamp in #{@file}")
157
+ end
158
+ Time.new($1, $2, $3, $4, $5, 0, 0)
159
+ end
160
+ end
161
+
162
+ def start_time
163
+ @start_time ||= (end_time - MINUTES_5)
164
+ end
165
+
166
+ def in_range?(range)
167
+ return false if end_time < range.begin
168
+ return false if start_time > range.end
169
+ true
170
+ end
171
+ end
172
+
173
+ class AlbLogEntry < Struct.new(*alb_log_fields.keys)
174
+ attr_accessor :line
175
+
176
+ def timestamp
177
+ @timestamp ||= Time.iso8601(self[:timestamp])
178
+ end
179
+
180
+ def target_processing_time
181
+ @target_processing_time ||= self[:target_processing_time].to_f
182
+ end
183
+ end
184
+
185
+ class RequestMatcher
186
+ attr_reader :range
187
+
188
+ def initialize(options)
189
+ @range = options[:start_time]..options[:end_time]
190
+ @exclude_filter = options[:exclude_filter]
191
+ @include_filter = options[:include_filter]
192
+ @request_times_over = options[:request_times_over]
193
+ end
194
+
195
+ def match?(entry)
196
+ return false unless @range.cover?(entry.timestamp)
197
+ return false if @include_filter && ! @include_filter.match?(entry.line)
198
+ return false if @exclude_filter && @exclude_filter.match?(entry.line)
199
+ return false if @request_times_over && @request_times_over > entry.target_processing_time
200
+ true
201
+ end
202
+ end
203
+
204
+ started_at = Time.now.utc
205
+
206
+ options = {
207
+ start_time: time_ago(started_at, '30 min'),
208
+ end_time: started_at,
209
+ include_filter: nil,
210
+ exclude_filter: nil,
211
+ alb_s3_bucket: nil,
212
+ aws_profile: nil,
213
+ log_file: $stdout,
214
+ display_stats: false,
215
+ request_times_over: nil
216
+ }
217
+ OptionParser.new do |opts|
218
+ opts.banner = "Usage: alblogs [options]"
219
+
220
+ opts.on("-s", "--start=TIME_EXP", "Start time") do |v|
221
+ options[:start_time] = time_ago(started_at, v)
222
+ end
223
+
224
+ opts.on("-e", "--end=TIME_EXP", "End time") do |v|
225
+ options[:end_time] = time_ago(started_at, v)
226
+ end
227
+
228
+ opts.on("--include=REGEX", "Include filter") do |v|
229
+ options[:include_filter] = Regexp.new(v)
230
+ end
231
+
232
+ opts.on("--exclude=REGEX", "Exclude filter") do |v|
233
+ options[:exclude_filter] = Regexp.new(v)
234
+ end
235
+
236
+ opts.on("-p", "--profile=PROFILE", "AWS profile") do |v|
237
+ options[:aws_profile] = v
238
+ end
239
+
240
+ opts.on("-b", "--bucket=ALB_S3_BUCKET", "ALB S3 Bucket and Path") do |v|
241
+ options[:alb_s3_bucket] = v
242
+ end
243
+
244
+ opts.on('-o', "--output=OUTPUT_FILE", 'File to stream matching ALB log entries to') do |v|
245
+ f = File.open(v, 'wb')
246
+ f.sync = true
247
+ options[:log_file] = f
248
+ end
249
+
250
+ opts.on("--stats", "Display Stats") do
251
+ options[:display_stats] = true
252
+ end
253
+
254
+ opts.on('--request-times-over=SECONDS', 'Find requests that took over X seconds') do |v|
255
+ options[:request_times_over] = v.to_f
256
+ end
257
+ end.parse!
258
+
259
+ raise("no bucket specified") unless options[:alb_s3_bucket]
260
+
261
+ # just forgive the user and swap the values
262
+ if options[:end_time] && options[:end_time] < options[:start_time]
263
+ $stderr.puts 'swapping start/end times'
264
+ options[:start_time], options[:end_time] = options[:end_time], options[:start_time]
265
+ end
266
+
267
+ request_matcher = RequestMatcher.new options
268
+
269
+ stats = Hash.new(0)
270
+ stats[:started_at] = started_at
271
+ stats[:range_starts_at] = request_matcher.range.begin
272
+ stats[:range_ends_at] = request_matcher.range.end
273
+ stats[:min_log_time] = nil
274
+ stats[:max_log_time] = nil
275
+ stats[:min_matched_log_time] = nil
276
+ stats[:max_matched_log_time] = nil
277
+
278
+ tmp_file = '.download.alblogs.log'
279
+ File.unlink(tmp_file) if File.exists?(tmp_file)
280
+ File.unlink("#{tmp_file}.gz") if File.exists?("#{tmp_file}.gz")
281
+
282
+ $stop = false
283
+ trap("INT") { $stop = true }
284
+
285
+ get_s3_files_in_range(request_matcher.range, options[:alb_s3_bucket], options[:aws_profile]).values.each do |s3_file|
286
+ stats[:files] += 1
287
+
288
+ stats[:total_download_time] += measure do
289
+ download_s3_file(s3_file, tmp_file, options[:aws_profile])
290
+ end
291
+
292
+ stats[:total_file_processing_time] += measure do
293
+ File.open(tmp_file, 'rb') do |f|
294
+ while(! f.eof? && ! $stop)
295
+ stats[:lines] += 1
296
+ line = f.readline
297
+ entry = get_alb_log_entry(line)
298
+ stats[:min_log_time] = ! stats[:min_log_time] || stats[:min_log_time] > entry.timestamp ? entry.timestamp : stats[:min_log_time]
299
+ stats[:max_log_time] = ! stats[:max_log_time] || stats[:max_log_time] < entry.timestamp ? entry.timestamp : stats[:max_log_time]
300
+ next unless request_matcher.match?(entry)
301
+ stats[:matching_lines] += 1
302
+ stats[:min_matched_log_time] = ! stats[:min_matched_log_time] || stats[:min_matched_log_time] > entry.timestamp ? entry.timestamp : stats[:min_matched_log_time]
303
+ stats[:max_matched_log_time] = ! stats[:max_matched_log_time] || stats[:max_matched_log_time] < entry.timestamp ? entry.timestamp : stats[:max_matched_log_time]
304
+ options[:log_file].puts line
305
+ end
306
+ end
307
+ end
308
+
309
+ File.unlink(tmp_file)
310
+
311
+ display_stats(stats) if options[:display_stats]
312
+ break if $stop
313
+ end
314
+
315
+ options[:log_file].close
metadata ADDED
@@ -0,0 +1,46 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: alblogs
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Doug Youch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-06-06 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Utility script for processing ALB access logs over a given time range
14
+ email: dougyouch@gmail.com
15
+ executables:
16
+ - alblogs
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - ".gitignore"
21
+ - README.md
22
+ - alblogs.gemspec
23
+ - bin/alblogs
24
+ homepage: https://github.com/dougyouch/alblogs
25
+ licenses: []
26
+ metadata: {}
27
+ post_install_message:
28
+ rdoc_options: []
29
+ require_paths:
30
+ - lib
31
+ required_ruby_version: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - ">="
34
+ - !ruby/object:Gem::Version
35
+ version: '0'
36
+ required_rubygems_version: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ requirements: []
42
+ rubygems_version: 3.0.3
43
+ signing_key:
44
+ specification_version: 4
45
+ summary: ALB access log processing
46
+ test_files: []