alblogs 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (6) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +52 -0
  3. data/README.md +27 -0
  4. data/alblogs.gemspec +14 -0
  5. data/bin/alblogs +315 -0
  6. metadata +46 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 40a4878aed41640c3d06e5734de49c18c96121fecb4bfb981b360aae7c19e70a
4
+ data.tar.gz: 9150dbb3f1d1fb9085c75431c8756d2d318a36b49b8005179fab72baccc7f71f
5
+ SHA512:
6
+ metadata.gz: a35fc4c33ceeb1be95779ec968f4d3fb74378a4f7978b9f19fe9bf9969ceac662a49feb8038823a09661214ad15b00459053b21ce6230ea0808dc41d68202ba1
7
+ data.tar.gz: ab6d34862e7a22a125fe323530ebfc058ec33d69e5ff65d7ca874a98e781853880749c1e4b56c248b82e71c88d3c874baed2b2425223b8330c737711fd84d1d5
data/.gitignore ADDED
@@ -0,0 +1,52 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /spec/examples.txt
9
+ /test/tmp/
10
+ /test/version_tmp/
11
+ /tmp/
12
+
13
+ # Used by dotenv library to load environment variables.
14
+ # .env
15
+
16
+ ## Specific to RubyMotion:
17
+ .dat*
18
+ .repl_history
19
+ build/
20
+ *.bridgesupport
21
+ build-iPhoneOS/
22
+ build-iPhoneSimulator/
23
+
24
+ ## Specific to RubyMotion (use of CocoaPods):
25
+ #
26
+ # We recommend against adding the Pods directory to your .gitignore. However
27
+ # you should judge for yourself, the pros and cons are mentioned at:
28
+ # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
29
+ #
30
+ # vendor/Pods/
31
+
32
+ ## Documentation cache and generated files:
33
+ /.yardoc/
34
+ /_yardoc/
35
+ /doc/
36
+ /rdoc/
37
+
38
+ ## Environment normalization:
39
+ /.bundle/
40
+ /vendor/bundle
41
+ /lib/bundler/man/
42
+
43
+ # for a library or gem, you might want to ignore these files since the code is
44
+ # intended to run in multiple environments; otherwise, check them in:
45
+ # Gemfile.lock
46
+ # .ruby-version
47
+ # .ruby-gemset
48
+
49
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
50
+ .rvmrc
51
+
52
+ *~
data/README.md ADDED
@@ -0,0 +1,27 @@
1
+ # alblogs
2
+
3
+ Utility script for processing ALB access logs over a given time range
4
+
5
+ ### Usage
6
+
7
+ ```
8
+ Usage: alblogs [options]
9
+ -s, --start=TIME_EXP Start time
10
+ -e, --end=TIME_EXP End time
11
+ --include=REGEX Include filter
12
+ --exclude=REGEX Exclude filter
13
+ -p, --profile=PROFILE AWS profile
14
+ -b, --bucket=ALB_S3_BUCKET ALB S3 Bucket and Path
15
+ -o, --output=OUTPUT_FILE File to stream matching ALB log entries to
16
+ --stats Display Stats
17
+ --request-times-over=SECONDS Find requests that took over X seconds
18
+ ```
19
+
20
+ ### Example
21
+
22
+ Find all requests that took over 500ms to process in the last 12 hours.
23
+
24
+ ```
25
+ alblogs -b 's3://<my-aws-alb-bucket-name>/access_logs/AWSLogs/<aws-account-id>/elasticloadbalancing/<aws-region>' -s '12 hours' -o slow-requests.log --request-times-over 0.5
26
+ ```
27
+
data/alblogs.gemspec ADDED
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'alblogs'
5
+ s.version = '0.0.1'
6
+ s.summary = 'ALB access log processing'
7
+ s.description = 'Utility script for processing ALB access logs over a given time range'
8
+ s.authors = ['Doug Youch']
9
+ s.email = 'dougyouch@gmail.com'
10
+ s.homepage = 'https://github.com/dougyouch/alblogs'
11
+ s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
12
+ s.bindir = 'bin'
13
+ s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
14
+ end
data/bin/alblogs ADDED
@@ -0,0 +1,315 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'time'
5
+ require 'shellwords'
6
+ require 'json'
7
+
8
+ def run_or_die(cmd)
9
+ res = `#{cmd}`
10
+ raise("command failed with #{$?}, #{cmd}") unless $?.success?
11
+ res
12
+ end
13
+
14
+ def parse_time_offset(str)
15
+ if str =~ /min/
16
+ str.sub(/ *min.*/, '').to_i * 60
17
+ elsif str =~ /hour/
18
+ str.sub(/ *hour.*/, '').to_i * 3600
19
+ elsif str =~ /day/
20
+ str.sub(/ *day.*/, '').to_i * 86400
21
+ else
22
+ nil
23
+ end
24
+ end
25
+
26
+ def time_ago(now, str)
27
+ if offset = parse_time_offset(str)
28
+ time = now - offset
29
+ time - (time.to_i % 60) # round to the start of the minute
30
+ else
31
+ Time.parse(str).utc
32
+ end
33
+ end
34
+
35
+ def get_s3_files(bucket, date_path, profile)
36
+ s3_url = "#{bucket}/#{date_path}/"
37
+ cmd = "aws"
38
+ cmd << " --profile #{Shellwords.escape(profile)}" if profile
39
+ cmd << " s3 ls #{Shellwords.escape(s3_url)}"
40
+ output = run_or_die(cmd)
41
+ output.split("\n").map do |line|
42
+ line =~ /(\d{4}\-\d{2}\-\d{2} \d{2}:\d{2}:\d{2}) +(\d+) +(.+)/
43
+ last_modified_at = Time.parse($1).utc
44
+ file_size = $2.to_i
45
+ file = $3
46
+ S3File.new("#{s3_url}#{file}", file_size, last_modified_at)
47
+ end
48
+ end
49
+
50
+ def get_s3_files_in_range(range, bucket, profile)
51
+ s3_files = {}
52
+ time = range.begin
53
+ while time < range.end
54
+ date_path = time.strftime('%Y/%m/%d')
55
+ get_s3_files(bucket, date_path, profile).each do |s3_file|
56
+ next unless s3_file.in_range?(range)
57
+ s3_files[s3_file.file] ||= s3_file
58
+ end
59
+ time += 86_400
60
+ end
61
+ s3_files
62
+ end
63
+
64
+ def download_s3_file(s3_file, dest, profile)
65
+ cmd = "aws"
66
+ cmd << " --profile #{Shellwords.escape(profile)}" if profile
67
+ cmd << " s3 cp #{Shellwords.escape(s3_file.file)} #{Shellwords.escape(dest)}.gz"
68
+ run_or_die(cmd)
69
+ cmd = "gzip -f -d #{Shellwords.escape(dest)}.gz"
70
+ run_or_die(cmd)
71
+ end
72
+
73
+ def alb_log_fields
74
+ @alb_log_fields ||=
75
+ begin
76
+ not_a_space = '([^ ]+)'
77
+ in_quotes = '"(.*?)"'
78
+
79
+ {
80
+ type: not_a_space,
81
+ timestamp: not_a_space,
82
+ elb: not_a_space,
83
+ client_port: not_a_space,
84
+ target_port: not_a_space,
85
+ request_processing_time: not_a_space,
86
+ target_processing_time: not_a_space,
87
+ response_processing_time: not_a_space,
88
+ elb_status_code: not_a_space,
89
+ target_status_code: not_a_space,
90
+ received_bytes: not_a_space,
91
+ sent_bytes: not_a_space,
92
+ request: in_quotes,
93
+ user_agent: in_quotes,
94
+ ssl_cipher: not_a_space,
95
+ ssl_protocol: not_a_space,
96
+ target_group_arn: not_a_space,
97
+ trace_id: in_quotes,
98
+ domain_name: in_quotes,
99
+ chosen_cert_arn: in_quotes,
100
+ matched_rule_priority: not_a_space,
101
+ request_creation_time: not_a_space,
102
+ actions_executed: in_quotes,
103
+ redirect_url: in_quotes,
104
+ error_reason: in_quotes
105
+ }
106
+ end
107
+ end
108
+
109
+ def alb_log_fields_regex
110
+ @alb_log_fields_regex ||=
111
+ begin
112
+ Regexp.new alb_log_fields.values.join(' ')
113
+ end
114
+ end
115
+
116
+ def get_alb_log_fields(line)
117
+ matches = alb_log_fields_regex.match(line).to_a
118
+ matches.shift
119
+ matches
120
+ end
121
+
122
+ def get_alb_log_entry(line)
123
+ entry = AlbLogEntry.new(*get_alb_log_fields(line))
124
+ entry.line = line
125
+ entry
126
+ end
127
+
128
+ def measure
129
+ start = Time.now
130
+ yield
131
+ Time.now - start
132
+ end
133
+
134
+ def display_stats(stats)
135
+ stats[:elapsed_time] = Time.now.utc - stats[:started_at]
136
+ $stderr.puts stats.inspect
137
+ end
138
+
139
+ class S3File
140
+ MINUTES_5 = 5 * 60
141
+
142
+ attr_reader :file,
143
+ :file_size,
144
+ :last_modified_at
145
+
146
+ def initialize(file, file_size, last_modified_at)
147
+ @file = file
148
+ @file_size = file_size
149
+ @last_modified_at = last_modified_at
150
+ end
151
+
152
+ def end_time
153
+ @end_time ||=
154
+ begin
155
+ unless @file =~ /_(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})Z_/
156
+ raise("unable to find time stamp in #{@file}")
157
+ end
158
+ Time.new($1, $2, $3, $4, $5, 0, 0)
159
+ end
160
+ end
161
+
162
+ def start_time
163
+ @start_time ||= (end_time - MINUTES_5)
164
+ end
165
+
166
+ def in_range?(range)
167
+ return false if end_time < range.begin
168
+ return false if start_time > range.end
169
+ true
170
+ end
171
+ end
172
+
173
+ class AlbLogEntry < Struct.new(*alb_log_fields.keys)
174
+ attr_accessor :line
175
+
176
+ def timestamp
177
+ @timestamp ||= Time.iso8601(self[:timestamp])
178
+ end
179
+
180
+ def target_processing_time
181
+ @target_processing_time ||= self[:target_processing_time].to_f
182
+ end
183
+ end
184
+
185
+ class RequestMatcher
186
+ attr_reader :range
187
+
188
+ def initialize(options)
189
+ @range = options[:start_time]..options[:end_time]
190
+ @exclude_filter = options[:exclude_filter]
191
+ @include_filter = options[:include_filter]
192
+ @request_times_over = options[:request_times_over]
193
+ end
194
+
195
+ def match?(entry)
196
+ return false unless @range.cover?(entry.timestamp)
197
+ return false if @include_filter && ! @include_filter.match?(entry.line)
198
+ return false if @exclude_filter && @exclude_filter.match?(entry.line)
199
+ return false if @request_times_over && @request_times_over > entry.target_processing_time
200
+ true
201
+ end
202
+ end
203
+
204
+ started_at = Time.now.utc
205
+
206
+ options = {
207
+ start_time: time_ago(started_at, '30 min'),
208
+ end_time: started_at,
209
+ include_filter: nil,
210
+ exclude_filter: nil,
211
+ alb_s3_bucket: nil,
212
+ aws_profile: nil,
213
+ log_file: $stdout,
214
+ display_stats: false,
215
+ request_times_over: nil
216
+ }
217
+ OptionParser.new do |opts|
218
+ opts.banner = "Usage: alblogs [options]"
219
+
220
+ opts.on("-s", "--start=TIME_EXP", "Start time") do |v|
221
+ options[:start_time] = time_ago(started_at, v)
222
+ end
223
+
224
+ opts.on("-e", "--end=TIME_EXP", "End time") do |v|
225
+ options[:end_time] = time_ago(started_at, v)
226
+ end
227
+
228
+ opts.on("--include=REGEX", "Include filter") do |v|
229
+ options[:include_filter] = Regexp.new(v)
230
+ end
231
+
232
+ opts.on("--exclude=REGEX", "Exclude filter") do |v|
233
+ options[:exclude_filter] = Regexp.new(v)
234
+ end
235
+
236
+ opts.on("-p", "--profile=PROFILE", "AWS profile") do |v|
237
+ options[:aws_profile] = v
238
+ end
239
+
240
+ opts.on("-b", "--bucket=ALB_S3_BUCKET", "ALB S3 Bucket and Path") do |v|
241
+ options[:alb_s3_bucket] = v
242
+ end
243
+
244
+ opts.on('-o', "--output=OUTPUT_FILE", 'File to stream matching ALB log entries to') do |v|
245
+ f = File.open(v, 'wb')
246
+ f.sync = true
247
+ options[:log_file] = f
248
+ end
249
+
250
+ opts.on("--stats", "Display Stats") do
251
+ options[:display_stats] = true
252
+ end
253
+
254
+ opts.on('--request-times-over=SECONDS', 'Find requests that took over X seconds') do |v|
255
+ options[:request_times_over] = v.to_f
256
+ end
257
+ end.parse!
258
+
259
+ raise("no bucket specified") unless options[:alb_s3_bucket]
260
+
261
+ # just forgive the user and swap the values
262
+ if options[:end_time] && options[:end_time] < options[:start_time]
263
+ $stderr.puts 'swapping start/end times'
264
+ options[:start_time], options[:end_time] = options[:end_time], options[:start_time]
265
+ end
266
+
267
+ request_matcher = RequestMatcher.new options
268
+
269
+ stats = Hash.new(0)
270
+ stats[:started_at] = started_at
271
+ stats[:range_starts_at] = request_matcher.range.begin
272
+ stats[:range_ends_at] = request_matcher.range.end
273
+ stats[:min_log_time] = nil
274
+ stats[:max_log_time] = nil
275
+ stats[:min_matched_log_time] = nil
276
+ stats[:max_matched_log_time] = nil
277
+
278
+ tmp_file = '.download.alblogs.log'
279
+ File.unlink(tmp_file) if File.exists?(tmp_file)
280
+ File.unlink("#{tmp_file}.gz") if File.exists?("#{tmp_file}.gz")
281
+
282
+ $stop = false
283
+ trap("INT") { $stop = true }
284
+
285
+ get_s3_files_in_range(request_matcher.range, options[:alb_s3_bucket], options[:aws_profile]).values.each do |s3_file|
286
+ stats[:files] += 1
287
+
288
+ stats[:total_download_time] += measure do
289
+ download_s3_file(s3_file, tmp_file, options[:aws_profile])
290
+ end
291
+
292
+ stats[:total_file_processing_time] += measure do
293
+ File.open(tmp_file, 'rb') do |f|
294
+ while(! f.eof? && ! $stop)
295
+ stats[:lines] += 1
296
+ line = f.readline
297
+ entry = get_alb_log_entry(line)
298
+ stats[:min_log_time] = ! stats[:min_log_time] || stats[:min_log_time] > entry.timestamp ? entry.timestamp : stats[:min_log_time]
299
+ stats[:max_log_time] = ! stats[:max_log_time] || stats[:max_log_time] < entry.timestamp ? entry.timestamp : stats[:max_log_time]
300
+ next unless request_matcher.match?(entry)
301
+ stats[:matching_lines] += 1
302
+ stats[:min_matched_log_time] = ! stats[:min_matched_log_time] || stats[:min_matched_log_time] > entry.timestamp ? entry.timestamp : stats[:min_matched_log_time]
303
+ stats[:max_matched_log_time] = ! stats[:max_matched_log_time] || stats[:max_matched_log_time] < entry.timestamp ? entry.timestamp : stats[:max_matched_log_time]
304
+ options[:log_file].puts line
305
+ end
306
+ end
307
+ end
308
+
309
+ File.unlink(tmp_file)
310
+
311
+ display_stats(stats) if options[:display_stats]
312
+ break if $stop
313
+ end
314
+
315
+ options[:log_file].close
metadata ADDED
@@ -0,0 +1,46 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: alblogs
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Doug Youch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-06-06 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Utility script for processing ALB access logs over a given time range
14
+ email: dougyouch@gmail.com
15
+ executables:
16
+ - alblogs
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - ".gitignore"
21
+ - README.md
22
+ - alblogs.gemspec
23
+ - bin/alblogs
24
+ homepage: https://github.com/dougyouch/alblogs
25
+ licenses: []
26
+ metadata: {}
27
+ post_install_message:
28
+ rdoc_options: []
29
+ require_paths:
30
+ - lib
31
+ required_ruby_version: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - ">="
34
+ - !ruby/object:Gem::Version
35
+ version: '0'
36
+ required_rubygems_version: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ requirements: []
42
+ rubygems_version: 3.0.3
43
+ signing_key:
44
+ specification_version: 4
45
+ summary: ALB access log processing
46
+ test_files: []