alblogs 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 40a4878aed41640c3d06e5734de49c18c96121fecb4bfb981b360aae7c19e70a
4
- data.tar.gz: 9150dbb3f1d1fb9085c75431c8756d2d318a36b49b8005179fab72baccc7f71f
3
+ metadata.gz: eefbd8b8cac015fc5eee191b6e8a73ff9fde3c465f8d1410b76774e686aa9623
4
+ data.tar.gz: 3dfce14461c67824dd0ad6c5bd1ecee0081767de256191bda00cad1a99c18d1f
5
5
  SHA512:
6
- metadata.gz: a35fc4c33ceeb1be95779ec968f4d3fb74378a4f7978b9f19fe9bf9969ceac662a49feb8038823a09661214ad15b00459053b21ce6230ea0808dc41d68202ba1
7
- data.tar.gz: ab6d34862e7a22a125fe323530ebfc058ec33d69e5ff65d7ca874a98e781853880749c1e4b56c248b82e71c88d3c874baed2b2425223b8330c737711fd84d1d5
6
+ metadata.gz: 9b17f53979c6c56d8909d62e1fcacf468edcd0294fc41928cd62a706d5d206fd8495e16453c339ea5dfd236aef9158c8ad9cfcb00522cc961e18ce82e98f7a4e
7
+ data.tar.gz: d5d792b11d17f82e30a58901f18ac7a1cdf1e3cea5d07201645475a63a255a06769f64772c947653f6ab2533657c23c84bdd2db4517382d484389e506f07079f
data/.gitignore CHANGED
@@ -50,3 +50,4 @@ build-iPhoneSimulator/
50
50
  .rvmrc
51
51
 
52
52
  *~
53
+ *.log
@@ -0,0 +1 @@
1
+ alblogs
@@ -0,0 +1 @@
1
+ 2.6.3
data/README.md CHANGED
@@ -2,6 +2,16 @@
2
2
 
3
3
  Utility script for processing ALB access logs over a given time range
4
4
 
5
+ ### Requirements
6
+
7
+ Need to have the AWS CLI installed. Can be found here https://aws.amazon.com/cli/
8
+
9
+ ### Install
10
+
11
+ ```
12
+ gem install alblogs
13
+ ```
14
+
5
15
  ### Usage
6
16
 
7
17
  ```
@@ -25,3 +35,8 @@ Find all requests that took over 500ms to process in the last 12 hours.
25
35
  alblogs -b 's3://<my-aws-alb-bucket-name>/access_logs/AWSLogs/<aws-account-id>/elasticloadbalancing/<aws-region>' -s '12 hours' -o slow-requests.log --request-times-over 0.5
26
36
  ```
27
37
 
38
+ ### References
39
+
40
+ AWS Documentaion: Access Logs for Your Application Load Balancer
41
+
42
+ https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-access-logs.html
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'alblogs'
5
- s.version = '0.0.1'
5
+ s.version = '0.1.0'
6
6
  s.summary = 'ALB access log processing'
7
7
  s.description = 'Utility script for processing ALB access logs over a given time range'
8
8
  s.authors = ['Doug Youch']
@@ -4,126 +4,7 @@ require 'optparse'
4
4
  require 'time'
5
5
  require 'shellwords'
6
6
  require 'json'
7
-
8
- def run_or_die(cmd)
9
- res = `#{cmd}`
10
- raise("command failed with #{$?}, #{cmd}") unless $?.success?
11
- res
12
- end
13
-
14
- def parse_time_offset(str)
15
- if str =~ /min/
16
- str.sub(/ *min.*/, '').to_i * 60
17
- elsif str =~ /hour/
18
- str.sub(/ *hour.*/, '').to_i * 3600
19
- elsif str =~ /day/
20
- str.sub(/ *day.*/, '').to_i * 86400
21
- else
22
- nil
23
- end
24
- end
25
-
26
- def time_ago(now, str)
27
- if offset = parse_time_offset(str)
28
- time = now - offset
29
- time - (time.to_i % 60) # round to the start of the minute
30
- else
31
- Time.parse(str).utc
32
- end
33
- end
34
-
35
- def get_s3_files(bucket, date_path, profile)
36
- s3_url = "#{bucket}/#{date_path}/"
37
- cmd = "aws"
38
- cmd << " --profile #{Shellwords.escape(profile)}" if profile
39
- cmd << " s3 ls #{Shellwords.escape(s3_url)}"
40
- output = run_or_die(cmd)
41
- output.split("\n").map do |line|
42
- line =~ /(\d{4}\-\d{2}\-\d{2} \d{2}:\d{2}:\d{2}) +(\d+) +(.+)/
43
- last_modified_at = Time.parse($1).utc
44
- file_size = $2.to_i
45
- file = $3
46
- S3File.new("#{s3_url}#{file}", file_size, last_modified_at)
47
- end
48
- end
49
-
50
- def get_s3_files_in_range(range, bucket, profile)
51
- s3_files = {}
52
- time = range.begin
53
- while time < range.end
54
- date_path = time.strftime('%Y/%m/%d')
55
- get_s3_files(bucket, date_path, profile).each do |s3_file|
56
- next unless s3_file.in_range?(range)
57
- s3_files[s3_file.file] ||= s3_file
58
- end
59
- time += 86_400
60
- end
61
- s3_files
62
- end
63
-
64
- def download_s3_file(s3_file, dest, profile)
65
- cmd = "aws"
66
- cmd << " --profile #{Shellwords.escape(profile)}" if profile
67
- cmd << " s3 cp #{Shellwords.escape(s3_file.file)} #{Shellwords.escape(dest)}.gz"
68
- run_or_die(cmd)
69
- cmd = "gzip -f -d #{Shellwords.escape(dest)}.gz"
70
- run_or_die(cmd)
71
- end
72
-
73
- def alb_log_fields
74
- @alb_log_fields ||=
75
- begin
76
- not_a_space = '([^ ]+)'
77
- in_quotes = '"(.*?)"'
78
-
79
- {
80
- type: not_a_space,
81
- timestamp: not_a_space,
82
- elb: not_a_space,
83
- client_port: not_a_space,
84
- target_port: not_a_space,
85
- request_processing_time: not_a_space,
86
- target_processing_time: not_a_space,
87
- response_processing_time: not_a_space,
88
- elb_status_code: not_a_space,
89
- target_status_code: not_a_space,
90
- received_bytes: not_a_space,
91
- sent_bytes: not_a_space,
92
- request: in_quotes,
93
- user_agent: in_quotes,
94
- ssl_cipher: not_a_space,
95
- ssl_protocol: not_a_space,
96
- target_group_arn: not_a_space,
97
- trace_id: in_quotes,
98
- domain_name: in_quotes,
99
- chosen_cert_arn: in_quotes,
100
- matched_rule_priority: not_a_space,
101
- request_creation_time: not_a_space,
102
- actions_executed: in_quotes,
103
- redirect_url: in_quotes,
104
- error_reason: in_quotes
105
- }
106
- end
107
- end
108
-
109
- def alb_log_fields_regex
110
- @alb_log_fields_regex ||=
111
- begin
112
- Regexp.new alb_log_fields.values.join(' ')
113
- end
114
- end
115
-
116
- def get_alb_log_fields(line)
117
- matches = alb_log_fields_regex.match(line).to_a
118
- matches.shift
119
- matches
120
- end
121
-
122
- def get_alb_log_entry(line)
123
- entry = AlbLogEntry.new(*get_alb_log_fields(line))
124
- entry.line = line
125
- entry
126
- end
7
+ require 'alblogs'
127
8
 
128
9
  def measure
129
10
  start = Time.now
@@ -136,75 +17,10 @@ def display_stats(stats)
136
17
  $stderr.puts stats.inspect
137
18
  end
138
19
 
139
- class S3File
140
- MINUTES_5 = 5 * 60
141
-
142
- attr_reader :file,
143
- :file_size,
144
- :last_modified_at
145
-
146
- def initialize(file, file_size, last_modified_at)
147
- @file = file
148
- @file_size = file_size
149
- @last_modified_at = last_modified_at
150
- end
151
-
152
- def end_time
153
- @end_time ||=
154
- begin
155
- unless @file =~ /_(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})Z_/
156
- raise("unable to find time stamp in #{@file}")
157
- end
158
- Time.new($1, $2, $3, $4, $5, 0, 0)
159
- end
160
- end
161
-
162
- def start_time
163
- @start_time ||= (end_time - MINUTES_5)
164
- end
165
-
166
- def in_range?(range)
167
- return false if end_time < range.begin
168
- return false if start_time > range.end
169
- true
170
- end
171
- end
172
-
173
- class AlbLogEntry < Struct.new(*alb_log_fields.keys)
174
- attr_accessor :line
175
-
176
- def timestamp
177
- @timestamp ||= Time.iso8601(self[:timestamp])
178
- end
179
-
180
- def target_processing_time
181
- @target_processing_time ||= self[:target_processing_time].to_f
182
- end
183
- end
184
-
185
- class RequestMatcher
186
- attr_reader :range
187
-
188
- def initialize(options)
189
- @range = options[:start_time]..options[:end_time]
190
- @exclude_filter = options[:exclude_filter]
191
- @include_filter = options[:include_filter]
192
- @request_times_over = options[:request_times_over]
193
- end
194
-
195
- def match?(entry)
196
- return false unless @range.cover?(entry.timestamp)
197
- return false if @include_filter && ! @include_filter.match?(entry.line)
198
- return false if @exclude_filter && @exclude_filter.match?(entry.line)
199
- return false if @request_times_over && @request_times_over > entry.target_processing_time
200
- true
201
- end
202
- end
203
-
204
20
  started_at = Time.now.utc
205
21
 
206
22
  options = {
207
- start_time: time_ago(started_at, '30 min'),
23
+ start_time: Alblogs::Utils.time_ago(started_at, '30 min'),
208
24
  end_time: started_at,
209
25
  include_filter: nil,
210
26
  exclude_filter: nil,
@@ -218,11 +34,11 @@ OptionParser.new do |opts|
218
34
  opts.banner = "Usage: alblogs [options]"
219
35
 
220
36
  opts.on("-s", "--start=TIME_EXP", "Start time") do |v|
221
- options[:start_time] = time_ago(started_at, v)
37
+ options[:start_time] = Alblogs::Utils.time_ago(started_at, v)
222
38
  end
223
39
 
224
40
  opts.on("-e", "--end=TIME_EXP", "End time") do |v|
225
- options[:end_time] = time_ago(started_at, v)
41
+ options[:end_time] = Alblogs::Utils.time_ago(started_at, v)
226
42
  end
227
43
 
228
44
  opts.on("--include=REGEX", "Include filter") do |v|
@@ -264,7 +80,7 @@ if options[:end_time] && options[:end_time] < options[:start_time]
264
80
  options[:start_time], options[:end_time] = options[:end_time], options[:start_time]
265
81
  end
266
82
 
267
- request_matcher = RequestMatcher.new options
83
+ request_matcher = Alblogs::RequestMatcher.new options
268
84
 
269
85
  stats = Hash.new(0)
270
86
  stats[:started_at] = started_at
@@ -282,11 +98,12 @@ File.unlink("#{tmp_file}.gz") if File.exists?("#{tmp_file}.gz")
282
98
  $stop = false
283
99
  trap("INT") { $stop = true }
284
100
 
285
- get_s3_files_in_range(request_matcher.range, options[:alb_s3_bucket], options[:aws_profile]).values.each do |s3_file|
101
+ s3_bucket = Alblogs::S3Bucket.new(options[:alb_s3_bucket], options[:aws_profile])
102
+ s3_bucket.get_s3_files_in_range(request_matcher.range).values.each do |s3_file|
286
103
  stats[:files] += 1
287
104
 
288
105
  stats[:total_download_time] += measure do
289
- download_s3_file(s3_file, tmp_file, options[:aws_profile])
106
+ s3_bucket.download_s3_file(s3_file, tmp_file)
290
107
  end
291
108
 
292
109
  stats[:total_file_processing_time] += measure do
@@ -294,7 +111,7 @@ get_s3_files_in_range(request_matcher.range, options[:alb_s3_bucket], options[:a
294
111
  while(! f.eof? && ! $stop)
295
112
  stats[:lines] += 1
296
113
  line = f.readline
297
- entry = get_alb_log_entry(line)
114
+ entry = Alblogs::Entry.from_line(line)
298
115
  stats[:min_log_time] = ! stats[:min_log_time] || stats[:min_log_time] > entry.timestamp ? entry.timestamp : stats[:min_log_time]
299
116
  stats[:max_log_time] = ! stats[:max_log_time] || stats[:max_log_time] < entry.timestamp ? entry.timestamp : stats[:max_log_time]
300
117
  next unless request_matcher.match?(entry)
@@ -0,0 +1,41 @@
1
+ module Alblogs
2
+ autoload :Entry, 'alblogs/entry'
3
+ autoload :RequestMatcher, 'alblogs/request_matcher'
4
+ autoload :S3Bucket, 'alblogs/s3_bucket'
5
+ autoload :S3File, 'alblogs/s3_file'
6
+ autoload :Utils, 'alblogs/utils'
7
+
8
+ FIELDS =
9
+ begin
10
+ not_a_space = '([^ ]+)'
11
+ in_quotes = '"(.*?)"'
12
+
13
+ {
14
+ type: not_a_space,
15
+ timestamp: not_a_space,
16
+ elb: not_a_space,
17
+ client_port: not_a_space,
18
+ target_port: not_a_space,
19
+ request_processing_time: not_a_space,
20
+ target_processing_time: not_a_space,
21
+ response_processing_time: not_a_space,
22
+ elb_status_code: not_a_space,
23
+ target_status_code: not_a_space,
24
+ received_bytes: not_a_space,
25
+ sent_bytes: not_a_space,
26
+ request: in_quotes,
27
+ user_agent: in_quotes,
28
+ ssl_cipher: not_a_space,
29
+ ssl_protocol: not_a_space,
30
+ target_group_arn: not_a_space,
31
+ trace_id: in_quotes,
32
+ domain_name: in_quotes,
33
+ chosen_cert_arn: in_quotes,
34
+ matched_rule_priority: not_a_space,
35
+ request_creation_time: not_a_space,
36
+ actions_executed: in_quotes,
37
+ redirect_url: in_quotes,
38
+ error_reason: in_quotes
39
+ }
40
+ end
41
+ end
@@ -0,0 +1,21 @@
1
+ module Alblogs
2
+ class Entry < Struct.new(:line, *::Alblogs::FIELDS.keys)
3
+ REGEXP = Regexp.new(::Alblogs::FIELDS.values.join(' '))
4
+
5
+ def timestamp
6
+ @timestamp ||= Time.iso8601(self[:timestamp])
7
+ end
8
+
9
+ def target_processing_time
10
+ self[:target_processing_time].to_f
11
+ end
12
+
13
+ def self.from_line(line)
14
+ new(*get_fields(line))
15
+ end
16
+
17
+ def self.get_fields(line)
18
+ REGEXP.match(line).to_a
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,20 @@
1
+ module Alblogs
2
+ class RequestMatcher
3
+ attr_reader :range
4
+
5
+ def initialize(options)
6
+ @range = options[:start_time]..options[:end_time]
7
+ @exclude_filter = options[:exclude_filter]
8
+ @include_filter = options[:include_filter]
9
+ @request_times_over = options[:request_times_over]
10
+ end
11
+
12
+ def match?(entry)
13
+ return false unless @range.cover?(entry.timestamp)
14
+ return false if @include_filter && ! @include_filter.match?(entry.line)
15
+ return false if @exclude_filter && @exclude_filter.match?(entry.line)
16
+ return false if @request_times_over && @request_times_over > entry.target_processing_time
17
+ true
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,49 @@
1
+ module Alblogs
2
+ class S3Bucket
3
+ attr_reader :bucket,
4
+ :aws_profile
5
+
6
+ def initialize(bucket, aws_profile=nil)
7
+ @bucket = bucket
8
+ @aws_profile = aws_profile
9
+ end
10
+
11
+ def get_s3_files(date_path)
12
+ s3_url = "#{bucket}/#{date_path}/"
13
+ cmd = "aws"
14
+ cmd << " --profile #{Shellwords.escape(aws_profile)}" if aws_profile
15
+ cmd << " s3 ls #{Shellwords.escape(s3_url)}"
16
+ output = ::Alblogs::Utils.run_or_die(cmd)
17
+ output.split("\n").map do |line|
18
+ line =~ /(\d{4}\-\d{2}\-\d{2} \d{2}:\d{2}:\d{2}) +(\d+) +(.+)/
19
+ last_modified_at = Time.parse($1).utc
20
+ file_size = $2.to_i
21
+ file = $3
22
+ ::Alblogs::S3File.new("#{s3_url}#{file}", file_size, last_modified_at)
23
+ end
24
+ end
25
+
26
+ def get_s3_files_in_range(range)
27
+ s3_files = {}
28
+ time = range.begin
29
+ while time < range.end
30
+ date_path = time.strftime('%Y/%m/%d')
31
+ get_s3_files(date_path).each do |s3_file|
32
+ next unless s3_file.in_range?(range)
33
+ s3_files[s3_file.file] ||= s3_file
34
+ end
35
+ time += 86_400
36
+ end
37
+ s3_files
38
+ end
39
+
40
+ def download_s3_file(s3_file, dest)
41
+ cmd = "aws"
42
+ cmd << " --profile #{Shellwords.escape(aws_profile)}" if aws_profile
43
+ cmd << " s3 cp #{Shellwords.escape(s3_file.file)} #{Shellwords.escape(dest)}.gz"
44
+ ::Alblogs::Utils.run_or_die(cmd)
45
+ cmd = "gzip -f -d #{Shellwords.escape(dest)}.gz"
46
+ ::Alblogs::Utils.run_or_die(cmd)
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,35 @@
1
+ module Alblogs
2
+ class S3File
3
+ MINUTES_5 = 5 * 60
4
+
5
+ attr_reader :file,
6
+ :file_size,
7
+ :last_modified_at
8
+
9
+ def initialize(file, file_size, last_modified_at)
10
+ @file = file
11
+ @file_size = file_size
12
+ @last_modified_at = last_modified_at
13
+ end
14
+
15
+ def end_time
16
+ @end_time ||=
17
+ begin
18
+ unless @file =~ /_(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})Z_/
19
+ raise("unable to find time stamp in #{@file}")
20
+ end
21
+ Time.new($1, $2, $3, $4, $5, 0, 0)
22
+ end
23
+ end
24
+
25
+ def start_time
26
+ @start_time ||= (end_time - MINUTES_5)
27
+ end
28
+
29
+ def in_range?(range)
30
+ return false if end_time < range.begin
31
+ return false if start_time > range.end
32
+ true
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,32 @@
1
+ module Alblogs
2
+ module Utils
3
+ module_function
4
+
5
+ def parse_time_offset(str)
6
+ if str =~ /min/
7
+ str.sub(/ *min.*/, '').to_i * 60
8
+ elsif str =~ /hour/
9
+ str.sub(/ *hour.*/, '').to_i * 3600
10
+ elsif str =~ /day/
11
+ str.sub(/ *day.*/, '').to_i * 86400
12
+ else
13
+ nil
14
+ end
15
+ end
16
+
17
+ def time_ago(now, str)
18
+ if offset = parse_time_offset(str)
19
+ time = now - offset
20
+ time - (time.to_i % 60) # round to the start of the minute
21
+ else
22
+ Time.parse(str).utc
23
+ end
24
+ end
25
+
26
+ def run_or_die(cmd)
27
+ res = `#{cmd}`
28
+ raise("command failed with #{$?}, #{cmd}") unless $?.success?
29
+ res
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ $LOAD_PATH << File.expand_path('../lib', __dir__)
5
+ require 'alblogs'
6
+ require 'irb'
7
+ IRB.start(__FILE__)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: alblogs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-06-06 00:00:00.000000000 Z
11
+ date: 2019-10-08 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Utility script for processing ALB access logs over a given time range
14
14
  email: dougyouch@gmail.com
@@ -18,9 +18,18 @@ extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
20
  - ".gitignore"
21
+ - ".ruby-gemset"
22
+ - ".ruby-version"
21
23
  - README.md
22
24
  - alblogs.gemspec
23
25
  - bin/alblogs
26
+ - lib/alblogs.rb
27
+ - lib/alblogs/entry.rb
28
+ - lib/alblogs/request_matcher.rb
29
+ - lib/alblogs/s3_bucket.rb
30
+ - lib/alblogs/s3_file.rb
31
+ - lib/alblogs/utils.rb
32
+ - script/console
24
33
  homepage: https://github.com/dougyouch/alblogs
25
34
  licenses: []
26
35
  metadata: {}