alblogs 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 40a4878aed41640c3d06e5734de49c18c96121fecb4bfb981b360aae7c19e70a
4
- data.tar.gz: 9150dbb3f1d1fb9085c75431c8756d2d318a36b49b8005179fab72baccc7f71f
3
+ metadata.gz: eefbd8b8cac015fc5eee191b6e8a73ff9fde3c465f8d1410b76774e686aa9623
4
+ data.tar.gz: 3dfce14461c67824dd0ad6c5bd1ecee0081767de256191bda00cad1a99c18d1f
5
5
  SHA512:
6
- metadata.gz: a35fc4c33ceeb1be95779ec968f4d3fb74378a4f7978b9f19fe9bf9969ceac662a49feb8038823a09661214ad15b00459053b21ce6230ea0808dc41d68202ba1
7
- data.tar.gz: ab6d34862e7a22a125fe323530ebfc058ec33d69e5ff65d7ca874a98e781853880749c1e4b56c248b82e71c88d3c874baed2b2425223b8330c737711fd84d1d5
6
+ metadata.gz: 9b17f53979c6c56d8909d62e1fcacf468edcd0294fc41928cd62a706d5d206fd8495e16453c339ea5dfd236aef9158c8ad9cfcb00522cc961e18ce82e98f7a4e
7
+ data.tar.gz: d5d792b11d17f82e30a58901f18ac7a1cdf1e3cea5d07201645475a63a255a06769f64772c947653f6ab2533657c23c84bdd2db4517382d484389e506f07079f
data/.gitignore CHANGED
@@ -50,3 +50,4 @@ build-iPhoneSimulator/
50
50
  .rvmrc
51
51
 
52
52
  *~
53
+ *.log
@@ -0,0 +1 @@
1
+ alblogs
@@ -0,0 +1 @@
1
+ 2.6.3
data/README.md CHANGED
@@ -2,6 +2,16 @@
2
2
 
3
3
  Utility script for processing ALB access logs over a given time range
4
4
 
5
+ ### Requirements
6
+
7
+ Need to have the AWS CLI installed. Can be found here https://aws.amazon.com/cli/
8
+
9
+ ### Install
10
+
11
+ ```
12
+ gem install alblogs
13
+ ```
14
+
5
15
  ### Usage
6
16
 
7
17
  ```
@@ -25,3 +35,8 @@ Find all requests that took over 500ms to process in the last 12 hours.
25
35
  alblogs -b 's3://<my-aws-alb-bucket-name>/access_logs/AWSLogs/<aws-account-id>/elasticloadbalancing/<aws-region>' -s '12 hours' -o slow-requests.log --request-times-over 0.5
26
36
  ```
27
37
 
38
+ ### References
39
+
40
+ AWS Documentaion: Access Logs for Your Application Load Balancer
41
+
42
+ https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-access-logs.html
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'alblogs'
5
- s.version = '0.0.1'
5
+ s.version = '0.1.0'
6
6
  s.summary = 'ALB access log processing'
7
7
  s.description = 'Utility script for processing ALB access logs over a given time range'
8
8
  s.authors = ['Doug Youch']
@@ -4,126 +4,7 @@ require 'optparse'
4
4
  require 'time'
5
5
  require 'shellwords'
6
6
  require 'json'
7
-
8
- def run_or_die(cmd)
9
- res = `#{cmd}`
10
- raise("command failed with #{$?}, #{cmd}") unless $?.success?
11
- res
12
- end
13
-
14
- def parse_time_offset(str)
15
- if str =~ /min/
16
- str.sub(/ *min.*/, '').to_i * 60
17
- elsif str =~ /hour/
18
- str.sub(/ *hour.*/, '').to_i * 3600
19
- elsif str =~ /day/
20
- str.sub(/ *day.*/, '').to_i * 86400
21
- else
22
- nil
23
- end
24
- end
25
-
26
- def time_ago(now, str)
27
- if offset = parse_time_offset(str)
28
- time = now - offset
29
- time - (time.to_i % 60) # round to the start of the minute
30
- else
31
- Time.parse(str).utc
32
- end
33
- end
34
-
35
- def get_s3_files(bucket, date_path, profile)
36
- s3_url = "#{bucket}/#{date_path}/"
37
- cmd = "aws"
38
- cmd << " --profile #{Shellwords.escape(profile)}" if profile
39
- cmd << " s3 ls #{Shellwords.escape(s3_url)}"
40
- output = run_or_die(cmd)
41
- output.split("\n").map do |line|
42
- line =~ /(\d{4}\-\d{2}\-\d{2} \d{2}:\d{2}:\d{2}) +(\d+) +(.+)/
43
- last_modified_at = Time.parse($1).utc
44
- file_size = $2.to_i
45
- file = $3
46
- S3File.new("#{s3_url}#{file}", file_size, last_modified_at)
47
- end
48
- end
49
-
50
- def get_s3_files_in_range(range, bucket, profile)
51
- s3_files = {}
52
- time = range.begin
53
- while time < range.end
54
- date_path = time.strftime('%Y/%m/%d')
55
- get_s3_files(bucket, date_path, profile).each do |s3_file|
56
- next unless s3_file.in_range?(range)
57
- s3_files[s3_file.file] ||= s3_file
58
- end
59
- time += 86_400
60
- end
61
- s3_files
62
- end
63
-
64
- def download_s3_file(s3_file, dest, profile)
65
- cmd = "aws"
66
- cmd << " --profile #{Shellwords.escape(profile)}" if profile
67
- cmd << " s3 cp #{Shellwords.escape(s3_file.file)} #{Shellwords.escape(dest)}.gz"
68
- run_or_die(cmd)
69
- cmd = "gzip -f -d #{Shellwords.escape(dest)}.gz"
70
- run_or_die(cmd)
71
- end
72
-
73
- def alb_log_fields
74
- @alb_log_fields ||=
75
- begin
76
- not_a_space = '([^ ]+)'
77
- in_quotes = '"(.*?)"'
78
-
79
- {
80
- type: not_a_space,
81
- timestamp: not_a_space,
82
- elb: not_a_space,
83
- client_port: not_a_space,
84
- target_port: not_a_space,
85
- request_processing_time: not_a_space,
86
- target_processing_time: not_a_space,
87
- response_processing_time: not_a_space,
88
- elb_status_code: not_a_space,
89
- target_status_code: not_a_space,
90
- received_bytes: not_a_space,
91
- sent_bytes: not_a_space,
92
- request: in_quotes,
93
- user_agent: in_quotes,
94
- ssl_cipher: not_a_space,
95
- ssl_protocol: not_a_space,
96
- target_group_arn: not_a_space,
97
- trace_id: in_quotes,
98
- domain_name: in_quotes,
99
- chosen_cert_arn: in_quotes,
100
- matched_rule_priority: not_a_space,
101
- request_creation_time: not_a_space,
102
- actions_executed: in_quotes,
103
- redirect_url: in_quotes,
104
- error_reason: in_quotes
105
- }
106
- end
107
- end
108
-
109
- def alb_log_fields_regex
110
- @alb_log_fields_regex ||=
111
- begin
112
- Regexp.new alb_log_fields.values.join(' ')
113
- end
114
- end
115
-
116
- def get_alb_log_fields(line)
117
- matches = alb_log_fields_regex.match(line).to_a
118
- matches.shift
119
- matches
120
- end
121
-
122
- def get_alb_log_entry(line)
123
- entry = AlbLogEntry.new(*get_alb_log_fields(line))
124
- entry.line = line
125
- entry
126
- end
7
+ require 'alblogs'
127
8
 
128
9
  def measure
129
10
  start = Time.now
@@ -136,75 +17,10 @@ def display_stats(stats)
136
17
  $stderr.puts stats.inspect
137
18
  end
138
19
 
139
- class S3File
140
- MINUTES_5 = 5 * 60
141
-
142
- attr_reader :file,
143
- :file_size,
144
- :last_modified_at
145
-
146
- def initialize(file, file_size, last_modified_at)
147
- @file = file
148
- @file_size = file_size
149
- @last_modified_at = last_modified_at
150
- end
151
-
152
- def end_time
153
- @end_time ||=
154
- begin
155
- unless @file =~ /_(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})Z_/
156
- raise("unable to find time stamp in #{@file}")
157
- end
158
- Time.new($1, $2, $3, $4, $5, 0, 0)
159
- end
160
- end
161
-
162
- def start_time
163
- @start_time ||= (end_time - MINUTES_5)
164
- end
165
-
166
- def in_range?(range)
167
- return false if end_time < range.begin
168
- return false if start_time > range.end
169
- true
170
- end
171
- end
172
-
173
- class AlbLogEntry < Struct.new(*alb_log_fields.keys)
174
- attr_accessor :line
175
-
176
- def timestamp
177
- @timestamp ||= Time.iso8601(self[:timestamp])
178
- end
179
-
180
- def target_processing_time
181
- @target_processing_time ||= self[:target_processing_time].to_f
182
- end
183
- end
184
-
185
- class RequestMatcher
186
- attr_reader :range
187
-
188
- def initialize(options)
189
- @range = options[:start_time]..options[:end_time]
190
- @exclude_filter = options[:exclude_filter]
191
- @include_filter = options[:include_filter]
192
- @request_times_over = options[:request_times_over]
193
- end
194
-
195
- def match?(entry)
196
- return false unless @range.cover?(entry.timestamp)
197
- return false if @include_filter && ! @include_filter.match?(entry.line)
198
- return false if @exclude_filter && @exclude_filter.match?(entry.line)
199
- return false if @request_times_over && @request_times_over > entry.target_processing_time
200
- true
201
- end
202
- end
203
-
204
20
  started_at = Time.now.utc
205
21
 
206
22
  options = {
207
- start_time: time_ago(started_at, '30 min'),
23
+ start_time: Alblogs::Utils.time_ago(started_at, '30 min'),
208
24
  end_time: started_at,
209
25
  include_filter: nil,
210
26
  exclude_filter: nil,
@@ -218,11 +34,11 @@ OptionParser.new do |opts|
218
34
  opts.banner = "Usage: alblogs [options]"
219
35
 
220
36
  opts.on("-s", "--start=TIME_EXP", "Start time") do |v|
221
- options[:start_time] = time_ago(started_at, v)
37
+ options[:start_time] = Alblogs::Utils.time_ago(started_at, v)
222
38
  end
223
39
 
224
40
  opts.on("-e", "--end=TIME_EXP", "End time") do |v|
225
- options[:end_time] = time_ago(started_at, v)
41
+ options[:end_time] = Alblogs::Utils.time_ago(started_at, v)
226
42
  end
227
43
 
228
44
  opts.on("--include=REGEX", "Include filter") do |v|
@@ -264,7 +80,7 @@ if options[:end_time] && options[:end_time] < options[:start_time]
264
80
  options[:start_time], options[:end_time] = options[:end_time], options[:start_time]
265
81
  end
266
82
 
267
- request_matcher = RequestMatcher.new options
83
+ request_matcher = Alblogs::RequestMatcher.new options
268
84
 
269
85
  stats = Hash.new(0)
270
86
  stats[:started_at] = started_at
@@ -282,11 +98,12 @@ File.unlink("#{tmp_file}.gz") if File.exists?("#{tmp_file}.gz")
282
98
  $stop = false
283
99
  trap("INT") { $stop = true }
284
100
 
285
- get_s3_files_in_range(request_matcher.range, options[:alb_s3_bucket], options[:aws_profile]).values.each do |s3_file|
101
+ s3_bucket = Alblogs::S3Bucket.new(options[:alb_s3_bucket], options[:aws_profile])
102
+ s3_bucket.get_s3_files_in_range(request_matcher.range).values.each do |s3_file|
286
103
  stats[:files] += 1
287
104
 
288
105
  stats[:total_download_time] += measure do
289
- download_s3_file(s3_file, tmp_file, options[:aws_profile])
106
+ s3_bucket.download_s3_file(s3_file, tmp_file)
290
107
  end
291
108
 
292
109
  stats[:total_file_processing_time] += measure do
@@ -294,7 +111,7 @@ get_s3_files_in_range(request_matcher.range, options[:alb_s3_bucket], options[:a
294
111
  while(! f.eof? && ! $stop)
295
112
  stats[:lines] += 1
296
113
  line = f.readline
297
- entry = get_alb_log_entry(line)
114
+ entry = Alblogs::Entry.from_line(line)
298
115
  stats[:min_log_time] = ! stats[:min_log_time] || stats[:min_log_time] > entry.timestamp ? entry.timestamp : stats[:min_log_time]
299
116
  stats[:max_log_time] = ! stats[:max_log_time] || stats[:max_log_time] < entry.timestamp ? entry.timestamp : stats[:max_log_time]
300
117
  next unless request_matcher.match?(entry)
@@ -0,0 +1,41 @@
1
+ module Alblogs
2
+ autoload :Entry, 'alblogs/entry'
3
+ autoload :RequestMatcher, 'alblogs/request_matcher'
4
+ autoload :S3Bucket, 'alblogs/s3_bucket'
5
+ autoload :S3File, 'alblogs/s3_file'
6
+ autoload :Utils, 'alblogs/utils'
7
+
8
+ FIELDS =
9
+ begin
10
+ not_a_space = '([^ ]+)'
11
+ in_quotes = '"(.*?)"'
12
+
13
+ {
14
+ type: not_a_space,
15
+ timestamp: not_a_space,
16
+ elb: not_a_space,
17
+ client_port: not_a_space,
18
+ target_port: not_a_space,
19
+ request_processing_time: not_a_space,
20
+ target_processing_time: not_a_space,
21
+ response_processing_time: not_a_space,
22
+ elb_status_code: not_a_space,
23
+ target_status_code: not_a_space,
24
+ received_bytes: not_a_space,
25
+ sent_bytes: not_a_space,
26
+ request: in_quotes,
27
+ user_agent: in_quotes,
28
+ ssl_cipher: not_a_space,
29
+ ssl_protocol: not_a_space,
30
+ target_group_arn: not_a_space,
31
+ trace_id: in_quotes,
32
+ domain_name: in_quotes,
33
+ chosen_cert_arn: in_quotes,
34
+ matched_rule_priority: not_a_space,
35
+ request_creation_time: not_a_space,
36
+ actions_executed: in_quotes,
37
+ redirect_url: in_quotes,
38
+ error_reason: in_quotes
39
+ }
40
+ end
41
+ end
@@ -0,0 +1,21 @@
1
+ module Alblogs
2
+ class Entry < Struct.new(:line, *::Alblogs::FIELDS.keys)
3
+ REGEXP = Regexp.new(::Alblogs::FIELDS.values.join(' '))
4
+
5
+ def timestamp
6
+ @timestamp ||= Time.iso8601(self[:timestamp])
7
+ end
8
+
9
+ def target_processing_time
10
+ self[:target_processing_time].to_f
11
+ end
12
+
13
+ def self.from_line(line)
14
+ new(*get_fields(line))
15
+ end
16
+
17
+ def self.get_fields(line)
18
+ REGEXP.match(line).to_a
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,20 @@
1
+ module Alblogs
2
+ class RequestMatcher
3
+ attr_reader :range
4
+
5
+ def initialize(options)
6
+ @range = options[:start_time]..options[:end_time]
7
+ @exclude_filter = options[:exclude_filter]
8
+ @include_filter = options[:include_filter]
9
+ @request_times_over = options[:request_times_over]
10
+ end
11
+
12
+ def match?(entry)
13
+ return false unless @range.cover?(entry.timestamp)
14
+ return false if @include_filter && ! @include_filter.match?(entry.line)
15
+ return false if @exclude_filter && @exclude_filter.match?(entry.line)
16
+ return false if @request_times_over && @request_times_over > entry.target_processing_time
17
+ true
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,49 @@
1
+ module Alblogs
2
+ class S3Bucket
3
+ attr_reader :bucket,
4
+ :aws_profile
5
+
6
+ def initialize(bucket, aws_profile=nil)
7
+ @bucket = bucket
8
+ @aws_profile = aws_profile
9
+ end
10
+
11
+ def get_s3_files(date_path)
12
+ s3_url = "#{bucket}/#{date_path}/"
13
+ cmd = "aws"
14
+ cmd << " --profile #{Shellwords.escape(aws_profile)}" if aws_profile
15
+ cmd << " s3 ls #{Shellwords.escape(s3_url)}"
16
+ output = ::Alblogs::Utils.run_or_die(cmd)
17
+ output.split("\n").map do |line|
18
+ line =~ /(\d{4}\-\d{2}\-\d{2} \d{2}:\d{2}:\d{2}) +(\d+) +(.+)/
19
+ last_modified_at = Time.parse($1).utc
20
+ file_size = $2.to_i
21
+ file = $3
22
+ ::Alblogs::S3File.new("#{s3_url}#{file}", file_size, last_modified_at)
23
+ end
24
+ end
25
+
26
+ def get_s3_files_in_range(range)
27
+ s3_files = {}
28
+ time = range.begin
29
+ while time < range.end
30
+ date_path = time.strftime('%Y/%m/%d')
31
+ get_s3_files(date_path).each do |s3_file|
32
+ next unless s3_file.in_range?(range)
33
+ s3_files[s3_file.file] ||= s3_file
34
+ end
35
+ time += 86_400
36
+ end
37
+ s3_files
38
+ end
39
+
40
+ def download_s3_file(s3_file, dest)
41
+ cmd = "aws"
42
+ cmd << " --profile #{Shellwords.escape(aws_profile)}" if aws_profile
43
+ cmd << " s3 cp #{Shellwords.escape(s3_file.file)} #{Shellwords.escape(dest)}.gz"
44
+ ::Alblogs::Utils.run_or_die(cmd)
45
+ cmd = "gzip -f -d #{Shellwords.escape(dest)}.gz"
46
+ ::Alblogs::Utils.run_or_die(cmd)
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,35 @@
1
+ module Alblogs
2
+ class S3File
3
+ MINUTES_5 = 5 * 60
4
+
5
+ attr_reader :file,
6
+ :file_size,
7
+ :last_modified_at
8
+
9
+ def initialize(file, file_size, last_modified_at)
10
+ @file = file
11
+ @file_size = file_size
12
+ @last_modified_at = last_modified_at
13
+ end
14
+
15
+ def end_time
16
+ @end_time ||=
17
+ begin
18
+ unless @file =~ /_(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})Z_/
19
+ raise("unable to find time stamp in #{@file}")
20
+ end
21
+ Time.new($1, $2, $3, $4, $5, 0, 0)
22
+ end
23
+ end
24
+
25
+ def start_time
26
+ @start_time ||= (end_time - MINUTES_5)
27
+ end
28
+
29
+ def in_range?(range)
30
+ return false if end_time < range.begin
31
+ return false if start_time > range.end
32
+ true
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,32 @@
1
+ module Alblogs
2
+ module Utils
3
+ module_function
4
+
5
+ def parse_time_offset(str)
6
+ if str =~ /min/
7
+ str.sub(/ *min.*/, '').to_i * 60
8
+ elsif str =~ /hour/
9
+ str.sub(/ *hour.*/, '').to_i * 3600
10
+ elsif str =~ /day/
11
+ str.sub(/ *day.*/, '').to_i * 86400
12
+ else
13
+ nil
14
+ end
15
+ end
16
+
17
+ def time_ago(now, str)
18
+ if offset = parse_time_offset(str)
19
+ time = now - offset
20
+ time - (time.to_i % 60) # round to the start of the minute
21
+ else
22
+ Time.parse(str).utc
23
+ end
24
+ end
25
+
26
+ def run_or_die(cmd)
27
+ res = `#{cmd}`
28
+ raise("command failed with #{$?}, #{cmd}") unless $?.success?
29
+ res
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ $LOAD_PATH << File.expand_path('../lib', __dir__)
5
+ require 'alblogs'
6
+ require 'irb'
7
+ IRB.start(__FILE__)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: alblogs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-06-06 00:00:00.000000000 Z
11
+ date: 2019-10-08 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Utility script for processing ALB access logs over a given time range
14
14
  email: dougyouch@gmail.com
@@ -18,9 +18,18 @@ extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
20
  - ".gitignore"
21
+ - ".ruby-gemset"
22
+ - ".ruby-version"
21
23
  - README.md
22
24
  - alblogs.gemspec
23
25
  - bin/alblogs
26
+ - lib/alblogs.rb
27
+ - lib/alblogs/entry.rb
28
+ - lib/alblogs/request_matcher.rb
29
+ - lib/alblogs/s3_bucket.rb
30
+ - lib/alblogs/s3_file.rb
31
+ - lib/alblogs/utils.rb
32
+ - script/console
24
33
  homepage: https://github.com/dougyouch/alblogs
25
34
  licenses: []
26
35
  metadata: {}