alblogs 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eefbd8b8cac015fc5eee191b6e8a73ff9fde3c465f8d1410b76774e686aa9623
4
- data.tar.gz: 3dfce14461c67824dd0ad6c5bd1ecee0081767de256191bda00cad1a99c18d1f
3
+ metadata.gz: 7d2aea6572a238675e0499c04e26341ea3cd3b8860b94c51095fe9c00cb4d806
4
+ data.tar.gz: 45a86b3b527c47bee978d0139c473c3aa491a1bddea41d2d10135093cc0cde4c
5
5
  SHA512:
6
- metadata.gz: 9b17f53979c6c56d8909d62e1fcacf468edcd0294fc41928cd62a706d5d206fd8495e16453c339ea5dfd236aef9158c8ad9cfcb00522cc961e18ce82e98f7a4e
7
- data.tar.gz: d5d792b11d17f82e30a58901f18ac7a1cdf1e3cea5d07201645475a63a255a06769f64772c947653f6ab2533657c23c84bdd2db4517382d484389e506f07079f
6
+ metadata.gz: 141b1919b75fc436dcb627fb93c20fd1312e57aa4d2b1525d1c7180f8e4921dba8c5af93614392f93162f9006756d4eba04a896f1ec18511033d4db7bdc7ae40
7
+ data.tar.gz: cc2c69a05b388d603c7e40cdf29759f57090b6894a4f9a001d1157a57a03692b419963e1f086efa6a87a35175ba4797b1a3121ea779c71a6f48bb92a46798f84
data/alblogs.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'alblogs'
5
- s.version = '0.1.0'
5
+ s.version = '0.1.2'
6
6
  s.summary = 'ALB access log processing'
7
7
  s.description = 'Utility script for processing ALB access logs over a given time range'
8
8
  s.authors = ['Doug Youch']
data/bin/alblogs CHANGED
@@ -6,17 +6,6 @@ require 'shellwords'
6
6
  require 'json'
7
7
  require 'alblogs'
8
8
 
9
- def measure
10
- start = Time.now
11
- yield
12
- Time.now - start
13
- end
14
-
15
- def display_stats(stats)
16
- stats[:elapsed_time] = Time.now.utc - stats[:started_at]
17
- $stderr.puts stats.inspect
18
- end
19
-
20
9
  started_at = Time.now.utc
21
10
 
22
11
  options = {
@@ -81,52 +70,18 @@ if options[:end_time] && options[:end_time] < options[:start_time]
81
70
  end
82
71
 
83
72
  request_matcher = Alblogs::RequestMatcher.new options
84
-
85
- stats = Hash.new(0)
86
- stats[:started_at] = started_at
87
- stats[:range_starts_at] = request_matcher.range.begin
88
- stats[:range_ends_at] = request_matcher.range.end
89
- stats[:min_log_time] = nil
90
- stats[:max_log_time] = nil
91
- stats[:min_matched_log_time] = nil
92
- stats[:max_matched_log_time] = nil
93
-
94
- tmp_file = '.download.alblogs.log'
95
- File.unlink(tmp_file) if File.exists?(tmp_file)
96
- File.unlink("#{tmp_file}.gz") if File.exists?("#{tmp_file}.gz")
97
-
98
- $stop = false
99
- trap("INT") { $stop = true }
100
-
101
73
  s3_bucket = Alblogs::S3Bucket.new(options[:alb_s3_bucket], options[:aws_profile])
102
- s3_bucket.get_s3_files_in_range(request_matcher.range).values.each do |s3_file|
103
- stats[:files] += 1
104
-
105
- stats[:total_download_time] += measure do
106
- s3_bucket.download_s3_file(s3_file, tmp_file)
107
- end
108
74
 
109
- stats[:total_file_processing_time] += measure do
110
- File.open(tmp_file, 'rb') do |f|
111
- while(! f.eof? && ! $stop)
112
- stats[:lines] += 1
113
- line = f.readline
114
- entry = Alblogs::Entry.from_line(line)
115
- stats[:min_log_time] = ! stats[:min_log_time] || stats[:min_log_time] > entry.timestamp ? entry.timestamp : stats[:min_log_time]
116
- stats[:max_log_time] = ! stats[:max_log_time] || stats[:max_log_time] < entry.timestamp ? entry.timestamp : stats[:max_log_time]
117
- next unless request_matcher.match?(entry)
118
- stats[:matching_lines] += 1
119
- stats[:min_matched_log_time] = ! stats[:min_matched_log_time] || stats[:min_matched_log_time] > entry.timestamp ? entry.timestamp : stats[:min_matched_log_time]
120
- stats[:max_matched_log_time] = ! stats[:max_matched_log_time] || stats[:max_matched_log_time] < entry.timestamp ? entry.timestamp : stats[:max_matched_log_time]
121
- options[:log_file].puts line
122
- end
123
- end
124
- end
75
+ iterator = Alblogs::Iterator.new(s3_bucket, request_matcher.range, request_matcher)
76
+ iterator.display_stats_proc = Proc.new do |stats|
77
+ stats[:elapsed_time] = Time.now.utc - stats[:started_at]
78
+ $stderr.puts stats.inspect
79
+ end
125
80
 
126
- File.unlink(tmp_file)
81
+ trap("INT") { iterator.stop! }
127
82
 
128
- display_stats(stats) if options[:display_stats]
129
- break if $stop
83
+ iterator.each do |entry|
84
+ options[:log_file].puts entry.line
130
85
  end
131
86
 
132
87
  options[:log_file].close
data/lib/alblogs.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  module Alblogs
2
2
  autoload :Entry, 'alblogs/entry'
3
+ autoload :Iterator, 'alblogs/iterator'
3
4
  autoload :RequestMatcher, 'alblogs/request_matcher'
4
5
  autoload :S3Bucket, 'alblogs/s3_bucket'
5
6
  autoload :S3File, 'alblogs/s3_file'
@@ -0,0 +1,88 @@
1
+ module Alblogs
2
+ class Iterator
3
+ include Enumerable
4
+
5
+ attr_accessor :tmp_file,
6
+ :display_stats_proc
7
+
8
+ attr_reader :s3_bucket,
9
+ :date_range,
10
+ :request_matcher,
11
+ :stats
12
+
13
+ def initialize(s3_bucket, date_range, request_matcher = nil)
14
+ @s3_bucket = s3_bucket
15
+ @date_range = date_range
16
+ @request_matcher = request_matcher
17
+ @tmp_file = '.download.alblogs.log'
18
+ end
19
+
20
+ def each
21
+ @stop = false
22
+ delete_tmp_file
23
+ init_stats
24
+
25
+ s3_bucket.get_s3_files_in_range(date_range).values.each do |s3_file|
26
+ stats[:files] += 1
27
+
28
+ stats[:total_download_time] += measure do
29
+ s3_bucket.download_s3_file(s3_file, tmp_file)
30
+ end
31
+
32
+ stats[:total_file_processing_time] += measure do
33
+ File.open(tmp_file, 'rb') do |f|
34
+ while(! f.eof? && ! @stop)
35
+ stats[:lines] += 1
36
+ line = f.readline
37
+ entry = ::Alblogs::Entry.from_line(line)
38
+ stats[:min_log_time] = ! stats[:min_log_time] || stats[:min_log_time] > entry.timestamp ? entry.timestamp : stats[:min_log_time]
39
+ stats[:max_log_time] = ! stats[:max_log_time] || stats[:max_log_time] < entry.timestamp ? entry.timestamp : stats[:max_log_time]
40
+ next if request_matcher && !request_matcher.match?(entry)
41
+ stats[:matching_lines] += 1
42
+ stats[:min_matched_log_time] = ! stats[:min_matched_log_time] || stats[:min_matched_log_time] > entry.timestamp ? entry.timestamp : stats[:min_matched_log_time]
43
+ stats[:max_matched_log_time] = ! stats[:max_matched_log_time] || stats[:max_matched_log_time] < entry.timestamp ? entry.timestamp : stats[:max_matched_log_time]
44
+ yield entry
45
+ end
46
+ end
47
+ end
48
+
49
+ File.unlink(tmp_file)
50
+
51
+ display_stats
52
+ break if @stop
53
+ end
54
+ end
55
+
56
+ def stop!
57
+ @stop = true
58
+ end
59
+
60
+ def display_stats
61
+ display_stats_proc && display_stats_proc.call(stats)
62
+ end
63
+
64
+ private
65
+
66
+ def measure
67
+ start = Time.now
68
+ yield
69
+ Time.now - start
70
+ end
71
+
72
+ def delete_tmp_file
73
+ File.unlink(tmp_file) if File.exists?(tmp_file)
74
+ File.unlink("#{tmp_file}.gz") if File.exists?("#{tmp_file}.gz")
75
+ end
76
+
77
+ def init_stats
78
+ @stats = Hash.new(0)
79
+ @stats[:started_at] = Time.now.utc
80
+ @stats[:range_starts_at] = date_range.begin
81
+ @stats[:range_ends_at] = date_range.end
82
+ @stats[:min_log_time] = nil
83
+ @stats[:max_log_time] = nil
84
+ @stats[:min_matched_log_time] = nil
85
+ @stats[:max_matched_log_time] = nil
86
+ end
87
+ end
88
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: alblogs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
@@ -25,6 +25,7 @@ files:
25
25
  - bin/alblogs
26
26
  - lib/alblogs.rb
27
27
  - lib/alblogs/entry.rb
28
+ - lib/alblogs/iterator.rb
28
29
  - lib/alblogs/request_matcher.rb
29
30
  - lib/alblogs/s3_bucket.rb
30
31
  - lib/alblogs/s3_file.rb