alblogs 0.1.0 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eefbd8b8cac015fc5eee191b6e8a73ff9fde3c465f8d1410b76774e686aa9623
4
- data.tar.gz: 3dfce14461c67824dd0ad6c5bd1ecee0081767de256191bda00cad1a99c18d1f
3
+ metadata.gz: 7d2aea6572a238675e0499c04e26341ea3cd3b8860b94c51095fe9c00cb4d806
4
+ data.tar.gz: 45a86b3b527c47bee978d0139c473c3aa491a1bddea41d2d10135093cc0cde4c
5
5
  SHA512:
6
- metadata.gz: 9b17f53979c6c56d8909d62e1fcacf468edcd0294fc41928cd62a706d5d206fd8495e16453c339ea5dfd236aef9158c8ad9cfcb00522cc961e18ce82e98f7a4e
7
- data.tar.gz: d5d792b11d17f82e30a58901f18ac7a1cdf1e3cea5d07201645475a63a255a06769f64772c947653f6ab2533657c23c84bdd2db4517382d484389e506f07079f
6
+ metadata.gz: 141b1919b75fc436dcb627fb93c20fd1312e57aa4d2b1525d1c7180f8e4921dba8c5af93614392f93162f9006756d4eba04a896f1ec18511033d4db7bdc7ae40
7
+ data.tar.gz: cc2c69a05b388d603c7e40cdf29759f57090b6894a4f9a001d1157a57a03692b419963e1f086efa6a87a35175ba4797b1a3121ea779c71a6f48bb92a46798f84
data/alblogs.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'alblogs'
5
- s.version = '0.1.0'
5
+ s.version = '0.1.2'
6
6
  s.summary = 'ALB access log processing'
7
7
  s.description = 'Utility script for processing ALB access logs over a given time range'
8
8
  s.authors = ['Doug Youch']
data/bin/alblogs CHANGED
@@ -6,17 +6,6 @@ require 'shellwords'
6
6
  require 'json'
7
7
  require 'alblogs'
8
8
 
9
- def measure
10
- start = Time.now
11
- yield
12
- Time.now - start
13
- end
14
-
15
- def display_stats(stats)
16
- stats[:elapsed_time] = Time.now.utc - stats[:started_at]
17
- $stderr.puts stats.inspect
18
- end
19
-
20
9
  started_at = Time.now.utc
21
10
 
22
11
  options = {
@@ -81,52 +70,18 @@ if options[:end_time] && options[:end_time] < options[:start_time]
81
70
  end
82
71
 
83
72
  request_matcher = Alblogs::RequestMatcher.new options
84
-
85
- stats = Hash.new(0)
86
- stats[:started_at] = started_at
87
- stats[:range_starts_at] = request_matcher.range.begin
88
- stats[:range_ends_at] = request_matcher.range.end
89
- stats[:min_log_time] = nil
90
- stats[:max_log_time] = nil
91
- stats[:min_matched_log_time] = nil
92
- stats[:max_matched_log_time] = nil
93
-
94
- tmp_file = '.download.alblogs.log'
95
- File.unlink(tmp_file) if File.exists?(tmp_file)
96
- File.unlink("#{tmp_file}.gz") if File.exists?("#{tmp_file}.gz")
97
-
98
- $stop = false
99
- trap("INT") { $stop = true }
100
-
101
73
  s3_bucket = Alblogs::S3Bucket.new(options[:alb_s3_bucket], options[:aws_profile])
102
- s3_bucket.get_s3_files_in_range(request_matcher.range).values.each do |s3_file|
103
- stats[:files] += 1
104
-
105
- stats[:total_download_time] += measure do
106
- s3_bucket.download_s3_file(s3_file, tmp_file)
107
- end
108
74
 
109
- stats[:total_file_processing_time] += measure do
110
- File.open(tmp_file, 'rb') do |f|
111
- while(! f.eof? && ! $stop)
112
- stats[:lines] += 1
113
- line = f.readline
114
- entry = Alblogs::Entry.from_line(line)
115
- stats[:min_log_time] = ! stats[:min_log_time] || stats[:min_log_time] > entry.timestamp ? entry.timestamp : stats[:min_log_time]
116
- stats[:max_log_time] = ! stats[:max_log_time] || stats[:max_log_time] < entry.timestamp ? entry.timestamp : stats[:max_log_time]
117
- next unless request_matcher.match?(entry)
118
- stats[:matching_lines] += 1
119
- stats[:min_matched_log_time] = ! stats[:min_matched_log_time] || stats[:min_matched_log_time] > entry.timestamp ? entry.timestamp : stats[:min_matched_log_time]
120
- stats[:max_matched_log_time] = ! stats[:max_matched_log_time] || stats[:max_matched_log_time] < entry.timestamp ? entry.timestamp : stats[:max_matched_log_time]
121
- options[:log_file].puts line
122
- end
123
- end
124
- end
75
+ iterator = Alblogs::Iterator.new(s3_bucket, request_matcher.range, request_matcher)
76
+ iterator.display_stats_proc = Proc.new do |stats|
77
+ stats[:elapsed_time] = Time.now.utc - stats[:started_at]
78
+ $stderr.puts stats.inspect
79
+ end
125
80
 
126
- File.unlink(tmp_file)
81
+ trap("INT") { iterator.stop! }
127
82
 
128
- display_stats(stats) if options[:display_stats]
129
- break if $stop
83
+ iterator.each do |entry|
84
+ options[:log_file].puts entry.line
130
85
  end
131
86
 
132
87
  options[:log_file].close
data/lib/alblogs.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  module Alblogs
2
2
  autoload :Entry, 'alblogs/entry'
3
+ autoload :Iterator, 'alblogs/iterator'
3
4
  autoload :RequestMatcher, 'alblogs/request_matcher'
4
5
  autoload :S3Bucket, 'alblogs/s3_bucket'
5
6
  autoload :S3File, 'alblogs/s3_file'
@@ -0,0 +1,88 @@
1
+ module Alblogs
2
+ class Iterator
3
+ include Enumerable
4
+
5
+ attr_accessor :tmp_file,
6
+ :display_stats_proc
7
+
8
+ attr_reader :s3_bucket,
9
+ :date_range,
10
+ :request_matcher,
11
+ :stats
12
+
13
+ def initialize(s3_bucket, date_range, request_matcher = nil)
14
+ @s3_bucket = s3_bucket
15
+ @date_range = date_range
16
+ @request_matcher = request_matcher
17
+ @tmp_file = '.download.alblogs.log'
18
+ end
19
+
20
+ def each
21
+ @stop = false
22
+ delete_tmp_file
23
+ init_stats
24
+
25
+ s3_bucket.get_s3_files_in_range(date_range).values.each do |s3_file|
26
+ stats[:files] += 1
27
+
28
+ stats[:total_download_time] += measure do
29
+ s3_bucket.download_s3_file(s3_file, tmp_file)
30
+ end
31
+
32
+ stats[:total_file_processing_time] += measure do
33
+ File.open(tmp_file, 'rb') do |f|
34
+ while(! f.eof? && ! @stop)
35
+ stats[:lines] += 1
36
+ line = f.readline
37
+ entry = ::Alblogs::Entry.from_line(line)
38
+ stats[:min_log_time] = ! stats[:min_log_time] || stats[:min_log_time] > entry.timestamp ? entry.timestamp : stats[:min_log_time]
39
+ stats[:max_log_time] = ! stats[:max_log_time] || stats[:max_log_time] < entry.timestamp ? entry.timestamp : stats[:max_log_time]
40
+ next if request_matcher && !request_matcher.match?(entry)
41
+ stats[:matching_lines] += 1
42
+ stats[:min_matched_log_time] = ! stats[:min_matched_log_time] || stats[:min_matched_log_time] > entry.timestamp ? entry.timestamp : stats[:min_matched_log_time]
43
+ stats[:max_matched_log_time] = ! stats[:max_matched_log_time] || stats[:max_matched_log_time] < entry.timestamp ? entry.timestamp : stats[:max_matched_log_time]
44
+ yield entry
45
+ end
46
+ end
47
+ end
48
+
49
+ File.unlink(tmp_file)
50
+
51
+ display_stats
52
+ break if @stop
53
+ end
54
+ end
55
+
56
+ def stop!
57
+ @stop = true
58
+ end
59
+
60
+ def display_stats
61
+ display_stats_proc && display_stats_proc.call(stats)
62
+ end
63
+
64
+ private
65
+
66
+ def measure
67
+ start = Time.now
68
+ yield
69
+ Time.now - start
70
+ end
71
+
72
+ def delete_tmp_file
73
+ File.unlink(tmp_file) if File.exists?(tmp_file)
74
+ File.unlink("#{tmp_file}.gz") if File.exists?("#{tmp_file}.gz")
75
+ end
76
+
77
+ def init_stats
78
+ @stats = Hash.new(0)
79
+ @stats[:started_at] = Time.now.utc
80
+ @stats[:range_starts_at] = date_range.begin
81
+ @stats[:range_ends_at] = date_range.end
82
+ @stats[:min_log_time] = nil
83
+ @stats[:max_log_time] = nil
84
+ @stats[:min_matched_log_time] = nil
85
+ @stats[:max_matched_log_time] = nil
86
+ end
87
+ end
88
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: alblogs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
@@ -25,6 +25,7 @@ files:
25
25
  - bin/alblogs
26
26
  - lib/alblogs.rb
27
27
  - lib/alblogs/entry.rb
28
+ - lib/alblogs/iterator.rb
28
29
  - lib/alblogs/request_matcher.rb
29
30
  - lib/alblogs/s3_bucket.rb
30
31
  - lib/alblogs/s3_file.rb