alblogs 0.1.0 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/alblogs.gemspec +1 -1
- data/bin/alblogs +8 -53
- data/lib/alblogs.rb +1 -0
- data/lib/alblogs/iterator.rb +88 -0
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7d2aea6572a238675e0499c04e26341ea3cd3b8860b94c51095fe9c00cb4d806
|
4
|
+
data.tar.gz: 45a86b3b527c47bee978d0139c473c3aa491a1bddea41d2d10135093cc0cde4c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 141b1919b75fc436dcb627fb93c20fd1312e57aa4d2b1525d1c7180f8e4921dba8c5af93614392f93162f9006756d4eba04a896f1ec18511033d4db7bdc7ae40
|
7
|
+
data.tar.gz: cc2c69a05b388d603c7e40cdf29759f57090b6894a4f9a001d1157a57a03692b419963e1f086efa6a87a35175ba4797b1a3121ea779c71a6f48bb92a46798f84
|
data/alblogs.gemspec
CHANGED
data/bin/alblogs
CHANGED
@@ -6,17 +6,6 @@ require 'shellwords'
|
|
6
6
|
require 'json'
|
7
7
|
require 'alblogs'
|
8
8
|
|
9
|
-
def measure
|
10
|
-
start = Time.now
|
11
|
-
yield
|
12
|
-
Time.now - start
|
13
|
-
end
|
14
|
-
|
15
|
-
def display_stats(stats)
|
16
|
-
stats[:elapsed_time] = Time.now.utc - stats[:started_at]
|
17
|
-
$stderr.puts stats.inspect
|
18
|
-
end
|
19
|
-
|
20
9
|
started_at = Time.now.utc
|
21
10
|
|
22
11
|
options = {
|
@@ -81,52 +70,18 @@ if options[:end_time] && options[:end_time] < options[:start_time]
|
|
81
70
|
end
|
82
71
|
|
83
72
|
request_matcher = Alblogs::RequestMatcher.new options
|
84
|
-
|
85
|
-
stats = Hash.new(0)
|
86
|
-
stats[:started_at] = started_at
|
87
|
-
stats[:range_starts_at] = request_matcher.range.begin
|
88
|
-
stats[:range_ends_at] = request_matcher.range.end
|
89
|
-
stats[:min_log_time] = nil
|
90
|
-
stats[:max_log_time] = nil
|
91
|
-
stats[:min_matched_log_time] = nil
|
92
|
-
stats[:max_matched_log_time] = nil
|
93
|
-
|
94
|
-
tmp_file = '.download.alblogs.log'
|
95
|
-
File.unlink(tmp_file) if File.exists?(tmp_file)
|
96
|
-
File.unlink("#{tmp_file}.gz") if File.exists?("#{tmp_file}.gz")
|
97
|
-
|
98
|
-
$stop = false
|
99
|
-
trap("INT") { $stop = true }
|
100
|
-
|
101
73
|
s3_bucket = Alblogs::S3Bucket.new(options[:alb_s3_bucket], options[:aws_profile])
|
102
|
-
s3_bucket.get_s3_files_in_range(request_matcher.range).values.each do |s3_file|
|
103
|
-
stats[:files] += 1
|
104
|
-
|
105
|
-
stats[:total_download_time] += measure do
|
106
|
-
s3_bucket.download_s3_file(s3_file, tmp_file)
|
107
|
-
end
|
108
74
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
entry = Alblogs::Entry.from_line(line)
|
115
|
-
stats[:min_log_time] = ! stats[:min_log_time] || stats[:min_log_time] > entry.timestamp ? entry.timestamp : stats[:min_log_time]
|
116
|
-
stats[:max_log_time] = ! stats[:max_log_time] || stats[:max_log_time] < entry.timestamp ? entry.timestamp : stats[:max_log_time]
|
117
|
-
next unless request_matcher.match?(entry)
|
118
|
-
stats[:matching_lines] += 1
|
119
|
-
stats[:min_matched_log_time] = ! stats[:min_matched_log_time] || stats[:min_matched_log_time] > entry.timestamp ? entry.timestamp : stats[:min_matched_log_time]
|
120
|
-
stats[:max_matched_log_time] = ! stats[:max_matched_log_time] || stats[:max_matched_log_time] < entry.timestamp ? entry.timestamp : stats[:max_matched_log_time]
|
121
|
-
options[:log_file].puts line
|
122
|
-
end
|
123
|
-
end
|
124
|
-
end
|
75
|
+
iterator = Alblogs::Iterator.new(s3_bucket, request_matcher.range, request_matcher)
|
76
|
+
iterator.display_stats_proc = Proc.new do |stats|
|
77
|
+
stats[:elapsed_time] = Time.now.utc - stats[:started_at]
|
78
|
+
$stderr.puts stats.inspect
|
79
|
+
end
|
125
80
|
|
126
|
-
|
81
|
+
trap("INT") { iterator.stop! }
|
127
82
|
|
128
|
-
|
129
|
-
|
83
|
+
iterator.each do |entry|
|
84
|
+
options[:log_file].puts entry.line
|
130
85
|
end
|
131
86
|
|
132
87
|
options[:log_file].close
|
data/lib/alblogs.rb
CHANGED
@@ -0,0 +1,88 @@
|
|
1
|
+
module Alblogs
|
2
|
+
class Iterator
|
3
|
+
include Enumerable
|
4
|
+
|
5
|
+
attr_accessor :tmp_file,
|
6
|
+
:display_stats_proc
|
7
|
+
|
8
|
+
attr_reader :s3_bucket,
|
9
|
+
:date_range,
|
10
|
+
:request_matcher,
|
11
|
+
:stats
|
12
|
+
|
13
|
+
def initialize(s3_bucket, date_range, request_matcher = nil)
|
14
|
+
@s3_bucket = s3_bucket
|
15
|
+
@date_range = date_range
|
16
|
+
@request_matcher = request_matcher
|
17
|
+
@tmp_file = '.download.alblogs.log'
|
18
|
+
end
|
19
|
+
|
20
|
+
def each
|
21
|
+
@stop = false
|
22
|
+
delete_tmp_file
|
23
|
+
init_stats
|
24
|
+
|
25
|
+
s3_bucket.get_s3_files_in_range(date_range).values.each do |s3_file|
|
26
|
+
stats[:files] += 1
|
27
|
+
|
28
|
+
stats[:total_download_time] += measure do
|
29
|
+
s3_bucket.download_s3_file(s3_file, tmp_file)
|
30
|
+
end
|
31
|
+
|
32
|
+
stats[:total_file_processing_time] += measure do
|
33
|
+
File.open(tmp_file, 'rb') do |f|
|
34
|
+
while(! f.eof? && ! @stop)
|
35
|
+
stats[:lines] += 1
|
36
|
+
line = f.readline
|
37
|
+
entry = ::Alblogs::Entry.from_line(line)
|
38
|
+
stats[:min_log_time] = ! stats[:min_log_time] || stats[:min_log_time] > entry.timestamp ? entry.timestamp : stats[:min_log_time]
|
39
|
+
stats[:max_log_time] = ! stats[:max_log_time] || stats[:max_log_time] < entry.timestamp ? entry.timestamp : stats[:max_log_time]
|
40
|
+
next if request_matcher && !request_matcher.match?(entry)
|
41
|
+
stats[:matching_lines] += 1
|
42
|
+
stats[:min_matched_log_time] = ! stats[:min_matched_log_time] || stats[:min_matched_log_time] > entry.timestamp ? entry.timestamp : stats[:min_matched_log_time]
|
43
|
+
stats[:max_matched_log_time] = ! stats[:max_matched_log_time] || stats[:max_matched_log_time] < entry.timestamp ? entry.timestamp : stats[:max_matched_log_time]
|
44
|
+
yield entry
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
File.unlink(tmp_file)
|
50
|
+
|
51
|
+
display_stats
|
52
|
+
break if @stop
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def stop!
|
57
|
+
@stop = true
|
58
|
+
end
|
59
|
+
|
60
|
+
def display_stats
|
61
|
+
display_stats_proc && display_stats_proc.call(stats)
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
def measure
|
67
|
+
start = Time.now
|
68
|
+
yield
|
69
|
+
Time.now - start
|
70
|
+
end
|
71
|
+
|
72
|
+
def delete_tmp_file
|
73
|
+
File.unlink(tmp_file) if File.exists?(tmp_file)
|
74
|
+
File.unlink("#{tmp_file}.gz") if File.exists?("#{tmp_file}.gz")
|
75
|
+
end
|
76
|
+
|
77
|
+
def init_stats
|
78
|
+
@stats = Hash.new(0)
|
79
|
+
@stats[:started_at] = Time.now.utc
|
80
|
+
@stats[:range_starts_at] = date_range.begin
|
81
|
+
@stats[:range_ends_at] = date_range.end
|
82
|
+
@stats[:min_log_time] = nil
|
83
|
+
@stats[:max_log_time] = nil
|
84
|
+
@stats[:min_matched_log_time] = nil
|
85
|
+
@stats[:max_matched_log_time] = nil
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: alblogs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Doug Youch
|
@@ -25,6 +25,7 @@ files:
|
|
25
25
|
- bin/alblogs
|
26
26
|
- lib/alblogs.rb
|
27
27
|
- lib/alblogs/entry.rb
|
28
|
+
- lib/alblogs/iterator.rb
|
28
29
|
- lib/alblogs/request_matcher.rb
|
29
30
|
- lib/alblogs/s3_bucket.rb
|
30
31
|
- lib/alblogs/s3_file.rb
|