alblogs 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/alblogs.gemspec +1 -1
- data/bin/alblogs +8 -53
- data/lib/alblogs.rb +1 -0
- data/lib/alblogs/iterator.rb +88 -0
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7d2aea6572a238675e0499c04e26341ea3cd3b8860b94c51095fe9c00cb4d806
|
4
|
+
data.tar.gz: 45a86b3b527c47bee978d0139c473c3aa491a1bddea41d2d10135093cc0cde4c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 141b1919b75fc436dcb627fb93c20fd1312e57aa4d2b1525d1c7180f8e4921dba8c5af93614392f93162f9006756d4eba04a896f1ec18511033d4db7bdc7ae40
|
7
|
+
data.tar.gz: cc2c69a05b388d603c7e40cdf29759f57090b6894a4f9a001d1157a57a03692b419963e1f086efa6a87a35175ba4797b1a3121ea779c71a6f48bb92a46798f84
|
data/alblogs.gemspec
CHANGED
data/bin/alblogs
CHANGED
@@ -6,17 +6,6 @@ require 'shellwords'
|
|
6
6
|
require 'json'
|
7
7
|
require 'alblogs'
|
8
8
|
|
9
|
-
def measure
|
10
|
-
start = Time.now
|
11
|
-
yield
|
12
|
-
Time.now - start
|
13
|
-
end
|
14
|
-
|
15
|
-
def display_stats(stats)
|
16
|
-
stats[:elapsed_time] = Time.now.utc - stats[:started_at]
|
17
|
-
$stderr.puts stats.inspect
|
18
|
-
end
|
19
|
-
|
20
9
|
started_at = Time.now.utc
|
21
10
|
|
22
11
|
options = {
|
@@ -81,52 +70,18 @@ if options[:end_time] && options[:end_time] < options[:start_time]
|
|
81
70
|
end
|
82
71
|
|
83
72
|
request_matcher = Alblogs::RequestMatcher.new options
|
84
|
-
|
85
|
-
stats = Hash.new(0)
|
86
|
-
stats[:started_at] = started_at
|
87
|
-
stats[:range_starts_at] = request_matcher.range.begin
|
88
|
-
stats[:range_ends_at] = request_matcher.range.end
|
89
|
-
stats[:min_log_time] = nil
|
90
|
-
stats[:max_log_time] = nil
|
91
|
-
stats[:min_matched_log_time] = nil
|
92
|
-
stats[:max_matched_log_time] = nil
|
93
|
-
|
94
|
-
tmp_file = '.download.alblogs.log'
|
95
|
-
File.unlink(tmp_file) if File.exists?(tmp_file)
|
96
|
-
File.unlink("#{tmp_file}.gz") if File.exists?("#{tmp_file}.gz")
|
97
|
-
|
98
|
-
$stop = false
|
99
|
-
trap("INT") { $stop = true }
|
100
|
-
|
101
73
|
s3_bucket = Alblogs::S3Bucket.new(options[:alb_s3_bucket], options[:aws_profile])
|
102
|
-
s3_bucket.get_s3_files_in_range(request_matcher.range).values.each do |s3_file|
|
103
|
-
stats[:files] += 1
|
104
|
-
|
105
|
-
stats[:total_download_time] += measure do
|
106
|
-
s3_bucket.download_s3_file(s3_file, tmp_file)
|
107
|
-
end
|
108
74
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
entry = Alblogs::Entry.from_line(line)
|
115
|
-
stats[:min_log_time] = ! stats[:min_log_time] || stats[:min_log_time] > entry.timestamp ? entry.timestamp : stats[:min_log_time]
|
116
|
-
stats[:max_log_time] = ! stats[:max_log_time] || stats[:max_log_time] < entry.timestamp ? entry.timestamp : stats[:max_log_time]
|
117
|
-
next unless request_matcher.match?(entry)
|
118
|
-
stats[:matching_lines] += 1
|
119
|
-
stats[:min_matched_log_time] = ! stats[:min_matched_log_time] || stats[:min_matched_log_time] > entry.timestamp ? entry.timestamp : stats[:min_matched_log_time]
|
120
|
-
stats[:max_matched_log_time] = ! stats[:max_matched_log_time] || stats[:max_matched_log_time] < entry.timestamp ? entry.timestamp : stats[:max_matched_log_time]
|
121
|
-
options[:log_file].puts line
|
122
|
-
end
|
123
|
-
end
|
124
|
-
end
|
75
|
+
iterator = Alblogs::Iterator.new(s3_bucket, request_matcher.range, request_matcher)
|
76
|
+
iterator.display_stats_proc = Proc.new do |stats|
|
77
|
+
stats[:elapsed_time] = Time.now.utc - stats[:started_at]
|
78
|
+
$stderr.puts stats.inspect
|
79
|
+
end
|
125
80
|
|
126
|
-
|
81
|
+
trap("INT") { iterator.stop! }
|
127
82
|
|
128
|
-
|
129
|
-
|
83
|
+
iterator.each do |entry|
|
84
|
+
options[:log_file].puts entry.line
|
130
85
|
end
|
131
86
|
|
132
87
|
options[:log_file].close
|
data/lib/alblogs.rb
CHANGED
@@ -0,0 +1,88 @@
|
|
1
|
+
module Alblogs
|
2
|
+
class Iterator
|
3
|
+
include Enumerable
|
4
|
+
|
5
|
+
attr_accessor :tmp_file,
|
6
|
+
:display_stats_proc
|
7
|
+
|
8
|
+
attr_reader :s3_bucket,
|
9
|
+
:date_range,
|
10
|
+
:request_matcher,
|
11
|
+
:stats
|
12
|
+
|
13
|
+
def initialize(s3_bucket, date_range, request_matcher = nil)
|
14
|
+
@s3_bucket = s3_bucket
|
15
|
+
@date_range = date_range
|
16
|
+
@request_matcher = request_matcher
|
17
|
+
@tmp_file = '.download.alblogs.log'
|
18
|
+
end
|
19
|
+
|
20
|
+
def each
|
21
|
+
@stop = false
|
22
|
+
delete_tmp_file
|
23
|
+
init_stats
|
24
|
+
|
25
|
+
s3_bucket.get_s3_files_in_range(date_range).values.each do |s3_file|
|
26
|
+
stats[:files] += 1
|
27
|
+
|
28
|
+
stats[:total_download_time] += measure do
|
29
|
+
s3_bucket.download_s3_file(s3_file, tmp_file)
|
30
|
+
end
|
31
|
+
|
32
|
+
stats[:total_file_processing_time] += measure do
|
33
|
+
File.open(tmp_file, 'rb') do |f|
|
34
|
+
while(! f.eof? && ! @stop)
|
35
|
+
stats[:lines] += 1
|
36
|
+
line = f.readline
|
37
|
+
entry = ::Alblogs::Entry.from_line(line)
|
38
|
+
stats[:min_log_time] = ! stats[:min_log_time] || stats[:min_log_time] > entry.timestamp ? entry.timestamp : stats[:min_log_time]
|
39
|
+
stats[:max_log_time] = ! stats[:max_log_time] || stats[:max_log_time] < entry.timestamp ? entry.timestamp : stats[:max_log_time]
|
40
|
+
next if request_matcher && !request_matcher.match?(entry)
|
41
|
+
stats[:matching_lines] += 1
|
42
|
+
stats[:min_matched_log_time] = ! stats[:min_matched_log_time] || stats[:min_matched_log_time] > entry.timestamp ? entry.timestamp : stats[:min_matched_log_time]
|
43
|
+
stats[:max_matched_log_time] = ! stats[:max_matched_log_time] || stats[:max_matched_log_time] < entry.timestamp ? entry.timestamp : stats[:max_matched_log_time]
|
44
|
+
yield entry
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
File.unlink(tmp_file)
|
50
|
+
|
51
|
+
display_stats
|
52
|
+
break if @stop
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def stop!
|
57
|
+
@stop = true
|
58
|
+
end
|
59
|
+
|
60
|
+
def display_stats
|
61
|
+
display_stats_proc && display_stats_proc.call(stats)
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
def measure
|
67
|
+
start = Time.now
|
68
|
+
yield
|
69
|
+
Time.now - start
|
70
|
+
end
|
71
|
+
|
72
|
+
def delete_tmp_file
|
73
|
+
File.unlink(tmp_file) if File.exists?(tmp_file)
|
74
|
+
File.unlink("#{tmp_file}.gz") if File.exists?("#{tmp_file}.gz")
|
75
|
+
end
|
76
|
+
|
77
|
+
def init_stats
|
78
|
+
@stats = Hash.new(0)
|
79
|
+
@stats[:started_at] = Time.now.utc
|
80
|
+
@stats[:range_starts_at] = date_range.begin
|
81
|
+
@stats[:range_ends_at] = date_range.end
|
82
|
+
@stats[:min_log_time] = nil
|
83
|
+
@stats[:max_log_time] = nil
|
84
|
+
@stats[:min_matched_log_time] = nil
|
85
|
+
@stats[:max_matched_log_time] = nil
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: alblogs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Doug Youch
|
@@ -25,6 +25,7 @@ files:
|
|
25
25
|
- bin/alblogs
|
26
26
|
- lib/alblogs.rb
|
27
27
|
- lib/alblogs/entry.rb
|
28
|
+
- lib/alblogs/iterator.rb
|
28
29
|
- lib/alblogs/request_matcher.rb
|
29
30
|
- lib/alblogs/s3_bucket.rb
|
30
31
|
- lib/alblogs/s3_file.rb
|