fluent-plugin-cloudfront-log 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -2
- data/fluent-plugin-cloudfront-log.gemspec +1 -1
- data/lib/fluent/plugin/in_cloudfront_log.rb +65 -24
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4d3261d6109af600ee45feae079ad583fb586435
|
4
|
+
data.tar.gz: 2c237fc30d3ac111a3d06fb15271fc9d44268903
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 24cb6dbbb51e4f6a736eaf6b30adf12a3ffa48654828581c219bc5fabad49f7340c5029b578a389db471b5b079087d26e31e2f7aa982e76a5e4a2b9df69403f5
|
7
|
+
data.tar.gz: bfb0c00d90770537cc8b9510f9d85bc0474fb7fc6f481d8982989b4767080e7d6252585ac005c7c2212702621529858aba050032065779b61403a1b428c278e6
|
data/README.md
CHANGED
@@ -35,8 +35,7 @@ This specifices what the log files will be named once they're processed. This de
|
|
35
35
|
The region where your cloudfront logs are stored.
|
36
36
|
|
37
37
|
#### interval
|
38
|
-
This is the rate in seconds at which we check the bucket for updated logs.
|
39
|
-
|
38
|
+
This is the rate in seconds at which we check the bucket for updated logs. This defaults to 300.
|
40
39
|
#### aws_sec_id
|
41
40
|
The ID of your AWS keypair. Note: Since this plugin uses aws-sdk under the hood you can leave these two aws fields blank if you have an IAM role applied to your FluentD instance.
|
42
41
|
|
@@ -46,6 +45,9 @@ The secret key portion of your AWS keypair
|
|
46
45
|
#### tag
|
47
46
|
This is a FluentD builtin.
|
48
47
|
|
48
|
+
#### thread_num
|
49
|
+
The number of threads to create to concurrently process the S3 objects. Defaults to 4.
|
50
|
+
|
49
51
|
#### delimiter
|
50
52
|
You shouldn't have to specify delimiter at all but this option is provided and passed to the S3 client in the event that you have a weird delimiter in your log file names. Defaults to `nil`.
|
51
53
|
|
@@ -12,6 +12,7 @@ class Fluent::Cloudfront_LogInput < Fluent::Input
|
|
12
12
|
config_param :interval, :integer, :default => 300
|
13
13
|
config_param :delimiter, :string, :default => nil
|
14
14
|
config_param :verbose, :string, :default => false
|
15
|
+
config_param :thread_num, :integer, :default => 4
|
15
16
|
|
16
17
|
def initialize
|
17
18
|
super
|
@@ -37,6 +38,7 @@ class Fluent::Cloudfront_LogInput < Fluent::Input
|
|
37
38
|
log.info("@moved_log_bucket: #{@moved_log_bucket}")
|
38
39
|
log.info("@log_prefix: #{@log_prefix}")
|
39
40
|
log.info("@moved_log_prefix: #{@moved_log_prefix}")
|
41
|
+
log.info("@thread_num: #{@thread_num}")
|
40
42
|
end
|
41
43
|
end
|
42
44
|
|
@@ -46,7 +48,7 @@ class Fluent::Cloudfront_LogInput < Fluent::Input
|
|
46
48
|
client
|
47
49
|
|
48
50
|
@loop = Coolio::Loop.new
|
49
|
-
timer = TimerWatcher.new(@interval, true, log, &method(:input))
|
51
|
+
timer = TimerWatcher.new(@interval, true, log, &method(:input))
|
50
52
|
|
51
53
|
@loop.attach(timer)
|
52
54
|
@thread = Thread.new(&method(:run))
|
@@ -80,7 +82,7 @@ class Fluent::Cloudfront_LogInput < Fluent::Input
|
|
80
82
|
@version = line.sub(/^#Version:/i, '').strip
|
81
83
|
when /^#Fields:.+/i then
|
82
84
|
@fields = line.sub(/^#Fields:/i, '').strip.split("\s")
|
83
|
-
end
|
85
|
+
end
|
84
86
|
end
|
85
87
|
|
86
88
|
def purge(filename)
|
@@ -94,37 +96,76 @@ class Fluent::Cloudfront_LogInput < Fluent::Input
|
|
94
96
|
dest_object_full_path = [@moved_log_bucket, dest_object_key].join('/')
|
95
97
|
|
96
98
|
log.info("Copying object: #{source_object_full_path} to #{dest_object_full_path}") if @verbose
|
97
|
-
|
99
|
+
|
100
|
+
begin
|
101
|
+
client.copy_object(:bucket => @moved_log_bucket, :copy_source => source_object_full_path, :key => dest_object_key)
|
102
|
+
rescue => e
|
103
|
+
log.warn("S3 Copy client error. #{e.message}")
|
104
|
+
return
|
105
|
+
end
|
106
|
+
|
98
107
|
|
99
108
|
log.info("Deleting object: #{source_object_key} from #{@log_bucket}") if @verbose
|
100
|
-
|
109
|
+
begin
|
110
|
+
client.delete_object(:bucket => @log_bucket, :key => source_object_key)
|
111
|
+
rescue => e
|
112
|
+
log.warn("S3 Delete client error. #{e.message}")
|
113
|
+
return
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
|
118
|
+
def process_content(content)
|
119
|
+
filename = content.key.sub(/^#{@log_prefix}\//, "")
|
120
|
+
log.info("CloudFront Currently processing: #{filename}") if @verbose
|
121
|
+
return if filename[-1] == '/' #skip directory/
|
122
|
+
return unless filename[-2, 2] == 'gz' #skip without gz file
|
123
|
+
|
124
|
+
begin
|
125
|
+
access_log_gz = client.get_object(:bucket => @log_bucket, :key => content.key).body
|
126
|
+
access_log = Zlib::GzipReader.new(access_log_gz).read
|
127
|
+
rescue => e
|
128
|
+
log.warn("S3 GET client error. #{e.message}")
|
129
|
+
return
|
130
|
+
end
|
131
|
+
|
132
|
+
access_log.split("\n").each do |line|
|
133
|
+
if line[0.1] == '#'
|
134
|
+
parse_header(line)
|
135
|
+
next
|
136
|
+
end
|
137
|
+
line = URI.unescape(line) #hoge%2520fuga -> hoge%20fuga
|
138
|
+
line = URI.unescape(line) #hoge%20fuga -> hoge fuga
|
139
|
+
line = line.split("\t")
|
140
|
+
record = Hash[@fields.collect.zip(line)]
|
141
|
+
timestamp = Time.parse("#{record['date']}T#{record['time']}+00:00").to_i
|
142
|
+
router.emit(@tag, timestamp, record)
|
143
|
+
end
|
144
|
+
purge(filename)
|
101
145
|
end
|
102
146
|
|
103
147
|
def input
|
148
|
+
log.info("CloudFront Begining input going to list S3")
|
104
149
|
client.list_objects(:bucket => @log_bucket, :prefix => @log_prefix , :delimiter => @delimiter).each do |list|
|
150
|
+
queue = Queue.new
|
151
|
+
threads = []
|
105
152
|
list.contents.each do |content|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
next
|
153
|
+
queue << content
|
154
|
+
end
|
155
|
+
# BEGINS THREADS
|
156
|
+
@thread_num.times do
|
157
|
+
threads << Thread.new do
|
158
|
+
until queue.empty?
|
159
|
+
work_unit = queue.pop(true) rescue nil
|
160
|
+
if work_unit
|
161
|
+
process_content(work_unit)
|
162
|
+
end
|
163
|
+
end
|
118
164
|
end
|
119
|
-
line = URI.unescape(line) #hoge%2520fuga -> hoge%20fuga
|
120
|
-
line = URI.unescape(line) #hoge%20fuga -> hoge fuga
|
121
|
-
line = line.split("\t")
|
122
|
-
record = Hash[@fields.collect.zip(line)]
|
123
|
-
timestamp = Time.parse("#{record['date']}T#{record['time']}+00:00").to_i
|
124
|
-
router.emit(@tag, timestamp, record)
|
125
165
|
end
|
126
|
-
|
127
|
-
|
166
|
+
log.debug("CloudFront Waiting for Threads to finish...")
|
167
|
+
threads.each { |t| t.join }
|
168
|
+
log.debug("CloudFront Finished")
|
128
169
|
end
|
129
170
|
end
|
130
171
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-cloudfront-log
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kubihie
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-09-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fluentd
|
@@ -114,7 +114,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
114
114
|
version: '0'
|
115
115
|
requirements: []
|
116
116
|
rubyforge_project:
|
117
|
-
rubygems_version: 2.
|
117
|
+
rubygems_version: 2.5.1
|
118
118
|
signing_key:
|
119
119
|
specification_version: 4
|
120
120
|
summary: AWS CloudFront log input plugin.
|