fluent-plugin-cloudfront-log 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -2
- data/fluent-plugin-cloudfront-log.gemspec +1 -1
- data/lib/fluent/plugin/in_cloudfront_log.rb +65 -24
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4d3261d6109af600ee45feae079ad583fb586435
|
4
|
+
data.tar.gz: 2c237fc30d3ac111a3d06fb15271fc9d44268903
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 24cb6dbbb51e4f6a736eaf6b30adf12a3ffa48654828581c219bc5fabad49f7340c5029b578a389db471b5b079087d26e31e2f7aa982e76a5e4a2b9df69403f5
|
7
|
+
data.tar.gz: bfb0c00d90770537cc8b9510f9d85bc0474fb7fc6f481d8982989b4767080e7d6252585ac005c7c2212702621529858aba050032065779b61403a1b428c278e6
|
data/README.md
CHANGED
@@ -35,8 +35,7 @@ This specifices what the log files will be named once they're processed. This de
|
|
35
35
|
The region where your cloudfront logs are stored.
|
36
36
|
|
37
37
|
#### interval
|
38
|
-
This is the rate in seconds at which we check the bucket for updated logs.
|
39
|
-
|
38
|
+
This is the rate in seconds at which we check the bucket for updated logs. This defaults to 300.
|
40
39
|
#### aws_sec_id
|
41
40
|
The ID of your AWS keypair. Note: Since this plugin uses aws-sdk under the hood you can leave these two aws fields blank if you have an IAM role applied to your FluentD instance.
|
42
41
|
|
@@ -46,6 +45,9 @@ The secret key portion of your AWS keypair
|
|
46
45
|
#### tag
|
47
46
|
This is a FluentD builtin.
|
48
47
|
|
48
|
+
#### thread_num
|
49
|
+
The number of threads to create to concurrently process the S3 objects. Defaults to 4.
|
50
|
+
|
49
51
|
#### delimiter
|
50
52
|
You shouldn't have to specify delimiter at all but this option is provided and passed to the S3 client in the event that you have a weird delimiter in your log file names. Defaults to `nil`.
|
51
53
|
|
@@ -12,6 +12,7 @@ class Fluent::Cloudfront_LogInput < Fluent::Input
|
|
12
12
|
config_param :interval, :integer, :default => 300
|
13
13
|
config_param :delimiter, :string, :default => nil
|
14
14
|
config_param :verbose, :string, :default => false
|
15
|
+
config_param :thread_num, :integer, :default => 4
|
15
16
|
|
16
17
|
def initialize
|
17
18
|
super
|
@@ -37,6 +38,7 @@ class Fluent::Cloudfront_LogInput < Fluent::Input
|
|
37
38
|
log.info("@moved_log_bucket: #{@moved_log_bucket}")
|
38
39
|
log.info("@log_prefix: #{@log_prefix}")
|
39
40
|
log.info("@moved_log_prefix: #{@moved_log_prefix}")
|
41
|
+
log.info("@thread_num: #{@thread_num}")
|
40
42
|
end
|
41
43
|
end
|
42
44
|
|
@@ -46,7 +48,7 @@ class Fluent::Cloudfront_LogInput < Fluent::Input
|
|
46
48
|
client
|
47
49
|
|
48
50
|
@loop = Coolio::Loop.new
|
49
|
-
timer = TimerWatcher.new(@interval, true, log, &method(:input))
|
51
|
+
timer = TimerWatcher.new(@interval, true, log, &method(:input))
|
50
52
|
|
51
53
|
@loop.attach(timer)
|
52
54
|
@thread = Thread.new(&method(:run))
|
@@ -80,7 +82,7 @@ class Fluent::Cloudfront_LogInput < Fluent::Input
|
|
80
82
|
@version = line.sub(/^#Version:/i, '').strip
|
81
83
|
when /^#Fields:.+/i then
|
82
84
|
@fields = line.sub(/^#Fields:/i, '').strip.split("\s")
|
83
|
-
end
|
85
|
+
end
|
84
86
|
end
|
85
87
|
|
86
88
|
def purge(filename)
|
@@ -94,37 +96,76 @@ class Fluent::Cloudfront_LogInput < Fluent::Input
|
|
94
96
|
dest_object_full_path = [@moved_log_bucket, dest_object_key].join('/')
|
95
97
|
|
96
98
|
log.info("Copying object: #{source_object_full_path} to #{dest_object_full_path}") if @verbose
|
97
|
-
|
99
|
+
|
100
|
+
begin
|
101
|
+
client.copy_object(:bucket => @moved_log_bucket, :copy_source => source_object_full_path, :key => dest_object_key)
|
102
|
+
rescue => e
|
103
|
+
log.warn("S3 Copy client error. #{e.message}")
|
104
|
+
return
|
105
|
+
end
|
106
|
+
|
98
107
|
|
99
108
|
log.info("Deleting object: #{source_object_key} from #{@log_bucket}") if @verbose
|
100
|
-
|
109
|
+
begin
|
110
|
+
client.delete_object(:bucket => @log_bucket, :key => source_object_key)
|
111
|
+
rescue => e
|
112
|
+
log.warn("S3 Delete client error. #{e.message}")
|
113
|
+
return
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
|
118
|
+
def process_content(content)
|
119
|
+
filename = content.key.sub(/^#{@log_prefix}\//, "")
|
120
|
+
log.info("CloudFront Currently processing: #{filename}") if @verbose
|
121
|
+
return if filename[-1] == '/' #skip directory/
|
122
|
+
return unless filename[-2, 2] == 'gz' #skip without gz file
|
123
|
+
|
124
|
+
begin
|
125
|
+
access_log_gz = client.get_object(:bucket => @log_bucket, :key => content.key).body
|
126
|
+
access_log = Zlib::GzipReader.new(access_log_gz).read
|
127
|
+
rescue => e
|
128
|
+
log.warn("S3 GET client error. #{e.message}")
|
129
|
+
return
|
130
|
+
end
|
131
|
+
|
132
|
+
access_log.split("\n").each do |line|
|
133
|
+
if line[0.1] == '#'
|
134
|
+
parse_header(line)
|
135
|
+
next
|
136
|
+
end
|
137
|
+
line = URI.unescape(line) #hoge%2520fuga -> hoge%20fuga
|
138
|
+
line = URI.unescape(line) #hoge%20fuga -> hoge fuga
|
139
|
+
line = line.split("\t")
|
140
|
+
record = Hash[@fields.collect.zip(line)]
|
141
|
+
timestamp = Time.parse("#{record['date']}T#{record['time']}+00:00").to_i
|
142
|
+
router.emit(@tag, timestamp, record)
|
143
|
+
end
|
144
|
+
purge(filename)
|
101
145
|
end
|
102
146
|
|
103
147
|
def input
|
148
|
+
log.info("CloudFront Begining input going to list S3")
|
104
149
|
client.list_objects(:bucket => @log_bucket, :prefix => @log_prefix , :delimiter => @delimiter).each do |list|
|
150
|
+
queue = Queue.new
|
151
|
+
threads = []
|
105
152
|
list.contents.each do |content|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
next
|
153
|
+
queue << content
|
154
|
+
end
|
155
|
+
# BEGINS THREADS
|
156
|
+
@thread_num.times do
|
157
|
+
threads << Thread.new do
|
158
|
+
until queue.empty?
|
159
|
+
work_unit = queue.pop(true) rescue nil
|
160
|
+
if work_unit
|
161
|
+
process_content(work_unit)
|
162
|
+
end
|
163
|
+
end
|
118
164
|
end
|
119
|
-
line = URI.unescape(line) #hoge%2520fuga -> hoge%20fuga
|
120
|
-
line = URI.unescape(line) #hoge%20fuga -> hoge fuga
|
121
|
-
line = line.split("\t")
|
122
|
-
record = Hash[@fields.collect.zip(line)]
|
123
|
-
timestamp = Time.parse("#{record['date']}T#{record['time']}+00:00").to_i
|
124
|
-
router.emit(@tag, timestamp, record)
|
125
165
|
end
|
126
|
-
|
127
|
-
|
166
|
+
log.debug("CloudFront Waiting for Threads to finish...")
|
167
|
+
threads.each { |t| t.join }
|
168
|
+
log.debug("CloudFront Finished")
|
128
169
|
end
|
129
170
|
end
|
130
171
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-cloudfront-log
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kubihie
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-09-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fluentd
|
@@ -114,7 +114,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
114
114
|
version: '0'
|
115
115
|
requirements: []
|
116
116
|
rubyforge_project:
|
117
|
-
rubygems_version: 2.
|
117
|
+
rubygems_version: 2.5.1
|
118
118
|
signing_key:
|
119
119
|
specification_version: 4
|
120
120
|
summary: AWS CloudFront log input plugin.
|