fluent-plugin-cloudfront-log 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9d0e94873fb17f7ba12461603e56f157a1b826a5
4
- data.tar.gz: b53f30d6a03a6afc2d3f2e51635007bdc5457e24
3
+ metadata.gz: 4d3261d6109af600ee45feae079ad583fb586435
4
+ data.tar.gz: 2c237fc30d3ac111a3d06fb15271fc9d44268903
5
5
  SHA512:
6
- metadata.gz: ca690918de97d284715582b8a04a0661c13d00859bfe0b84458a2537c90f59d53306bc401e964a8017bb65f6d9266e43164cc99eae606a0a646f0eb2e7f8ebd3
7
- data.tar.gz: 6259a40d205cd1a359159e806e98d2e53d1b903b4f3ce5cd1bd158c3eea62b22239dc0c4bf09cbaa42b7f90c66fdcf9407f1202f11b4661c61105b2b1690c8d6
6
+ metadata.gz: 24cb6dbbb51e4f6a736eaf6b30adf12a3ffa48654828581c219bc5fabad49f7340c5029b578a389db471b5b079087d26e31e2f7aa982e76a5e4a2b9df69403f5
7
+ data.tar.gz: bfb0c00d90770537cc8b9510f9d85bc0474fb7fc6f481d8982989b4767080e7d6252585ac005c7c2212702621529858aba050032065779b61403a1b428c278e6
data/README.md CHANGED
@@ -35,8 +35,7 @@ This specifices what the log files will be named once they're processed. This de
35
35
  The region where your cloudfront logs are stored.
36
36
 
37
37
  #### interval
38
- This is the rate in seconds at which we check the bucket for updated logs. It's recommended not to put this lower than 300(The default), cloudfront delivers logs every 20~ minutes to s3, so shortening this interval won't deliver your logs faster.
39
-
38
+ This is the rate in seconds at which we check the bucket for updated logs. This defaults to 300.
40
39
  #### aws_sec_id
41
40
  The ID of your AWS keypair. Note: Since this plugin uses aws-sdk under the hood you can leave these two aws fields blank if you have an IAM role applied to your FluentD instance.
42
41
 
@@ -46,6 +45,9 @@ The secret key portion of your AWS keypair
46
45
  #### tag
47
46
  This is a FluentD builtin.
48
47
 
48
+ #### thread_num
49
+ The number of threads to create to concurrently process the S3 objects. Defaults to 4.
50
+
49
51
  #### delimiter
50
52
  You shouldn't have to specify delimiter at all but this option is provided and passed to the S3 client in the event that you have a weird delimiter in your log file names. Defaults to `nil`.
51
53
 
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |spec|
6
6
  spec.name = "fluent-plugin-cloudfront-log"
7
- spec.version = "0.0.3"
7
+ spec.version = "0.0.4"
8
8
  spec.authors = ["kubihie"]
9
9
  spec.email = ["kubihie@gmail.com"]
10
10
 
@@ -12,6 +12,7 @@ class Fluent::Cloudfront_LogInput < Fluent::Input
12
12
  config_param :interval, :integer, :default => 300
13
13
  config_param :delimiter, :string, :default => nil
14
14
  config_param :verbose, :string, :default => false
15
+ config_param :thread_num, :integer, :default => 4
15
16
 
16
17
  def initialize
17
18
  super
@@ -37,6 +38,7 @@ class Fluent::Cloudfront_LogInput < Fluent::Input
37
38
  log.info("@moved_log_bucket: #{@moved_log_bucket}")
38
39
  log.info("@log_prefix: #{@log_prefix}")
39
40
  log.info("@moved_log_prefix: #{@moved_log_prefix}")
41
+ log.info("@thread_num: #{@thread_num}")
40
42
  end
41
43
  end
42
44
 
@@ -46,7 +48,7 @@ class Fluent::Cloudfront_LogInput < Fluent::Input
46
48
  client
47
49
 
48
50
  @loop = Coolio::Loop.new
49
- timer = TimerWatcher.new(@interval, true, log, &method(:input))
51
+ timer = TimerWatcher.new(@interval, true, log, &method(:input))
50
52
 
51
53
  @loop.attach(timer)
52
54
  @thread = Thread.new(&method(:run))
@@ -80,7 +82,7 @@ class Fluent::Cloudfront_LogInput < Fluent::Input
80
82
  @version = line.sub(/^#Version:/i, '').strip
81
83
  when /^#Fields:.+/i then
82
84
  @fields = line.sub(/^#Fields:/i, '').strip.split("\s")
83
- end
85
+ end
84
86
  end
85
87
 
86
88
  def purge(filename)
@@ -94,37 +96,76 @@ class Fluent::Cloudfront_LogInput < Fluent::Input
94
96
  dest_object_full_path = [@moved_log_bucket, dest_object_key].join('/')
95
97
 
96
98
  log.info("Copying object: #{source_object_full_path} to #{dest_object_full_path}") if @verbose
97
- client.copy_object(:bucket => @moved_log_bucket, :copy_source => source_object_full_path, :key => dest_object_key)
99
+
100
+ begin
101
+ client.copy_object(:bucket => @moved_log_bucket, :copy_source => source_object_full_path, :key => dest_object_key)
102
+ rescue => e
103
+ log.warn("S3 Copy client error. #{e.message}")
104
+ return
105
+ end
106
+
98
107
 
99
108
  log.info("Deleting object: #{source_object_key} from #{@log_bucket}") if @verbose
100
- client.delete_object(:bucket => @log_bucket, :key => source_object_key)
109
+ begin
110
+ client.delete_object(:bucket => @log_bucket, :key => source_object_key)
111
+ rescue => e
112
+ log.warn("S3 Delete client error. #{e.message}")
113
+ return
114
+ end
115
+ end
116
+
117
+
118
+ def process_content(content)
119
+ filename = content.key.sub(/^#{@log_prefix}\//, "")
120
+ log.info("CloudFront Currently processing: #{filename}") if @verbose
121
+ return if filename[-1] == '/' #skip directory/
122
+ return unless filename[-2, 2] == 'gz' #skip without gz file
123
+
124
+ begin
125
+ access_log_gz = client.get_object(:bucket => @log_bucket, :key => content.key).body
126
+ access_log = Zlib::GzipReader.new(access_log_gz).read
127
+ rescue => e
128
+ log.warn("S3 GET client error. #{e.message}")
129
+ return
130
+ end
131
+
132
+ access_log.split("\n").each do |line|
133
+ if line[0.1] == '#'
134
+ parse_header(line)
135
+ next
136
+ end
137
+ line = URI.unescape(line) #hoge%2520fuga -> hoge%20fuga
138
+ line = URI.unescape(line) #hoge%20fuga -> hoge fuga
139
+ line = line.split("\t")
140
+ record = Hash[@fields.collect.zip(line)]
141
+ timestamp = Time.parse("#{record['date']}T#{record['time']}+00:00").to_i
142
+ router.emit(@tag, timestamp, record)
143
+ end
144
+ purge(filename)
101
145
  end
102
146
 
103
147
  def input
148
+ log.info("CloudFront Begining input going to list S3")
104
149
  client.list_objects(:bucket => @log_bucket, :prefix => @log_prefix , :delimiter => @delimiter).each do |list|
150
+ queue = Queue.new
151
+ threads = []
105
152
  list.contents.each do |content|
106
- filename = content.key.sub(/^#{@log_prefix}\//, "")
107
- log.info("Currently processing: #{filename}") if @verbose
108
- next if filename[-1] == '/' #skip directory/
109
- next unless filename[-2, 2] == 'gz' #skip without gz file
110
-
111
- access_log_gz = client.get_object(:bucket => @log_bucket, :key => content.key).body
112
- access_log = Zlib::GzipReader.new(access_log_gz).read
113
-
114
- access_log.split("\n").each do |line|
115
- if line[0.1] == '#'
116
- parse_header(line)
117
- next
153
+ queue << content
154
+ end
155
+ # BEGINS THREADS
156
+ @thread_num.times do
157
+ threads << Thread.new do
158
+ until queue.empty?
159
+ work_unit = queue.pop(true) rescue nil
160
+ if work_unit
161
+ process_content(work_unit)
162
+ end
163
+ end
118
164
  end
119
- line = URI.unescape(line) #hoge%2520fuga -> hoge%20fuga
120
- line = URI.unescape(line) #hoge%20fuga -> hoge fuga
121
- line = line.split("\t")
122
- record = Hash[@fields.collect.zip(line)]
123
- timestamp = Time.parse("#{record['date']}T#{record['time']}+00:00").to_i
124
- router.emit(@tag, timestamp, record)
125
165
  end
126
- purge(filename)
127
- end
166
+ log.debug("CloudFront Waiting for Threads to finish...")
167
+ threads.each { |t| t.join }
168
+ log.debug("CloudFront Finished")
128
169
  end
129
170
  end
130
171
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-cloudfront-log
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - kubihie
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-16 00:00:00.000000000 Z
11
+ date: 2016-09-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -114,7 +114,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
114
114
  version: '0'
115
115
  requirements: []
116
116
  rubyforge_project:
117
- rubygems_version: 2.4.7
117
+ rubygems_version: 2.5.1
118
118
  signing_key:
119
119
  specification_version: 4
120
120
  summary: AWS CloudFront log input plugin.