fluent-plugin-cloudfront-log 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9d0e94873fb17f7ba12461603e56f157a1b826a5
4
- data.tar.gz: b53f30d6a03a6afc2d3f2e51635007bdc5457e24
3
+ metadata.gz: 4d3261d6109af600ee45feae079ad583fb586435
4
+ data.tar.gz: 2c237fc30d3ac111a3d06fb15271fc9d44268903
5
5
  SHA512:
6
- metadata.gz: ca690918de97d284715582b8a04a0661c13d00859bfe0b84458a2537c90f59d53306bc401e964a8017bb65f6d9266e43164cc99eae606a0a646f0eb2e7f8ebd3
7
- data.tar.gz: 6259a40d205cd1a359159e806e98d2e53d1b903b4f3ce5cd1bd158c3eea62b22239dc0c4bf09cbaa42b7f90c66fdcf9407f1202f11b4661c61105b2b1690c8d6
6
+ metadata.gz: 24cb6dbbb51e4f6a736eaf6b30adf12a3ffa48654828581c219bc5fabad49f7340c5029b578a389db471b5b079087d26e31e2f7aa982e76a5e4a2b9df69403f5
7
+ data.tar.gz: bfb0c00d90770537cc8b9510f9d85bc0474fb7fc6f481d8982989b4767080e7d6252585ac005c7c2212702621529858aba050032065779b61403a1b428c278e6
data/README.md CHANGED
@@ -35,8 +35,7 @@ This specifices what the log files will be named once they're processed. This de
35
35
  The region where your cloudfront logs are stored.
36
36
 
37
37
  #### interval
38
- This is the rate in seconds at which we check the bucket for updated logs. It's recommended not to put this lower than 300(The default), cloudfront delivers logs every 20~ minutes to s3, so shortening this interval won't deliver your logs faster.
39
-
38
+ This is the rate in seconds at which we check the bucket for updated logs. This defaults to 300.
40
39
  #### aws_sec_id
41
40
  The ID of your AWS keypair. Note: Since this plugin uses aws-sdk under the hood you can leave these two aws fields blank if you have an IAM role applied to your FluentD instance.
42
41
 
@@ -46,6 +45,9 @@ The secret key portion of your AWS keypair
46
45
  #### tag
47
46
  This is a FluentD builtin.
48
47
 
48
+ #### thread_num
49
+ The number of threads to create to concurrently process the S3 objects. Defaults to 4.
50
+
49
51
  #### delimiter
50
52
  You shouldn't have to specify delimiter at all but this option is provided and passed to the S3 client in the event that you have a weird delimiter in your log file names. Defaults to `nil`.
51
53
 
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |spec|
6
6
  spec.name = "fluent-plugin-cloudfront-log"
7
- spec.version = "0.0.3"
7
+ spec.version = "0.0.4"
8
8
  spec.authors = ["kubihie"]
9
9
  spec.email = ["kubihie@gmail.com"]
10
10
 
@@ -12,6 +12,7 @@ class Fluent::Cloudfront_LogInput < Fluent::Input
12
12
  config_param :interval, :integer, :default => 300
13
13
  config_param :delimiter, :string, :default => nil
14
14
  config_param :verbose, :string, :default => false
15
+ config_param :thread_num, :integer, :default => 4
15
16
 
16
17
  def initialize
17
18
  super
@@ -37,6 +38,7 @@ class Fluent::Cloudfront_LogInput < Fluent::Input
37
38
  log.info("@moved_log_bucket: #{@moved_log_bucket}")
38
39
  log.info("@log_prefix: #{@log_prefix}")
39
40
  log.info("@moved_log_prefix: #{@moved_log_prefix}")
41
+ log.info("@thread_num: #{@thread_num}")
40
42
  end
41
43
  end
42
44
 
@@ -46,7 +48,7 @@ class Fluent::Cloudfront_LogInput < Fluent::Input
46
48
  client
47
49
 
48
50
  @loop = Coolio::Loop.new
49
- timer = TimerWatcher.new(@interval, true, log, &method(:input))
51
+ timer = TimerWatcher.new(@interval, true, log, &method(:input))
50
52
 
51
53
  @loop.attach(timer)
52
54
  @thread = Thread.new(&method(:run))
@@ -80,7 +82,7 @@ class Fluent::Cloudfront_LogInput < Fluent::Input
80
82
  @version = line.sub(/^#Version:/i, '').strip
81
83
  when /^#Fields:.+/i then
82
84
  @fields = line.sub(/^#Fields:/i, '').strip.split("\s")
83
- end
85
+ end
84
86
  end
85
87
 
86
88
  def purge(filename)
@@ -94,37 +96,76 @@ class Fluent::Cloudfront_LogInput < Fluent::Input
94
96
  dest_object_full_path = [@moved_log_bucket, dest_object_key].join('/')
95
97
 
96
98
  log.info("Copying object: #{source_object_full_path} to #{dest_object_full_path}") if @verbose
97
- client.copy_object(:bucket => @moved_log_bucket, :copy_source => source_object_full_path, :key => dest_object_key)
99
+
100
+ begin
101
+ client.copy_object(:bucket => @moved_log_bucket, :copy_source => source_object_full_path, :key => dest_object_key)
102
+ rescue => e
103
+ log.warn("S3 Copy client error. #{e.message}")
104
+ return
105
+ end
106
+
98
107
 
99
108
  log.info("Deleting object: #{source_object_key} from #{@log_bucket}") if @verbose
100
- client.delete_object(:bucket => @log_bucket, :key => source_object_key)
109
+ begin
110
+ client.delete_object(:bucket => @log_bucket, :key => source_object_key)
111
+ rescue => e
112
+ log.warn("S3 Delete client error. #{e.message}")
113
+ return
114
+ end
115
+ end
116
+
117
+
118
+ def process_content(content)
119
+ filename = content.key.sub(/^#{@log_prefix}\//, "")
120
+ log.info("CloudFront Currently processing: #{filename}") if @verbose
121
+ return if filename[-1] == '/' #skip directory/
122
+ return unless filename[-2, 2] == 'gz' #skip without gz file
123
+
124
+ begin
125
+ access_log_gz = client.get_object(:bucket => @log_bucket, :key => content.key).body
126
+ access_log = Zlib::GzipReader.new(access_log_gz).read
127
+ rescue => e
128
+ log.warn("S3 GET client error. #{e.message}")
129
+ return
130
+ end
131
+
132
+ access_log.split("\n").each do |line|
133
+ if line[0.1] == '#'
134
+ parse_header(line)
135
+ next
136
+ end
137
+ line = URI.unescape(line) #hoge%2520fuga -> hoge%20fuga
138
+ line = URI.unescape(line) #hoge%20fuga -> hoge fuga
139
+ line = line.split("\t")
140
+ record = Hash[@fields.collect.zip(line)]
141
+ timestamp = Time.parse("#{record['date']}T#{record['time']}+00:00").to_i
142
+ router.emit(@tag, timestamp, record)
143
+ end
144
+ purge(filename)
101
145
  end
102
146
 
103
147
  def input
148
+ log.info("CloudFront Begining input going to list S3")
104
149
  client.list_objects(:bucket => @log_bucket, :prefix => @log_prefix , :delimiter => @delimiter).each do |list|
150
+ queue = Queue.new
151
+ threads = []
105
152
  list.contents.each do |content|
106
- filename = content.key.sub(/^#{@log_prefix}\//, "")
107
- log.info("Currently processing: #{filename}") if @verbose
108
- next if filename[-1] == '/' #skip directory/
109
- next unless filename[-2, 2] == 'gz' #skip without gz file
110
-
111
- access_log_gz = client.get_object(:bucket => @log_bucket, :key => content.key).body
112
- access_log = Zlib::GzipReader.new(access_log_gz).read
113
-
114
- access_log.split("\n").each do |line|
115
- if line[0.1] == '#'
116
- parse_header(line)
117
- next
153
+ queue << content
154
+ end
155
+ # BEGINS THREADS
156
+ @thread_num.times do
157
+ threads << Thread.new do
158
+ until queue.empty?
159
+ work_unit = queue.pop(true) rescue nil
160
+ if work_unit
161
+ process_content(work_unit)
162
+ end
163
+ end
118
164
  end
119
- line = URI.unescape(line) #hoge%2520fuga -> hoge%20fuga
120
- line = URI.unescape(line) #hoge%20fuga -> hoge fuga
121
- line = line.split("\t")
122
- record = Hash[@fields.collect.zip(line)]
123
- timestamp = Time.parse("#{record['date']}T#{record['time']}+00:00").to_i
124
- router.emit(@tag, timestamp, record)
125
165
  end
126
- purge(filename)
127
- end
166
+ log.debug("CloudFront Waiting for Threads to finish...")
167
+ threads.each { |t| t.join }
168
+ log.debug("CloudFront Finished")
128
169
  end
129
170
  end
130
171
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-cloudfront-log
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - kubihie
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-16 00:00:00.000000000 Z
11
+ date: 2016-09-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -114,7 +114,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
114
114
  version: '0'
115
115
  requirements: []
116
116
  rubyforge_project:
117
- rubygems_version: 2.4.7
117
+ rubygems_version: 2.5.1
118
118
  signing_key:
119
119
  specification_version: 4
120
120
  summary: AWS CloudFront log input plugin.