logstash-input-s3 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NzkxNDE2MTI2NWM5YzVjNGNlNzU5ZjI3OTMxM2U2OWVkZjQ1ZDJmOQ==
5
+ data.tar.gz: !binary |-
6
+ YzlhZWI4NGMyZTY1N2E0Njk5M2Y3NjI1YTIwNzYyNWJhM2QzZDI2ZQ==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ZDkzNTVhYmE2YTg4ZWYyZTNiMjQ5M2RhMjRiNmZiNzE0ZDFhZjIxMTIwNDM0
10
+ NmFhMzMyZmY2MTkwZWM3Y2ZlZDhiYzQ1NThhNzQ2ODgyYThjNzFiNDhkMmE3
11
+ Zjc5N2U2ODlkYmYwM2RiZmQ4Y2I0OGVmZTc0YTdhN2IzMGI3YmU=
12
+ data.tar.gz: !binary |-
13
+ NjI1NmFiOTY1ZWNiNGZiZTQ4YjE4ZjFiNGY3ZjU3MGU1NDgwYTQyODI0YTEx
14
+ MGNkMjA5MGNmNTdmNTJlNDAyNjlhODc4ZDkzYWFkNWM4MzU3ZTJmZjZlZDdk
15
+ Y2I5ODIxM2RiYmEwNTA4ZmU5ZGQyN2YxZjMzNjc4NzEyYWM3NmM=
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
4
+ vendor
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
4
+ gem 'archive-tar-minitar'
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2012-2014 Elasticsearch <http://www.elasticsearch.org>
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ @files=[]
2
+
3
+ task :default do
4
+ system("rake -T")
5
+ end
6
+
@@ -0,0 +1,278 @@
1
+ # encoding: utf-8
2
+ require "logstash/inputs/base"
3
+ require "logstash/namespace"
4
+
5
+ require "time"
6
+ require "tmpdir"
7
+
8
+ # Stream events from files from a S3 bucket.
9
+ #
10
+ # Each line from each file generates an event.
11
+ # Files ending in '.gz' are handled as gzip'ed files.
12
+ class LogStash::Inputs::S3 < LogStash::Inputs::Base
13
+ config_name "s3"
14
+ milestone 1
15
+
16
+ # TODO(sissel): refactor to use 'line' codec (requires removing both gzip
17
+ # support and readline usage). Support gzip through a gzip codec! ;)
18
+ default :codec, "plain"
19
+
20
+ # The credentials of the AWS account used to access the bucket.
21
+ # Credentials can be specified:
22
+ # - As an ["id","secret"] array
23
+ # - As a path to a file containing AWS_ACCESS_KEY_ID=... and AWS_SECRET_ACCESS_KEY=...
24
+ # - In the environment, if not set (using variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY)
25
+ config :credentials, :validate => :array, :default => []
26
+
27
+ # The name of the S3 bucket.
28
+ config :bucket, :validate => :string, :required => true
29
+
30
+ # The AWS region for your bucket.
31
+ config :region, :validate => ["us-east-1", "us-west-1", "us-west-2",
32
+ "eu-west-1", "ap-southeast-1", "ap-southeast-2",
33
+ "ap-northeast-1", "sa-east-1", "us-gov-west-1"],
34
+ :deprecated => "'region' has been deprecated in favor of 'region_endpoint'"
35
+
36
+ # The AWS region for your bucket.
37
+ config :region_endpoint, :validate => ["us-east-1", "us-west-1", "us-west-2",
38
+ "eu-west-1", "ap-southeast-1", "ap-southeast-2",
39
+ "ap-northeast-1", "sa-east-1", "us-gov-west-1"], :default => "us-east-1"
40
+
41
+ # If specified, the prefix the filenames in the bucket must match (not a regexp)
42
+ config :prefix, :validate => :string, :default => nil
43
+
44
+ # Where to write the since database (keeps track of the date
45
+ # the last handled file was added to S3). The default will write
46
+ # sincedb files to some path matching "$HOME/.sincedb*"
47
+ config :sincedb_path, :validate => :string, :default => nil
48
+
49
+ # Name of a S3 bucket to backup processed files to.
50
+ config :backup_to_bucket, :validate => :string, :default => nil
51
+
52
+ # Path of a local directory to backup processed files to.
53
+ config :backup_to_dir, :validate => :string, :default => nil
54
+
55
+ # Whether to delete processed files from the original bucket.
56
+ config :delete, :validate => :boolean, :default => false
57
+
58
+ # Interval to wait between to check the file list again after a run is finished.
59
+ # Value is in seconds.
60
+ config :interval, :validate => :number, :default => 60
61
+
62
+ public
63
+ def register
64
+ require "digest/md5"
65
+ require "aws-sdk"
66
+
67
+ @region_endpoint = @region if @region && !@region.empty?
68
+
69
+ @logger.info("Registering s3 input", :bucket => @bucket, :region_endpoint => @region_endpoint)
70
+
71
+ if @credentials.length == 0
72
+ @access_key_id = ENV['AWS_ACCESS_KEY_ID']
73
+ @secret_access_key = ENV['AWS_SECRET_ACCESS_KEY']
74
+ elsif @credentials.length == 1
75
+ File.open(@credentials[0]) { |f| f.each do |line|
76
+ unless (/^\#/.match(line))
77
+ if(/\s*=\s*/.match(line))
78
+ param, value = line.split('=', 2)
79
+ param = param.chomp().strip()
80
+ value = value.chomp().strip()
81
+ if param.eql?('AWS_ACCESS_KEY_ID')
82
+ @access_key_id = value
83
+ elsif param.eql?('AWS_SECRET_ACCESS_KEY')
84
+ @secret_access_key = value
85
+ end
86
+ end
87
+ end
88
+ end
89
+ }
90
+ elsif @credentials.length == 2
91
+ @access_key_id = @credentials[0]
92
+ @secret_access_key = @credentials[1]
93
+ else
94
+ raise ArgumentError.new('Credentials must be of the form "/path/to/file" or ["id", "secret"]')
95
+ end
96
+
97
+ if @access_key_id.nil? or @secret_access_key.nil?
98
+ raise ArgumentError.new('Missing AWS credentials')
99
+ end
100
+
101
+ if @bucket.nil?
102
+ raise ArgumentError.new('Missing AWS bucket')
103
+ end
104
+
105
+ if @sincedb_path.nil?
106
+ if ENV['HOME'].nil?
107
+ raise ArgumentError.new('No HOME or sincedb_path set')
108
+ end
109
+ @sincedb_path = File.join(ENV["HOME"], ".sincedb_" + Digest::MD5.hexdigest("#{@bucket}+#{@prefix}"))
110
+ end
111
+
112
+ s3 = AWS::S3.new(
113
+ :access_key_id => @access_key_id,
114
+ :secret_access_key => @secret_access_key,
115
+ :region => @region_endpoint
116
+ )
117
+
118
+ @s3bucket = s3.buckets[@bucket]
119
+
120
+ unless @backup_to_bucket.nil?
121
+ @backup_bucket = s3.buckets[@backup_to_bucket]
122
+ unless @backup_bucket.exists?
123
+ s3.buckets.create(@backup_to_bucket)
124
+ end
125
+ end
126
+
127
+ unless @backup_to_dir.nil?
128
+ Dir.mkdir(@backup_to_dir, 0700) unless File.exists?(@backup_to_dir)
129
+ end
130
+
131
+ end # def register
132
+
133
+ public
134
+ def run(queue)
135
+ loop do
136
+ process_new(queue)
137
+ sleep(@interval)
138
+ end
139
+ finished
140
+ end # def run
141
+
142
+ private
143
+ def process_new(queue, since=nil)
144
+
145
+ if since.nil?
146
+ since = sincedb_read()
147
+ end
148
+
149
+ objects = list_new(since)
150
+ objects.each do |k|
151
+ @logger.debug("S3 input processing", :bucket => @bucket, :key => k)
152
+ lastmod = @s3bucket.objects[k].last_modified
153
+ process_log(queue, k)
154
+ sincedb_write(lastmod)
155
+ end
156
+
157
+ end # def process_new
158
+
159
+ private
160
+ def list_new(since=nil)
161
+
162
+ if since.nil?
163
+ since = Time.new(0)
164
+ end
165
+
166
+ objects = {}
167
+ @s3bucket.objects.with_prefix(@prefix).each do |log|
168
+ if log.last_modified > since
169
+ objects[log.key] = log.last_modified
170
+ end
171
+ end
172
+
173
+ return sorted_objects = objects.keys.sort {|a,b| objects[a] <=> objects[b]}
174
+
175
+ end # def list_new
176
+
177
+ private
178
+ def process_log(queue, key)
179
+
180
+ object = @s3bucket.objects[key]
181
+ tmp = Dir.mktmpdir("logstash-")
182
+ begin
183
+ filename = File.join(tmp, File.basename(key))
184
+ File.open(filename, 'wb') do |s3file|
185
+ object.read do |chunk|
186
+ s3file.write(chunk)
187
+ end
188
+ end
189
+ process_local_log(queue, filename)
190
+ unless @backup_to_bucket.nil?
191
+ backup_object = @backup_bucket.objects[key]
192
+ backup_object.write(Pathname.new(filename))
193
+ end
194
+ unless @backup_to_dir.nil?
195
+ FileUtils.cp(filename, @backup_to_dir)
196
+ end
197
+ if @delete
198
+ object.delete()
199
+ end
200
+ end
201
+ FileUtils.remove_entry_secure(tmp, force=true)
202
+
203
+ end # def process_log
204
+
205
+ private
206
+ def process_local_log(queue, filename)
207
+
208
+ metadata = {
209
+ :version => nil,
210
+ :format => nil,
211
+ }
212
+ File.open(filename) do |file|
213
+ if filename.end_with?('.gz')
214
+ gz = Zlib::GzipReader.new(file)
215
+ gz.each_line do |line|
216
+ metadata = process_line(queue, metadata, line)
217
+ end
218
+ else
219
+ file.each do |line|
220
+ metadata = process_line(queue, metadata, line)
221
+ end
222
+ end
223
+ end
224
+
225
+ end # def process_local_log
226
+
227
+ private
228
+ def process_line(queue, metadata, line)
229
+
230
+ if /#Version: .+/.match(line)
231
+ junk, version = line.strip().split(/#Version: (.+)/)
232
+ unless version.nil?
233
+ metadata[:version] = version
234
+ end
235
+ elsif /#Fields: .+/.match(line)
236
+ junk, format = line.strip().split(/#Fields: (.+)/)
237
+ unless format.nil?
238
+ metadata[:format] = format
239
+ end
240
+ else
241
+ @codec.decode(line) do |event|
242
+ decorate(event)
243
+ unless metadata[:version].nil?
244
+ event["cloudfront_version"] = metadata[:version]
245
+ end
246
+ unless metadata[:format].nil?
247
+ event["cloudfront_fields"] = metadata[:format]
248
+ end
249
+ queue << event
250
+ end
251
+ end
252
+ return metadata
253
+
254
+ end # def process_line
255
+
256
+ private
257
+ def sincedb_read()
258
+
259
+ if File.exists?(@sincedb_path)
260
+ since = Time.parse(File.read(@sincedb_path).chomp.strip)
261
+ else
262
+ since = Time.new(0)
263
+ end
264
+ return since
265
+
266
+ end # def sincedb_read
267
+
268
+ private
269
+ def sincedb_write(since=nil)
270
+
271
+ if since.nil?
272
+ since = Time.now()
273
+ end
274
+ File.open(@sincedb_path, 'w') { |file| file.write(since.to_s) }
275
+
276
+ end # def sincedb_write
277
+
278
+ end # class LogStash::Inputs::S3
@@ -0,0 +1,29 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-input-s3'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "Stream events from files from a S3 bucket."
7
+ s.description = "Stream events from files from a S3 bucket."
8
+ s.authors = ["Elasticsearch"]
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = "http://logstash.net/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*')
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true", "group" => "input" }
21
+
22
+ # Gem dependencies
23
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
+
25
+ s.add_runtime_dependency 'logstash-codec-plain'
26
+ s.add_runtime_dependency 'aws-sdk'
27
+
28
+ end
29
+
@@ -0,0 +1,9 @@
1
+ require "gem_publisher"
2
+
3
+ desc "Publish gem to RubyGems.org"
4
+ task :publish_gem do |t|
5
+ gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
+ gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
+ puts "Published #{gem}" if gem
8
+ end
9
+
@@ -0,0 +1,169 @@
1
+ require "net/http"
2
+ require "uri"
3
+ require "digest/sha1"
4
+
5
+ def vendor(*args)
6
+ return File.join("vendor", *args)
7
+ end
8
+
9
+ directory "vendor/" => ["vendor"] do |task, args|
10
+ mkdir task.name
11
+ end
12
+
13
+ def fetch(url, sha1, output)
14
+
15
+ puts "Downloading #{url}"
16
+ actual_sha1 = download(url, output)
17
+
18
+ if actual_sha1 != sha1
19
+ fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
+ end
21
+ end # def fetch
22
+
23
+ def file_fetch(url, sha1)
24
+ filename = File.basename( URI(url).path )
25
+ output = "vendor/#{filename}"
26
+ task output => [ "vendor/" ] do
27
+ begin
28
+ actual_sha1 = file_sha1(output)
29
+ if actual_sha1 != sha1
30
+ fetch(url, sha1, output)
31
+ end
32
+ rescue Errno::ENOENT
33
+ fetch(url, sha1, output)
34
+ end
35
+ end.invoke
36
+
37
+ return output
38
+ end
39
+
40
+ def file_sha1(path)
41
+ digest = Digest::SHA1.new
42
+ fd = File.new(path, "r")
43
+ while true
44
+ begin
45
+ digest << fd.sysread(16384)
46
+ rescue EOFError
47
+ break
48
+ end
49
+ end
50
+ return digest.hexdigest
51
+ ensure
52
+ fd.close if fd
53
+ end
54
+
55
+ def download(url, output)
56
+ uri = URI(url)
57
+ digest = Digest::SHA1.new
58
+ tmp = "#{output}.tmp"
59
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
+ request = Net::HTTP::Get.new(uri.path)
61
+ http.request(request) do |response|
62
+ fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
+ size = (response["content-length"].to_i || -1).to_f
64
+ count = 0
65
+ File.open(tmp, "w") do |fd|
66
+ response.read_body do |chunk|
67
+ fd.write(chunk)
68
+ digest << chunk
69
+ if size > 0 && $stdout.tty?
70
+ count += chunk.bytesize
71
+ $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
+ end
73
+ end
74
+ end
75
+ $stdout.write("\r \r") if $stdout.tty?
76
+ end
77
+ end
78
+
79
+ File.rename(tmp, output)
80
+
81
+ return digest.hexdigest
82
+ rescue SocketError => e
83
+ puts "Failure while downloading #{url}: #{e}"
84
+ raise
85
+ ensure
86
+ File.unlink(tmp) if File.exist?(tmp)
87
+ end # def download
88
+
89
+ def untar(tarball, &block)
90
+ require "archive/tar/minitar"
91
+ tgz = Zlib::GzipReader.new(File.open(tarball))
92
+ # Pull out typesdb
93
+ tar = Archive::Tar::Minitar::Input.open(tgz)
94
+ tar.each do |entry|
95
+ path = block.call(entry)
96
+ next if path.nil?
97
+ parent = File.dirname(path)
98
+
99
+ mkdir_p parent unless File.directory?(parent)
100
+
101
+ # Skip this file if the output file is the same size
102
+ if entry.directory?
103
+ mkdir path unless File.directory?(path)
104
+ else
105
+ entry_mode = entry.instance_eval { @mode } & 0777
106
+ if File.exists?(path)
107
+ stat = File.stat(path)
108
+ # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
+ # expose headers in the entry.
110
+ entry_size = entry.instance_eval { @size }
111
+ # If file sizes are same, skip writing.
112
+ next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
+ end
114
+ puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
+ File.open(path, "w") do |fd|
116
+ # eof? check lets us skip empty files. Necessary because the API provided by
117
+ # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
+ # IO object. Something about empty files in this EntryStream causes
119
+ # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
+ # TODO(sissel): File a bug about this.
121
+ while !entry.eof?
122
+ chunk = entry.read(16384)
123
+ fd.write(chunk)
124
+ end
125
+ #IO.copy_stream(entry, fd)
126
+ end
127
+ File.chmod(entry_mode, path)
128
+ end
129
+ end
130
+ tar.close
131
+ File.unlink(tarball) if File.file?(tarball)
132
+ end # def untar
133
+
134
+ def ungz(file)
135
+
136
+ outpath = file.gsub('.gz', '')
137
+ tgz = Zlib::GzipReader.new(File.open(file))
138
+ begin
139
+ File.open(outpath, "w") do |out|
140
+ IO::copy_stream(tgz, out)
141
+ end
142
+ File.unlink(file)
143
+ rescue
144
+ File.unlink(outpath) if File.file?(outpath)
145
+ raise
146
+ end
147
+ tgz.close
148
+ end
149
+
150
+ desc "Process any vendor files required for this plugin"
151
+ task "vendor" do |task, args|
152
+
153
+ @files.each do |file|
154
+ download = file_fetch(file['url'], file['sha1'])
155
+ if download =~ /.tar.gz/
156
+ prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
+ untar(download) do |entry|
158
+ if !file['files'].nil?
159
+ next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
+ out = entry.full_name.split("/").last
161
+ end
162
+ File.join('vendor', out)
163
+ end
164
+ elsif download =~ /.gz/
165
+ ungz(download)
166
+ end
167
+ end
168
+
169
+ end
@@ -0,0 +1,5 @@
1
+ require 'spec_helper'
2
+ require 'logstash/inputs/s3'
3
+
4
+ describe LogStash::Inputs::S3 do
5
+ end
metadata ADDED
@@ -0,0 +1,103 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-input-s3
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-05 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: logstash-codec-plain
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ - !ruby/object:Gem::Dependency
48
+ name: aws-sdk
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ! '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ description: Stream events from files from a S3 bucket.
62
+ email: richard.pijnenburg@elasticsearch.com
63
+ executables: []
64
+ extensions: []
65
+ extra_rdoc_files: []
66
+ files:
67
+ - .gitignore
68
+ - Gemfile
69
+ - LICENSE
70
+ - Rakefile
71
+ - lib/logstash/inputs/s3.rb
72
+ - logstash-input-s3.gemspec
73
+ - rakelib/publish.rake
74
+ - rakelib/vendor.rake
75
+ - spec/inputs/s3_spec.rb
76
+ homepage: http://logstash.net/
77
+ licenses:
78
+ - Apache License (2.0)
79
+ metadata:
80
+ logstash_plugin: 'true'
81
+ group: input
82
+ post_install_message:
83
+ rdoc_options: []
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ! '>='
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ required_rubygems_version: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ! '>='
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ requirements: []
97
+ rubyforge_project:
98
+ rubygems_version: 2.4.1
99
+ signing_key:
100
+ specification_version: 4
101
+ summary: Stream events from files from a S3 bucket.
102
+ test_files:
103
+ - spec/inputs/s3_spec.rb