logstash-input-twitter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ M2M0NzBkNDE3MmM3ZDY1ZmRiMzExNDczN2ZmMmMyM2QxMDVmZDU2OA==
5
+ data.tar.gz: !binary |-
6
+ MDEwNmEyNTdkMTc3MGQxNjBjNzhkNjRiZjkxNTczMGU4ODdmZjlhMA==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ OTUxMzk5NTUzOThkZDQyNGJmMDhhMDQyYmRhZjk0MTIwZGJiYzc3ZmY5Y2Vh
10
+ NWFhODVmOGEyNTI2MDczZTFjZTFmZWM2ZTBiODg3ZTYzMjFlMjIzOGRiNzE4
11
+ NTY0MWIzYzY5M2IwZjk2OTM5YTIzZmFmZjgyMTg3YzZlOTc3OGY=
12
+ data.tar.gz: !binary |-
13
+ NGQwNjIxOGRlMjg5YWYzODg4ZjcxNjBmY2NmMWRmNDVjZTdjYzY0NTk4ZmMx
14
+ M2M4MzZkMzJiMTUyZjJkMmE3MWMwZTQ4MmNjODVhNzUyMzYyNzc3OGRlMWU1
15
+ MTQ2YzdhNjFmZjA3NGFlMWYxOThiYzA0ZGM4YTkxYmVkOWVmZTQ=
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
4
+ vendor
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
4
+ gem 'archive-tar-minitar'
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2012-2014 Elasticsearch <http://www.elasticsearch.org>
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ @files=[]
2
+
3
+ task :default do
4
+ system("rake -T")
5
+ end
6
+
@@ -0,0 +1,124 @@
1
+ # encoding: utf-8
2
+ require "logstash/inputs/base"
3
+ require "logstash/namespace"
4
+ require "logstash/timestamp"
5
+ require "logstash/util"
6
+ require "logstash/json"
7
+
8
+ # Read events from the twitter streaming api.
9
+ class LogStash::Inputs::Twitter < LogStash::Inputs::Base
10
+
11
+ config_name "twitter"
12
+ milestone 1
13
+
14
+ # Your twitter app's consumer key
15
+ #
16
+ # Don't know what this is? You need to create an "application"
17
+ # on twitter, see this url: <https://dev.twitter.com/apps/new>
18
+ config :consumer_key, :validate => :string, :required => true
19
+
20
+ # Your twitter app's consumer secret
21
+ #
22
+ # If you don't have one of these, you can create one by
23
+ # registering a new application with twitter:
24
+ # <https://dev.twitter.com/apps/new>
25
+ config :consumer_secret, :validate => :password, :required => true
26
+
27
+ # Your oauth token.
28
+ #
29
+ # To get this, login to twitter with whatever account you want,
30
+ # then visit <https://dev.twitter.com/apps>
31
+ #
32
+ # Click on your app (used with the consumer_key and consumer_secret settings)
33
+ # Then at the bottom of the page, click 'Create my access token' which
34
+ # will create an oauth token and secret bound to your account and that
35
+ # application.
36
+ config :oauth_token, :validate => :string, :required => true
37
+
38
+ # Your oauth token secret.
39
+ #
40
+ # To get this, login to twitter with whatever account you want,
41
+ # then visit <https://dev.twitter.com/apps>
42
+ #
43
+ # Click on your app (used with the consumer_key and consumer_secret settings)
44
+ # Then at the bottom of the page, click 'Create my access token' which
45
+ # will create an oauth token and secret bound to your account and that
46
+ # application.
47
+ config :oauth_token_secret, :validate => :password, :required => true
48
+
49
+ # Any keywords to track in the twitter stream
50
+ config :keywords, :validate => :array, :required => true
51
+
52
+ # Record full tweet object as given to us by the Twitter stream api.
53
+ config :full_tweet, :validate => :boolean, :default => false
54
+
55
+ public
56
+ def register
57
+ require "twitter"
58
+
59
+ # monkey patch twitter gem to ignore json parsing error.
60
+ # at the same time, use our own json parser
61
+ # this has been tested with a specific gem version, raise if not the same
62
+ raise("Invalid Twitter gem") unless Twitter::Version.to_s == "5.0.0.rc.1"
63
+ Twitter::Streaming::Response.module_eval do
64
+ def on_body(data)
65
+ @tokenizer.extract(data).each do |line|
66
+ next if line.empty?
67
+ begin
68
+ @block.call(LogStash::Json.load(line, :symbolize_keys => true))
69
+ rescue LogStash::Json::ParserError
70
+ # silently ignore json parsing errors
71
+ end
72
+ end
73
+ end
74
+ end
75
+
76
+ @client = Twitter::Streaming::Client.new do |c|
77
+ c.consumer_key = @consumer_key
78
+ c.consumer_secret = @consumer_secret.value
79
+ c.access_token = @oauth_token
80
+ c.access_token_secret = @oauth_token_secret.value
81
+ end
82
+ end
83
+
84
+ public
85
+ def run(queue)
86
+ @logger.info("Starting twitter tracking", :keywords => @keywords)
87
+ begin
88
+ @client.filter(:track => @keywords.join(",")) do |tweet|
89
+ if tweet.is_a?(Twitter::Tweet)
90
+ @logger.debug? && @logger.debug("Got tweet", :user => tweet.user.screen_name, :text => tweet.text)
91
+ if @full_tweet
92
+ event = LogStash::Event.new(LogStash::Util.stringify_symbols(tweet.to_hash))
93
+ event.timestamp = LogStash::Timestamp.new(tweet.created_at)
94
+ else
95
+ event = LogStash::Event.new(
96
+ LogStash::Event::TIMESTAMP => LogStash::Timestamp.new(tweet.created_at),
97
+ "message" => tweet.full_text,
98
+ "user" => tweet.user.screen_name,
99
+ "client" => tweet.source,
100
+ "retweeted" => tweet.retweeted?,
101
+ "source" => "http://twitter.com/#{tweet.user.screen_name}/status/#{tweet.id}"
102
+ )
103
+ event["in-reply-to"] = tweet.in_reply_to_status_id if tweet.reply?
104
+ unless tweet.urls.empty?
105
+ event["urls"] = tweet.urls.map(&:expanded_url).map(&:to_s)
106
+ end
107
+ end
108
+
109
+ decorate(event)
110
+ queue << event
111
+ end
112
+ end # client.filter
113
+ rescue LogStash::ShutdownSignal
114
+ return
115
+ rescue Twitter::Error::TooManyRequests => e
116
+ @logger.warn("Twitter too many requests error, sleeping for #{e.rate_limit.reset_in}s")
117
+ sleep(e.rate_limit.reset_in)
118
+ retry
119
+ rescue => e
120
+ @logger.warn("Twitter client error", :message => e.message, :exception => e, :backtrace => e.backtrace)
121
+ retry
122
+ end
123
+ end # def run
124
+ end # class LogStash::Inputs::Twitter
@@ -0,0 +1,28 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-input-twitter'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "Read events from the twitter streaming api."
7
+ s.description = "Read events from the twitter streaming api."
8
+ s.authors = ["Elasticsearch"]
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = "http://logstash.net/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*')
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true", "group" => "input" }
21
+
22
+ # Gem dependencies
23
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
+
25
+ s.add_runtime_dependency 'twitter', ['5.12.0']
26
+
27
+ end
28
+
@@ -0,0 +1,9 @@
1
+ require "gem_publisher"
2
+
3
+ desc "Publish gem to RubyGems.org"
4
+ task :publish_gem do |t|
5
+ gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
+ gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
+ puts "Published #{gem}" if gem
8
+ end
9
+
@@ -0,0 +1,169 @@
1
+ require "net/http"
2
+ require "uri"
3
+ require "digest/sha1"
4
+
5
+ def vendor(*args)
6
+ return File.join("vendor", *args)
7
+ end
8
+
9
+ directory "vendor/" => ["vendor"] do |task, args|
10
+ mkdir task.name
11
+ end
12
+
13
+ def fetch(url, sha1, output)
14
+
15
+ puts "Downloading #{url}"
16
+ actual_sha1 = download(url, output)
17
+
18
+ if actual_sha1 != sha1
19
+ fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
+ end
21
+ end # def fetch
22
+
23
+ def file_fetch(url, sha1)
24
+ filename = File.basename( URI(url).path )
25
+ output = "vendor/#{filename}"
26
+ task output => [ "vendor/" ] do
27
+ begin
28
+ actual_sha1 = file_sha1(output)
29
+ if actual_sha1 != sha1
30
+ fetch(url, sha1, output)
31
+ end
32
+ rescue Errno::ENOENT
33
+ fetch(url, sha1, output)
34
+ end
35
+ end.invoke
36
+
37
+ return output
38
+ end
39
+
40
+ def file_sha1(path)
41
+ digest = Digest::SHA1.new
42
+ fd = File.new(path, "r")
43
+ while true
44
+ begin
45
+ digest << fd.sysread(16384)
46
+ rescue EOFError
47
+ break
48
+ end
49
+ end
50
+ return digest.hexdigest
51
+ ensure
52
+ fd.close if fd
53
+ end
54
+
55
+ def download(url, output)
56
+ uri = URI(url)
57
+ digest = Digest::SHA1.new
58
+ tmp = "#{output}.tmp"
59
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
+ request = Net::HTTP::Get.new(uri.path)
61
+ http.request(request) do |response|
62
+ fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
+ size = (response["content-length"].to_i || -1).to_f
64
+ count = 0
65
+ File.open(tmp, "w") do |fd|
66
+ response.read_body do |chunk|
67
+ fd.write(chunk)
68
+ digest << chunk
69
+ if size > 0 && $stdout.tty?
70
+ count += chunk.bytesize
71
+ $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
+ end
73
+ end
74
+ end
75
+ $stdout.write("\r \r") if $stdout.tty?
76
+ end
77
+ end
78
+
79
+ File.rename(tmp, output)
80
+
81
+ return digest.hexdigest
82
+ rescue SocketError => e
83
+ puts "Failure while downloading #{url}: #{e}"
84
+ raise
85
+ ensure
86
+ File.unlink(tmp) if File.exist?(tmp)
87
+ end # def download
88
+
89
+ def untar(tarball, &block)
90
+ require "archive/tar/minitar"
91
+ tgz = Zlib::GzipReader.new(File.open(tarball))
92
+ # Pull out typesdb
93
+ tar = Archive::Tar::Minitar::Input.open(tgz)
94
+ tar.each do |entry|
95
+ path = block.call(entry)
96
+ next if path.nil?
97
+ parent = File.dirname(path)
98
+
99
+ mkdir_p parent unless File.directory?(parent)
100
+
101
+ # Skip this file if the output file is the same size
102
+ if entry.directory?
103
+ mkdir path unless File.directory?(path)
104
+ else
105
+ entry_mode = entry.instance_eval { @mode } & 0777
106
+ if File.exists?(path)
107
+ stat = File.stat(path)
108
+ # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
+ # expose headers in the entry.
110
+ entry_size = entry.instance_eval { @size }
111
+ # If file sizes are same, skip writing.
112
+ next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
+ end
114
+ puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
+ File.open(path, "w") do |fd|
116
+ # eof? check lets us skip empty files. Necessary because the API provided by
117
+ # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
+ # IO object. Something about empty files in this EntryStream causes
119
+ # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
+ # TODO(sissel): File a bug about this.
121
+ while !entry.eof?
122
+ chunk = entry.read(16384)
123
+ fd.write(chunk)
124
+ end
125
+ #IO.copy_stream(entry, fd)
126
+ end
127
+ File.chmod(entry_mode, path)
128
+ end
129
+ end
130
+ tar.close
131
+ File.unlink(tarball) if File.file?(tarball)
132
+ end # def untar
133
+
134
+ def ungz(file)
135
+
136
+ outpath = file.gsub('.gz', '')
137
+ tgz = Zlib::GzipReader.new(File.open(file))
138
+ begin
139
+ File.open(outpath, "w") do |out|
140
+ IO::copy_stream(tgz, out)
141
+ end
142
+ File.unlink(file)
143
+ rescue
144
+ File.unlink(outpath) if File.file?(outpath)
145
+ raise
146
+ end
147
+ tgz.close
148
+ end
149
+
150
+ desc "Process any vendor files required for this plugin"
151
+ task "vendor" do |task, args|
152
+
153
+ @files.each do |file|
154
+ download = file_fetch(file['url'], file['sha1'])
155
+ if download =~ /.tar.gz/
156
+ prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
+ untar(download) do |entry|
158
+ if !file['files'].nil?
159
+ next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
+ out = entry.full_name.split("/").last
161
+ end
162
+ File.join('vendor', out)
163
+ end
164
+ elsif download =~ /.gz/
165
+ ungz(download)
166
+ end
167
+ end
168
+
169
+ end
@@ -0,0 +1,5 @@
1
+ require 'spec_helper'
2
+ require 'logstash/inputs/twitter'
3
+
4
+ describe LogStash::Inputs::Twitter do
5
+ end
metadata ADDED
@@ -0,0 +1,89 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-input-twitter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: twitter
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - '='
38
+ - !ruby/object:Gem::Version
39
+ version: 5.12.0
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - '='
45
+ - !ruby/object:Gem::Version
46
+ version: 5.12.0
47
+ description: Read events from the twitter streaming api.
48
+ email: richard.pijnenburg@elasticsearch.com
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - .gitignore
54
+ - Gemfile
55
+ - LICENSE
56
+ - Rakefile
57
+ - lib/logstash/inputs/twitter.rb
58
+ - logstash-input-twitter.gemspec
59
+ - rakelib/publish.rake
60
+ - rakelib/vendor.rake
61
+ - spec/inputs/twitter_spec.rb
62
+ homepage: http://logstash.net/
63
+ licenses:
64
+ - Apache License (2.0)
65
+ metadata:
66
+ logstash_plugin: 'true'
67
+ group: input
68
+ post_install_message:
69
+ rdoc_options: []
70
+ require_paths:
71
+ - lib
72
+ required_ruby_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ! '>='
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ requirements: []
83
+ rubyforge_project:
84
+ rubygems_version: 2.4.1
85
+ signing_key:
86
+ specification_version: 4
87
+ summary: Read events from the twitter streaming api.
88
+ test_files:
89
+ - spec/inputs/twitter_spec.rb