logstash-input-twitter 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/LICENSE +13 -0
- data/Rakefile +6 -0
- data/lib/logstash/inputs/twitter.rb +124 -0
- data/logstash-input-twitter.gemspec +28 -0
- data/rakelib/publish.rake +9 -0
- data/rakelib/vendor.rake +169 -0
- data/spec/inputs/twitter_spec.rb +5 -0
- metadata +89 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
M2M0NzBkNDE3MmM3ZDY1ZmRiMzExNDczN2ZmMmMyM2QxMDVmZDU2OA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MDEwNmEyNTdkMTc3MGQxNjBjNzhkNjRiZjkxNTczMGU4ODdmZjlhMA==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
OTUxMzk5NTUzOThkZDQyNGJmMDhhMDQyYmRhZjk0MTIwZGJiYzc3ZmY5Y2Vh
|
10
|
+
NWFhODVmOGEyNTI2MDczZTFjZTFmZWM2ZTBiODg3ZTYzMjFlMjIzOGRiNzE4
|
11
|
+
NTY0MWIzYzY5M2IwZjk2OTM5YTIzZmFmZjgyMTg3YzZlOTc3OGY=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
NGQwNjIxOGRlMjg5YWYzODg4ZjcxNjBmY2NmMWRmNDVjZTdjYzY0NTk4ZmMx
|
14
|
+
M2M4MzZkMzJiMTUyZjJkMmE3MWMwZTQ4MmNjODVhNzUyMzYyNzc3OGRlMWU1
|
15
|
+
MTQ2YzdhNjFmZjA3NGFlMWYxOThiYzA0ZGM4YTkxYmVkOWVmZTQ=
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright (c) 2012-2014 Elasticsearch <http://www.elasticsearch.org>
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/Rakefile
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/inputs/base"
|
3
|
+
require "logstash/namespace"
|
4
|
+
require "logstash/timestamp"
|
5
|
+
require "logstash/util"
|
6
|
+
require "logstash/json"
|
7
|
+
|
8
|
+
# Read events from the twitter streaming api.
|
9
|
+
class LogStash::Inputs::Twitter < LogStash::Inputs::Base
|
10
|
+
|
11
|
+
config_name "twitter"
|
12
|
+
milestone 1
|
13
|
+
|
14
|
+
# Your twitter app's consumer key
|
15
|
+
#
|
16
|
+
# Don't know what this is? You need to create an "application"
|
17
|
+
# on twitter, see this url: <https://dev.twitter.com/apps/new>
|
18
|
+
config :consumer_key, :validate => :string, :required => true
|
19
|
+
|
20
|
+
# Your twitter app's consumer secret
|
21
|
+
#
|
22
|
+
# If you don't have one of these, you can create one by
|
23
|
+
# registering a new application with twitter:
|
24
|
+
# <https://dev.twitter.com/apps/new>
|
25
|
+
config :consumer_secret, :validate => :password, :required => true
|
26
|
+
|
27
|
+
# Your oauth token.
|
28
|
+
#
|
29
|
+
# To get this, login to twitter with whatever account you want,
|
30
|
+
# then visit <https://dev.twitter.com/apps>
|
31
|
+
#
|
32
|
+
# Click on your app (used with the consumer_key and consumer_secret settings)
|
33
|
+
# Then at the bottom of the page, click 'Create my access token' which
|
34
|
+
# will create an oauth token and secret bound to your account and that
|
35
|
+
# application.
|
36
|
+
config :oauth_token, :validate => :string, :required => true
|
37
|
+
|
38
|
+
# Your oauth token secret.
|
39
|
+
#
|
40
|
+
# To get this, login to twitter with whatever account you want,
|
41
|
+
# then visit <https://dev.twitter.com/apps>
|
42
|
+
#
|
43
|
+
# Click on your app (used with the consumer_key and consumer_secret settings)
|
44
|
+
# Then at the bottom of the page, click 'Create my access token' which
|
45
|
+
# will create an oauth token and secret bound to your account and that
|
46
|
+
# application.
|
47
|
+
config :oauth_token_secret, :validate => :password, :required => true
|
48
|
+
|
49
|
+
# Any keywords to track in the twitter stream
|
50
|
+
config :keywords, :validate => :array, :required => true
|
51
|
+
|
52
|
+
# Record full tweet object as given to us by the Twitter stream api.
|
53
|
+
config :full_tweet, :validate => :boolean, :default => false
|
54
|
+
|
55
|
+
public
|
56
|
+
def register
|
57
|
+
require "twitter"
|
58
|
+
|
59
|
+
# monkey patch twitter gem to ignore json parsing error.
|
60
|
+
# at the same time, use our own json parser
|
61
|
+
# this has been tested with a specific gem version, raise if not the same
|
62
|
+
raise("Invalid Twitter gem") unless Twitter::Version.to_s == "5.0.0.rc.1"
|
63
|
+
Twitter::Streaming::Response.module_eval do
|
64
|
+
def on_body(data)
|
65
|
+
@tokenizer.extract(data).each do |line|
|
66
|
+
next if line.empty?
|
67
|
+
begin
|
68
|
+
@block.call(LogStash::Json.load(line, :symbolize_keys => true))
|
69
|
+
rescue LogStash::Json::ParserError
|
70
|
+
# silently ignore json parsing errors
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
@client = Twitter::Streaming::Client.new do |c|
|
77
|
+
c.consumer_key = @consumer_key
|
78
|
+
c.consumer_secret = @consumer_secret.value
|
79
|
+
c.access_token = @oauth_token
|
80
|
+
c.access_token_secret = @oauth_token_secret.value
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
public
|
85
|
+
def run(queue)
|
86
|
+
@logger.info("Starting twitter tracking", :keywords => @keywords)
|
87
|
+
begin
|
88
|
+
@client.filter(:track => @keywords.join(",")) do |tweet|
|
89
|
+
if tweet.is_a?(Twitter::Tweet)
|
90
|
+
@logger.debug? && @logger.debug("Got tweet", :user => tweet.user.screen_name, :text => tweet.text)
|
91
|
+
if @full_tweet
|
92
|
+
event = LogStash::Event.new(LogStash::Util.stringify_symbols(tweet.to_hash))
|
93
|
+
event.timestamp = LogStash::Timestamp.new(tweet.created_at)
|
94
|
+
else
|
95
|
+
event = LogStash::Event.new(
|
96
|
+
LogStash::Event::TIMESTAMP => LogStash::Timestamp.new(tweet.created_at),
|
97
|
+
"message" => tweet.full_text,
|
98
|
+
"user" => tweet.user.screen_name,
|
99
|
+
"client" => tweet.source,
|
100
|
+
"retweeted" => tweet.retweeted?,
|
101
|
+
"source" => "http://twitter.com/#{tweet.user.screen_name}/status/#{tweet.id}"
|
102
|
+
)
|
103
|
+
event["in-reply-to"] = tweet.in_reply_to_status_id if tweet.reply?
|
104
|
+
unless tweet.urls.empty?
|
105
|
+
event["urls"] = tweet.urls.map(&:expanded_url).map(&:to_s)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
decorate(event)
|
110
|
+
queue << event
|
111
|
+
end
|
112
|
+
end # client.filter
|
113
|
+
rescue LogStash::ShutdownSignal
|
114
|
+
return
|
115
|
+
rescue Twitter::Error::TooManyRequests => e
|
116
|
+
@logger.warn("Twitter too many requests error, sleeping for #{e.rate_limit.reset_in}s")
|
117
|
+
sleep(e.rate_limit.reset_in)
|
118
|
+
retry
|
119
|
+
rescue => e
|
120
|
+
@logger.warn("Twitter client error", :message => e.message, :exception => e, :backtrace => e.backtrace)
|
121
|
+
retry
|
122
|
+
end
|
123
|
+
end # def run
|
124
|
+
end # class LogStash::Inputs::Twitter
|
@@ -0,0 +1,28 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
|
3
|
+
s.name = 'logstash-input-twitter'
|
4
|
+
s.version = '0.1.0'
|
5
|
+
s.licenses = ['Apache License (2.0)']
|
6
|
+
s.summary = "Read events from the twitter streaming api."
|
7
|
+
s.description = "Read events from the twitter streaming api."
|
8
|
+
s.authors = ["Elasticsearch"]
|
9
|
+
s.email = 'richard.pijnenburg@elasticsearch.com'
|
10
|
+
s.homepage = "http://logstash.net/"
|
11
|
+
s.require_paths = ["lib"]
|
12
|
+
|
13
|
+
# Files
|
14
|
+
s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*')
|
15
|
+
|
16
|
+
# Tests
|
17
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
18
|
+
|
19
|
+
# Special flag to let us know this is actually a logstash plugin
|
20
|
+
s.metadata = { "logstash_plugin" => "true", "group" => "input" }
|
21
|
+
|
22
|
+
# Gem dependencies
|
23
|
+
s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
|
24
|
+
|
25
|
+
s.add_runtime_dependency 'twitter', ['5.12.0']
|
26
|
+
|
27
|
+
end
|
28
|
+
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require "gem_publisher"
|
2
|
+
|
3
|
+
desc "Publish gem to RubyGems.org"
|
4
|
+
task :publish_gem do |t|
|
5
|
+
gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
|
6
|
+
gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
|
7
|
+
puts "Published #{gem}" if gem
|
8
|
+
end
|
9
|
+
|
data/rakelib/vendor.rake
ADDED
@@ -0,0 +1,169 @@
|
|
1
|
+
require "net/http"
|
2
|
+
require "uri"
|
3
|
+
require "digest/sha1"
|
4
|
+
|
5
|
+
def vendor(*args)
|
6
|
+
return File.join("vendor", *args)
|
7
|
+
end
|
8
|
+
|
9
|
+
directory "vendor/" => ["vendor"] do |task, args|
|
10
|
+
mkdir task.name
|
11
|
+
end
|
12
|
+
|
13
|
+
def fetch(url, sha1, output)
|
14
|
+
|
15
|
+
puts "Downloading #{url}"
|
16
|
+
actual_sha1 = download(url, output)
|
17
|
+
|
18
|
+
if actual_sha1 != sha1
|
19
|
+
fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
|
20
|
+
end
|
21
|
+
end # def fetch
|
22
|
+
|
23
|
+
def file_fetch(url, sha1)
|
24
|
+
filename = File.basename( URI(url).path )
|
25
|
+
output = "vendor/#{filename}"
|
26
|
+
task output => [ "vendor/" ] do
|
27
|
+
begin
|
28
|
+
actual_sha1 = file_sha1(output)
|
29
|
+
if actual_sha1 != sha1
|
30
|
+
fetch(url, sha1, output)
|
31
|
+
end
|
32
|
+
rescue Errno::ENOENT
|
33
|
+
fetch(url, sha1, output)
|
34
|
+
end
|
35
|
+
end.invoke
|
36
|
+
|
37
|
+
return output
|
38
|
+
end
|
39
|
+
|
40
|
+
def file_sha1(path)
|
41
|
+
digest = Digest::SHA1.new
|
42
|
+
fd = File.new(path, "r")
|
43
|
+
while true
|
44
|
+
begin
|
45
|
+
digest << fd.sysread(16384)
|
46
|
+
rescue EOFError
|
47
|
+
break
|
48
|
+
end
|
49
|
+
end
|
50
|
+
return digest.hexdigest
|
51
|
+
ensure
|
52
|
+
fd.close if fd
|
53
|
+
end
|
54
|
+
|
55
|
+
def download(url, output)
|
56
|
+
uri = URI(url)
|
57
|
+
digest = Digest::SHA1.new
|
58
|
+
tmp = "#{output}.tmp"
|
59
|
+
Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
|
60
|
+
request = Net::HTTP::Get.new(uri.path)
|
61
|
+
http.request(request) do |response|
|
62
|
+
fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
|
63
|
+
size = (response["content-length"].to_i || -1).to_f
|
64
|
+
count = 0
|
65
|
+
File.open(tmp, "w") do |fd|
|
66
|
+
response.read_body do |chunk|
|
67
|
+
fd.write(chunk)
|
68
|
+
digest << chunk
|
69
|
+
if size > 0 && $stdout.tty?
|
70
|
+
count += chunk.bytesize
|
71
|
+
$stdout.write(sprintf("\r%0.2f%%", count/size * 100))
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
$stdout.write("\r \r") if $stdout.tty?
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
File.rename(tmp, output)
|
80
|
+
|
81
|
+
return digest.hexdigest
|
82
|
+
rescue SocketError => e
|
83
|
+
puts "Failure while downloading #{url}: #{e}"
|
84
|
+
raise
|
85
|
+
ensure
|
86
|
+
File.unlink(tmp) if File.exist?(tmp)
|
87
|
+
end # def download
|
88
|
+
|
89
|
+
def untar(tarball, &block)
|
90
|
+
require "archive/tar/minitar"
|
91
|
+
tgz = Zlib::GzipReader.new(File.open(tarball))
|
92
|
+
# Pull out typesdb
|
93
|
+
tar = Archive::Tar::Minitar::Input.open(tgz)
|
94
|
+
tar.each do |entry|
|
95
|
+
path = block.call(entry)
|
96
|
+
next if path.nil?
|
97
|
+
parent = File.dirname(path)
|
98
|
+
|
99
|
+
mkdir_p parent unless File.directory?(parent)
|
100
|
+
|
101
|
+
# Skip this file if the output file is the same size
|
102
|
+
if entry.directory?
|
103
|
+
mkdir path unless File.directory?(path)
|
104
|
+
else
|
105
|
+
entry_mode = entry.instance_eval { @mode } & 0777
|
106
|
+
if File.exists?(path)
|
107
|
+
stat = File.stat(path)
|
108
|
+
# TODO(sissel): Submit a patch to archive-tar-minitar upstream to
|
109
|
+
# expose headers in the entry.
|
110
|
+
entry_size = entry.instance_eval { @size }
|
111
|
+
# If file sizes are same, skip writing.
|
112
|
+
next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
|
113
|
+
end
|
114
|
+
puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
|
115
|
+
File.open(path, "w") do |fd|
|
116
|
+
# eof? check lets us skip empty files. Necessary because the API provided by
|
117
|
+
# Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
|
118
|
+
# IO object. Something about empty files in this EntryStream causes
|
119
|
+
# IO.copy_stream to throw "can't convert nil into String" on JRuby
|
120
|
+
# TODO(sissel): File a bug about this.
|
121
|
+
while !entry.eof?
|
122
|
+
chunk = entry.read(16384)
|
123
|
+
fd.write(chunk)
|
124
|
+
end
|
125
|
+
#IO.copy_stream(entry, fd)
|
126
|
+
end
|
127
|
+
File.chmod(entry_mode, path)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
tar.close
|
131
|
+
File.unlink(tarball) if File.file?(tarball)
|
132
|
+
end # def untar
|
133
|
+
|
134
|
+
def ungz(file)
|
135
|
+
|
136
|
+
outpath = file.gsub('.gz', '')
|
137
|
+
tgz = Zlib::GzipReader.new(File.open(file))
|
138
|
+
begin
|
139
|
+
File.open(outpath, "w") do |out|
|
140
|
+
IO::copy_stream(tgz, out)
|
141
|
+
end
|
142
|
+
File.unlink(file)
|
143
|
+
rescue
|
144
|
+
File.unlink(outpath) if File.file?(outpath)
|
145
|
+
raise
|
146
|
+
end
|
147
|
+
tgz.close
|
148
|
+
end
|
149
|
+
|
150
|
+
desc "Process any vendor files required for this plugin"
|
151
|
+
task "vendor" do |task, args|
|
152
|
+
|
153
|
+
@files.each do |file|
|
154
|
+
download = file_fetch(file['url'], file['sha1'])
|
155
|
+
if download =~ /.tar.gz/
|
156
|
+
prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
|
157
|
+
untar(download) do |entry|
|
158
|
+
if !file['files'].nil?
|
159
|
+
next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
|
160
|
+
out = entry.full_name.split("/").last
|
161
|
+
end
|
162
|
+
File.join('vendor', out)
|
163
|
+
end
|
164
|
+
elsif download =~ /.gz/
|
165
|
+
ungz(download)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: logstash-input-twitter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Elasticsearch
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-11-07 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: logstash
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ! '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.4.0
|
20
|
+
- - <
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 2.0.0
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.4.0
|
30
|
+
- - <
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 2.0.0
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: twitter
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - '='
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: 5.12.0
|
40
|
+
type: :runtime
|
41
|
+
prerelease: false
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - '='
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 5.12.0
|
47
|
+
description: Read events from the twitter streaming api.
|
48
|
+
email: richard.pijnenburg@elasticsearch.com
|
49
|
+
executables: []
|
50
|
+
extensions: []
|
51
|
+
extra_rdoc_files: []
|
52
|
+
files:
|
53
|
+
- .gitignore
|
54
|
+
- Gemfile
|
55
|
+
- LICENSE
|
56
|
+
- Rakefile
|
57
|
+
- lib/logstash/inputs/twitter.rb
|
58
|
+
- logstash-input-twitter.gemspec
|
59
|
+
- rakelib/publish.rake
|
60
|
+
- rakelib/vendor.rake
|
61
|
+
- spec/inputs/twitter_spec.rb
|
62
|
+
homepage: http://logstash.net/
|
63
|
+
licenses:
|
64
|
+
- Apache License (2.0)
|
65
|
+
metadata:
|
66
|
+
logstash_plugin: 'true'
|
67
|
+
group: input
|
68
|
+
post_install_message:
|
69
|
+
rdoc_options: []
|
70
|
+
require_paths:
|
71
|
+
- lib
|
72
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ! '>='
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - ! '>='
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '0'
|
82
|
+
requirements: []
|
83
|
+
rubyforge_project:
|
84
|
+
rubygems_version: 2.4.1
|
85
|
+
signing_key:
|
86
|
+
specification_version: 4
|
87
|
+
summary: Read events from the twitter streaming api.
|
88
|
+
test_files:
|
89
|
+
- spec/inputs/twitter_spec.rb
|