jls-tweetstream 2.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,11 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rspec/core/rake_task'
5
+ RSpec::Core::RakeTask.new(:spec)
6
+
7
+ task :test => :spec
8
+ task :default => :spec
9
+
10
+ require 'yard'
11
+ YARD::Rake::YardocTask.new
@@ -0,0 +1,36 @@
1
+ require 'tweetstream/configuration'
2
+ require 'tweetstream/client'
3
+ require 'tweetstream/daemon'
4
+
5
+ module TweetStream
6
+ extend Configuration
7
+
8
+ class ReconnectError < StandardError
9
+ attr_accessor :timeout, :retries
10
+ def initialize(timeout, retries)
11
+ self.timeout = timeout
12
+ self.retries = retries
13
+ super("Failed to reconnect after #{retries} tries.")
14
+ end
15
+ end
16
+
17
+ class << self
18
+ # Alias for TweetStream::Client.new
19
+ #
20
+ # @return [TweetStream::Client]
21
+ def new(options={})
22
+ TweetStream::Client.new(options)
23
+ end
24
+
25
+ # Delegate to TweetStream::Client
26
+ def method_missing(method, *args, &block)
27
+ return super unless new.respond_to?(method)
28
+ new.send(method, *args, &block)
29
+ end
30
+
31
+ # Delegate to TweetStream::Client
32
+ def respond_to?(method, include_private = false)
33
+ new.respond_to?(method, include_private) || super(method, include_private)
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,582 @@
1
+ require 'em-twitter'
2
+ require 'eventmachine'
3
+ require 'twitter'
4
+ begin
5
+ require 'yajl'
6
+ rescue LoadError
7
+ require 'json'
8
+ module Yajl
9
+ module Parser
10
+ def parse(text, opts=nil)
11
+ JSON.parse(text, opts)
12
+ end
13
+ end
14
+ end
15
+ end
16
+
17
+ module TweetStream
18
+ # Provides simple access to the Twitter Streaming API (https://dev.twitter.com/docs/streaming-api)
19
+ # for Ruby scripts that need to create a long connection to
20
+ # Twitter for tracking and other purposes.
21
+ #
22
+ # Basic usage of the library is to call one of the provided
23
+ # methods and provide a block that will perform actions on
24
+ # a yielded Twitter::Tweet. For example:
25
+ #
26
+ # TweetStream::Client.new.track('fail') do |status|
27
+ # puts "[#{status.user.screen_name}] #{status.text}"
28
+ # end
29
+ #
30
+ # For information about a daemonized TweetStream client,
31
+ # view the TweetStream::Daemon class.
32
+ class Client
33
+
34
+ OPTION_CALLBACKS = [:delete,
35
+ :scrub_geo,
36
+ :limit,
37
+ :error,
38
+ :enhance_your_calm,
39
+ :unauthorized,
40
+ :reconnect,
41
+ :inited,
42
+ :direct_message,
43
+ :timeline_status,
44
+ :anything,
45
+ :no_data_received,
46
+ :status_withheld,
47
+ :user_withheld].freeze unless defined?(OPTION_CALLBACKS)
48
+
49
+ # @private
50
+ attr_accessor *Configuration::VALID_OPTIONS_KEYS
51
+ attr_accessor :options
52
+ attr_reader :control_uri, :control, :stream
53
+
54
+ # Creates a new API
55
+ def initialize(options={})
56
+ self.options = options
57
+ merged_options = TweetStream.options.merge(options)
58
+ Configuration::VALID_OPTIONS_KEYS.each do |key|
59
+ send("#{key}=", merged_options[key])
60
+ end
61
+ @callbacks = {}
62
+ end
63
+
64
+ # Returns all public statuses. The Firehose is not a generally
65
+ # available resource. Few applications require this level of access.
66
+ # Creative use of a combination of other resources and various access
67
+ # levels can satisfy nearly every application use case.
68
+ def firehose(query_parameters = {}, &block)
69
+ start('/1.1/statuses/firehose.json', query_parameters, &block)
70
+ end
71
+
72
+ # Returns all statuses containing http: and https:. The links stream is
73
+ # not a generally available resource. Few applications require this level
74
+ # of access. Creative use of a combination of other resources and various
75
+ # access levels can satisfy nearly every application use case.
76
+ def links(query_parameters = {}, &block)
77
+ start('/1.1/statuses/links.json', query_parameters, &block)
78
+ end
79
+
80
+ # Returns all retweets. The retweet stream is not a generally available
81
+ # resource. Few applications require this level of access. Creative
82
+ # use of a combination of other resources and various access levels
83
+ # can satisfy nearly every application use case. As of 9/11/2009,
84
+ # the site-wide retweet feature has not yet launched,
85
+ # so there are currently few, if any, retweets on this stream.
86
+ def retweet(query_parameters = {}, &block)
87
+ start('/1.1/statuses/retweet.json', query_parameters, &block)
88
+ end
89
+
90
+ # Returns a random sample of all public statuses. The default access level
91
+ # provides a small proportion of the Firehose. The "Gardenhose" access
92
+ # level provides a proportion more suitable for data mining and
93
+ # research applications that desire a larger proportion to be statistically
94
+ # significant sample.
95
+ def sample(query_parameters = {}, &block)
96
+ start('/1.1/statuses/sample.json', query_parameters, &block)
97
+ end
98
+
99
+ # Specify keywords to track. Queries are subject to Track Limitations,
100
+ # described in Track Limiting and subject to access roles, described in
101
+ # the statuses/filter method. Track keywords are case-insensitive logical
102
+ # ORs. Terms are exact-matched, and also exact-matched ignoring
103
+ # punctuation. Phrases, keywords with spaces, are not supported.
104
+ # Keywords containing punctuation will only exact match tokens.
105
+ # Query parameters may be passed as the last argument.
106
+ def track(*keywords, &block)
107
+ query_params = keywords.pop if keywords.last.is_a?(::Hash)
108
+ query_params ||= {}
109
+ filter(query_params.merge(:track => keywords), &block)
110
+ end
111
+
112
+ # Returns public statuses from or in reply to a set of users. Mentions
113
+ # ("Hello @user!") and implicit replies ("@user Hello!" created without
114
+ # pressing the reply "swoosh") are not matched. Requires integer user
115
+ # IDs, not screen names. Query parameters may be passed as the last argument.
116
+ def follow(*user_ids, &block)
117
+ query_params = user_ids.pop if user_ids.last.is_a?(::Hash)
118
+ query_params ||= {}
119
+ filter(query_params.merge(:follow => user_ids), &block)
120
+ end
121
+
122
+ # Specifies a set of bounding boxes to track. Only tweets that are both created
123
+ # using the Geotagging API and are placed from within a tracked bounding box will
124
+ # be included in the stream – the user’s location field is not used to filter tweets
125
+ # (e.g. if a user has their location set to “San Francisco”, but the tweet was not created
126
+ # using the Geotagging API and has no geo element, it will not be included in the stream).
127
+ # Bounding boxes are specified as a comma separate list of longitude/latitude pairs, with
128
+ # the first pair denoting the southwest corner of the box
129
+ # longitude/latitude pairs, separated by commas. The first pair specifies the southwest corner of the box.
130
+ def locations(*locations_map, &block)
131
+ query_params = locations_map.pop if locations_map.last.is_a?(::Hash)
132
+ query_params ||= {}
133
+ filter(query_params.merge(:locations => locations_map), &block)
134
+ end
135
+
136
+ # Make a call to the statuses/filter method of the Streaming API,
137
+ # you may provide <tt>:follow</tt>, <tt>:track</tt> or both as options
138
+ # to follow the tweets of specified users or track keywords. This
139
+ # method is provided separately for cases when it would conserve the
140
+ # number of HTTP connections to combine track and follow.
141
+ def filter(query_params = {}, &block)
142
+ start('/1.1/statuses/filter.json', query_params.merge(:method => :post), &block)
143
+ end
144
+
145
+ # Make a call to the userstream api for currently authenticated user
146
+ def userstream(query_params = {}, &block)
147
+ stream_params = { :host => "userstream.twitter.com" }
148
+ query_params.merge!(:extra_stream_parameters => stream_params)
149
+ start('/1.1/user.json', query_params, &block)
150
+ end
151
+
152
+ # Make a call to the userstream api
153
+ def sitestream(user_ids = [], query_params = {}, &block)
154
+ stream_params = { :host => "sitestream.twitter.com" }
155
+ query_params.merge!({
156
+ :method => :post,
157
+ :follow => user_ids,
158
+ :extra_stream_parameters => stream_params
159
+ })
160
+ query_params.merge!(:with => 'followings') if query_params.delete(:followings)
161
+ start('/1.1/site.json', query_params, &block)
162
+ end
163
+
164
+ # Set a Proc to be run when a deletion notice is received
165
+ # from the Twitter stream. For example:
166
+ #
167
+ # @client = TweetStream::Client.new
168
+ # @client.on_delete do |status_id, user_id|
169
+ # Tweet.delete(status_id)
170
+ # end
171
+ #
172
+ # Block must take two arguments: the status id and the user id.
173
+ # If no block is given, it will return the currently set
174
+ # deletion proc. When a block is given, the TweetStream::Client
175
+ # object is returned to allow for chaining.
176
+ def on_delete(&block)
177
+ on('delete', &block)
178
+ end
179
+
180
+ # Set a Proc to be run when a scrub_geo notice is received
181
+ # from the Twitter stream. For example:
182
+ #
183
+ # @client = TweetStream::Client.new
184
+ # @client.on_scrub_geo do |up_to_status_id, user_id|
185
+ # Tweet.where(:status_id <= up_to_status_id)
186
+ # end
187
+ #
188
+ # Block must take two arguments: the upper status id and the user id.
189
+ # If no block is given, it will return the currently set
190
+ # scrub_geo proc. When a block is given, the TweetStream::Client
191
+ # object is returned to allow for chaining.
192
+ def on_scrub_geo(&block)
193
+ on('scrub_geo', &block)
194
+ end
195
+
196
+ # Set a Proc to be run when a rate limit notice is received
197
+ # from the Twitter stream. For example:
198
+ #
199
+ # @client = TweetStream::Client.new
200
+ # @client.on_limit do |discarded_count|
201
+ # # Make note of discarded count
202
+ # end
203
+ #
204
+ # Block must take one argument: the number of discarded tweets.
205
+ # If no block is given, it will return the currently set
206
+ # limit proc. When a block is given, the TweetStream::Client
207
+ # object is returned to allow for chaining.
208
+ def on_limit(&block)
209
+ on('limit', &block)
210
+ end
211
+
212
+ # Set a Proc to be run when an HTTP error is encountered in the
213
+ # processing of the stream. Note that TweetStream will automatically
214
+ # try to reconnect, this is for reference only. Don't panic!
215
+ #
216
+ # @client = TweetStream::Client.new
217
+ # @client.on_error do |message|
218
+ # # Make note of error message
219
+ # end
220
+ #
221
+ # Block must take one argument: the error message.
222
+ # If no block is given, it will return the currently set
223
+ # error proc. When a block is given, the TweetStream::Client
224
+ # object is returned to allow for chaining.
225
+ def on_error(&block)
226
+ on('error', &block)
227
+ end
228
+
229
+ # Set a Proc to be run when an HTTP status 401 is encountered while
230
+ # connecting to Twitter. This could happen when system clock drift
231
+ # has occured.
232
+ #
233
+ # If no block is given, it will return the currently set
234
+ # unauthorized proc. When a block is given, the TweetStream::Client
235
+ # object is returned to allow for chaining.
236
+ def on_unauthorized(&block)
237
+ on('unauthorized', &block)
238
+ end
239
+
240
+ # Set a Proc to be run when a direct message is encountered in the
241
+ # processing of the stream.
242
+ #
243
+ # @client = TweetStream::Client.new
244
+ # @client.on_direct_message do |direct_message|
245
+ # # do something with the direct message
246
+ # end
247
+ #
248
+ # Block must take one argument: the direct message.
249
+ # If no block is given, it will return the currently set
250
+ # direct message proc. When a block is given, the TweetStream::Client
251
+ # object is returned to allow for chaining.
252
+ def on_direct_message(&block)
253
+ on('direct_message', &block)
254
+ end
255
+
256
+ # Set a Proc to be run whenever anything is encountered in the
257
+ # processing of the stream.
258
+ #
259
+ # @client = TweetStream::Client.new
260
+ # @client.on_anything do |status|
261
+ # # do something with the status
262
+ # end
263
+ #
264
+ # Block can take one or two arguments. |status (, client)|
265
+ # If no block is given, it will return the currently set
266
+ # timeline status proc. When a block is given, the TweetStream::Client
267
+ # object is returned to allow for chaining.
268
+ def on_anything(&block)
269
+ on('anything', &block)
270
+ end
271
+
272
+ # Set a Proc to be run when a regular timeline message is encountered in the
273
+ # processing of the stream.
274
+ #
275
+ # @client = TweetStream::Client.new
276
+ # @client.on_timeline_status do |status|
277
+ # # do something with the status
278
+ # end
279
+ #
280
+ # Block can take one or two arguments. |status (, client)|
281
+ # If no block is given, it will return the currently set
282
+ # timeline status proc. When a block is given, the TweetStream::Client
283
+ # object is returned to allow for chaining.
284
+ def on_timeline_status(&block)
285
+ on('timeline_status', &block)
286
+ end
287
+
288
+ # Set a Proc to be run on reconnect.
289
+ #
290
+ # @client = TweetStream::Client.new
291
+ # @client.on_reconnect do |timeout, retries|
292
+ # # Make note of the reconnection
293
+ # end
294
+ #
295
+ def on_reconnect(&block)
296
+ on('reconnect', &block)
297
+ end
298
+
299
+ # Set a Proc to be run when connection established.
300
+ # Called in EventMachine::Connection#post_init
301
+ #
302
+ # @client = TweetStream::Client.new
303
+ # @client.on_inited do
304
+ # puts 'Connected...'
305
+ # end
306
+ #
307
+ def on_inited(&block)
308
+ on('inited', &block)
309
+ end
310
+
311
+ # Set a Proc to be run when no data is received from the server
312
+ # and a stall occurs. Twitter defines this to be 90 seconds.
313
+ #
314
+ # @client = TweetStream::Client.new
315
+ # @client.on_no_data_received do
316
+ # # Make note of no data, possi
317
+ # end
318
+ def on_no_data_received(&block)
319
+ on('no_data_received', &block)
320
+ end
321
+
322
+ # Set a Proc to be run when enhance_your_calm signal is received.
323
+ #
324
+ # @client = TweetStream::Client.new
325
+ # @client.on_enhance_your_calm do
326
+ # # do something, your account has been blocked
327
+ # end
328
+ def on_enhance_your_calm(&block)
329
+ on('enhance_your_calm', &block)
330
+ end
331
+
332
+ # Set a Proc to be run when a status_withheld message is received.
333
+ #
334
+ # @client = TweetStream::Client.new
335
+ # @client.on_status_withheld do |status|
336
+ # # do something with the status
337
+ # end
338
+ def on_status_withheld(&block)
339
+ on('status_withheld', &block)
340
+ end
341
+
342
+ # Set a Proc to be run when a status_withheld message is received.
343
+ #
344
+ # @client = TweetStream::Client.new
345
+ # @client.on_user_withheld do |status|
346
+ # # do something with the status
347
+ # end
348
+ def on_user_withheld(&block)
349
+ on('user_withheld', &block)
350
+ end
351
+
352
+ # Set a Proc to be run when a Site Stream friends list is received.
353
+ #
354
+ # @client = TweetStream::Client.new
355
+ # @client.on_friends do |friends|
356
+ # # do something with the friends list
357
+ # end
358
+ def on_friends(&block)
359
+ on('friends', &block)
360
+ end
361
+
362
+ # Set a Proc to be run when a stall warning is received.
363
+ #
364
+ # @client = TweetStream::Client.new
365
+ # @client.on_stall_warning do |warning|
366
+ # # do something with the friends list
367
+ # end
368
+ def on_stall_warning(&block)
369
+ on('stall_warning', &block)
370
+ end
371
+
372
+ # Set a Proc to be run on userstream events
373
+ #
374
+ # @client = TweetStream::Client.new
375
+ # @client.event(:favorite) do |event|
376
+ # # do something with the status
377
+ # end
378
+ def on_event(event, &block)
379
+ on(event, &block)
380
+ end
381
+
382
+ def on(event, &block)
383
+ if block_given?
384
+ @callbacks[event.to_s] = block
385
+ self
386
+ else
387
+ @callbacks[event.to_s]
388
+ end
389
+ end
390
+
391
+ # connect to twitter while starting a new EventMachine run loop
392
+ def start(path, query_parameters = {}, &block)
393
+ if EventMachine.reactor_running?
394
+ connect(path, query_parameters, &block)
395
+ else
396
+ EventMachine.epoll
397
+ EventMachine.kqueue
398
+
399
+ EventMachine::run do
400
+ connect(path, query_parameters, &block)
401
+ end
402
+ end
403
+ end
404
+
405
+ # connect to twitter without starting a new EventMachine run loop
406
+ def connect(path, options = {}, &block)
407
+ stream_parameters, callbacks = connection_options(path, options)
408
+
409
+ @stream = EM::Twitter::Client.connect(stream_parameters)
410
+ @stream.each do |item|
411
+ begin
412
+ hash = Yajl::Parser.parse(item, :symbolize_keys => true)
413
+ rescue Yajl::ParseError
414
+ invoke_callback(callbacks['error'], "Yajl::ParseError occured in stream: #{item}")
415
+ next
416
+ end
417
+
418
+ unless hash.is_a?(::Hash)
419
+ invoke_callback(callbacks['error'], "Unexpected JSON object in stream: #{item}")
420
+ next
421
+ end
422
+
423
+ Twitter.identity_map = false
424
+
425
+ respond_to(hash, callbacks, &block)
426
+
427
+ yield_message_to(callbacks['anything'], hash)
428
+ end
429
+
430
+ @stream.on_error do |message|
431
+ invoke_callback(callbacks['error'], message)
432
+ end
433
+
434
+ @stream.on_unauthorized do
435
+ invoke_callback(callbacks['unauthorized'])
436
+ end
437
+
438
+ @stream.on_enhance_your_calm do
439
+ invoke_callback(callbacks['enhance_your_calm'])
440
+ end
441
+
442
+ @stream.on_reconnect do |timeout, retries|
443
+ invoke_callback(callbacks['reconnect'], timeout, retries)
444
+ end
445
+
446
+ @stream.on_max_reconnects do |timeout, retries|
447
+ raise TweetStream::ReconnectError.new(timeout, retries)
448
+ end
449
+
450
+ @stream.on_no_data_received do
451
+ invoke_callback(callbacks['no_data_received'])
452
+ end
453
+
454
+ @stream
455
+ end
456
+
457
+ def respond_to(hash, callbacks, &block)
458
+ if hash[:control] && hash[:control][:control_uri]
459
+ @control_uri = hash[:control][:control_uri]
460
+ require 'tweetstream/site_stream_client'
461
+ @control = TweetStream::SiteStreamClient.new(@control_uri, options)
462
+ @control.on_error(&callbacks['error'])
463
+ elsif hash[:warning]
464
+ invoke_callback(callbacks['stall_warning'], hash[:warning])
465
+ elsif hash[:delete] && hash[:delete][:status]
466
+ invoke_callback(callbacks['delete'], hash[:delete][:status][:id], hash[:delete][:status][:user_id])
467
+ elsif hash[:scrub_geo] && hash[:scrub_geo][:up_to_status_id]
468
+ invoke_callback(callbacks['scrub_geo'], hash[:scrub_geo][:up_to_status_id], hash[:scrub_geo][:user_id])
469
+ elsif hash[:limit] && hash[:limit][:track]
470
+ invoke_callback(callbacks['limit'], hash[:limit][:track])
471
+ elsif hash[:direct_message]
472
+ yield_message_to(callbacks['direct_message'], Twitter::DirectMessage.new(hash[:direct_message]))
473
+ elsif hash[:status_withheld]
474
+ invoke_callback(callbacks['status_withheld'], hash[:status_withheld])
475
+ elsif hash[:user_withheld]
476
+ invoke_callback(callbacks['user_withheld'], hash[:user_withheld])
477
+ elsif hash[:event]
478
+ invoke_callback(callbacks[hash[:event].to_s], hash)
479
+ elsif hash[:friends]
480
+ invoke_callback(callbacks['friends'], hash[:friends])
481
+ elsif hash[:text] && hash[:user]
482
+ @last_status = Twitter::Tweet.new(hash)
483
+ yield_message_to(callbacks['timeline_status'], @last_status)
484
+
485
+ yield_message_to(block, @last_status) if block_given?
486
+ elsif hash[:for_user]
487
+ yield_message_to(block, hash) if block_given?
488
+ end
489
+ end
490
+
491
+ # Terminate the currently running TweetStream and close EventMachine loop
492
+ def stop
493
+ EventMachine.stop_event_loop
494
+ @last_status
495
+ end
496
+
497
+ # Close the connection to twitter without closing the eventmachine loop
498
+ def close_connection
499
+ @stream.close_connection if @stream
500
+ end
501
+
502
+ def stop_stream
503
+ @stream.stop if @stream
504
+ end
505
+
506
+ protected
507
+
508
+ def normalize_filter_parameters(query_parameters = {})
509
+ [:follow, :track, :locations].each do |param|
510
+ if query_parameters[param].kind_of?(Array)
511
+ query_parameters[param] = query_parameters[param].flatten.collect { |q| q.to_s }.join(',')
512
+ elsif query_parameters[param]
513
+ query_parameters[param] = query_parameters[param].to_s
514
+ end
515
+ end
516
+ query_parameters
517
+ end
518
+
519
+ def auth_params
520
+ if auth_method.to_s == 'basic'
521
+ { :basic => {
522
+ :username => username,
523
+ :password => password
524
+ }
525
+ }
526
+ else
527
+ { :oauth => {
528
+ :consumer_key => consumer_key,
529
+ :consumer_secret => consumer_secret,
530
+ :token => oauth_token,
531
+ :token_secret => oauth_token_secret
532
+ }
533
+ }
534
+ end
535
+ end
536
+
537
+ # A utility method used to invoke callback methods against the Client
538
+ def invoke_callback(callback, *args)
539
+ callback.call(*args) if callback
540
+ end
541
+
542
+ def yield_message_to(procedure, message)
543
+ # Give the block the option to receive either one
544
+ # or two arguments, depending on its arity.
545
+ if procedure.is_a?(Proc)
546
+ case procedure.arity
547
+ when 1 then invoke_callback(procedure, message)
548
+ when 2 then invoke_callback(procedure, message, self)
549
+ end
550
+ end
551
+ end
552
+
553
+ def connection_options(path, options)
554
+ warn_if_callbacks(options)
555
+
556
+ callbacks = @callbacks.dup
557
+ OPTION_CALLBACKS.each do |callback|
558
+ callbacks.merge(callback.to_s => options.delete(callback)) if options[callback]
559
+ end
560
+
561
+ inited_proc = options.delete(:inited) || @callbacks['inited']
562
+ extra_stream_parameters = options.delete(:extra_stream_parameters) || {}
563
+
564
+ stream_params = {
565
+ :path => path,
566
+ :method => (options.delete(:method) || 'get').to_s.upcase,
567
+ :user_agent => user_agent,
568
+ :on_inited => inited_proc,
569
+ :params => normalize_filter_parameters(options),
570
+ :proxy => proxy
571
+ }.merge(extra_stream_parameters).merge(auth_params)
572
+
573
+ [stream_params, callbacks]
574
+ end
575
+
576
+ def warn_if_callbacks(options={})
577
+ if OPTION_CALLBACKS.select { |callback| options[callback] }.size > 0
578
+ Kernel.warn("Passing callbacks via the options hash is deprecated and will be removed in TweetStream 3.0")
579
+ end
580
+ end
581
+ end
582
+ end