jls-tweetstream 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rspec/core/rake_task'
5
+ RSpec::Core::RakeTask.new(:spec)
6
+
7
+ task :test => :spec
8
+ task :default => :spec
9
+
10
+ require 'yard'
11
+ YARD::Rake::YardocTask.new
@@ -0,0 +1,36 @@
1
+ require 'tweetstream/configuration'
2
+ require 'tweetstream/client'
3
+ require 'tweetstream/daemon'
4
+
5
+ module TweetStream
6
+ extend Configuration
7
+
8
+ class ReconnectError < StandardError
9
+ attr_accessor :timeout, :retries
10
+ def initialize(timeout, retries)
11
+ self.timeout = timeout
12
+ self.retries = retries
13
+ super("Failed to reconnect after #{retries} tries.")
14
+ end
15
+ end
16
+
17
+ class << self
18
+ # Alias for TweetStream::Client.new
19
+ #
20
+ # @return [TweetStream::Client]
21
+ def new(options={})
22
+ TweetStream::Client.new(options)
23
+ end
24
+
25
+ # Delegate to TweetStream::Client
26
+ def method_missing(method, *args, &block)
27
+ return super unless new.respond_to?(method)
28
+ new.send(method, *args, &block)
29
+ end
30
+
31
+ # Delegate to TweetStream::Client
32
+ def respond_to?(method, include_private = false)
33
+ new.respond_to?(method, include_private) || super(method, include_private)
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,582 @@
1
+ require 'em-twitter'
2
+ require 'eventmachine'
3
+ require 'twitter'
4
+ begin
5
+ require 'yajl'
6
+ rescue LoadError
7
+ require 'json'
8
+ module Yajl
9
+ module Parser
10
+ def parse(text, opts=nil)
11
+ JSON.parse(text, opts)
12
+ end
13
+ end
14
+ end
15
+ end
16
+
17
+ module TweetStream
18
+ # Provides simple access to the Twitter Streaming API (https://dev.twitter.com/docs/streaming-api)
19
+ # for Ruby scripts that need to create a long connection to
20
+ # Twitter for tracking and other purposes.
21
+ #
22
+ # Basic usage of the library is to call one of the provided
23
+ # methods and provide a block that will perform actions on
24
+ # a yielded Twitter::Tweet. For example:
25
+ #
26
+ # TweetStream::Client.new.track('fail') do |status|
27
+ # puts "[#{status.user.screen_name}] #{status.text}"
28
+ # end
29
+ #
30
+ # For information about a daemonized TweetStream client,
31
+ # view the TweetStream::Daemon class.
32
+ class Client
33
+
34
+ OPTION_CALLBACKS = [:delete,
35
+ :scrub_geo,
36
+ :limit,
37
+ :error,
38
+ :enhance_your_calm,
39
+ :unauthorized,
40
+ :reconnect,
41
+ :inited,
42
+ :direct_message,
43
+ :timeline_status,
44
+ :anything,
45
+ :no_data_received,
46
+ :status_withheld,
47
+ :user_withheld].freeze unless defined?(OPTION_CALLBACKS)
48
+
49
+ # @private
50
+ attr_accessor *Configuration::VALID_OPTIONS_KEYS
51
+ attr_accessor :options
52
+ attr_reader :control_uri, :control, :stream
53
+
54
+ # Creates a new API
55
+ def initialize(options={})
56
+ self.options = options
57
+ merged_options = TweetStream.options.merge(options)
58
+ Configuration::VALID_OPTIONS_KEYS.each do |key|
59
+ send("#{key}=", merged_options[key])
60
+ end
61
+ @callbacks = {}
62
+ end
63
+
64
+ # Returns all public statuses. The Firehose is not a generally
65
+ # available resource. Few applications require this level of access.
66
+ # Creative use of a combination of other resources and various access
67
+ # levels can satisfy nearly every application use case.
68
+ def firehose(query_parameters = {}, &block)
69
+ start('/1.1/statuses/firehose.json', query_parameters, &block)
70
+ end
71
+
72
+ # Returns all statuses containing http: and https:. The links stream is
73
+ # not a generally available resource. Few applications require this level
74
+ # of access. Creative use of a combination of other resources and various
75
+ # access levels can satisfy nearly every application use case.
76
+ def links(query_parameters = {}, &block)
77
+ start('/1.1/statuses/links.json', query_parameters, &block)
78
+ end
79
+
80
+ # Returns all retweets. The retweet stream is not a generally available
81
+ # resource. Few applications require this level of access. Creative
82
+ # use of a combination of other resources and various access levels
83
+ # can satisfy nearly every application use case. As of 9/11/2009,
84
+ # the site-wide retweet feature has not yet launched,
85
+ # so there are currently few, if any, retweets on this stream.
86
+ def retweet(query_parameters = {}, &block)
87
+ start('/1.1/statuses/retweet.json', query_parameters, &block)
88
+ end
89
+
90
+ # Returns a random sample of all public statuses. The default access level
91
+ # provides a small proportion of the Firehose. The "Gardenhose" access
92
+ # level provides a proportion more suitable for data mining and
93
+ # research applications that desire a larger proportion to be statistically
94
+ # significant sample.
95
+ def sample(query_parameters = {}, &block)
96
+ start('/1.1/statuses/sample.json', query_parameters, &block)
97
+ end
98
+
99
+ # Specify keywords to track. Queries are subject to Track Limitations,
100
+ # described in Track Limiting and subject to access roles, described in
101
+ # the statuses/filter method. Track keywords are case-insensitive logical
102
+ # ORs. Terms are exact-matched, and also exact-matched ignoring
103
+ # punctuation. Phrases, keywords with spaces, are not supported.
104
+ # Keywords containing punctuation will only exact match tokens.
105
+ # Query parameters may be passed as the last argument.
106
+ def track(*keywords, &block)
107
+ query_params = keywords.pop if keywords.last.is_a?(::Hash)
108
+ query_params ||= {}
109
+ filter(query_params.merge(:track => keywords), &block)
110
+ end
111
+
112
+ # Returns public statuses from or in reply to a set of users. Mentions
113
+ # ("Hello @user!") and implicit replies ("@user Hello!" created without
114
+ # pressing the reply "swoosh") are not matched. Requires integer user
115
+ # IDs, not screen names. Query parameters may be passed as the last argument.
116
+ def follow(*user_ids, &block)
117
+ query_params = user_ids.pop if user_ids.last.is_a?(::Hash)
118
+ query_params ||= {}
119
+ filter(query_params.merge(:follow => user_ids), &block)
120
+ end
121
+
122
+ # Specifies a set of bounding boxes to track. Only tweets that are both created
123
+ # using the Geotagging API and are placed from within a tracked bounding box will
124
+ # be included in the stream – the user’s location field is not used to filter tweets
125
+ # (e.g. if a user has their location set to “San Francisco”, but the tweet was not created
126
+ # using the Geotagging API and has no geo element, it will not be included in the stream).
127
+ # Bounding boxes are specified as a comma separate list of longitude/latitude pairs, with
128
+ # the first pair denoting the southwest corner of the box
129
+ # longitude/latitude pairs, separated by commas. The first pair specifies the southwest corner of the box.
130
+ def locations(*locations_map, &block)
131
+ query_params = locations_map.pop if locations_map.last.is_a?(::Hash)
132
+ query_params ||= {}
133
+ filter(query_params.merge(:locations => locations_map), &block)
134
+ end
135
+
136
+ # Make a call to the statuses/filter method of the Streaming API,
137
+ # you may provide <tt>:follow</tt>, <tt>:track</tt> or both as options
138
+ # to follow the tweets of specified users or track keywords. This
139
+ # method is provided separately for cases when it would conserve the
140
+ # number of HTTP connections to combine track and follow.
141
+ def filter(query_params = {}, &block)
142
+ start('/1.1/statuses/filter.json', query_params.merge(:method => :post), &block)
143
+ end
144
+
145
+ # Make a call to the userstream api for currently authenticated user
146
+ def userstream(query_params = {}, &block)
147
+ stream_params = { :host => "userstream.twitter.com" }
148
+ query_params.merge!(:extra_stream_parameters => stream_params)
149
+ start('/1.1/user.json', query_params, &block)
150
+ end
151
+
152
+ # Make a call to the userstream api
153
+ def sitestream(user_ids = [], query_params = {}, &block)
154
+ stream_params = { :host => "sitestream.twitter.com" }
155
+ query_params.merge!({
156
+ :method => :post,
157
+ :follow => user_ids,
158
+ :extra_stream_parameters => stream_params
159
+ })
160
+ query_params.merge!(:with => 'followings') if query_params.delete(:followings)
161
+ start('/1.1/site.json', query_params, &block)
162
+ end
163
+
164
+ # Set a Proc to be run when a deletion notice is received
165
+ # from the Twitter stream. For example:
166
+ #
167
+ # @client = TweetStream::Client.new
168
+ # @client.on_delete do |status_id, user_id|
169
+ # Tweet.delete(status_id)
170
+ # end
171
+ #
172
+ # Block must take two arguments: the status id and the user id.
173
+ # If no block is given, it will return the currently set
174
+ # deletion proc. When a block is given, the TweetStream::Client
175
+ # object is returned to allow for chaining.
176
+ def on_delete(&block)
177
+ on('delete', &block)
178
+ end
179
+
180
+ # Set a Proc to be run when a scrub_geo notice is received
181
+ # from the Twitter stream. For example:
182
+ #
183
+ # @client = TweetStream::Client.new
184
+ # @client.on_scrub_geo do |up_to_status_id, user_id|
185
+ # Tweet.where(:status_id <= up_to_status_id)
186
+ # end
187
+ #
188
+ # Block must take two arguments: the upper status id and the user id.
189
+ # If no block is given, it will return the currently set
190
+ # scrub_geo proc. When a block is given, the TweetStream::Client
191
+ # object is returned to allow for chaining.
192
+ def on_scrub_geo(&block)
193
+ on('scrub_geo', &block)
194
+ end
195
+
196
+ # Set a Proc to be run when a rate limit notice is received
197
+ # from the Twitter stream. For example:
198
+ #
199
+ # @client = TweetStream::Client.new
200
+ # @client.on_limit do |discarded_count|
201
+ # # Make note of discarded count
202
+ # end
203
+ #
204
+ # Block must take one argument: the number of discarded tweets.
205
+ # If no block is given, it will return the currently set
206
+ # limit proc. When a block is given, the TweetStream::Client
207
+ # object is returned to allow for chaining.
208
+ def on_limit(&block)
209
+ on('limit', &block)
210
+ end
211
+
212
+ # Set a Proc to be run when an HTTP error is encountered in the
213
+ # processing of the stream. Note that TweetStream will automatically
214
+ # try to reconnect, this is for reference only. Don't panic!
215
+ #
216
+ # @client = TweetStream::Client.new
217
+ # @client.on_error do |message|
218
+ # # Make note of error message
219
+ # end
220
+ #
221
+ # Block must take one argument: the error message.
222
+ # If no block is given, it will return the currently set
223
+ # error proc. When a block is given, the TweetStream::Client
224
+ # object is returned to allow for chaining.
225
+ def on_error(&block)
226
+ on('error', &block)
227
+ end
228
+
229
+ # Set a Proc to be run when an HTTP status 401 is encountered while
230
+ # connecting to Twitter. This could happen when system clock drift
231
+ # has occured.
232
+ #
233
+ # If no block is given, it will return the currently set
234
+ # unauthorized proc. When a block is given, the TweetStream::Client
235
+ # object is returned to allow for chaining.
236
+ def on_unauthorized(&block)
237
+ on('unauthorized', &block)
238
+ end
239
+
240
+ # Set a Proc to be run when a direct message is encountered in the
241
+ # processing of the stream.
242
+ #
243
+ # @client = TweetStream::Client.new
244
+ # @client.on_direct_message do |direct_message|
245
+ # # do something with the direct message
246
+ # end
247
+ #
248
+ # Block must take one argument: the direct message.
249
+ # If no block is given, it will return the currently set
250
+ # direct message proc. When a block is given, the TweetStream::Client
251
+ # object is returned to allow for chaining.
252
+ def on_direct_message(&block)
253
+ on('direct_message', &block)
254
+ end
255
+
256
+ # Set a Proc to be run whenever anything is encountered in the
257
+ # processing of the stream.
258
+ #
259
+ # @client = TweetStream::Client.new
260
+ # @client.on_anything do |status|
261
+ # # do something with the status
262
+ # end
263
+ #
264
+ # Block can take one or two arguments. |status (, client)|
265
+ # If no block is given, it will return the currently set
266
+ # timeline status proc. When a block is given, the TweetStream::Client
267
+ # object is returned to allow for chaining.
268
+ def on_anything(&block)
269
+ on('anything', &block)
270
+ end
271
+
272
+ # Set a Proc to be run when a regular timeline message is encountered in the
273
+ # processing of the stream.
274
+ #
275
+ # @client = TweetStream::Client.new
276
+ # @client.on_timeline_status do |status|
277
+ # # do something with the status
278
+ # end
279
+ #
280
+ # Block can take one or two arguments. |status (, client)|
281
+ # If no block is given, it will return the currently set
282
+ # timeline status proc. When a block is given, the TweetStream::Client
283
+ # object is returned to allow for chaining.
284
+ def on_timeline_status(&block)
285
+ on('timeline_status', &block)
286
+ end
287
+
288
+ # Set a Proc to be run on reconnect.
289
+ #
290
+ # @client = TweetStream::Client.new
291
+ # @client.on_reconnect do |timeout, retries|
292
+ # # Make note of the reconnection
293
+ # end
294
+ #
295
+ def on_reconnect(&block)
296
+ on('reconnect', &block)
297
+ end
298
+
299
+ # Set a Proc to be run when connection established.
300
+ # Called in EventMachine::Connection#post_init
301
+ #
302
+ # @client = TweetStream::Client.new
303
+ # @client.on_inited do
304
+ # puts 'Connected...'
305
+ # end
306
+ #
307
+ def on_inited(&block)
308
+ on('inited', &block)
309
+ end
310
+
311
+ # Set a Proc to be run when no data is received from the server
312
+ # and a stall occurs. Twitter defines this to be 90 seconds.
313
+ #
314
+ # @client = TweetStream::Client.new
315
+ # @client.on_no_data_received do
316
+ # # Make note of no data, possi
317
+ # end
318
+ def on_no_data_received(&block)
319
+ on('no_data_received', &block)
320
+ end
321
+
322
+ # Set a Proc to be run when enhance_your_calm signal is received.
323
+ #
324
+ # @client = TweetStream::Client.new
325
+ # @client.on_enhance_your_calm do
326
+ # # do something, your account has been blocked
327
+ # end
328
+ def on_enhance_your_calm(&block)
329
+ on('enhance_your_calm', &block)
330
+ end
331
+
332
+ # Set a Proc to be run when a status_withheld message is received.
333
+ #
334
+ # @client = TweetStream::Client.new
335
+ # @client.on_status_withheld do |status|
336
+ # # do something with the status
337
+ # end
338
+ def on_status_withheld(&block)
339
+ on('status_withheld', &block)
340
+ end
341
+
342
+ # Set a Proc to be run when a status_withheld message is received.
343
+ #
344
+ # @client = TweetStream::Client.new
345
+ # @client.on_user_withheld do |status|
346
+ # # do something with the status
347
+ # end
348
+ def on_user_withheld(&block)
349
+ on('user_withheld', &block)
350
+ end
351
+
352
+ # Set a Proc to be run when a Site Stream friends list is received.
353
+ #
354
+ # @client = TweetStream::Client.new
355
+ # @client.on_friends do |friends|
356
+ # # do something with the friends list
357
+ # end
358
+ def on_friends(&block)
359
+ on('friends', &block)
360
+ end
361
+
362
+ # Set a Proc to be run when a stall warning is received.
363
+ #
364
+ # @client = TweetStream::Client.new
365
+ # @client.on_stall_warning do |warning|
366
+ # # do something with the friends list
367
+ # end
368
+ def on_stall_warning(&block)
369
+ on('stall_warning', &block)
370
+ end
371
+
372
+ # Set a Proc to be run on userstream events
373
+ #
374
+ # @client = TweetStream::Client.new
375
+ # @client.event(:favorite) do |event|
376
+ # # do something with the status
377
+ # end
378
+ def on_event(event, &block)
379
+ on(event, &block)
380
+ end
381
+
382
+ def on(event, &block)
383
+ if block_given?
384
+ @callbacks[event.to_s] = block
385
+ self
386
+ else
387
+ @callbacks[event.to_s]
388
+ end
389
+ end
390
+
391
+ # connect to twitter while starting a new EventMachine run loop
392
+ def start(path, query_parameters = {}, &block)
393
+ if EventMachine.reactor_running?
394
+ connect(path, query_parameters, &block)
395
+ else
396
+ EventMachine.epoll
397
+ EventMachine.kqueue
398
+
399
+ EventMachine::run do
400
+ connect(path, query_parameters, &block)
401
+ end
402
+ end
403
+ end
404
+
405
+ # connect to twitter without starting a new EventMachine run loop
406
+ def connect(path, options = {}, &block)
407
+ stream_parameters, callbacks = connection_options(path, options)
408
+
409
+ @stream = EM::Twitter::Client.connect(stream_parameters)
410
+ @stream.each do |item|
411
+ begin
412
+ hash = Yajl::Parser.parse(item, :symbolize_keys => true)
413
+ rescue Yajl::ParseError
414
+ invoke_callback(callbacks['error'], "Yajl::ParseError occured in stream: #{item}")
415
+ next
416
+ end
417
+
418
+ unless hash.is_a?(::Hash)
419
+ invoke_callback(callbacks['error'], "Unexpected JSON object in stream: #{item}")
420
+ next
421
+ end
422
+
423
+ Twitter.identity_map = false
424
+
425
+ respond_to(hash, callbacks, &block)
426
+
427
+ yield_message_to(callbacks['anything'], hash)
428
+ end
429
+
430
+ @stream.on_error do |message|
431
+ invoke_callback(callbacks['error'], message)
432
+ end
433
+
434
+ @stream.on_unauthorized do
435
+ invoke_callback(callbacks['unauthorized'])
436
+ end
437
+
438
+ @stream.on_enhance_your_calm do
439
+ invoke_callback(callbacks['enhance_your_calm'])
440
+ end
441
+
442
+ @stream.on_reconnect do |timeout, retries|
443
+ invoke_callback(callbacks['reconnect'], timeout, retries)
444
+ end
445
+
446
+ @stream.on_max_reconnects do |timeout, retries|
447
+ raise TweetStream::ReconnectError.new(timeout, retries)
448
+ end
449
+
450
+ @stream.on_no_data_received do
451
+ invoke_callback(callbacks['no_data_received'])
452
+ end
453
+
454
+ @stream
455
+ end
456
+
457
+ def respond_to(hash, callbacks, &block)
458
+ if hash[:control] && hash[:control][:control_uri]
459
+ @control_uri = hash[:control][:control_uri]
460
+ require 'tweetstream/site_stream_client'
461
+ @control = TweetStream::SiteStreamClient.new(@control_uri, options)
462
+ @control.on_error(&callbacks['error'])
463
+ elsif hash[:warning]
464
+ invoke_callback(callbacks['stall_warning'], hash[:warning])
465
+ elsif hash[:delete] && hash[:delete][:status]
466
+ invoke_callback(callbacks['delete'], hash[:delete][:status][:id], hash[:delete][:status][:user_id])
467
+ elsif hash[:scrub_geo] && hash[:scrub_geo][:up_to_status_id]
468
+ invoke_callback(callbacks['scrub_geo'], hash[:scrub_geo][:up_to_status_id], hash[:scrub_geo][:user_id])
469
+ elsif hash[:limit] && hash[:limit][:track]
470
+ invoke_callback(callbacks['limit'], hash[:limit][:track])
471
+ elsif hash[:direct_message]
472
+ yield_message_to(callbacks['direct_message'], Twitter::DirectMessage.new(hash[:direct_message]))
473
+ elsif hash[:status_withheld]
474
+ invoke_callback(callbacks['status_withheld'], hash[:status_withheld])
475
+ elsif hash[:user_withheld]
476
+ invoke_callback(callbacks['user_withheld'], hash[:user_withheld])
477
+ elsif hash[:event]
478
+ invoke_callback(callbacks[hash[:event].to_s], hash)
479
+ elsif hash[:friends]
480
+ invoke_callback(callbacks['friends'], hash[:friends])
481
+ elsif hash[:text] && hash[:user]
482
+ @last_status = Twitter::Tweet.new(hash)
483
+ yield_message_to(callbacks['timeline_status'], @last_status)
484
+
485
+ yield_message_to(block, @last_status) if block_given?
486
+ elsif hash[:for_user]
487
+ yield_message_to(block, hash) if block_given?
488
+ end
489
+ end
490
+
491
+ # Terminate the currently running TweetStream and close EventMachine loop
492
+ def stop
493
+ EventMachine.stop_event_loop
494
+ @last_status
495
+ end
496
+
497
+ # Close the connection to twitter without closing the eventmachine loop
498
+ def close_connection
499
+ @stream.close_connection if @stream
500
+ end
501
+
502
+ def stop_stream
503
+ @stream.stop if @stream
504
+ end
505
+
506
+ protected
507
+
508
+ def normalize_filter_parameters(query_parameters = {})
509
+ [:follow, :track, :locations].each do |param|
510
+ if query_parameters[param].kind_of?(Array)
511
+ query_parameters[param] = query_parameters[param].flatten.collect { |q| q.to_s }.join(',')
512
+ elsif query_parameters[param]
513
+ query_parameters[param] = query_parameters[param].to_s
514
+ end
515
+ end
516
+ query_parameters
517
+ end
518
+
519
+ def auth_params
520
+ if auth_method.to_s == 'basic'
521
+ { :basic => {
522
+ :username => username,
523
+ :password => password
524
+ }
525
+ }
526
+ else
527
+ { :oauth => {
528
+ :consumer_key => consumer_key,
529
+ :consumer_secret => consumer_secret,
530
+ :token => oauth_token,
531
+ :token_secret => oauth_token_secret
532
+ }
533
+ }
534
+ end
535
+ end
536
+
537
+ # A utility method used to invoke callback methods against the Client
538
+ def invoke_callback(callback, *args)
539
+ callback.call(*args) if callback
540
+ end
541
+
542
+ def yield_message_to(procedure, message)
543
+ # Give the block the option to receive either one
544
+ # or two arguments, depending on its arity.
545
+ if procedure.is_a?(Proc)
546
+ case procedure.arity
547
+ when 1 then invoke_callback(procedure, message)
548
+ when 2 then invoke_callback(procedure, message, self)
549
+ end
550
+ end
551
+ end
552
+
553
+ def connection_options(path, options)
554
+ warn_if_callbacks(options)
555
+
556
+ callbacks = @callbacks.dup
557
+ OPTION_CALLBACKS.each do |callback|
558
+ callbacks.merge(callback.to_s => options.delete(callback)) if options[callback]
559
+ end
560
+
561
+ inited_proc = options.delete(:inited) || @callbacks['inited']
562
+ extra_stream_parameters = options.delete(:extra_stream_parameters) || {}
563
+
564
+ stream_params = {
565
+ :path => path,
566
+ :method => (options.delete(:method) || 'get').to_s.upcase,
567
+ :user_agent => user_agent,
568
+ :on_inited => inited_proc,
569
+ :params => normalize_filter_parameters(options),
570
+ :proxy => proxy
571
+ }.merge(extra_stream_parameters).merge(auth_params)
572
+
573
+ [stream_params, callbacks]
574
+ end
575
+
576
+ def warn_if_callbacks(options={})
577
+ if OPTION_CALLBACKS.select { |callback| options[callback] }.size > 0
578
+ Kernel.warn("Passing callbacks via the options hash is deprecated and will be removed in TweetStream 3.0")
579
+ end
580
+ end
581
+ end
582
+ end