twitter-search-watcher 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/twitter-search-watcher.rb +259 -0
- metadata +55 -0
@@ -0,0 +1,259 @@
|
|
1
|
+
%w( rubygems cgi json open-uri ostruct ).each {|lib| require lib }
|
2
|
+
|
3
|
+
#
|
4
|
+
# = Usage
|
5
|
+
#
|
6
|
+
# coming soon
|
7
|
+
#
|
8
|
+
class TwitterSearchWatcher
|
9
|
+
|
10
|
+
TWITTER_SEARCH_URL = 'http://search.twitter.com/search.json'
|
11
|
+
DEFAULT_USER_AGENT = 'TwitterSearchWatcher RubyGem http://github.com/devfu/twitter-search-watcher'
|
12
|
+
QUERY_STRING_ATTRIBUTES = [ :q, :to, :from, :since_id, :page, :max_id, :rpp ]
|
13
|
+
|
14
|
+
# The User-Agent header value to send along with all Twitter Search API requests
|
15
|
+
attr_accessor :user_agent
|
16
|
+
|
17
|
+
# A string you want to search twitter for
|
18
|
+
attr_accessor :q
|
19
|
+
|
20
|
+
# The username of someone you want to search replies to
|
21
|
+
attr_accessor :to
|
22
|
+
|
23
|
+
# The username of someone you want to search replies from
|
24
|
+
attr_accessor :from
|
25
|
+
|
26
|
+
# Get a particular page of Twitter search results (pagination).
|
27
|
+
# Typically used in conjunction with :max_id
|
28
|
+
attr_accessor :page
|
29
|
+
|
30
|
+
# Used for pagination, so you can get page=3 where the max_id of the first page was 1234
|
31
|
+
attr_accessor :max_id
|
32
|
+
|
33
|
+
# Only get tweets with ID's greater than this ID (useful for only getting new tweets)
|
34
|
+
attr_accessor :since_id
|
35
|
+
|
36
|
+
# Number of results per page (max 100)
|
37
|
+
attr_accessor :rpp
|
38
|
+
|
39
|
+
# The number of seconds to wait between Twitter calls. Default: 60 (seconds)
|
40
|
+
attr_accessor :check_every
|
41
|
+
|
42
|
+
# The maximum number of pages to check for tweets
|
43
|
+
#
|
44
|
+
# If nil, we'll check until there are no more pages (when :next_page isn't present)
|
45
|
+
attr_accessor :max_pages
|
46
|
+
|
47
|
+
def rpp= value
|
48
|
+
raise "The maximum rpp (Results per Page) value is 100" if value > 100
|
49
|
+
@rpp = value
|
50
|
+
end
|
51
|
+
|
52
|
+
def check_every
|
53
|
+
@check_every || 60
|
54
|
+
end
|
55
|
+
|
56
|
+
# Create a new TwitterSearchWatcher
|
57
|
+
#
|
58
|
+
# TwitterSearchWatcher.new 'string to search'
|
59
|
+
# TwitterSearchWatcher.new 'string to search', :check_every => 60
|
60
|
+
# TwitterSearchWatcher.new :to => 'barackobama', :from => 'SenJohnMcCain'
|
61
|
+
#
|
62
|
+
def initialize search_string = nil, options = nil
|
63
|
+
if search_string.is_a? Hash
|
64
|
+
options = search_string
|
65
|
+
else
|
66
|
+
self.q = search_string
|
67
|
+
end
|
68
|
+
|
69
|
+
options.each {|k,v| send "#{k}=", v } if options
|
70
|
+
end
|
71
|
+
|
72
|
+
# Returns the URL we'll use to call the Twitter Search API.
|
73
|
+
#
|
74
|
+
# Without parameters, it'll generate a URL just from this TwitterSearchWatcher instance.
|
75
|
+
#
|
76
|
+
# With parameters, it'll override the TwitterSearchWatcher instance's options with
|
77
|
+
# whatever you pass, eg.
|
78
|
+
#
|
79
|
+
# >> TwitterSearchWatcher.new( 'foo', :rpp => 15 ).search_url
|
80
|
+
# => "http://search.twitter.com/search.json?q=foo&rpp=15"
|
81
|
+
#
|
82
|
+
# >> TwitterSearchWatcher.new( 'foo', :rpp => 15 ).search_url( :rpp => 99 )
|
83
|
+
# => "http://search.twitter.com/search.json?q=foo&rpp=99"
|
84
|
+
#
|
85
|
+
def search_url additional_parameters = nil
|
86
|
+
TWITTER_SEARCH_URL + build_query_string(additional_parameters)
|
87
|
+
end
|
88
|
+
|
89
|
+
def user_agent
|
90
|
+
@user_agent || DEFAULT_USER_AGENT
|
91
|
+
end
|
92
|
+
|
93
|
+
# Performs a search. Accepts the same parameters as #search_url
|
94
|
+
def search! additional_parameters = nil
|
95
|
+
json search_url(additional_parameters)
|
96
|
+
end
|
97
|
+
|
98
|
+
# Performs a search, given the response from another search.
|
99
|
+
#
|
100
|
+
# If a response if given, the search will only return tweets newer than the given response's tweets.
|
101
|
+
# If a response is not given, this performs a normal search.
|
102
|
+
#
|
103
|
+
# Accepts additional parameters (same as #search_url)
|
104
|
+
def search_newer! response = nil, additional_parameters = nil
|
105
|
+
if response
|
106
|
+
search!( (additional_parameters || {}).merge( :since_id => response['max_id'] ) )
|
107
|
+
else
|
108
|
+
search! additional_parameters
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# Performs a search, given the response from another search.
|
113
|
+
#
|
114
|
+
# If the response given is paginated (ie. there are additional tweets available on additional pages),
|
115
|
+
# this will return the next page. Else, this will return nil.
|
116
|
+
#
|
117
|
+
# Accepts additional parameters (same as #search_url)
|
118
|
+
def search_more! response, additional_parameters = nil
|
119
|
+
search!( (additional_parameters || {}).merge( :page => (response['page'] + 1), :max_id => response['max_id'] ) ) if response['next_page']
|
120
|
+
end
|
121
|
+
|
122
|
+
# Performs a search! and search_more! as needed to return a response with *all* pages of tweets.
|
123
|
+
#
|
124
|
+
# This respects max_pages and will only make max_pages number of additional requests to get paginated tweets.
|
125
|
+
#
|
126
|
+
# The response object returned is similar to the responses returned by all other methods, but we only
|
127
|
+
# currently have a 'results' key on the Hash returned. If you're used to getting some of the other keys
|
128
|
+
# returned by the other methods (which Twitter returns), be warned!
|
129
|
+
#
|
130
|
+
# To get the tweets off of the response:
|
131
|
+
#
|
132
|
+
# tweets = watcher.search_with_pagination!['results']
|
133
|
+
def search_with_pagination! additional_parameters = nil
|
134
|
+
response = search! additional_parameters
|
135
|
+
|
136
|
+
max_requests = max_pages
|
137
|
+
max_requests = additional_parameters[:max_pages] if additional_parameters && additional_parameters[:max_pages]
|
138
|
+
|
139
|
+
tweets = { 'results' => response['results'] }
|
140
|
+
pages_requested_so_far = 1
|
141
|
+
|
142
|
+
if response['next_page'] and (max_requests.nil? or max_requests > 1) # we've already dony 1 request
|
143
|
+
while response = search_more!(response, additional_parameters)
|
144
|
+
tweets['results'] += response['results']
|
145
|
+
pages_requested_so_far += 1
|
146
|
+
|
147
|
+
break if max_requests && pages_requested_so_far >= max_requests
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
tweets
|
152
|
+
end
|
153
|
+
|
154
|
+
# Helper to do an HTTP GET request and return the response body
|
155
|
+
def get url
|
156
|
+
TwitterSearchWatcher.get url, 'User-Agent' => user_agent
|
157
|
+
end
|
158
|
+
|
159
|
+
# Helper to do an HTTP GET request and return the response body
|
160
|
+
def self.get url, options = {}
|
161
|
+
open( url, { 'User-Agent' => DEFAULT_USER_AGENT }.merge(options) ).read
|
162
|
+
end
|
163
|
+
|
164
|
+
# Helper to #get a url and return the response body parsed as JSON
|
165
|
+
def json url
|
166
|
+
JSON.parse get(url)
|
167
|
+
end
|
168
|
+
|
169
|
+
# Helper to #get a url and return the response body parsed as JSON
|
170
|
+
def self.json url
|
171
|
+
JSON.parse get(url)
|
172
|
+
end
|
173
|
+
|
174
|
+
# Instantiates a new TwitterSearchWatcher given the search_string and options and then
|
175
|
+
# calls search_with_pagination! on the instance, returning the response.
|
176
|
+
#
|
177
|
+
# tweets_json = TwitterSearchWatcher.search_with_pagination!('foo')['results']
|
178
|
+
#
|
179
|
+
def self.search_with_pagination! search_string, options = nil
|
180
|
+
watcher = TwitterSearchWatcher.new search_string, options
|
181
|
+
watcher.search_with_pagination!
|
182
|
+
end
|
183
|
+
|
184
|
+
# Instantiates a new TwitterSearchWatcher given the search_string and options and then
|
185
|
+
# calls #watch on the instance using the block given.
|
186
|
+
def self.watch! search_string, options = nil, &block
|
187
|
+
watcher = TwitterSearchWatcher.new search_string, options
|
188
|
+
watcher.watch! &block
|
189
|
+
end
|
190
|
+
|
191
|
+
# Starts watching this search in a loop.
|
192
|
+
# It will wait #check_every seconds between new requests (except requests to get additional pages).
|
193
|
+
# Every time a new tweet is found, that tweet is passed to the block given.
|
194
|
+
#
|
195
|
+
# TwitterSearchWatcher.new('foo').watch! {|tweet| puts "got tweet: #{ tweet.text }" }
|
196
|
+
#
|
197
|
+
def watch! additional_parameters = nil, &block
|
198
|
+
@max_id_found_so_far = 0
|
199
|
+
|
200
|
+
trap('INT'){ puts "\nexiting ..."; exit }
|
201
|
+
puts "Watching for tweets: #{ search_url(additional_parameters) }"
|
202
|
+
|
203
|
+
loop do
|
204
|
+
|
205
|
+
@last_response = search_newer!(@last_response, additional_parameters)
|
206
|
+
call_tweet_callbacks(@last_response, block)
|
207
|
+
update_max_id @last_response
|
208
|
+
|
209
|
+
# this is kindof icky ... but it works
|
210
|
+
if @last_response['next_page']
|
211
|
+
response = @last_response
|
212
|
+
num_pages_searched = 0
|
213
|
+
while (response = search_more!(response, additional_parameters)) && (num_pages_searched <= max_pages if max_pages)
|
214
|
+
num_pages_searched += 1
|
215
|
+
call_tweet_callbacks(response, block)
|
216
|
+
update_max_id response
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
sleep check_every
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
private
|
225
|
+
|
226
|
+
def update_max_id response
|
227
|
+
@max_id_found_so_far = response['max_id'] if response['max_id'] > @max_id_found_so_far
|
228
|
+
end
|
229
|
+
|
230
|
+
def call_tweet_callbacks response, block
|
231
|
+
response['results'].each do |tweet|
|
232
|
+
tweet['tweet_id'] = tweet.delete 'id'
|
233
|
+
block.call OpenStruct.new(tweet)
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
def escape string
|
238
|
+
CGI.escape(string.to_s).gsub('%22','"').gsub(' ','+')
|
239
|
+
end
|
240
|
+
|
241
|
+
def build_query_string additional_parameters = nil
|
242
|
+
parameter_values = QUERY_STRING_ATTRIBUTES.inject({}){|all, attr|
|
243
|
+
all[attr] = send(attr) if send(attr)
|
244
|
+
all
|
245
|
+
}
|
246
|
+
|
247
|
+
# if additional parameters are passed, we override the watcher's parameters with these
|
248
|
+
if additional_parameters
|
249
|
+
additional_parameter_values = QUERY_STRING_ATTRIBUTES.inject({}){|all, attr|
|
250
|
+
all[attr] = additional_parameters[attr] if additional_parameters.keys.include?(attr)
|
251
|
+
all
|
252
|
+
}
|
253
|
+
parameter_values.merge! additional_parameter_values
|
254
|
+
end
|
255
|
+
|
256
|
+
'?' + parameter_values.map {|k,v| "#{ k }=#{ escape(v) }" if v }.compact.join('&')
|
257
|
+
end
|
258
|
+
|
259
|
+
end
|
metadata
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: twitter-search-watcher
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.4
|
5
|
+
platform: ruby
|
6
|
+
authors: []
|
7
|
+
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2010-01-05 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: for watching a particular Twitter search and calling code whenever there are new tweets
|
17
|
+
email:
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- lib/twitter-search-watcher.rb
|
26
|
+
has_rdoc: true
|
27
|
+
homepage:
|
28
|
+
licenses: []
|
29
|
+
|
30
|
+
post_install_message:
|
31
|
+
rdoc_options: []
|
32
|
+
|
33
|
+
require_paths:
|
34
|
+
- lib
|
35
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: "0"
|
40
|
+
version:
|
41
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: "0"
|
46
|
+
version:
|
47
|
+
requirements: []
|
48
|
+
|
49
|
+
rubyforge_project:
|
50
|
+
rubygems_version: 1.3.5
|
51
|
+
signing_key:
|
52
|
+
specification_version: 3
|
53
|
+
summary: for watching a Twitter search
|
54
|
+
test_files: []
|
55
|
+
|