twitter-search-watcher 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/twitter-search-watcher.rb +259 -0
- metadata +55 -0
@@ -0,0 +1,259 @@
|
|
1
|
+
%w( rubygems cgi json open-uri ostruct ).each {|lib| require lib }
|
2
|
+
|
3
|
+
#
|
4
|
+
# = Usage
|
5
|
+
#
|
6
|
+
# coming soon
|
7
|
+
#
|
8
|
+
class TwitterSearchWatcher
|
9
|
+
|
10
|
+
TWITTER_SEARCH_URL = 'http://search.twitter.com/search.json'
|
11
|
+
DEFAULT_USER_AGENT = 'TwitterSearchWatcher RubyGem http://github.com/devfu/twitter-search-watcher'
|
12
|
+
QUERY_STRING_ATTRIBUTES = [ :q, :to, :from, :since_id, :page, :max_id, :rpp ]
|
13
|
+
|
14
|
+
# The User-Agent header value to send along with all Twitter Search API requests
|
15
|
+
attr_accessor :user_agent
|
16
|
+
|
17
|
+
# A string you want to search twitter for
|
18
|
+
attr_accessor :q
|
19
|
+
|
20
|
+
# The username of someone you want to search replies to
|
21
|
+
attr_accessor :to
|
22
|
+
|
23
|
+
# The username of someone you want to search replies from
|
24
|
+
attr_accessor :from
|
25
|
+
|
26
|
+
# Get a particular page of Twitter search results (pagination).
|
27
|
+
# Typically used in conjunction with :max_id
|
28
|
+
attr_accessor :page
|
29
|
+
|
30
|
+
# Used for pagination, so you can get page=3 where the max_id of the first page was 1234
|
31
|
+
attr_accessor :max_id
|
32
|
+
|
33
|
+
# Only get tweets with ID's greater than this ID (useful for only getting new tweets)
|
34
|
+
attr_accessor :since_id
|
35
|
+
|
36
|
+
# Number of results per page (max 100)
|
37
|
+
attr_accessor :rpp
|
38
|
+
|
39
|
+
# The number of seconds to wait between Twitter calls. Default: 60 (seconds)
|
40
|
+
attr_accessor :check_every
|
41
|
+
|
42
|
+
# The maximum number of pages to check for tweets
|
43
|
+
#
|
44
|
+
# If nil, we'll check until there are no more pages (when :next_page isn't present)
|
45
|
+
attr_accessor :max_pages
|
46
|
+
|
47
|
+
def rpp= value
|
48
|
+
raise "The maximum rpp (Results per Page) value is 100" if value > 100
|
49
|
+
@rpp = value
|
50
|
+
end
|
51
|
+
|
52
|
+
def check_every
|
53
|
+
@check_every || 60
|
54
|
+
end
|
55
|
+
|
56
|
+
# Create a new TwitterSearchWatcher
|
57
|
+
#
|
58
|
+
# TwitterSearchWatcher.new 'string to search'
|
59
|
+
# TwitterSearchWatcher.new 'string to search', :check_every => 60
|
60
|
+
# TwitterSearchWatcher.new :to => 'barackobama', :from => 'SenJohnMcCain'
|
61
|
+
#
|
62
|
+
def initialize search_string = nil, options = nil
|
63
|
+
if search_string.is_a? Hash
|
64
|
+
options = search_string
|
65
|
+
else
|
66
|
+
self.q = search_string
|
67
|
+
end
|
68
|
+
|
69
|
+
options.each {|k,v| send "#{k}=", v } if options
|
70
|
+
end
|
71
|
+
|
72
|
+
# Returns the URL we'll use to call the Twitter Search API.
|
73
|
+
#
|
74
|
+
# Without parameters, it'll generate a URL just from this TwitterSearchWatcher instance.
|
75
|
+
#
|
76
|
+
# With parameters, it'll override the TwitterSearchWatcher instance's options with
|
77
|
+
# whatever you pass, eg.
|
78
|
+
#
|
79
|
+
# >> TwitterSearchWatcher.new( 'foo', :rpp => 15 ).search_url
|
80
|
+
# => "http://search.twitter.com/search.json?q=foo&rpp=15"
|
81
|
+
#
|
82
|
+
# >> TwitterSearchWatcher.new( 'foo', :rpp => 15 ).search_url( :rpp => 99 )
|
83
|
+
# => "http://search.twitter.com/search.json?q=foo&rpp=99"
|
84
|
+
#
|
85
|
+
def search_url additional_parameters = nil
|
86
|
+
TWITTER_SEARCH_URL + build_query_string(additional_parameters)
|
87
|
+
end
|
88
|
+
|
89
|
+
def user_agent
|
90
|
+
@user_agent || DEFAULT_USER_AGENT
|
91
|
+
end
|
92
|
+
|
93
|
+
# Performs a search. Accepts the same parameters as #search_url
|
94
|
+
def search! additional_parameters = nil
|
95
|
+
json search_url(additional_parameters)
|
96
|
+
end
|
97
|
+
|
98
|
+
# Performs a search, given the response from another search.
|
99
|
+
#
|
100
|
+
# If a response if given, the search will only return tweets newer than the given response's tweets.
|
101
|
+
# If a response is not given, this performs a normal search.
|
102
|
+
#
|
103
|
+
# Accepts additional parameters (same as #search_url)
|
104
|
+
def search_newer! response = nil, additional_parameters = nil
|
105
|
+
if response
|
106
|
+
search!( (additional_parameters || {}).merge( :since_id => response['max_id'] ) )
|
107
|
+
else
|
108
|
+
search! additional_parameters
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# Performs a search, given the response from another search.
|
113
|
+
#
|
114
|
+
# If the response given is paginated (ie. there are additional tweets available on additional pages),
|
115
|
+
# this will return the next page. Else, this will return nil.
|
116
|
+
#
|
117
|
+
# Accepts additional parameters (same as #search_url)
|
118
|
+
def search_more! response, additional_parameters = nil
|
119
|
+
search!( (additional_parameters || {}).merge( :page => (response['page'] + 1), :max_id => response['max_id'] ) ) if response['next_page']
|
120
|
+
end
|
121
|
+
|
122
|
+
# Performs a search! and search_more! as needed to return a response with *all* pages of tweets.
|
123
|
+
#
|
124
|
+
# This respects max_pages and will only make max_pages number of additional requests to get paginated tweets.
|
125
|
+
#
|
126
|
+
# The response object returned is similar to the responses returned by all other methods, but we only
|
127
|
+
# currently have a 'results' key on the Hash returned. If you're used to getting some of the other keys
|
128
|
+
# returned by the other methods (which Twitter returns), be warned!
|
129
|
+
#
|
130
|
+
# To get the tweets off of the response:
|
131
|
+
#
|
132
|
+
# tweets = watcher.search_with_pagination!['results']
|
133
|
+
def search_with_pagination! additional_parameters = nil
|
134
|
+
response = search! additional_parameters
|
135
|
+
|
136
|
+
max_requests = max_pages
|
137
|
+
max_requests = additional_parameters[:max_pages] if additional_parameters && additional_parameters[:max_pages]
|
138
|
+
|
139
|
+
tweets = { 'results' => response['results'] }
|
140
|
+
pages_requested_so_far = 1
|
141
|
+
|
142
|
+
if response['next_page'] and (max_requests.nil? or max_requests > 1) # we've already dony 1 request
|
143
|
+
while response = search_more!(response, additional_parameters)
|
144
|
+
tweets['results'] += response['results']
|
145
|
+
pages_requested_so_far += 1
|
146
|
+
|
147
|
+
break if max_requests && pages_requested_so_far >= max_requests
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
tweets
|
152
|
+
end
|
153
|
+
|
154
|
+
# Helper to do an HTTP GET request and return the response body
|
155
|
+
def get url
|
156
|
+
TwitterSearchWatcher.get url, 'User-Agent' => user_agent
|
157
|
+
end
|
158
|
+
|
159
|
+
# Helper to do an HTTP GET request and return the response body
|
160
|
+
def self.get url, options = {}
|
161
|
+
open( url, { 'User-Agent' => DEFAULT_USER_AGENT }.merge(options) ).read
|
162
|
+
end
|
163
|
+
|
164
|
+
# Helper to #get a url and return the response body parsed as JSON
|
165
|
+
def json url
|
166
|
+
JSON.parse get(url)
|
167
|
+
end
|
168
|
+
|
169
|
+
# Helper to #get a url and return the response body parsed as JSON
|
170
|
+
def self.json url
|
171
|
+
JSON.parse get(url)
|
172
|
+
end
|
173
|
+
|
174
|
+
# Instantiates a new TwitterSearchWatcher given the search_string and options and then
|
175
|
+
# calls search_with_pagination! on the instance, returning the response.
|
176
|
+
#
|
177
|
+
# tweets_json = TwitterSearchWatcher.search_with_pagination!('foo')['results']
|
178
|
+
#
|
179
|
+
def self.search_with_pagination! search_string, options = nil
|
180
|
+
watcher = TwitterSearchWatcher.new search_string, options
|
181
|
+
watcher.search_with_pagination!
|
182
|
+
end
|
183
|
+
|
184
|
+
# Instantiates a new TwitterSearchWatcher given the search_string and options and then
|
185
|
+
# calls #watch on the instance using the block given.
|
186
|
+
def self.watch! search_string, options = nil, &block
|
187
|
+
watcher = TwitterSearchWatcher.new search_string, options
|
188
|
+
watcher.watch! &block
|
189
|
+
end
|
190
|
+
|
191
|
+
# Starts watching this search in a loop.
|
192
|
+
# It will wait #check_every seconds between new requests (except requests to get additional pages).
|
193
|
+
# Every time a new tweet is found, that tweet is passed to the block given.
|
194
|
+
#
|
195
|
+
# TwitterSearchWatcher.new('foo').watch! {|tweet| puts "got tweet: #{ tweet.text }" }
|
196
|
+
#
|
197
|
+
def watch! additional_parameters = nil, &block
|
198
|
+
@max_id_found_so_far = 0
|
199
|
+
|
200
|
+
trap('INT'){ puts "\nexiting ..."; exit }
|
201
|
+
puts "Watching for tweets: #{ search_url(additional_parameters) }"
|
202
|
+
|
203
|
+
loop do
|
204
|
+
|
205
|
+
@last_response = search_newer!(@last_response, additional_parameters)
|
206
|
+
call_tweet_callbacks(@last_response, block)
|
207
|
+
update_max_id @last_response
|
208
|
+
|
209
|
+
# this is kindof icky ... but it works
|
210
|
+
if @last_response['next_page']
|
211
|
+
response = @last_response
|
212
|
+
num_pages_searched = 0
|
213
|
+
while (response = search_more!(response, additional_parameters)) && (num_pages_searched <= max_pages if max_pages)
|
214
|
+
num_pages_searched += 1
|
215
|
+
call_tweet_callbacks(response, block)
|
216
|
+
update_max_id response
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
sleep check_every
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
private
|
225
|
+
|
226
|
+
def update_max_id response
|
227
|
+
@max_id_found_so_far = response['max_id'] if response['max_id'] > @max_id_found_so_far
|
228
|
+
end
|
229
|
+
|
230
|
+
def call_tweet_callbacks response, block
|
231
|
+
response['results'].each do |tweet|
|
232
|
+
tweet['tweet_id'] = tweet.delete 'id'
|
233
|
+
block.call OpenStruct.new(tweet)
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
def escape string
|
238
|
+
CGI.escape(string.to_s).gsub('%22','"').gsub(' ','+')
|
239
|
+
end
|
240
|
+
|
241
|
+
def build_query_string additional_parameters = nil
|
242
|
+
parameter_values = QUERY_STRING_ATTRIBUTES.inject({}){|all, attr|
|
243
|
+
all[attr] = send(attr) if send(attr)
|
244
|
+
all
|
245
|
+
}
|
246
|
+
|
247
|
+
# if additional parameters are passed, we override the watcher's parameters with these
|
248
|
+
if additional_parameters
|
249
|
+
additional_parameter_values = QUERY_STRING_ATTRIBUTES.inject({}){|all, attr|
|
250
|
+
all[attr] = additional_parameters[attr] if additional_parameters.keys.include?(attr)
|
251
|
+
all
|
252
|
+
}
|
253
|
+
parameter_values.merge! additional_parameter_values
|
254
|
+
end
|
255
|
+
|
256
|
+
'?' + parameter_values.map {|k,v| "#{ k }=#{ escape(v) }" if v }.compact.join('&')
|
257
|
+
end
|
258
|
+
|
259
|
+
end
|
metadata
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: twitter-search-watcher
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.4
|
5
|
+
platform: ruby
|
6
|
+
authors: []
|
7
|
+
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2010-01-05 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: for watching a particular Twitter search and calling code whenever there are new tweets
|
17
|
+
email:
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- lib/twitter-search-watcher.rb
|
26
|
+
has_rdoc: true
|
27
|
+
homepage:
|
28
|
+
licenses: []
|
29
|
+
|
30
|
+
post_install_message:
|
31
|
+
rdoc_options: []
|
32
|
+
|
33
|
+
require_paths:
|
34
|
+
- lib
|
35
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: "0"
|
40
|
+
version:
|
41
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: "0"
|
46
|
+
version:
|
47
|
+
requirements: []
|
48
|
+
|
49
|
+
rubyforge_project:
|
50
|
+
rubygems_version: 1.3.5
|
51
|
+
signing_key:
|
52
|
+
specification_version: 3
|
53
|
+
summary: for watching a Twitter search
|
54
|
+
test_files: []
|
55
|
+
|