twitter-search-watcher 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/twitter-search-watcher.rb +259 -0
  2. metadata +55 -0
@@ -0,0 +1,259 @@
1
+ %w( rubygems cgi json open-uri ostruct ).each {|lib| require lib }
2
+
3
+ #
4
+ # = Usage
5
+ #
6
+ # coming soon
7
+ #
8
+ class TwitterSearchWatcher
9
+
10
+ TWITTER_SEARCH_URL = 'http://search.twitter.com/search.json'
11
+ DEFAULT_USER_AGENT = 'TwitterSearchWatcher RubyGem http://github.com/devfu/twitter-search-watcher'
12
+ QUERY_STRING_ATTRIBUTES = [ :q, :to, :from, :since_id, :page, :max_id, :rpp ]
13
+
14
+ # The User-Agent header value to send along with all Twitter Search API requests
15
+ attr_accessor :user_agent
16
+
17
+ # A string you want to search twitter for
18
+ attr_accessor :q
19
+
20
+ # The username of someone you want to search replies to
21
+ attr_accessor :to
22
+
23
+ # The username of someone you want to search replies from
24
+ attr_accessor :from
25
+
26
+ # Get a particular page of Twitter search results (pagination).
27
+ # Typically used in conjunction with :max_id
28
+ attr_accessor :page
29
+
30
+ # Used for pagination, so you can get page=3 where the max_id of the first page was 1234
31
+ attr_accessor :max_id
32
+
33
+ # Only get tweets with ID's greater than this ID (useful for only getting new tweets)
34
+ attr_accessor :since_id
35
+
36
+ # Number of results per page (max 100)
37
+ attr_accessor :rpp
38
+
39
+ # The number of seconds to wait between Twitter calls. Default: 60 (seconds)
40
+ attr_accessor :check_every
41
+
42
+ # The maximum number of pages to check for tweets
43
+ #
44
+ # If nil, we'll check until there are no more pages (when :next_page isn't present)
45
+ attr_accessor :max_pages
46
+
47
+ def rpp= value
48
+ raise "The maximum rpp (Results per Page) value is 100" if value > 100
49
+ @rpp = value
50
+ end
51
+
52
+ def check_every
53
+ @check_every || 60
54
+ end
55
+
56
+ # Create a new TwitterSearchWatcher
57
+ #
58
+ # TwitterSearchWatcher.new 'string to search'
59
+ # TwitterSearchWatcher.new 'string to search', :check_every => 60
60
+ # TwitterSearchWatcher.new :to => 'barackobama', :from => 'SenJohnMcCain'
61
+ #
62
+ def initialize search_string = nil, options = nil
63
+ if search_string.is_a? Hash
64
+ options = search_string
65
+ else
66
+ self.q = search_string
67
+ end
68
+
69
+ options.each {|k,v| send "#{k}=", v } if options
70
+ end
71
+
72
+ # Returns the URL we'll use to call the Twitter Search API.
73
+ #
74
+ # Without parameters, it'll generate a URL just from this TwitterSearchWatcher instance.
75
+ #
76
+ # With parameters, it'll override the TwitterSearchWatcher instance's options with
77
+ # whatever you pass, eg.
78
+ #
79
+ # >> TwitterSearchWatcher.new( 'foo', :rpp => 15 ).search_url
80
+ # => "http://search.twitter.com/search.json?q=foo&rpp=15"
81
+ #
82
+ # >> TwitterSearchWatcher.new( 'foo', :rpp => 15 ).search_url( :rpp => 99 )
83
+ # => "http://search.twitter.com/search.json?q=foo&rpp=99"
84
+ #
85
+ def search_url additional_parameters = nil
86
+ TWITTER_SEARCH_URL + build_query_string(additional_parameters)
87
+ end
88
+
89
+ def user_agent
90
+ @user_agent || DEFAULT_USER_AGENT
91
+ end
92
+
93
+ # Performs a search. Accepts the same parameters as #search_url
94
+ def search! additional_parameters = nil
95
+ json search_url(additional_parameters)
96
+ end
97
+
98
+ # Performs a search, given the response from another search.
99
+ #
100
+ # If a response if given, the search will only return tweets newer than the given response's tweets.
101
+ # If a response is not given, this performs a normal search.
102
+ #
103
+ # Accepts additional parameters (same as #search_url)
104
+ def search_newer! response = nil, additional_parameters = nil
105
+ if response
106
+ search!( (additional_parameters || {}).merge( :since_id => response['max_id'] ) )
107
+ else
108
+ search! additional_parameters
109
+ end
110
+ end
111
+
112
+ # Performs a search, given the response from another search.
113
+ #
114
+ # If the response given is paginated (ie. there are additional tweets available on additional pages),
115
+ # this will return the next page. Else, this will return nil.
116
+ #
117
+ # Accepts additional parameters (same as #search_url)
118
+ def search_more! response, additional_parameters = nil
119
+ search!( (additional_parameters || {}).merge( :page => (response['page'] + 1), :max_id => response['max_id'] ) ) if response['next_page']
120
+ end
121
+
122
+ # Performs a search! and search_more! as needed to return a response with *all* pages of tweets.
123
+ #
124
+ # This respects max_pages and will only make max_pages number of additional requests to get paginated tweets.
125
+ #
126
+ # The response object returned is similar to the responses returned by all other methods, but we only
127
+ # currently have a 'results' key on the Hash returned. If you're used to getting some of the other keys
128
+ # returned by the other methods (which Twitter returns), be warned!
129
+ #
130
+ # To get the tweets off of the response:
131
+ #
132
+ # tweets = watcher.search_with_pagination!['results']
133
+ def search_with_pagination! additional_parameters = nil
134
+ response = search! additional_parameters
135
+
136
+ max_requests = max_pages
137
+ max_requests = additional_parameters[:max_pages] if additional_parameters && additional_parameters[:max_pages]
138
+
139
+ tweets = { 'results' => response['results'] }
140
+ pages_requested_so_far = 1
141
+
142
+ if response['next_page'] and (max_requests.nil? or max_requests > 1) # we've already dony 1 request
143
+ while response = search_more!(response, additional_parameters)
144
+ tweets['results'] += response['results']
145
+ pages_requested_so_far += 1
146
+
147
+ break if max_requests && pages_requested_so_far >= max_requests
148
+ end
149
+ end
150
+
151
+ tweets
152
+ end
153
+
154
+ # Helper to do an HTTP GET request and return the response body
155
+ def get url
156
+ TwitterSearchWatcher.get url, 'User-Agent' => user_agent
157
+ end
158
+
159
+ # Helper to do an HTTP GET request and return the response body
160
+ def self.get url, options = {}
161
+ open( url, { 'User-Agent' => DEFAULT_USER_AGENT }.merge(options) ).read
162
+ end
163
+
164
+ # Helper to #get a url and return the response body parsed as JSON
165
+ def json url
166
+ JSON.parse get(url)
167
+ end
168
+
169
+ # Helper to #get a url and return the response body parsed as JSON
170
+ def self.json url
171
+ JSON.parse get(url)
172
+ end
173
+
174
+ # Instantiates a new TwitterSearchWatcher given the search_string and options and then
175
+ # calls search_with_pagination! on the instance, returning the response.
176
+ #
177
+ # tweets_json = TwitterSearchWatcher.search_with_pagination!('foo')['results']
178
+ #
179
+ def self.search_with_pagination! search_string, options = nil
180
+ watcher = TwitterSearchWatcher.new search_string, options
181
+ watcher.search_with_pagination!
182
+ end
183
+
184
+ # Instantiates a new TwitterSearchWatcher given the search_string and options and then
185
+ # calls #watch on the instance using the block given.
186
+ def self.watch! search_string, options = nil, &block
187
+ watcher = TwitterSearchWatcher.new search_string, options
188
+ watcher.watch! &block
189
+ end
190
+
191
+ # Starts watching this search in a loop.
192
+ # It will wait #check_every seconds between new requests (except requests to get additional pages).
193
+ # Every time a new tweet is found, that tweet is passed to the block given.
194
+ #
195
+ # TwitterSearchWatcher.new('foo').watch! {|tweet| puts "got tweet: #{ tweet.text }" }
196
+ #
197
+ def watch! additional_parameters = nil, &block
198
+ @max_id_found_so_far = 0
199
+
200
+ trap('INT'){ puts "\nexiting ..."; exit }
201
+ puts "Watching for tweets: #{ search_url(additional_parameters) }"
202
+
203
+ loop do
204
+
205
+ @last_response = search_newer!(@last_response, additional_parameters)
206
+ call_tweet_callbacks(@last_response, block)
207
+ update_max_id @last_response
208
+
209
+ # this is kindof icky ... but it works
210
+ if @last_response['next_page']
211
+ response = @last_response
212
+ num_pages_searched = 0
213
+ while (response = search_more!(response, additional_parameters)) && (num_pages_searched <= max_pages if max_pages)
214
+ num_pages_searched += 1
215
+ call_tweet_callbacks(response, block)
216
+ update_max_id response
217
+ end
218
+ end
219
+
220
+ sleep check_every
221
+ end
222
+ end
223
+
224
+ private
225
+
226
+ def update_max_id response
227
+ @max_id_found_so_far = response['max_id'] if response['max_id'] > @max_id_found_so_far
228
+ end
229
+
230
+ def call_tweet_callbacks response, block
231
+ response['results'].each do |tweet|
232
+ tweet['tweet_id'] = tweet.delete 'id'
233
+ block.call OpenStruct.new(tweet)
234
+ end
235
+ end
236
+
237
+ def escape string
238
+ CGI.escape(string.to_s).gsub('%22','"').gsub(' ','+')
239
+ end
240
+
241
+ def build_query_string additional_parameters = nil
242
+ parameter_values = QUERY_STRING_ATTRIBUTES.inject({}){|all, attr|
243
+ all[attr] = send(attr) if send(attr)
244
+ all
245
+ }
246
+
247
+ # if additional parameters are passed, we override the watcher's parameters with these
248
+ if additional_parameters
249
+ additional_parameter_values = QUERY_STRING_ATTRIBUTES.inject({}){|all, attr|
250
+ all[attr] = additional_parameters[attr] if additional_parameters.keys.include?(attr)
251
+ all
252
+ }
253
+ parameter_values.merge! additional_parameter_values
254
+ end
255
+
256
+ '?' + parameter_values.map {|k,v| "#{ k }=#{ escape(v) }" if v }.compact.join('&')
257
+ end
258
+
259
+ end
metadata ADDED
@@ -0,0 +1,55 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: twitter-search-watcher
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.4
5
+ platform: ruby
6
+ authors: []
7
+
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-01-05 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: for watching a particular Twitter search and calling code whenever there are new tweets
17
+ email:
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - lib/twitter-search-watcher.rb
26
+ has_rdoc: true
27
+ homepage:
28
+ licenses: []
29
+
30
+ post_install_message:
31
+ rdoc_options: []
32
+
33
+ require_paths:
34
+ - lib
35
+ required_ruby_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: "0"
40
+ version:
41
+ required_rubygems_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: "0"
46
+ version:
47
+ requirements: []
48
+
49
+ rubyforge_project:
50
+ rubygems_version: 1.3.5
51
+ signing_key:
52
+ specification_version: 3
53
+ summary: for watching a Twitter search
54
+ test_files: []
55
+