twitter-search-watcher 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/twitter-search-watcher.rb +259 -0
  2. metadata +55 -0
@@ -0,0 +1,259 @@
1
+ %w( rubygems cgi json open-uri ostruct ).each {|lib| require lib }
2
+
3
+ #
4
+ # = Usage
5
+ #
6
+ # coming soon
7
+ #
8
+ class TwitterSearchWatcher
9
+
10
+ TWITTER_SEARCH_URL = 'http://search.twitter.com/search.json'
11
+ DEFAULT_USER_AGENT = 'TwitterSearchWatcher RubyGem http://github.com/devfu/twitter-search-watcher'
12
+ QUERY_STRING_ATTRIBUTES = [ :q, :to, :from, :since_id, :page, :max_id, :rpp ]
13
+
14
+ # The User-Agent header value to send along with all Twitter Search API requests
15
+ attr_accessor :user_agent
16
+
17
+ # A string you want to search twitter for
18
+ attr_accessor :q
19
+
20
+ # The username of someone you want to search replies to
21
+ attr_accessor :to
22
+
23
+ # The username of someone you want to search replies from
24
+ attr_accessor :from
25
+
26
+ # Get a particular page of Twitter search results (pagination).
27
+ # Typically used in conjunction with :max_id
28
+ attr_accessor :page
29
+
30
+ # Used for pagination, so you can get page=3 where the max_id of the first page was 1234
31
+ attr_accessor :max_id
32
+
33
+ # Only get tweets with ID's greater than this ID (useful for only getting new tweets)
34
+ attr_accessor :since_id
35
+
36
+ # Number of results per page (max 100)
37
+ attr_accessor :rpp
38
+
39
+ # The number of seconds to wait between Twitter calls. Default: 60 (seconds)
40
+ attr_accessor :check_every
41
+
42
+ # The maximum number of pages to check for tweets
43
+ #
44
+ # If nil, we'll check until there are no more pages (when :next_page isn't present)
45
+ attr_accessor :max_pages
46
+
47
+ def rpp= value
48
+ raise "The maximum rpp (Results per Page) value is 100" if value > 100
49
+ @rpp = value
50
+ end
51
+
52
+ def check_every
53
+ @check_every || 60
54
+ end
55
+
56
+ # Create a new TwitterSearchWatcher
57
+ #
58
+ # TwitterSearchWatcher.new 'string to search'
59
+ # TwitterSearchWatcher.new 'string to search', :check_every => 60
60
+ # TwitterSearchWatcher.new :to => 'barackobama', :from => 'SenJohnMcCain'
61
+ #
62
+ def initialize search_string = nil, options = nil
63
+ if search_string.is_a? Hash
64
+ options = search_string
65
+ else
66
+ self.q = search_string
67
+ end
68
+
69
+ options.each {|k,v| send "#{k}=", v } if options
70
+ end
71
+
72
+ # Returns the URL we'll use to call the Twitter Search API.
73
+ #
74
+ # Without parameters, it'll generate a URL just from this TwitterSearchWatcher instance.
75
+ #
76
+ # With parameters, it'll override the TwitterSearchWatcher instance's options with
77
+ # whatever you pass, eg.
78
+ #
79
+ # >> TwitterSearchWatcher.new( 'foo', :rpp => 15 ).search_url
80
+ # => "http://search.twitter.com/search.json?q=foo&rpp=15"
81
+ #
82
+ # >> TwitterSearchWatcher.new( 'foo', :rpp => 15 ).search_url( :rpp => 99 )
83
+ # => "http://search.twitter.com/search.json?q=foo&rpp=99"
84
+ #
85
+ def search_url additional_parameters = nil
86
+ TWITTER_SEARCH_URL + build_query_string(additional_parameters)
87
+ end
88
+
89
+ def user_agent
90
+ @user_agent || DEFAULT_USER_AGENT
91
+ end
92
+
93
+ # Performs a search. Accepts the same parameters as #search_url
94
+ def search! additional_parameters = nil
95
+ json search_url(additional_parameters)
96
+ end
97
+
98
+ # Performs a search, given the response from another search.
99
+ #
100
+ # If a response if given, the search will only return tweets newer than the given response's tweets.
101
+ # If a response is not given, this performs a normal search.
102
+ #
103
+ # Accepts additional parameters (same as #search_url)
104
+ def search_newer! response = nil, additional_parameters = nil
105
+ if response
106
+ search!( (additional_parameters || {}).merge( :since_id => response['max_id'] ) )
107
+ else
108
+ search! additional_parameters
109
+ end
110
+ end
111
+
112
+ # Performs a search, given the response from another search.
113
+ #
114
+ # If the response given is paginated (ie. there are additional tweets available on additional pages),
115
+ # this will return the next page. Else, this will return nil.
116
+ #
117
+ # Accepts additional parameters (same as #search_url)
118
+ def search_more! response, additional_parameters = nil
119
+ search!( (additional_parameters || {}).merge( :page => (response['page'] + 1), :max_id => response['max_id'] ) ) if response['next_page']
120
+ end
121
+
122
+ # Performs a search! and search_more! as needed to return a response with *all* pages of tweets.
123
+ #
124
+ # This respects max_pages and will only make max_pages number of additional requests to get paginated tweets.
125
+ #
126
+ # The response object returned is similar to the responses returned by all other methods, but we only
127
+ # currently have a 'results' key on the Hash returned. If you're used to getting some of the other keys
128
+ # returned by the other methods (which Twitter returns), be warned!
129
+ #
130
+ # To get the tweets off of the response:
131
+ #
132
+ # tweets = watcher.search_with_pagination!['results']
133
+ def search_with_pagination! additional_parameters = nil
134
+ response = search! additional_parameters
135
+
136
+ max_requests = max_pages
137
+ max_requests = additional_parameters[:max_pages] if additional_parameters && additional_parameters[:max_pages]
138
+
139
+ tweets = { 'results' => response['results'] }
140
+ pages_requested_so_far = 1
141
+
142
+ if response['next_page'] and (max_requests.nil? or max_requests > 1) # we've already dony 1 request
143
+ while response = search_more!(response, additional_parameters)
144
+ tweets['results'] += response['results']
145
+ pages_requested_so_far += 1
146
+
147
+ break if max_requests && pages_requested_so_far >= max_requests
148
+ end
149
+ end
150
+
151
+ tweets
152
+ end
153
+
154
+ # Helper to do an HTTP GET request and return the response body
155
+ def get url
156
+ TwitterSearchWatcher.get url, 'User-Agent' => user_agent
157
+ end
158
+
159
+ # Helper to do an HTTP GET request and return the response body
160
+ def self.get url, options = {}
161
+ open( url, { 'User-Agent' => DEFAULT_USER_AGENT }.merge(options) ).read
162
+ end
163
+
164
+ # Helper to #get a url and return the response body parsed as JSON
165
+ def json url
166
+ JSON.parse get(url)
167
+ end
168
+
169
+ # Helper to #get a url and return the response body parsed as JSON
170
+ def self.json url
171
+ JSON.parse get(url)
172
+ end
173
+
174
+ # Instantiates a new TwitterSearchWatcher given the search_string and options and then
175
+ # calls search_with_pagination! on the instance, returning the response.
176
+ #
177
+ # tweets_json = TwitterSearchWatcher.search_with_pagination!('foo')['results']
178
+ #
179
+ def self.search_with_pagination! search_string, options = nil
180
+ watcher = TwitterSearchWatcher.new search_string, options
181
+ watcher.search_with_pagination!
182
+ end
183
+
184
+ # Instantiates a new TwitterSearchWatcher given the search_string and options and then
185
+ # calls #watch on the instance using the block given.
186
+ def self.watch! search_string, options = nil, &block
187
+ watcher = TwitterSearchWatcher.new search_string, options
188
+ watcher.watch! &block
189
+ end
190
+
191
+ # Starts watching this search in a loop.
192
+ # It will wait #check_every seconds between new requests (except requests to get additional pages).
193
+ # Every time a new tweet is found, that tweet is passed to the block given.
194
+ #
195
+ # TwitterSearchWatcher.new('foo').watch! {|tweet| puts "got tweet: #{ tweet.text }" }
196
+ #
197
+ def watch! additional_parameters = nil, &block
198
+ @max_id_found_so_far = 0
199
+
200
+ trap('INT'){ puts "\nexiting ..."; exit }
201
+ puts "Watching for tweets: #{ search_url(additional_parameters) }"
202
+
203
+ loop do
204
+
205
+ @last_response = search_newer!(@last_response, additional_parameters)
206
+ call_tweet_callbacks(@last_response, block)
207
+ update_max_id @last_response
208
+
209
+ # this is kindof icky ... but it works
210
+ if @last_response['next_page']
211
+ response = @last_response
212
+ num_pages_searched = 0
213
+ while (response = search_more!(response, additional_parameters)) && (num_pages_searched <= max_pages if max_pages)
214
+ num_pages_searched += 1
215
+ call_tweet_callbacks(response, block)
216
+ update_max_id response
217
+ end
218
+ end
219
+
220
+ sleep check_every
221
+ end
222
+ end
223
+
224
+ private
225
+
226
+ def update_max_id response
227
+ @max_id_found_so_far = response['max_id'] if response['max_id'] > @max_id_found_so_far
228
+ end
229
+
230
+ def call_tweet_callbacks response, block
231
+ response['results'].each do |tweet|
232
+ tweet['tweet_id'] = tweet.delete 'id'
233
+ block.call OpenStruct.new(tweet)
234
+ end
235
+ end
236
+
237
+ def escape string
238
+ CGI.escape(string.to_s).gsub('%22','"').gsub(' ','+')
239
+ end
240
+
241
+ def build_query_string additional_parameters = nil
242
+ parameter_values = QUERY_STRING_ATTRIBUTES.inject({}){|all, attr|
243
+ all[attr] = send(attr) if send(attr)
244
+ all
245
+ }
246
+
247
+ # if additional parameters are passed, we override the watcher's parameters with these
248
+ if additional_parameters
249
+ additional_parameter_values = QUERY_STRING_ATTRIBUTES.inject({}){|all, attr|
250
+ all[attr] = additional_parameters[attr] if additional_parameters.keys.include?(attr)
251
+ all
252
+ }
253
+ parameter_values.merge! additional_parameter_values
254
+ end
255
+
256
+ '?' + parameter_values.map {|k,v| "#{ k }=#{ escape(v) }" if v }.compact.join('&')
257
+ end
258
+
259
+ end
metadata ADDED
@@ -0,0 +1,55 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: twitter-search-watcher
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.4
5
+ platform: ruby
6
+ authors: []
7
+
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-01-05 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: for watching a particular Twitter search and calling code whenever there are new tweets
17
+ email:
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - lib/twitter-search-watcher.rb
26
+ has_rdoc: true
27
+ homepage:
28
+ licenses: []
29
+
30
+ post_install_message:
31
+ rdoc_options: []
32
+
33
+ require_paths:
34
+ - lib
35
+ required_ruby_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: "0"
40
+ version:
41
+ required_rubygems_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: "0"
46
+ version:
47
+ requirements: []
48
+
49
+ rubyforge_project:
50
+ rubygems_version: 1.3.5
51
+ signing_key:
52
+ specification_version: 3
53
+ summary: for watching a Twitter search
54
+ test_files: []
55
+