tweetlr 0.1.6 → 0.1.7pre

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -2,8 +2,8 @@
2
2
  *.log
3
3
  pkg
4
4
  *.pid
5
- config
6
5
  *.output
7
6
  .rvmrc
8
7
  Gemfile.lock
9
- tweetlr.tid
8
+ tweetlr.tid
9
+ tweetlr.yml.dev
data/bin/tweetlr CHANGED
@@ -46,22 +46,24 @@ Daemons.run_proc('tweetlr', :dir_mode => :script, :dir => './', :backtrace => tr
46
46
  EventMachine::run {
47
47
  EventMachine::add_periodic_timer( UPDATE_PERIOD ) {
48
48
  @log.info "starting tweetlr crawl..."
49
- response = @tweetlr.lazy_search_twitter
49
+ response = {}
50
+ response = @tweetlr.lazy_search_twitter(@tweetlr.twitter_config["refresh_url"]) #looks awkward, but the refresh url will come from the db soon and make sense then...
50
51
  if response
51
52
  tweets = response['results']
52
53
  if tweets
53
54
  tweets.each do |tweet|
54
- tumblr_post = @tweetlr.generate_tumblr_photo_post tweet
55
- if tumblr_post.nil? || tumblr_post[:source].nil?
56
- @log.warn "could not get image source: tweet: #{tweet} --- tumblr post: #{tumblr_post.inspect}"
57
- else
58
- @log.debug "tumblr post: #{tumblr_post}"
59
- res = @tweetlr.post_to_tumblr tumblr_post
60
- @log.warn "tumblr response: #{res.header_str} #{res.body_str}" unless res.response_code == 201
61
- end
62
- # store the highest tweet id
63
- File.open(tid_file, "w+") { |io| io.write(tweets.first['id']) }
55
+ tumblr_post = @tweetlr.generate_tumblr_photo_post tweet
56
+ if tumblr_post.nil? || tumblr_post[:source].nil?
57
+ @log.warn "could not get image source: tweet: #{tweet} --- tumblr post: #{tumblr_post.inspect}"
58
+ else
59
+ @log.debug "tumblr post: #{tumblr_post}"
60
+ res = @tweetlr.post_to_tumblr tumblr_post
61
+ @log.warn "tumblr response: #{res.header_str} #{res.body_str}" unless res.response_code == 201
64
62
  end
63
+ end
64
+ # store the highest tweet id
65
+ @tweetlr.twitter_config[:refresh_url]=response['refresh_url']
66
+ File.open(tid_file, "w+") { |io| io.write(response['max_id']) }
65
67
  end
66
68
  else
67
69
  @log.error "twitter search returned no response. hail the failwhale!"
@@ -0,0 +1,42 @@
1
+ require 'curb'
2
+ require 'log_aware'
3
+
4
+ module HttpProcessor
5
+ include LogAware
6
+
7
+ USER_AGENT = %{Mozilla/5.0 (compatible; tweetlr; +http://tweetlr.5v3n.com)}
8
+
9
+ #convenience method for curl http get calls and parsing them to json.
10
+ def HttpProcessor::http_get(request, log=nil)
11
+ tries = 3
12
+ begin
13
+ curl = Curl::Easy.new request
14
+ curl.useragent = USER_AGENT
15
+ curl.perform
16
+ begin
17
+ JSON.parse curl.body_str
18
+ rescue JSON::ParserError => err
19
+ begin
20
+ if log
21
+ log.warn "#{err}: Could not parse response for #{request} - this is probably not a json response: #{curl.body_str}"
22
+ end
23
+ return nil
24
+ rescue Encoding::CompatibilityError => err
25
+ if log
26
+ log.error "Trying to rescue a JSON::ParserError for '#{request}' we got stuck in a Encoding::CompatibilityError."
27
+ end
28
+ return nil
29
+ end
30
+ end
31
+ rescue Curl::Err::CurlError => err
32
+ log.error "Failure in Curl call: #{err}" if log
33
+ tries -= 1
34
+ sleep 3
35
+ if tries > 0
36
+ retry
37
+ else
38
+ nil
39
+ end
40
+ end
41
+ end
42
+ end
data/lib/log_aware.rb ADDED
@@ -0,0 +1,8 @@
1
+ module LogAware
2
+ def self.log=(log)
3
+ @@log = log #TODO think of a more elegant way of logging than a static attribute
4
+ end
5
+ def self.log()
6
+ @@log
7
+ end
8
+ end
@@ -0,0 +1,122 @@
1
+ require 'log_aware'
2
+
3
+ module PhotoServiceProcessor
4
+
5
+ LOCATION_START_INDICATOR = 'Location: '
6
+ LOCATION_STOP_INDICATOR = "\r\n"
7
+ PIC_REGEXP = /(.*?)\.(jpg|jpeg|png|gif)/i
8
+
9
+ include LogAware
10
+
11
+ def self.find_image_url(link)
12
+ url = nil
13
+ if link && !(photo? link)
14
+ url = image_url_instagram link if (link.index('instagr.am') || link.index('instagram.com'))
15
+ url = image_url_picplz link if link.index 'picplz'
16
+ url = image_url_twitpic link if link.index 'twitpic'
17
+ url = image_url_yfrog link if link.index 'yfrog'
18
+ url = image_url_imgly link if link.index 'img.ly'
19
+ url = image_url_tco link if link.index 't.co'
20
+ url = image_url_lockerz link if link.index 'lockerz.com'
21
+ url = image_url_foursquare link if link.index '4sq.com'
22
+ url = image_url_embedly link if url.nil? #just try embed.ly for anything else. could do all image url processing w/ embedly, but there's probably some kind of rate limit invovled.
23
+ elsif photo? link
24
+ url = link
25
+ end
26
+ url
27
+ end
28
+
29
+ def self.photo?(link)
30
+ link =~ PIC_REGEXP
31
+ end
32
+
33
+ #find the image's url via embed.ly
34
+ def self.image_url_embedly(link_url)
35
+ response = HttpProcessor::http_get "http://api.embed.ly/1/oembed?url=#{link_url}"
36
+ if response && response['type'] == 'photo'
37
+ image_url = response['url']
38
+ end
39
+ image_url
40
+ end
41
+ #find the image's url for a foursquare link
42
+ def self.image_url_foursquare(link_url)
43
+ image_url_embedly link_url
44
+ end
45
+ #find the image's url for a lockerz link
46
+ def self.image_url_lockerz(link_url)
47
+ response = HttpProcessor::http_get "http://api.plixi.com/api/tpapi.svc/json/metadatafromurl?details=false&url=#{link_url}"
48
+ response["BigImageUrl"] if response
49
+ end
50
+ #find the image's url for an twitter shortened link
51
+ def self.image_url_tco(link_url)
52
+ service_url = link_url_redirect link_url
53
+ find_image_url service_url
54
+ end
55
+ #find the image's url for an instagram link
56
+ def self.image_url_instagram(link_url)
57
+ link_url['instagram.com'] = 'instagr.am' if link_url.index 'instagram.com' #instagram's oembed does not work for .com links
58
+ response = HttpProcessor::http_get "http://api.instagram.com/oembed?url=#{link_url}"
59
+ response['url'] if response
60
+ end
61
+
62
+ #find the image's url for a picplz short/longlink
63
+ def self.image_url_picplz(link_url)
64
+ id = extract_id link_url
65
+ #try short url
66
+ response = HttpProcessor::http_get "http://picplz.com/api/v2/pic.json?shorturl_ids=#{id}"
67
+ #if short url fails, try long url
68
+ #response = HTTParty.get "http://picplz.com/api/v2/pic.json?longurl_ids=#{id}"
69
+ #extract url
70
+ if response && response['value'] && response['value']['pics'] && response['value']['pics'].first && response['value']['pics'].first['pic_files'] && response['value']['pics'].first['pic_files']['640r']
71
+ response['value']['pics'].first['pic_files']['640r']['img_url']
72
+ else
73
+ nil
74
+ end
75
+ end
76
+ #find the image's url for a twitpic link
77
+ def self.image_url_twitpic(link_url)
78
+ image_url_redirect link_url, "http://twitpic.com/show/full/"
79
+ end
80
+ #find the image'S url for a yfrog link
81
+ def self.image_url_yfrog(link_url)
82
+ response = HttpProcessor::http_get("http://www.yfrog.com/api/oembed?url=#{link_url}")
83
+ response['url'] if response
84
+ end
85
+ #find the image's url for a img.ly link
86
+ def self.image_url_imgly(link_url)
87
+ image_url_redirect link_url, "http://img.ly/show/full/", "\r\n"
88
+ end
89
+
90
+ # extract image url from services like twitpic & img.ly that do not offer oembed interfaces
91
+ def self.image_url_redirect(link_url, service_endpoint, stop_indicator = LOCATION_STOP_INDICATOR)
92
+ link_url_redirect "#{service_endpoint}#{extract_id link_url}", stop_indicator
93
+ end
94
+
95
+ def self.link_url_redirect(short_url, stop_indicator = LOCATION_STOP_INDICATOR)
96
+ tries = 3
97
+ begin
98
+ resp = Curl::Easy.http_get(short_url) { |res| res.follow_location = true }
99
+ rescue Curl::Err::CurlError => err
100
+ log.error "Curl::Easy.http_get failed: #{err}"
101
+ tries -= 1
102
+ sleep 3
103
+ if tries > 0
104
+ retry
105
+ else
106
+ return nil
107
+ end
108
+ end
109
+ if(resp && resp.header_str.index(LOCATION_START_INDICATOR) && resp.header_str.index(stop_indicator))
110
+ start = resp.header_str.index(LOCATION_START_INDICATOR) + LOCATION_START_INDICATOR.size
111
+ stop = resp.header_str.index(stop_indicator, start)
112
+ resp.header_str[start...stop]
113
+ else
114
+ nil
115
+ end
116
+ end
117
+
118
+ #extract the pic id from a given <code>link</code>
119
+ def self.extract_id(link)
120
+ link.split('/').last if link.split('/')
121
+ end
122
+ end
@@ -0,0 +1,3 @@
1
+ module TumblrProcessor
2
+
3
+ end
data/lib/tweetlr.rb CHANGED
@@ -3,14 +3,17 @@ require 'logger'
3
3
  require 'yaml'
4
4
  require 'curb'
5
5
  require 'json'
6
+ require 'twitter_processor'
7
+ require 'http_processor'
8
+ require 'photo_service_processor'
9
+ require 'log_aware'
6
10
 
7
11
  class Tweetlr
12
+
13
+ attr_accessor :twitter_config
8
14
 
9
- VERSION = '0.1.6'
15
+ VERSION = '0.1.7pre'
10
16
  GENERATOR = %{tweetlr - http://tweetlr.5v3n.com}
11
- USER_AGENT = %{Mozilla/5.0 (compatible; tweetlr/#{VERSION}; +http://tweetlr.5v3n.com)}
12
- LOCATION_START_INDICATOR = 'Location: '
13
- LOCATION_STOP_INDICATOR = "\r\n"
14
17
 
15
18
  API_ENDPOINT_TWITTER = 'http://search.twitter.com/search.json'
16
19
  API_ENDPOINT_TUMBLR = 'http://www.tumblr.com'
@@ -18,8 +21,6 @@ class Tweetlr
18
21
  TWITTER_RESULTS_TYPE = 'recent'
19
22
  UPDATE_PERIOD = 600 #10 minutes
20
23
 
21
- PIC_REGEXP = /(.*?)\.(jpg|jpeg|png|gif)/i
22
-
23
24
  def initialize(email, password, args={:terms=>nil, :whitelist => nil, :shouts => nil, :since_id=>nil, :results_per_page => nil, :loglevel=>nil, :result_type => nil})
24
25
  @log = Logger.new(STDOUT)
25
26
  if (Logger::DEBUG..Logger::UNKNOWN).to_a.index(args[:loglevel])
@@ -28,21 +29,33 @@ class Tweetlr
28
29
  @log.level = Logger::INFO
29
30
  end
30
31
  @log.debug "log level set to #{@log.level}"
32
+ LogAware.log=@log
33
+ @twitter_config = {
34
+ :since_id => args[:since_id],
35
+ :search_term => args[:terms],
36
+ :results_per_page => args[:results_per_page] || TWITTER_RESULTS_PER_PAGE,
37
+ :result_type => args[:result_type] || TWITTER_RESULTS_TYPE,
38
+ :api_endpoint_twitter => args[:api_endpoint_twitter] || API_ENDPOINT_TWITTER
39
+ }
40
+ @twitter_config[:refresh_url] = "?ors=#{@twitter_config[:search_term]}&since_id=#{@twitter_config[:since_id]}&rpp=#{@twitter_config[:results_per_page]}&result_type=#{@twitter_config[:result_type]}" if (@twitter_config[:since_id] && @twitter_config[:search_term])
41
+ @twitter_config[:logger] = @log
42
+
31
43
  @email = email
32
44
  @password = password
33
- @since_id = args[:since_id]
34
- @search_term = args[:terms]
35
45
  @cookie = args[:cookie]
36
- @results_per_page = args[:results_per_page] || TWITTER_RESULTS_PER_PAGE
37
- @result_type = args[:result_type] || TWITTER_RESULTS_TYPE
38
- @api_endpoint_twitter = args[:api_endpoint_twitter] || API_ENDPOINT_TWITTER
46
+ @api_endpoint_twitter =
39
47
  @api_endpoint_tumblr = args[:api_endpoint_tumblr] || API_ENDPOINT_TUMBLR
40
48
  @whitelist = args[:whitelist]
41
49
  @shouts = args[:shouts]
42
50
  @update_period = args[:update_period] || UPDATE_PERIOD
43
51
  @whitelist.each {|entry| entry.downcase!} if @whitelist
44
- @refresh_url = "#{@api_endpoint_twitter}?ors=#{@search_term}&since_id=#{@since_id}&rpp=#{@results_per_page}&result_type=#{@result_type}" if (@since_id && @search_term)
45
52
  end
53
+
54
+ def lazy_search_twitter(refresh_url=nil)
55
+ @twitter_config[:refresh_url] = refresh_url if refresh_url
56
+ TwitterProcessor::lazy_search(@twitter_config)
57
+ end
58
+
46
59
  #post a tumblr photo entry. required arguments are :type, :date, :source, :caption, :state. optional argument: :tags
47
60
  def post_to_tumblr(options={})
48
61
  tries = 3
@@ -60,7 +73,7 @@ class Tweetlr
60
73
  Curl::PostField.content('state', options[:state]),
61
74
  Curl::PostField.content('tags', tags)
62
75
  )
63
- rescue Curl::Err => err
76
+ rescue Curl::Err::CurlError => err
64
77
  @log.error "Failure in Curl call: #{err}"
65
78
  tries -= 1
66
79
  sleep 3
@@ -78,7 +91,7 @@ class Tweetlr
78
91
  def generate_tumblr_photo_post tweet
79
92
  tumblr_post = nil
80
93
  message = tweet['text']
81
- if !retweet? message
94
+ if !TwitterProcessor::retweet? message
82
95
  @log.debug "tweet: #{tweet}"
83
96
  tumblr_post = {}
84
97
  tumblr_post[:type] = 'photo'
@@ -100,183 +113,17 @@ class Tweetlr
100
113
  tumblr_post
101
114
  end
102
115
 
103
- #checks if the message is a retweet
104
- def retweet?(message)
105
- message.index('RT @') || message.index(%{ "@}) || message.index(" \u201c@") #detect retweets
106
- end
107
-
108
- #fire a new search
109
- def search_twitter()
110
- search_call = "#{@api_endpoint_twitter}?ors=#{@search_term}&result_type=#{@result_type}&rpp=#{@results_per_page}"
111
- @response = http_get search_call
112
- end
113
- # lazy update - search for a term or refresh the search if a response is available already
114
- def lazy_search_twitter()
115
- @refresh_url = "#{@api_endpoint_twitter}#{@response['refresh_url']}" unless (@response.nil? || @response['refresh_url'].nil? || @response['refresh_url'].empty?)
116
- if @refresh_url
117
- search_url = "#{@refresh_url}&result_type=#{@result_type}&rpp=#{@results_per_page}"
118
- @log.info "lazy search using '#{search_url}'"
119
- @response = http_get search_url
120
- else
121
- @log.debug "regular search using '#{@search_term}'"
122
- @response = search_twitter()
123
- end
124
- end
125
-
126
116
  #extract a linked image file's url from a tweet. first found image will be used.
127
117
  def extract_image_url(tweet)
128
- links = extract_links tweet
118
+ links = TwitterProcessor::extract_links tweet
129
119
  image_url = nil
130
120
  if links
131
121
  links.each do |link|
132
- image_url = find_image_url(link)
133
- return image_url if image_url =~ PIC_REGEXP
122
+ image_url = PhotoServiceProcessor::find_image_url(link)
123
+ return image_url if PhotoServiceProcessor::photo? image_url
134
124
  end
135
125
  end
136
126
  image_url
137
127
  end
138
128
 
139
- #extract the linked image file's url from a tweet
140
- def find_image_url(link)
141
- url = nil
142
- if !link.nil?
143
- url = image_url_instagram link if (link.index('instagr.am') || link.index('instagram.com'))
144
- url = image_url_picplz link if link.index 'picplz'
145
- url = image_url_twitpic link if link.index 'twitpic'
146
- url = image_url_yfrog link if link.index 'yfrog'
147
- url = image_url_imgly link if link.index 'img.ly'
148
- url = image_url_tco link if link.index 't.co'
149
- url = image_url_lockerz link if link.index 'lockerz.com'
150
- url = image_url_foursquare link if link.index '4sq.com'
151
- url = image_url_embedly link if url.nil? #just try embed.ly for anything else. could do all image url processing w/ embedly, but there's probably some kind of rate limit invovled.
152
- end
153
- url
154
- end
155
-
156
- #find the image's url via embed.ly
157
- def image_url_embedly(link_url)
158
- response = http_get "http://api.embed.ly/1/oembed?url=#{link_url}"
159
- response['url'] if response
160
- end
161
- #find the image's url for a foursquare link
162
- def image_url_foursquare(link_url)
163
- image_url_embedly link_url
164
- end
165
- #find the image's url for a lockerz link
166
- def image_url_lockerz(link_url)
167
- response = http_get "http://api.plixi.com/api/tpapi.svc/json/metadatafromurl?details=false&url=#{link_url}"
168
- response["BigImageUrl"] if response
169
- end
170
- #find the image's url for an twitter shortened link
171
- def image_url_tco(link_url)
172
- service_url = link_url_redirect link_url
173
- find_image_url service_url
174
- end
175
- #find the image's url for an instagram link
176
- def image_url_instagram(link_url)
177
- link_url['instagram.com'] = 'instagr.am' if link_url.index 'instagram.com' #instagram's oembed does not work for .com links
178
- response = http_get "http://api.instagram.com/oembed?url=#{link_url}"
179
- response['url'] if response
180
- end
181
-
182
- #find the image's url for a picplz short/longlink
183
- def image_url_picplz(link_url)
184
- id = extract_id link_url
185
- #try short url
186
- response = http_get "http://picplz.com/api/v2/pic.json?shorturl_ids=#{id}"
187
- #if short url fails, try long url
188
- #response = HTTParty.get "http://picplz.com/api/v2/pic.json?longurl_ids=#{id}"
189
- #extract url
190
- if response && response['value'] && response['value']['pics'] && response['value']['pics'].first && response['value']['pics'].first['pic_files'] && response['value']['pics'].first['pic_files']['640r']
191
- response['value']['pics'].first['pic_files']['640r']['img_url']
192
- else
193
- nil
194
- end
195
- end
196
- #find the image's url for a twitpic link
197
- def image_url_twitpic(link_url)
198
- image_url_redirect link_url, "http://twitpic.com/show/full/"
199
- end
200
- #find the image'S url for a yfrog link
201
- def image_url_yfrog(link_url)
202
- response = http_get("http://www.yfrog.com/api/oembed?url=#{link_url}")
203
- response['url'] if response
204
- end
205
- #find the image's url for a img.ly link
206
- def image_url_imgly(link_url)
207
- image_url_redirect link_url, "http://img.ly/show/full/", "\r\n"
208
- end
209
-
210
- # extract image url from services like twitpic & img.ly that do not offer oembed interfaces
211
- def image_url_redirect(link_url, service_endpoint, stop_indicator = LOCATION_STOP_INDICATOR)
212
- link_url_redirect "#{service_endpoint}#{extract_id link_url}", stop_indicator
213
- end
214
-
215
- def link_url_redirect(short_url, stop_indicator = LOCATION_STOP_INDICATOR)
216
- tries = 3
217
- begin
218
- resp = Curl::Easy.http_get(short_url) { |res| res.follow_location = true }
219
- rescue Curl::Err => err
220
- @log.error "Curl::Easy.http_get failed: #{err}"
221
- tries -= 1
222
- sleep 3
223
- if tries > 0
224
- retry
225
- else
226
- return nil
227
- end
228
- end
229
- if(resp && resp.header_str.index(LOCATION_START_INDICATOR) && resp.header_str.index(stop_indicator))
230
- start = resp.header_str.index(LOCATION_START_INDICATOR) + LOCATION_START_INDICATOR.size
231
- stop = resp.header_str.index(stop_indicator, start)
232
- resp.header_str[start...stop]
233
- else
234
- nil
235
- end
236
- end
237
-
238
- #extract the pic id from a given <code>link</code>
239
- def extract_id(link)
240
- link.split('/').last if link.split('/')
241
- end
242
-
243
- #extract the links from a given tweet
244
- def extract_links(tweet)
245
- if tweet
246
- text = tweet['text']
247
- text.gsub(/https?:\/\/[\S]+/).to_a if text
248
- end
249
- end
250
-
251
- private
252
-
253
- #convenience method for curl http get calls and parsing them to json.
254
- def http_get(request)
255
- tries = 3
256
- begin
257
- curl = Curl::Easy.new request
258
- curl.useragent = USER_AGENT
259
- curl.perform
260
- begin
261
- JSON.parse curl.body_str
262
- rescue JSON::ParserError => err
263
- begin
264
- @log.warn "#{err}: Could not parse response for #{request} - this is probably not a json response: #{curl.body_str}"
265
- return nil
266
- rescue Encoding::CompatibilityError => err
267
- @log.error "Trying to rescue a JSON::ParserError for '#{request}' we got stuck in a Encoding::CompatibilityError."
268
- return nil
269
- end
270
- end
271
- rescue Curl::Err => err
272
- @log.error "Failure in Curl call: #{err}"
273
- tries -= 1
274
- sleep 3
275
- if tries > 0
276
- retry
277
- else
278
- nil
279
- end
280
- end
281
- end
282
129
  end
@@ -0,0 +1,39 @@
1
+ require 'http_processor'
2
+
3
+ module TwitterProcessor
4
+
5
+ #checks if the message is a retweet
6
+ def self.retweet?(message)
7
+ message.index('RT @') || message.index(%{"@}) || message.index("\u201c@") #detect retweets
8
+ end
9
+
10
+ #extract the links from a given tweet
11
+ def self.extract_links(tweet)
12
+ if tweet
13
+ text = tweet['text']
14
+ text.gsub(/https?:\/\/[\S]+/).to_a if text
15
+ end
16
+ end
17
+
18
+ #fire a new search
19
+ def self.search(config)
20
+ search_call = "#{config[:api_endpoint_twitter]}?ors=#{config[:search_term]}&result_type=#{config[:result_type]}&rpp=#{config[:results_per_page]}"
21
+ HttpProcessor::http_get search_call
22
+ end
23
+
24
+ # lazy update - search for a term or refresh the search if a response is available already
25
+ def self.lazy_search(config)
26
+ result = nil
27
+ refresh_url = config[:refresh_url]
28
+ log = config[:logger]
29
+ if refresh_url
30
+ search_url = "#{config[:api_endpoint_twitter]}#{refresh_url}&result_type=#{config[:result_type]}&rpp=#{config[:results_per_page]}"
31
+ log.info "lazy search using '#{search_url}'" if log
32
+ result = HttpProcessor::http_get search_url
33
+ else
34
+ log.debug "regular search using '#{config[:search_term]}'" if log
35
+ result = search(config)
36
+ end
37
+ result
38
+ end
39
+ end
@@ -0,0 +1,38 @@
1
+ require 'spec_helper'
2
+
3
+ describe PhotoServiceProcessor do
4
+ before :each do
5
+ @links = {
6
+ :instagram => "http://instagr.am/p/DzCWn/",
7
+ :twitpic => "http://twitpic.com/449o2x",
8
+ :yfrog => "http://yfrog.com/h4vlfp",
9
+ :picplz => "http://picplz.com/2hWv",
10
+ :imgly => "http://img.ly/3M1o",
11
+ :tco => 'http://t.co/MUGNayA',
12
+ :lockerz => 'http://lockerz.com/s/100269159',
13
+ :embedly => 'http://flic.kr/p/973hTv',
14
+ :twitter_pics => 'http://t.co/FmyBGfyY'
15
+ }
16
+ end
17
+ it "should find a picture's url from the supported services" do
18
+ @links.each do |service,link|
19
+ send "stub_#{service}"
20
+ url = PhotoServiceProcessor::find_image_url link
21
+ url.should be, "service #{service} not working!"
22
+ check_pic_url_extraction service if [:instagram,:picplz,:yfrog,:imgly,:not_listed].index service
23
+ end
24
+ end
25
+ it "should not crash if embedly fallback won't find a link" do
26
+ stub_bad_request
27
+ url = PhotoServiceProcessor::find_image_url "http://mopskopf"
28
+ end
29
+ it "should not crash with an encoding error when response is non-us-ascii" do
30
+ stub_utf8_response
31
+ url = PhotoServiceProcessor::find_image_url "http://api.instagram.com/oembed?url=http://instagr.am/p/Gx%E2%80%946/"
32
+ end
33
+ it "follows redirects" do
34
+ stub_imgly
35
+ link = PhotoServiceProcessor::link_url_redirect 'im mocked anyways'
36
+ link.should == 'http://s3.amazonaws.com/imgly_production/899582/full.jpg'
37
+ end
38
+ end
data/spec/spec_helper.rb CHANGED
@@ -2,6 +2,11 @@
2
2
  require "bundler"
3
3
  Bundler.require :default, :development, :test
4
4
 
5
+ def check_pic_url_extraction(service)
6
+ image_url = PhotoServiceProcessor::send "image_url_#{service}".to_sym, @links[service]
7
+ image_url.should =~ PhotoServiceProcessor::PIC_REGEXP
8
+ end
9
+
5
10
  def stub_twitter
6
11
  Curl::Easy.any_instance.stub(:body_str).and_return %|{"results":[{"from_user_id_str":"220650275","profile_image_url":"http://a2.twimg.com/profile_images/668619338/9729_148876458070_505518070_2628895_7160219_n_normal.jpg","created_at":"Sat, 16 Jul 2011 23:20:01 +0000","from_user":"LoMuma","id_str":"92372947855093760","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Need to stop procrastinating! 5 quizzes and personal responses due tomorrow... #fail","id":92372947855093760,"from_user_id":220650275,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"&lt;a href=&quot;http://twitter.com/&quot;&gt;web&lt;/a&gt;"},{"from_user_id_str":"129718556","profile_image_url":"http://a2.twimg.com/profile_images/1428268221/twitter_normal.png","created_at":"Sat, 16 Jul 2011 23:20:01 +0000","from_user":"priiislopes","id_str":"92372947846692865","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Esse jogo do Flu foi uma vergonha. Se ele fez o melhor dele no brasileiro semana passada, hj fez o pior de todos os tempos. #Fail","id":92372947846692865,"from_user_id":129718556,"geo":null,"iso_language_code":"pt","to_user_id_str":null,"source":"&lt;a href=&quot;http://twitter.com/&quot;&gt;web&lt;/a&gt;"},{"from_user_id_str":"259930166","profile_image_url":"http://a3.twimg.com/profile_images/1425221519/foto_normal.jpg","created_at":"Sat, 16 Jul 2011 23:20:00 +0000","from_user":"YamiiG4","id_str":"92372943132303360","metadata":{"result_type":"recent"},"to_user_id":null,"text":"vaya que eran 2 minutos..#FAIL!","id":92372943132303360,"from_user_id":259930166,"geo":null,"iso_language_code":"es","to_user_id_str":null,"source":"&lt;a href=&quot;http://www.tweetdeck.com&quot; rel=&quot;nofollow&quot;&gt;TweetDeck&lt;/a&gt;"},{"from_user_id_str":"321557905","profile_image_url":"http://a0.twimg.com/profile_images/1445672626/profile_normal.png","created_at":"Sat, 16 Jul 2011 23:20:00 +0000","from_user":"JasWafer_FFOE","id_str":"92372941379088384","metadata":{"result_type":"recent"},"to_user_id":null,"text":"RT @eye_OFBEHOLDER: RT @JasWafer_FFOE #Oomf said that he'll NEVER eat pussy! O.o --#FAIL","id":92372941379088384,"from_user_id":321557905,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"&lt;a href=&quot;http://twidroyd.com&quot; rel=&quot;nofollow&quot;&gt;Twidroyd for Android&lt;/a&gt;"},{"from_user_id_str":"279395613","profile_image_url":"http://a0.twimg.com/profile_images/1334871419/lnnsquare_normal.jpg","created_at":"Sat, 16 Jul 2011 23:19:59 +0000","from_user":"LanguageNewsNet","id_str":"92372940640890881","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Questioning the Inca Paradox: Did the civilization behind Machu Picchu really fail to develop a written la... http://tinyurl.com/5sfos23","id":92372940640890881,"from_user_id":279395613,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"&lt;a href=&quot;http://twitterfeed.com&quot; rel=&quot;nofollow&quot;&gt;twitterfeed&lt;/a&gt;"}],"max_id":92372947855093760,"since_id":0,"refresh_url":"?since_id=92372947855093760&q=+fail","next_page":"?page=2&max_id=92372947855093760&rpp=5&q=+fail","results_per_page":5,"page":1,"completed_in":0.022152,"since_id_str":"0","max_id_str":"92372947855093760","query":"+fail"}|
7
12
  Curl::Easy.any_instance.stub(:perform).and_return Curl::Easy.new
@@ -17,6 +22,12 @@ def stub_instagram
17
22
  Curl::Easy.any_instance.stub(:perform).and_return Curl::Easy.new
18
23
  end
19
24
 
25
+ #instagram syntax but without a valid image link
26
+ def stub_no_image_link
27
+ Curl::Easy.any_instance.stub(:body_str).and_return %|{"url":"http://noimageurl"}|
28
+ Curl::Easy.any_instance.stub(:perform).and_return Curl::Easy.new
29
+ end
30
+
20
31
  def stub_bad_request
21
32
  Curl::Easy.any_instance.stub(:body_str).and_return %|<html><title>400: Bad Request - Invalid URL format http://mopskopf</title><body>400: Bad Request - Invalid URL format http://mopskopf</body></html>|
22
33
  Curl::Easy.any_instance.stub(:perform).and_return Curl::Easy.new
data/spec/tweetlr_spec.rb CHANGED
@@ -12,17 +12,6 @@ describe Tweetlr do
12
12
  before :each do
13
13
  @credentials = {:email => USER, :password => PW}
14
14
  @searchterm = 'fail'
15
- @links = {
16
- :instagram => "http://instagr.am/p/DzCWn/",
17
- :twitpic => "http://twitpic.com/449o2x",
18
- :yfrog => "http://yfrog.com/h4vlfp",
19
- :picplz => "http://picplz.com/2hWv",
20
- :imgly => "http://img.ly/3M1o",
21
- :tco => 'http://t.co/MUGNayA',
22
- :lockerz => 'http://lockerz.com/s/100269159',
23
- :embedly => 'http://flic.kr/p/973hTv',
24
- :twitter_pics => 'http://t.co/FmyBGfyY'
25
- }
26
15
  @tweets = {
27
16
  :instagram => {'text' => "jadda jadda http://instagr.am/p/DzCWn/"},
28
17
  :twitpic => {'text' => "jadda jadda http://twitpic.com/449o2x"},
@@ -34,14 +23,25 @@ describe Tweetlr do
34
23
  :embedly => {'text' => "jadda jadda http://flic.kr/p/973hTv"},
35
24
  :twitter_pics => {'text' => "jadda jadda http://t.co/FmyBGfyY"}
36
25
  }
26
+ @links = {
27
+ :instagram => "http://instagr.am/p/DzCWn/",
28
+ :twitpic => "http://twitpic.com/449o2x",
29
+ :yfrog => "http://yfrog.com/h4vlfp",
30
+ :picplz => "http://picplz.com/2hWv",
31
+ :imgly => "http://img.ly/3M1o",
32
+ :tco => 'http://t.co/MUGNayA',
33
+ :lockerz => 'http://lockerz.com/s/100269159',
34
+ :embedly => 'http://flic.kr/p/973hTv',
35
+ :twitter_pics => 'http://t.co/FmyBGfyY'
36
+ }
37
37
  @first_link = "http://url.com"
38
- @second_link = @links[:instagram]
38
+ @second_link = "http://instagr.am/p/DzCWn/"
39
39
  @third_link = "https://imageurl.com"
40
40
  @twitter_response = {"from_user_id_str"=>"1915714", "profile_image_url"=>"http://a0.twimg.com/profile_images/386000279/2_normal.jpg", "created_at"=>"Sun, 17 Apr 2011 16:48:42 +0000", "from_user"=>"whitey_Mc_whIteLIst", "id_str"=>"59659561224765440", "metadata"=>{"result_type"=>"recent"}, "to_user_id"=>nil, "text"=>"Rigaer #wirsounterwegs #{@first_link} @ Augenarzt Dr. Lierow #{@second_link} #{@third_link}", "id"=>59659561224765440, "from_user_id"=>1915714, "geo"=>{"type"=>"Point", "coordinates"=>[52.5182, 13.454]}, "iso_language_code"=>"de", "place"=>{"id"=>"3078869807f9dd36", "type"=>"city", "full_name"=>"Berlin, Berlin"}, "to_user_id_str"=>nil, "source"=>"&lt;a href=&quot;http://instagr.am&quot; rel=&quot;nofollow&quot;&gt;instagram&lt;/a&gt;"}
41
41
  @non_whitelist_tweet = @twitter_response.merge 'from_user' => 'nonwhitelist user'
42
42
  @retweet = @twitter_response.merge "text" => "bla bla RT @fgd: tueddelkram"
43
43
  @new_style_retweet = @twitter_response.merge "text" => "and it scales! \u201c@moeffju: http://t.co/8gUSPKu #hktbl1 #origami success! :)\u201d"
44
- @pic_regexp = /(.*?)\.(jpg|jpeg|png|gif)/i
44
+ @new_style_retweet_no_addition = @twitter_response.merge "text" => "\u201c@moeffju: http://t.co/8gUSPKu #hktbl1 #origami success! :)\u201d"
45
45
  @config_file = File.join( Dir.pwd, 'config', 'tweetlr.yml')
46
46
  @tweetlr = Tweetlr.new(USER, PW, {:whitelist => WHITELIST, :results_per_page => 5, :since_id => TIMESTAMP, :terms => @searchterm, :loglevel => 4})
47
47
  end
@@ -55,7 +55,7 @@ describe Tweetlr do
55
55
  it "should search twitter for a given term" do
56
56
  stub_twitter
57
57
  tweetlr = @tweetlr
58
- response = tweetlr.search_twitter
58
+ response = tweetlr.lazy_search_twitter
59
59
  tweets = response['results']
60
60
  tweets.should be
61
61
  tweets.should_not be_empty
@@ -94,61 +94,34 @@ describe Tweetlr do
94
94
  post = @tweetlr.generate_tumblr_photo_post @retweet
95
95
  post.should_not be
96
96
  end
97
- it "should not use new style retweets which would produce double blog posts" do
98
- post = @tweetlr.generate_tumblr_photo_post @new_style_retweet
99
- post.should_not be
97
+ context "should not use new style retweets which would produce double blog posts" do
98
+ it "for quotes in context" do
99
+ post = @tweetlr.generate_tumblr_photo_post @new_style_retweet
100
+ post.should_not be
101
+ end
102
+ it "for quotes without further text addition" do
103
+ post = @tweetlr.generate_tumblr_photo_post @new_style_retweet_no_addition
104
+ post.should_not be
105
+ end
100
106
  end
101
- context "image url processing" do
102
- it "should find a picture's url from the supported services" do
103
- @links.each do |key,value|
107
+ context "handles pictures in tweets" do
108
+ it "extracting their corresponding links" do
109
+ @tweets.each do |key,value|
104
110
  send "stub_#{key}"
105
- url = @tweetlr.find_image_url value
111
+ url = @tweetlr.extract_image_url value
106
112
  url.should be, "service #{key} not working!"
107
113
  check_pic_url_extraction key if [:instagram,:picplz,:yfrog,:imgly,:not_listed].index key
108
114
  end
109
115
  end
110
- it "should not crash if embedly fallback won't find a link" do
111
- stub_bad_request
112
- url = @tweetlr.find_image_url "http://mopskopf"
113
- end
114
- it "should not crash with an encoding error when response is non-us-ascii" do
115
- stub_utf8_response
116
- url = @tweetlr.find_image_url "http://api.instagram.com/oembed?url=http://instagr.am/p/Gx%E2%80%946/"
117
- end
118
- end
119
- describe "tweet api response processing" do
120
- it "extracts links" do
121
- links = @tweetlr.extract_links ''
122
- links.should be_nil
123
- links = @tweetlr.extract_links @twitter_response
124
- links[0].should == @first_link
125
- links[1].should == @second_link
126
- links[2].should == @third_link
127
- end
128
- it "uses the first image link found in a tweet with multiple links" do
116
+ it "using the first image link found in a tweet with multiple links" do
129
117
  stub_instagram
130
118
  link = @tweetlr.extract_image_url @twitter_response
131
119
  link.should == 'http://distillery.s3.amazonaws.com/media/2011/05/02/d25df62b9cec4a138967a3ad027d055b_7.jpg'
132
120
  end
133
- it "follows redirects" do
134
- stub_imgly
135
- link = @tweetlr.link_url_redirect 'im mocked anyways'
136
- link.should == 'http://s3.amazonaws.com/imgly_production/899582/full.jpg'
137
- end
138
- it "extracts pictures from links" do
139
- @tweets.each do |key,value|
140
- send "stub_#{key}"
141
- url = @tweetlr.extract_image_url value
142
- url.should be, "service #{key} not working!"
143
- check_pic_url_extraction key if [:instagram,:picplz,:yfrog,:imgly,:not_listed].index key
144
- end
121
+ it "not returning links that do not belong to images" do
122
+ stub_no_image_link
123
+ link = @tweetlr.extract_image_url @twitter_response
124
+ link.should_not be
145
125
  end
146
126
  end
147
-
148
- def check_pic_url_extraction(service)
149
- image_url = @tweetlr.send "image_url_#{service}".to_sym, @links[service]
150
- image_url.should =~ @pic_regexp
151
- end
152
-
153
127
  end
154
-
@@ -0,0 +1,18 @@
1
+ require 'spec_helper'
2
+
3
+ describe TwitterProcessor do
4
+ before :each do
5
+ @first_link = "http://url.com"
6
+ @second_link = "http://instagr.am/p/DzCWn/"
7
+ @third_link = "https://imageurl.com"
8
+ @twitter_response = {"from_user_id_str"=>"1915714", "profile_image_url"=>"http://a0.twimg.com/profile_images/386000279/2_normal.jpg", "created_at"=>"Sun, 17 Apr 2011 16:48:42 +0000", "from_user"=>"whitey_Mc_whIteLIst", "id_str"=>"59659561224765440", "metadata"=>{"result_type"=>"recent"}, "to_user_id"=>nil, "text"=>"Rigaer #wirsounterwegs #{@first_link} @ Augenarzt Dr. Lierow #{@second_link} #{@third_link}", "id"=>59659561224765440, "from_user_id"=>1915714, "geo"=>{"type"=>"Point", "coordinates"=>[52.5182, 13.454]}, "iso_language_code"=>"de", "place"=>{"id"=>"3078869807f9dd36", "type"=>"city", "full_name"=>"Berlin, Berlin"}, "to_user_id_str"=>nil, "source"=>"&lt;a href=&quot;http://instagr.am&quot; rel=&quot;nofollow&quot;&gt;instagram&lt;/a&gt;"}
9
+ end
10
+ it "extracts links" do
11
+ links = TwitterProcessor::extract_links ''
12
+ links.should be_nil
13
+ links = TwitterProcessor::extract_links @twitter_response
14
+ links[0].should == @first_link
15
+ links[1].should == @second_link
16
+ links[2].should == @third_link
17
+ end
18
+ end
data/tweetlr.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "tweetlr"
3
- s.version = "0.1.6"
3
+ s.version = "0.1.7pre"
4
4
  s.author = "Sven Kraeuter"
5
5
  s.email = "sven.kraeuter@gmail.com"
6
6
  s.homepage = "http://tweetlr.5v3n.com"
metadata CHANGED
@@ -1,19 +1,19 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tweetlr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
5
- prerelease:
4
+ version: 0.1.7pre
5
+ prerelease: 5
6
6
  platform: ruby
7
7
  authors:
8
8
  - Sven Kraeuter
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-11-05 00:00:00.000000000Z
12
+ date: 2011-11-12 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: daemons
16
- requirement: &2156339340 !ruby/object:Gem::Requirement
16
+ requirement: &2153597020 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2156339340
24
+ version_requirements: *2153597020
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: eventmachine
27
- requirement: &2156338900 !ruby/object:Gem::Requirement
27
+ requirement: &2153596600 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *2156338900
35
+ version_requirements: *2153596600
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: curb
38
- requirement: &2156338480 !ruby/object:Gem::Requirement
38
+ requirement: &2153596180 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *2156338480
46
+ version_requirements: *2153596180
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: json
49
- requirement: &2156338060 !ruby/object:Gem::Requirement
49
+ requirement: &2153595760 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *2156338060
57
+ version_requirements: *2153595760
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: rake
60
- requirement: &2156337560 !ruby/object:Gem::Requirement
60
+ requirement: &2153595260 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 0.8.7
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2156337560
68
+ version_requirements: *2153595260
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rspec
71
- requirement: &2156337140 !ruby/object:Gem::Requirement
71
+ requirement: &2153621460 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: '0'
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *2156337140
79
+ version_requirements: *2153621460
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: rdoc
82
- requirement: &2156336680 !ruby/object:Gem::Requirement
82
+ requirement: &2153621000 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,7 +87,7 @@ dependencies:
87
87
  version: '0'
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *2156336680
90
+ version_requirements: *2153621000
91
91
  description: tweetlr crawls twitter for a given term, extracts photos out of the collected
92
92
  tweets' short urls and posts the images to tumblr.
93
93
  email: sven.kraeuter@gmail.com
@@ -108,9 +108,16 @@ files:
108
108
  - Rakefile
109
109
  - bin/tweetlr
110
110
  - config/tweetlr.yml
111
+ - lib/http_processor.rb
112
+ - lib/log_aware.rb
113
+ - lib/photo_service_processor.rb
114
+ - lib/tumblr_processor.rb
111
115
  - lib/tweetlr.rb
116
+ - lib/twitter_processor.rb
117
+ - spec/photo_services_processor_spec.rb
112
118
  - spec/spec_helper.rb
113
119
  - spec/tweetlr_spec.rb
120
+ - spec/twitter_processor_spec.rb
114
121
  - tweetlr.gemspec
115
122
  homepage: http://tweetlr.5v3n.com
116
123
  licenses: []
@@ -127,9 +134,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
127
134
  required_rubygems_version: !ruby/object:Gem::Requirement
128
135
  none: false
129
136
  requirements:
130
- - - ! '>='
137
+ - - ! '>'
131
138
  - !ruby/object:Gem::Version
132
- version: '0'
139
+ version: 1.3.1
133
140
  requirements: []
134
141
  rubyforge_project: tweetlr
135
142
  rubygems_version: 1.8.10
@@ -138,5 +145,7 @@ specification_version: 3
138
145
  summary: tweetlr crawls twitter for a given term, extracts photos out of the collected
139
146
  tweets' short urls and posts the images to tumblr.
140
147
  test_files:
148
+ - spec/photo_services_processor_spec.rb
141
149
  - spec/spec_helper.rb
142
150
  - spec/tweetlr_spec.rb
151
+ - spec/twitter_processor_spec.rb