tweetlr 0.1.6 → 0.1.7pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -2,8 +2,8 @@
2
2
  *.log
3
3
  pkg
4
4
  *.pid
5
- config
6
5
  *.output
7
6
  .rvmrc
8
7
  Gemfile.lock
9
- tweetlr.tid
8
+ tweetlr.tid
9
+ tweetlr.yml.dev
data/bin/tweetlr CHANGED
@@ -46,22 +46,24 @@ Daemons.run_proc('tweetlr', :dir_mode => :script, :dir => './', :backtrace => tr
46
46
  EventMachine::run {
47
47
  EventMachine::add_periodic_timer( UPDATE_PERIOD ) {
48
48
  @log.info "starting tweetlr crawl..."
49
- response = @tweetlr.lazy_search_twitter
49
+ response = {}
50
+ response = @tweetlr.lazy_search_twitter(@tweetlr.twitter_config["refresh_url"]) #looks awkward, but the refresh url will come from the db soon and make sense then...
50
51
  if response
51
52
  tweets = response['results']
52
53
  if tweets
53
54
  tweets.each do |tweet|
54
- tumblr_post = @tweetlr.generate_tumblr_photo_post tweet
55
- if tumblr_post.nil? || tumblr_post[:source].nil?
56
- @log.warn "could not get image source: tweet: #{tweet} --- tumblr post: #{tumblr_post.inspect}"
57
- else
58
- @log.debug "tumblr post: #{tumblr_post}"
59
- res = @tweetlr.post_to_tumblr tumblr_post
60
- @log.warn "tumblr response: #{res.header_str} #{res.body_str}" unless res.response_code == 201
61
- end
62
- # store the highest tweet id
63
- File.open(tid_file, "w+") { |io| io.write(tweets.first['id']) }
55
+ tumblr_post = @tweetlr.generate_tumblr_photo_post tweet
56
+ if tumblr_post.nil? || tumblr_post[:source].nil?
57
+ @log.warn "could not get image source: tweet: #{tweet} --- tumblr post: #{tumblr_post.inspect}"
58
+ else
59
+ @log.debug "tumblr post: #{tumblr_post}"
60
+ res = @tweetlr.post_to_tumblr tumblr_post
61
+ @log.warn "tumblr response: #{res.header_str} #{res.body_str}" unless res.response_code == 201
64
62
  end
63
+ end
64
+ # store the highest tweet id
65
+ @tweetlr.twitter_config[:refresh_url]=response['refresh_url']
66
+ File.open(tid_file, "w+") { |io| io.write(response['max_id']) }
65
67
  end
66
68
  else
67
69
  @log.error "twitter search returned no response. hail the failwhale!"
@@ -0,0 +1,42 @@
1
+ require 'curb'
2
+ require 'log_aware'
3
+
4
+ module HttpProcessor
5
+ include LogAware
6
+
7
+ USER_AGENT = %{Mozilla/5.0 (compatible; tweetlr; +http://tweetlr.5v3n.com)}
8
+
9
+ #convenience method for curl http get calls and parsing them to json.
10
+ def HttpProcessor::http_get(request, log=nil)
11
+ tries = 3
12
+ begin
13
+ curl = Curl::Easy.new request
14
+ curl.useragent = USER_AGENT
15
+ curl.perform
16
+ begin
17
+ JSON.parse curl.body_str
18
+ rescue JSON::ParserError => err
19
+ begin
20
+ if log
21
+ log.warn "#{err}: Could not parse response for #{request} - this is probably not a json response: #{curl.body_str}"
22
+ end
23
+ return nil
24
+ rescue Encoding::CompatibilityError => err
25
+ if log
26
+ log.error "Trying to rescue a JSON::ParserError for '#{request}' we got stuck in a Encoding::CompatibilityError."
27
+ end
28
+ return nil
29
+ end
30
+ end
31
+ rescue Curl::Err::CurlError => err
32
+ log.error "Failure in Curl call: #{err}" if log
33
+ tries -= 1
34
+ sleep 3
35
+ if tries > 0
36
+ retry
37
+ else
38
+ nil
39
+ end
40
+ end
41
+ end
42
+ end
data/lib/log_aware.rb ADDED
@@ -0,0 +1,8 @@
1
+ module LogAware
2
+ def self.log=(log)
3
+ @@log = log #TODO think of a more elegant way of logging than a static attribute
4
+ end
5
+ def self.log()
6
+ @@log
7
+ end
8
+ end
@@ -0,0 +1,122 @@
1
+ require 'log_aware'
2
+
3
+ module PhotoServiceProcessor
4
+
5
+ LOCATION_START_INDICATOR = 'Location: '
6
+ LOCATION_STOP_INDICATOR = "\r\n"
7
+ PIC_REGEXP = /(.*?)\.(jpg|jpeg|png|gif)/i
8
+
9
+ include LogAware
10
+
11
+ def self.find_image_url(link)
12
+ url = nil
13
+ if link && !(photo? link)
14
+ url = image_url_instagram link if (link.index('instagr.am') || link.index('instagram.com'))
15
+ url = image_url_picplz link if link.index 'picplz'
16
+ url = image_url_twitpic link if link.index 'twitpic'
17
+ url = image_url_yfrog link if link.index 'yfrog'
18
+ url = image_url_imgly link if link.index 'img.ly'
19
+ url = image_url_tco link if link.index 't.co'
20
+ url = image_url_lockerz link if link.index 'lockerz.com'
21
+ url = image_url_foursquare link if link.index '4sq.com'
22
+ url = image_url_embedly link if url.nil? #just try embed.ly for anything else. could do all image url processing w/ embedly, but there's probably some kind of rate limit invovled.
23
+ elsif photo? link
24
+ url = link
25
+ end
26
+ url
27
+ end
28
+
29
+ def self.photo?(link)
30
+ link =~ PIC_REGEXP
31
+ end
32
+
33
+ #find the image's url via embed.ly
34
+ def self.image_url_embedly(link_url)
35
+ response = HttpProcessor::http_get "http://api.embed.ly/1/oembed?url=#{link_url}"
36
+ if response && response['type'] == 'photo'
37
+ image_url = response['url']
38
+ end
39
+ image_url
40
+ end
41
+ #find the image's url for a foursquare link
42
+ def self.image_url_foursquare(link_url)
43
+ image_url_embedly link_url
44
+ end
45
+ #find the image's url for a lockerz link
46
+ def self.image_url_lockerz(link_url)
47
+ response = HttpProcessor::http_get "http://api.plixi.com/api/tpapi.svc/json/metadatafromurl?details=false&url=#{link_url}"
48
+ response["BigImageUrl"] if response
49
+ end
50
+ #find the image's url for an twitter shortened link
51
+ def self.image_url_tco(link_url)
52
+ service_url = link_url_redirect link_url
53
+ find_image_url service_url
54
+ end
55
+ #find the image's url for an instagram link
56
+ def self.image_url_instagram(link_url)
57
+ link_url['instagram.com'] = 'instagr.am' if link_url.index 'instagram.com' #instagram's oembed does not work for .com links
58
+ response = HttpProcessor::http_get "http://api.instagram.com/oembed?url=#{link_url}"
59
+ response['url'] if response
60
+ end
61
+
62
+ #find the image's url for a picplz short/longlink
63
+ def self.image_url_picplz(link_url)
64
+ id = extract_id link_url
65
+ #try short url
66
+ response = HttpProcessor::http_get "http://picplz.com/api/v2/pic.json?shorturl_ids=#{id}"
67
+ #if short url fails, try long url
68
+ #response = HTTParty.get "http://picplz.com/api/v2/pic.json?longurl_ids=#{id}"
69
+ #extract url
70
+ if response && response['value'] && response['value']['pics'] && response['value']['pics'].first && response['value']['pics'].first['pic_files'] && response['value']['pics'].first['pic_files']['640r']
71
+ response['value']['pics'].first['pic_files']['640r']['img_url']
72
+ else
73
+ nil
74
+ end
75
+ end
76
+ #find the image's url for a twitpic link
77
+ def self.image_url_twitpic(link_url)
78
+ image_url_redirect link_url, "http://twitpic.com/show/full/"
79
+ end
80
+ #find the image'S url for a yfrog link
81
+ def self.image_url_yfrog(link_url)
82
+ response = HttpProcessor::http_get("http://www.yfrog.com/api/oembed?url=#{link_url}")
83
+ response['url'] if response
84
+ end
85
+ #find the image's url for a img.ly link
86
+ def self.image_url_imgly(link_url)
87
+ image_url_redirect link_url, "http://img.ly/show/full/", "\r\n"
88
+ end
89
+
90
+ # extract image url from services like twitpic & img.ly that do not offer oembed interfaces
91
+ def self.image_url_redirect(link_url, service_endpoint, stop_indicator = LOCATION_STOP_INDICATOR)
92
+ link_url_redirect "#{service_endpoint}#{extract_id link_url}", stop_indicator
93
+ end
94
+
95
+ def self.link_url_redirect(short_url, stop_indicator = LOCATION_STOP_INDICATOR)
96
+ tries = 3
97
+ begin
98
+ resp = Curl::Easy.http_get(short_url) { |res| res.follow_location = true }
99
+ rescue Curl::Err::CurlError => err
100
+ log.error "Curl::Easy.http_get failed: #{err}"
101
+ tries -= 1
102
+ sleep 3
103
+ if tries > 0
104
+ retry
105
+ else
106
+ return nil
107
+ end
108
+ end
109
+ if(resp && resp.header_str.index(LOCATION_START_INDICATOR) && resp.header_str.index(stop_indicator))
110
+ start = resp.header_str.index(LOCATION_START_INDICATOR) + LOCATION_START_INDICATOR.size
111
+ stop = resp.header_str.index(stop_indicator, start)
112
+ resp.header_str[start...stop]
113
+ else
114
+ nil
115
+ end
116
+ end
117
+
118
+ #extract the pic id from a given <code>link</code>
119
+ def self.extract_id(link)
120
+ link.split('/').last if link.split('/')
121
+ end
122
+ end
@@ -0,0 +1,3 @@
1
+ module TumblrProcessor
2
+
3
+ end
data/lib/tweetlr.rb CHANGED
@@ -3,14 +3,17 @@ require 'logger'
3
3
  require 'yaml'
4
4
  require 'curb'
5
5
  require 'json'
6
+ require 'twitter_processor'
7
+ require 'http_processor'
8
+ require 'photo_service_processor'
9
+ require 'log_aware'
6
10
 
7
11
  class Tweetlr
12
+
13
+ attr_accessor :twitter_config
8
14
 
9
- VERSION = '0.1.6'
15
+ VERSION = '0.1.7pre'
10
16
  GENERATOR = %{tweetlr - http://tweetlr.5v3n.com}
11
- USER_AGENT = %{Mozilla/5.0 (compatible; tweetlr/#{VERSION}; +http://tweetlr.5v3n.com)}
12
- LOCATION_START_INDICATOR = 'Location: '
13
- LOCATION_STOP_INDICATOR = "\r\n"
14
17
 
15
18
  API_ENDPOINT_TWITTER = 'http://search.twitter.com/search.json'
16
19
  API_ENDPOINT_TUMBLR = 'http://www.tumblr.com'
@@ -18,8 +21,6 @@ class Tweetlr
18
21
  TWITTER_RESULTS_TYPE = 'recent'
19
22
  UPDATE_PERIOD = 600 #10 minutes
20
23
 
21
- PIC_REGEXP = /(.*?)\.(jpg|jpeg|png|gif)/i
22
-
23
24
  def initialize(email, password, args={:terms=>nil, :whitelist => nil, :shouts => nil, :since_id=>nil, :results_per_page => nil, :loglevel=>nil, :result_type => nil})
24
25
  @log = Logger.new(STDOUT)
25
26
  if (Logger::DEBUG..Logger::UNKNOWN).to_a.index(args[:loglevel])
@@ -28,21 +29,33 @@ class Tweetlr
28
29
  @log.level = Logger::INFO
29
30
  end
30
31
  @log.debug "log level set to #{@log.level}"
32
+ LogAware.log=@log
33
+ @twitter_config = {
34
+ :since_id => args[:since_id],
35
+ :search_term => args[:terms],
36
+ :results_per_page => args[:results_per_page] || TWITTER_RESULTS_PER_PAGE,
37
+ :result_type => args[:result_type] || TWITTER_RESULTS_TYPE,
38
+ :api_endpoint_twitter => args[:api_endpoint_twitter] || API_ENDPOINT_TWITTER
39
+ }
40
+ @twitter_config[:refresh_url] = "?ors=#{@twitter_config[:search_term]}&since_id=#{@twitter_config[:since_id]}&rpp=#{@twitter_config[:results_per_page]}&result_type=#{@twitter_config[:result_type]}" if (@twitter_config[:since_id] && @twitter_config[:search_term])
41
+ @twitter_config[:logger] = @log
42
+
31
43
  @email = email
32
44
  @password = password
33
- @since_id = args[:since_id]
34
- @search_term = args[:terms]
35
45
  @cookie = args[:cookie]
36
- @results_per_page = args[:results_per_page] || TWITTER_RESULTS_PER_PAGE
37
- @result_type = args[:result_type] || TWITTER_RESULTS_TYPE
38
- @api_endpoint_twitter = args[:api_endpoint_twitter] || API_ENDPOINT_TWITTER
46
+ @api_endpoint_twitter =
39
47
  @api_endpoint_tumblr = args[:api_endpoint_tumblr] || API_ENDPOINT_TUMBLR
40
48
  @whitelist = args[:whitelist]
41
49
  @shouts = args[:shouts]
42
50
  @update_period = args[:update_period] || UPDATE_PERIOD
43
51
  @whitelist.each {|entry| entry.downcase!} if @whitelist
44
- @refresh_url = "#{@api_endpoint_twitter}?ors=#{@search_term}&since_id=#{@since_id}&rpp=#{@results_per_page}&result_type=#{@result_type}" if (@since_id && @search_term)
45
52
  end
53
+
54
+ def lazy_search_twitter(refresh_url=nil)
55
+ @twitter_config[:refresh_url] = refresh_url if refresh_url
56
+ TwitterProcessor::lazy_search(@twitter_config)
57
+ end
58
+
46
59
  #post a tumblr photo entry. required arguments are :type, :date, :source, :caption, :state. optional argument: :tags
47
60
  def post_to_tumblr(options={})
48
61
  tries = 3
@@ -60,7 +73,7 @@ class Tweetlr
60
73
  Curl::PostField.content('state', options[:state]),
61
74
  Curl::PostField.content('tags', tags)
62
75
  )
63
- rescue Curl::Err => err
76
+ rescue Curl::Err::CurlError => err
64
77
  @log.error "Failure in Curl call: #{err}"
65
78
  tries -= 1
66
79
  sleep 3
@@ -78,7 +91,7 @@ class Tweetlr
78
91
  def generate_tumblr_photo_post tweet
79
92
  tumblr_post = nil
80
93
  message = tweet['text']
81
- if !retweet? message
94
+ if !TwitterProcessor::retweet? message
82
95
  @log.debug "tweet: #{tweet}"
83
96
  tumblr_post = {}
84
97
  tumblr_post[:type] = 'photo'
@@ -100,183 +113,17 @@ class Tweetlr
100
113
  tumblr_post
101
114
  end
102
115
 
103
- #checks if the message is a retweet
104
- def retweet?(message)
105
- message.index('RT @') || message.index(%{ "@}) || message.index(" \u201c@") #detect retweets
106
- end
107
-
108
- #fire a new search
109
- def search_twitter()
110
- search_call = "#{@api_endpoint_twitter}?ors=#{@search_term}&result_type=#{@result_type}&rpp=#{@results_per_page}"
111
- @response = http_get search_call
112
- end
113
- # lazy update - search for a term or refresh the search if a response is available already
114
- def lazy_search_twitter()
115
- @refresh_url = "#{@api_endpoint_twitter}#{@response['refresh_url']}" unless (@response.nil? || @response['refresh_url'].nil? || @response['refresh_url'].empty?)
116
- if @refresh_url
117
- search_url = "#{@refresh_url}&result_type=#{@result_type}&rpp=#{@results_per_page}"
118
- @log.info "lazy search using '#{search_url}'"
119
- @response = http_get search_url
120
- else
121
- @log.debug "regular search using '#{@search_term}'"
122
- @response = search_twitter()
123
- end
124
- end
125
-
126
116
  #extract a linked image file's url from a tweet. first found image will be used.
127
117
  def extract_image_url(tweet)
128
- links = extract_links tweet
118
+ links = TwitterProcessor::extract_links tweet
129
119
  image_url = nil
130
120
  if links
131
121
  links.each do |link|
132
- image_url = find_image_url(link)
133
- return image_url if image_url =~ PIC_REGEXP
122
+ image_url = PhotoServiceProcessor::find_image_url(link)
123
+ return image_url if PhotoServiceProcessor::photo? image_url
134
124
  end
135
125
  end
136
126
  image_url
137
127
  end
138
128
 
139
- #extract the linked image file's url from a tweet
140
- def find_image_url(link)
141
- url = nil
142
- if !link.nil?
143
- url = image_url_instagram link if (link.index('instagr.am') || link.index('instagram.com'))
144
- url = image_url_picplz link if link.index 'picplz'
145
- url = image_url_twitpic link if link.index 'twitpic'
146
- url = image_url_yfrog link if link.index 'yfrog'
147
- url = image_url_imgly link if link.index 'img.ly'
148
- url = image_url_tco link if link.index 't.co'
149
- url = image_url_lockerz link if link.index 'lockerz.com'
150
- url = image_url_foursquare link if link.index '4sq.com'
151
- url = image_url_embedly link if url.nil? #just try embed.ly for anything else. could do all image url processing w/ embedly, but there's probably some kind of rate limit invovled.
152
- end
153
- url
154
- end
155
-
156
- #find the image's url via embed.ly
157
- def image_url_embedly(link_url)
158
- response = http_get "http://api.embed.ly/1/oembed?url=#{link_url}"
159
- response['url'] if response
160
- end
161
- #find the image's url for a foursquare link
162
- def image_url_foursquare(link_url)
163
- image_url_embedly link_url
164
- end
165
- #find the image's url for a lockerz link
166
- def image_url_lockerz(link_url)
167
- response = http_get "http://api.plixi.com/api/tpapi.svc/json/metadatafromurl?details=false&url=#{link_url}"
168
- response["BigImageUrl"] if response
169
- end
170
- #find the image's url for an twitter shortened link
171
- def image_url_tco(link_url)
172
- service_url = link_url_redirect link_url
173
- find_image_url service_url
174
- end
175
- #find the image's url for an instagram link
176
- def image_url_instagram(link_url)
177
- link_url['instagram.com'] = 'instagr.am' if link_url.index 'instagram.com' #instagram's oembed does not work for .com links
178
- response = http_get "http://api.instagram.com/oembed?url=#{link_url}"
179
- response['url'] if response
180
- end
181
-
182
- #find the image's url for a picplz short/longlink
183
- def image_url_picplz(link_url)
184
- id = extract_id link_url
185
- #try short url
186
- response = http_get "http://picplz.com/api/v2/pic.json?shorturl_ids=#{id}"
187
- #if short url fails, try long url
188
- #response = HTTParty.get "http://picplz.com/api/v2/pic.json?longurl_ids=#{id}"
189
- #extract url
190
- if response && response['value'] && response['value']['pics'] && response['value']['pics'].first && response['value']['pics'].first['pic_files'] && response['value']['pics'].first['pic_files']['640r']
191
- response['value']['pics'].first['pic_files']['640r']['img_url']
192
- else
193
- nil
194
- end
195
- end
196
- #find the image's url for a twitpic link
197
- def image_url_twitpic(link_url)
198
- image_url_redirect link_url, "http://twitpic.com/show/full/"
199
- end
200
- #find the image'S url for a yfrog link
201
- def image_url_yfrog(link_url)
202
- response = http_get("http://www.yfrog.com/api/oembed?url=#{link_url}")
203
- response['url'] if response
204
- end
205
- #find the image's url for a img.ly link
206
- def image_url_imgly(link_url)
207
- image_url_redirect link_url, "http://img.ly/show/full/", "\r\n"
208
- end
209
-
210
- # extract image url from services like twitpic & img.ly that do not offer oembed interfaces
211
- def image_url_redirect(link_url, service_endpoint, stop_indicator = LOCATION_STOP_INDICATOR)
212
- link_url_redirect "#{service_endpoint}#{extract_id link_url}", stop_indicator
213
- end
214
-
215
- def link_url_redirect(short_url, stop_indicator = LOCATION_STOP_INDICATOR)
216
- tries = 3
217
- begin
218
- resp = Curl::Easy.http_get(short_url) { |res| res.follow_location = true }
219
- rescue Curl::Err => err
220
- @log.error "Curl::Easy.http_get failed: #{err}"
221
- tries -= 1
222
- sleep 3
223
- if tries > 0
224
- retry
225
- else
226
- return nil
227
- end
228
- end
229
- if(resp && resp.header_str.index(LOCATION_START_INDICATOR) && resp.header_str.index(stop_indicator))
230
- start = resp.header_str.index(LOCATION_START_INDICATOR) + LOCATION_START_INDICATOR.size
231
- stop = resp.header_str.index(stop_indicator, start)
232
- resp.header_str[start...stop]
233
- else
234
- nil
235
- end
236
- end
237
-
238
- #extract the pic id from a given <code>link</code>
239
- def extract_id(link)
240
- link.split('/').last if link.split('/')
241
- end
242
-
243
- #extract the links from a given tweet
244
- def extract_links(tweet)
245
- if tweet
246
- text = tweet['text']
247
- text.gsub(/https?:\/\/[\S]+/).to_a if text
248
- end
249
- end
250
-
251
- private
252
-
253
- #convenience method for curl http get calls and parsing them to json.
254
- def http_get(request)
255
- tries = 3
256
- begin
257
- curl = Curl::Easy.new request
258
- curl.useragent = USER_AGENT
259
- curl.perform
260
- begin
261
- JSON.parse curl.body_str
262
- rescue JSON::ParserError => err
263
- begin
264
- @log.warn "#{err}: Could not parse response for #{request} - this is probably not a json response: #{curl.body_str}"
265
- return nil
266
- rescue Encoding::CompatibilityError => err
267
- @log.error "Trying to rescue a JSON::ParserError for '#{request}' we got stuck in a Encoding::CompatibilityError."
268
- return nil
269
- end
270
- end
271
- rescue Curl::Err => err
272
- @log.error "Failure in Curl call: #{err}"
273
- tries -= 1
274
- sleep 3
275
- if tries > 0
276
- retry
277
- else
278
- nil
279
- end
280
- end
281
- end
282
129
  end
@@ -0,0 +1,39 @@
1
+ require 'http_processor'
2
+
3
+ module TwitterProcessor
4
+
5
+ #checks if the message is a retweet
6
+ def self.retweet?(message)
7
+ message.index('RT @') || message.index(%{"@}) || message.index("\u201c@") #detect retweets
8
+ end
9
+
10
+ #extract the links from a given tweet
11
+ def self.extract_links(tweet)
12
+ if tweet
13
+ text = tweet['text']
14
+ text.gsub(/https?:\/\/[\S]+/).to_a if text
15
+ end
16
+ end
17
+
18
+ #fire a new search
19
+ def self.search(config)
20
+ search_call = "#{config[:api_endpoint_twitter]}?ors=#{config[:search_term]}&result_type=#{config[:result_type]}&rpp=#{config[:results_per_page]}"
21
+ HttpProcessor::http_get search_call
22
+ end
23
+
24
+ # lazy update - search for a term or refresh the search if a response is available already
25
+ def self.lazy_search(config)
26
+ result = nil
27
+ refresh_url = config[:refresh_url]
28
+ log = config[:logger]
29
+ if refresh_url
30
+ search_url = "#{config[:api_endpoint_twitter]}#{refresh_url}&result_type=#{config[:result_type]}&rpp=#{config[:results_per_page]}"
31
+ log.info "lazy search using '#{search_url}'" if log
32
+ result = HttpProcessor::http_get search_url
33
+ else
34
+ log.debug "regular search using '#{config[:search_term]}'" if log
35
+ result = search(config)
36
+ end
37
+ result
38
+ end
39
+ end
@@ -0,0 +1,38 @@
1
+ require 'spec_helper'
2
+
3
+ describe PhotoServiceProcessor do
4
+ before :each do
5
+ @links = {
6
+ :instagram => "http://instagr.am/p/DzCWn/",
7
+ :twitpic => "http://twitpic.com/449o2x",
8
+ :yfrog => "http://yfrog.com/h4vlfp",
9
+ :picplz => "http://picplz.com/2hWv",
10
+ :imgly => "http://img.ly/3M1o",
11
+ :tco => 'http://t.co/MUGNayA',
12
+ :lockerz => 'http://lockerz.com/s/100269159',
13
+ :embedly => 'http://flic.kr/p/973hTv',
14
+ :twitter_pics => 'http://t.co/FmyBGfyY'
15
+ }
16
+ end
17
+ it "should find a picture's url from the supported services" do
18
+ @links.each do |service,link|
19
+ send "stub_#{service}"
20
+ url = PhotoServiceProcessor::find_image_url link
21
+ url.should be, "service #{service} not working!"
22
+ check_pic_url_extraction service if [:instagram,:picplz,:yfrog,:imgly,:not_listed].index service
23
+ end
24
+ end
25
+ it "should not crash if embedly fallback won't find a link" do
26
+ stub_bad_request
27
+ url = PhotoServiceProcessor::find_image_url "http://mopskopf"
28
+ end
29
+ it "should not crash with an encoding error when response is non-us-ascii" do
30
+ stub_utf8_response
31
+ url = PhotoServiceProcessor::find_image_url "http://api.instagram.com/oembed?url=http://instagr.am/p/Gx%E2%80%946/"
32
+ end
33
+ it "follows redirects" do
34
+ stub_imgly
35
+ link = PhotoServiceProcessor::link_url_redirect 'im mocked anyways'
36
+ link.should == 'http://s3.amazonaws.com/imgly_production/899582/full.jpg'
37
+ end
38
+ end
data/spec/spec_helper.rb CHANGED
@@ -2,6 +2,11 @@
2
2
  require "bundler"
3
3
  Bundler.require :default, :development, :test
4
4
 
5
+ def check_pic_url_extraction(service)
6
+ image_url = PhotoServiceProcessor::send "image_url_#{service}".to_sym, @links[service]
7
+ image_url.should =~ PhotoServiceProcessor::PIC_REGEXP
8
+ end
9
+
5
10
  def stub_twitter
6
11
  Curl::Easy.any_instance.stub(:body_str).and_return %|{"results":[{"from_user_id_str":"220650275","profile_image_url":"http://a2.twimg.com/profile_images/668619338/9729_148876458070_505518070_2628895_7160219_n_normal.jpg","created_at":"Sat, 16 Jul 2011 23:20:01 +0000","from_user":"LoMuma","id_str":"92372947855093760","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Need to stop procrastinating! 5 quizzes and personal responses due tomorrow... #fail","id":92372947855093760,"from_user_id":220650275,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"&lt;a href=&quot;http://twitter.com/&quot;&gt;web&lt;/a&gt;"},{"from_user_id_str":"129718556","profile_image_url":"http://a2.twimg.com/profile_images/1428268221/twitter_normal.png","created_at":"Sat, 16 Jul 2011 23:20:01 +0000","from_user":"priiislopes","id_str":"92372947846692865","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Esse jogo do Flu foi uma vergonha. Se ele fez o melhor dele no brasileiro semana passada, hj fez o pior de todos os tempos. #Fail","id":92372947846692865,"from_user_id":129718556,"geo":null,"iso_language_code":"pt","to_user_id_str":null,"source":"&lt;a href=&quot;http://twitter.com/&quot;&gt;web&lt;/a&gt;"},{"from_user_id_str":"259930166","profile_image_url":"http://a3.twimg.com/profile_images/1425221519/foto_normal.jpg","created_at":"Sat, 16 Jul 2011 23:20:00 +0000","from_user":"YamiiG4","id_str":"92372943132303360","metadata":{"result_type":"recent"},"to_user_id":null,"text":"vaya que eran 2 minutos..#FAIL!","id":92372943132303360,"from_user_id":259930166,"geo":null,"iso_language_code":"es","to_user_id_str":null,"source":"&lt;a href=&quot;http://www.tweetdeck.com&quot; rel=&quot;nofollow&quot;&gt;TweetDeck&lt;/a&gt;"},{"from_user_id_str":"321557905","profile_image_url":"http://a0.twimg.com/profile_images/1445672626/profile_normal.png","created_at":"Sat, 16 Jul 2011 23:20:00 +0000","from_user":"JasWafer_FFOE","id_str":"92372941379088384","metadata":{"result_type":"recent"},"to_user_id":null,"text":"RT @eye_OFBEHOLDER: RT @JasWafer_FFOE #Oomf said that he'll NEVER eat pussy! O.o --#FAIL","id":92372941379088384,"from_user_id":321557905,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"&lt;a href=&quot;http://twidroyd.com&quot; rel=&quot;nofollow&quot;&gt;Twidroyd for Android&lt;/a&gt;"},{"from_user_id_str":"279395613","profile_image_url":"http://a0.twimg.com/profile_images/1334871419/lnnsquare_normal.jpg","created_at":"Sat, 16 Jul 2011 23:19:59 +0000","from_user":"LanguageNewsNet","id_str":"92372940640890881","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Questioning the Inca Paradox: Did the civilization behind Machu Picchu really fail to develop a written la... http://tinyurl.com/5sfos23","id":92372940640890881,"from_user_id":279395613,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"&lt;a href=&quot;http://twitterfeed.com&quot; rel=&quot;nofollow&quot;&gt;twitterfeed&lt;/a&gt;"}],"max_id":92372947855093760,"since_id":0,"refresh_url":"?since_id=92372947855093760&q=+fail","next_page":"?page=2&max_id=92372947855093760&rpp=5&q=+fail","results_per_page":5,"page":1,"completed_in":0.022152,"since_id_str":"0","max_id_str":"92372947855093760","query":"+fail"}|
7
12
  Curl::Easy.any_instance.stub(:perform).and_return Curl::Easy.new
@@ -17,6 +22,12 @@ def stub_instagram
17
22
  Curl::Easy.any_instance.stub(:perform).and_return Curl::Easy.new
18
23
  end
19
24
 
25
+ #instagram syntax but without a valid image link
26
+ def stub_no_image_link
27
+ Curl::Easy.any_instance.stub(:body_str).and_return %|{"url":"http://noimageurl"}|
28
+ Curl::Easy.any_instance.stub(:perform).and_return Curl::Easy.new
29
+ end
30
+
20
31
  def stub_bad_request
21
32
  Curl::Easy.any_instance.stub(:body_str).and_return %|<html><title>400: Bad Request - Invalid URL format http://mopskopf</title><body>400: Bad Request - Invalid URL format http://mopskopf</body></html>|
22
33
  Curl::Easy.any_instance.stub(:perform).and_return Curl::Easy.new
data/spec/tweetlr_spec.rb CHANGED
@@ -12,17 +12,6 @@ describe Tweetlr do
12
12
  before :each do
13
13
  @credentials = {:email => USER, :password => PW}
14
14
  @searchterm = 'fail'
15
- @links = {
16
- :instagram => "http://instagr.am/p/DzCWn/",
17
- :twitpic => "http://twitpic.com/449o2x",
18
- :yfrog => "http://yfrog.com/h4vlfp",
19
- :picplz => "http://picplz.com/2hWv",
20
- :imgly => "http://img.ly/3M1o",
21
- :tco => 'http://t.co/MUGNayA',
22
- :lockerz => 'http://lockerz.com/s/100269159',
23
- :embedly => 'http://flic.kr/p/973hTv',
24
- :twitter_pics => 'http://t.co/FmyBGfyY'
25
- }
26
15
  @tweets = {
27
16
  :instagram => {'text' => "jadda jadda http://instagr.am/p/DzCWn/"},
28
17
  :twitpic => {'text' => "jadda jadda http://twitpic.com/449o2x"},
@@ -34,14 +23,25 @@ describe Tweetlr do
34
23
  :embedly => {'text' => "jadda jadda http://flic.kr/p/973hTv"},
35
24
  :twitter_pics => {'text' => "jadda jadda http://t.co/FmyBGfyY"}
36
25
  }
26
+ @links = {
27
+ :instagram => "http://instagr.am/p/DzCWn/",
28
+ :twitpic => "http://twitpic.com/449o2x",
29
+ :yfrog => "http://yfrog.com/h4vlfp",
30
+ :picplz => "http://picplz.com/2hWv",
31
+ :imgly => "http://img.ly/3M1o",
32
+ :tco => 'http://t.co/MUGNayA',
33
+ :lockerz => 'http://lockerz.com/s/100269159',
34
+ :embedly => 'http://flic.kr/p/973hTv',
35
+ :twitter_pics => 'http://t.co/FmyBGfyY'
36
+ }
37
37
  @first_link = "http://url.com"
38
- @second_link = @links[:instagram]
38
+ @second_link = "http://instagr.am/p/DzCWn/"
39
39
  @third_link = "https://imageurl.com"
40
40
  @twitter_response = {"from_user_id_str"=>"1915714", "profile_image_url"=>"http://a0.twimg.com/profile_images/386000279/2_normal.jpg", "created_at"=>"Sun, 17 Apr 2011 16:48:42 +0000", "from_user"=>"whitey_Mc_whIteLIst", "id_str"=>"59659561224765440", "metadata"=>{"result_type"=>"recent"}, "to_user_id"=>nil, "text"=>"Rigaer #wirsounterwegs #{@first_link} @ Augenarzt Dr. Lierow #{@second_link} #{@third_link}", "id"=>59659561224765440, "from_user_id"=>1915714, "geo"=>{"type"=>"Point", "coordinates"=>[52.5182, 13.454]}, "iso_language_code"=>"de", "place"=>{"id"=>"3078869807f9dd36", "type"=>"city", "full_name"=>"Berlin, Berlin"}, "to_user_id_str"=>nil, "source"=>"&lt;a href=&quot;http://instagr.am&quot; rel=&quot;nofollow&quot;&gt;instagram&lt;/a&gt;"}
41
41
  @non_whitelist_tweet = @twitter_response.merge 'from_user' => 'nonwhitelist user'
42
42
  @retweet = @twitter_response.merge "text" => "bla bla RT @fgd: tueddelkram"
43
43
  @new_style_retweet = @twitter_response.merge "text" => "and it scales! \u201c@moeffju: http://t.co/8gUSPKu #hktbl1 #origami success! :)\u201d"
44
- @pic_regexp = /(.*?)\.(jpg|jpeg|png|gif)/i
44
+ @new_style_retweet_no_addition = @twitter_response.merge "text" => "\u201c@moeffju: http://t.co/8gUSPKu #hktbl1 #origami success! :)\u201d"
45
45
  @config_file = File.join( Dir.pwd, 'config', 'tweetlr.yml')
46
46
  @tweetlr = Tweetlr.new(USER, PW, {:whitelist => WHITELIST, :results_per_page => 5, :since_id => TIMESTAMP, :terms => @searchterm, :loglevel => 4})
47
47
  end
@@ -55,7 +55,7 @@ describe Tweetlr do
55
55
  it "should search twitter for a given term" do
56
56
  stub_twitter
57
57
  tweetlr = @tweetlr
58
- response = tweetlr.search_twitter
58
+ response = tweetlr.lazy_search_twitter
59
59
  tweets = response['results']
60
60
  tweets.should be
61
61
  tweets.should_not be_empty
@@ -94,61 +94,34 @@ describe Tweetlr do
94
94
  post = @tweetlr.generate_tumblr_photo_post @retweet
95
95
  post.should_not be
96
96
  end
97
- it "should not use new style retweets which would produce double blog posts" do
98
- post = @tweetlr.generate_tumblr_photo_post @new_style_retweet
99
- post.should_not be
97
+ context "should not use new style retweets which would produce double blog posts" do
98
+ it "for quotes in context" do
99
+ post = @tweetlr.generate_tumblr_photo_post @new_style_retweet
100
+ post.should_not be
101
+ end
102
+ it "for quotes without further text addition" do
103
+ post = @tweetlr.generate_tumblr_photo_post @new_style_retweet_no_addition
104
+ post.should_not be
105
+ end
100
106
  end
101
- context "image url processing" do
102
- it "should find a picture's url from the supported services" do
103
- @links.each do |key,value|
107
+ context "handles pictures in tweets" do
108
+ it "extracting their corresponding links" do
109
+ @tweets.each do |key,value|
104
110
  send "stub_#{key}"
105
- url = @tweetlr.find_image_url value
111
+ url = @tweetlr.extract_image_url value
106
112
  url.should be, "service #{key} not working!"
107
113
  check_pic_url_extraction key if [:instagram,:picplz,:yfrog,:imgly,:not_listed].index key
108
114
  end
109
115
  end
110
- it "should not crash if embedly fallback won't find a link" do
111
- stub_bad_request
112
- url = @tweetlr.find_image_url "http://mopskopf"
113
- end
114
- it "should not crash with an encoding error when response is non-us-ascii" do
115
- stub_utf8_response
116
- url = @tweetlr.find_image_url "http://api.instagram.com/oembed?url=http://instagr.am/p/Gx%E2%80%946/"
117
- end
118
- end
119
- describe "tweet api response processing" do
120
- it "extracts links" do
121
- links = @tweetlr.extract_links ''
122
- links.should be_nil
123
- links = @tweetlr.extract_links @twitter_response
124
- links[0].should == @first_link
125
- links[1].should == @second_link
126
- links[2].should == @third_link
127
- end
128
- it "uses the first image link found in a tweet with multiple links" do
116
+ it "using the first image link found in a tweet with multiple links" do
129
117
  stub_instagram
130
118
  link = @tweetlr.extract_image_url @twitter_response
131
119
  link.should == 'http://distillery.s3.amazonaws.com/media/2011/05/02/d25df62b9cec4a138967a3ad027d055b_7.jpg'
132
120
  end
133
- it "follows redirects" do
134
- stub_imgly
135
- link = @tweetlr.link_url_redirect 'im mocked anyways'
136
- link.should == 'http://s3.amazonaws.com/imgly_production/899582/full.jpg'
137
- end
138
- it "extracts pictures from links" do
139
- @tweets.each do |key,value|
140
- send "stub_#{key}"
141
- url = @tweetlr.extract_image_url value
142
- url.should be, "service #{key} not working!"
143
- check_pic_url_extraction key if [:instagram,:picplz,:yfrog,:imgly,:not_listed].index key
144
- end
121
+ it "not returning links that do not belong to images" do
122
+ stub_no_image_link
123
+ link = @tweetlr.extract_image_url @twitter_response
124
+ link.should_not be
145
125
  end
146
126
  end
147
-
148
- def check_pic_url_extraction(service)
149
- image_url = @tweetlr.send "image_url_#{service}".to_sym, @links[service]
150
- image_url.should =~ @pic_regexp
151
- end
152
-
153
127
  end
154
-
@@ -0,0 +1,18 @@
1
+ require 'spec_helper'
2
+
3
+ describe TwitterProcessor do
4
+ before :each do
5
+ @first_link = "http://url.com"
6
+ @second_link = "http://instagr.am/p/DzCWn/"
7
+ @third_link = "https://imageurl.com"
8
+ @twitter_response = {"from_user_id_str"=>"1915714", "profile_image_url"=>"http://a0.twimg.com/profile_images/386000279/2_normal.jpg", "created_at"=>"Sun, 17 Apr 2011 16:48:42 +0000", "from_user"=>"whitey_Mc_whIteLIst", "id_str"=>"59659561224765440", "metadata"=>{"result_type"=>"recent"}, "to_user_id"=>nil, "text"=>"Rigaer #wirsounterwegs #{@first_link} @ Augenarzt Dr. Lierow #{@second_link} #{@third_link}", "id"=>59659561224765440, "from_user_id"=>1915714, "geo"=>{"type"=>"Point", "coordinates"=>[52.5182, 13.454]}, "iso_language_code"=>"de", "place"=>{"id"=>"3078869807f9dd36", "type"=>"city", "full_name"=>"Berlin, Berlin"}, "to_user_id_str"=>nil, "source"=>"&lt;a href=&quot;http://instagr.am&quot; rel=&quot;nofollow&quot;&gt;instagram&lt;/a&gt;"}
9
+ end
10
+ it "extracts links" do
11
+ links = TwitterProcessor::extract_links ''
12
+ links.should be_nil
13
+ links = TwitterProcessor::extract_links @twitter_response
14
+ links[0].should == @first_link
15
+ links[1].should == @second_link
16
+ links[2].should == @third_link
17
+ end
18
+ end
data/tweetlr.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "tweetlr"
3
- s.version = "0.1.6"
3
+ s.version = "0.1.7pre"
4
4
  s.author = "Sven Kraeuter"
5
5
  s.email = "sven.kraeuter@gmail.com"
6
6
  s.homepage = "http://tweetlr.5v3n.com"
metadata CHANGED
@@ -1,19 +1,19 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tweetlr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
5
- prerelease:
4
+ version: 0.1.7pre
5
+ prerelease: 5
6
6
  platform: ruby
7
7
  authors:
8
8
  - Sven Kraeuter
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-11-05 00:00:00.000000000Z
12
+ date: 2011-11-12 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: daemons
16
- requirement: &2156339340 !ruby/object:Gem::Requirement
16
+ requirement: &2153597020 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2156339340
24
+ version_requirements: *2153597020
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: eventmachine
27
- requirement: &2156338900 !ruby/object:Gem::Requirement
27
+ requirement: &2153596600 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *2156338900
35
+ version_requirements: *2153596600
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: curb
38
- requirement: &2156338480 !ruby/object:Gem::Requirement
38
+ requirement: &2153596180 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *2156338480
46
+ version_requirements: *2153596180
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: json
49
- requirement: &2156338060 !ruby/object:Gem::Requirement
49
+ requirement: &2153595760 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *2156338060
57
+ version_requirements: *2153595760
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: rake
60
- requirement: &2156337560 !ruby/object:Gem::Requirement
60
+ requirement: &2153595260 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 0.8.7
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2156337560
68
+ version_requirements: *2153595260
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rspec
71
- requirement: &2156337140 !ruby/object:Gem::Requirement
71
+ requirement: &2153621460 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: '0'
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *2156337140
79
+ version_requirements: *2153621460
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: rdoc
82
- requirement: &2156336680 !ruby/object:Gem::Requirement
82
+ requirement: &2153621000 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,7 +87,7 @@ dependencies:
87
87
  version: '0'
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *2156336680
90
+ version_requirements: *2153621000
91
91
  description: tweetlr crawls twitter for a given term, extracts photos out of the collected
92
92
  tweets' short urls and posts the images to tumblr.
93
93
  email: sven.kraeuter@gmail.com
@@ -108,9 +108,16 @@ files:
108
108
  - Rakefile
109
109
  - bin/tweetlr
110
110
  - config/tweetlr.yml
111
+ - lib/http_processor.rb
112
+ - lib/log_aware.rb
113
+ - lib/photo_service_processor.rb
114
+ - lib/tumblr_processor.rb
111
115
  - lib/tweetlr.rb
116
+ - lib/twitter_processor.rb
117
+ - spec/photo_services_processor_spec.rb
112
118
  - spec/spec_helper.rb
113
119
  - spec/tweetlr_spec.rb
120
+ - spec/twitter_processor_spec.rb
114
121
  - tweetlr.gemspec
115
122
  homepage: http://tweetlr.5v3n.com
116
123
  licenses: []
@@ -127,9 +134,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
127
134
  required_rubygems_version: !ruby/object:Gem::Requirement
128
135
  none: false
129
136
  requirements:
130
- - - ! '>='
137
+ - - ! '>'
131
138
  - !ruby/object:Gem::Version
132
- version: '0'
139
+ version: 1.3.1
133
140
  requirements: []
134
141
  rubyforge_project: tweetlr
135
142
  rubygems_version: 1.8.10
@@ -138,5 +145,7 @@ specification_version: 3
138
145
  summary: tweetlr crawls twitter for a given term, extracts photos out of the collected
139
146
  tweets' short urls and posts the images to tumblr.
140
147
  test_files:
148
+ - spec/photo_services_processor_spec.rb
141
149
  - spec/spec_helper.rb
142
150
  - spec/tweetlr_spec.rb
151
+ - spec/twitter_processor_spec.rb