RubyGems - tweetlr - Versions diffs - 0.1.6 → 0.1.7pre - Mend

tweetlr 0.1.6 → 0.1.7pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

data/.gitignore +2 -2
data/bin/tweetlr +13 -11
data/lib/http_processor.rb +42 -0
data/lib/log_aware.rb +8 -0
data/lib/photo_service_processor.rb +122 -0
data/lib/tumblr_processor.rb +3 -0
data/lib/tweetlr.rb +30 -183
data/lib/twitter_processor.rb +39 -0
data/spec/photo_services_processor_spec.rb +38 -0
data/spec/spec_helper.rb +11 -0
data/spec/tweetlr_spec.rb +32 -59
data/spec/twitter_processor_spec.rb +18 -0
data/tweetlr.gemspec +1 -1
metadata +28 -19

data/.gitignore CHANGED Viewed

@@ -2,8 +2,8 @@
 *.log
 pkg
 *.pid
-config
 *.output
 .rvmrc
 Gemfile.lock
-tweetlr.tid
+tweetlr.tid
+tweetlr.yml.dev

data/bin/tweetlr CHANGED Viewed

@@ -46,22 +46,24 @@ Daemons.run_proc('tweetlr', :dir_mode => :script, :dir => './', :backtrace => tr
   EventMachine::run {
     EventMachine::add_periodic_timer( UPDATE_PERIOD ) {
      @log.info "starting tweetlr crawl..."
-     response = @tweetlr.lazy_search_twitter
+     response = {}
+     response = @tweetlr.lazy_search_twitter(@tweetlr.twitter_config["refresh_url"]) #looks awkward, but the refresh url will come from the db soon and make sense then...
      if response
        tweets = response['results']
        if tweets
        tweets.each do |tweet|
-           tumblr_post = @tweetlr.generate_tumblr_photo_post tweet
-           if tumblr_post.nil? ||  tumblr_post[:source].nil?
-              @log.warn "could not get image source: tweet: #{tweet} --- tumblr post: #{tumblr_post.inspect}"
-           else
-             @log.debug "tumblr post: #{tumblr_post}"
-             res = @tweetlr.post_to_tumblr tumblr_post
-             @log.warn "tumblr response: #{res.header_str} #{res.body_str}" unless res.response_code == 201
-           end
-           # store the highest tweet id
-           File.open(tid_file, "w+") { |io| io.write(tweets.first['id']) }
+         tumblr_post = @tweetlr.generate_tumblr_photo_post tweet
+         if tumblr_post.nil? ||  tumblr_post[:source].nil?
+            @log.warn "could not get image source: tweet: #{tweet} --- tumblr post: #{tumblr_post.inspect}"
+         else
+           @log.debug "tumblr post: #{tumblr_post}"
+           res = @tweetlr.post_to_tumblr tumblr_post
+           @log.warn "tumblr response: #{res.header_str} #{res.body_str}" unless res.response_code == 201
          end
+        end
+         # store the highest tweet id
+         @tweetlr.twitter_config[:refresh_url]=response['refresh_url']
+         File.open(tid_file, "w+") { |io| io.write(response['max_id']) }
        end
      else
        @log.error "twitter search returned no response. hail the failwhale!"

data/lib/http_processor.rb ADDED Viewed

@@ -0,0 +1,42 @@
+require 'curb'
+require 'log_aware'
+module HttpProcessor
+  include LogAware
+  USER_AGENT = %{Mozilla/5.0 (compatible; tweetlr; +http://tweetlr.5v3n.com)}
+  #convenience method for curl http get calls and parsing them to json.
+  def HttpProcessor::http_get(request, log=nil)
+    tries = 3
+    begin
+      curl = Curl::Easy.new request
+      curl.useragent = USER_AGENT
+      curl.perform
+      begin
+        JSON.parse curl.body_str
+      rescue JSON::ParserError => err
+        begin
+          if log
+            log.warn "#{err}: Could not parse response for #{request} - this is probably not a json response: #{curl.body_str}"
+          end
+          return nil
+        rescue Encoding::CompatibilityError => err
+          if log
+            log.error "Trying to rescue a JSON::ParserError for '#{request}' we got stuck in a Encoding::CompatibilityError."
+          end
+          return nil
+        end
+      end
+    rescue Curl::Err::CurlError => err
+      log.error "Failure in Curl call: #{err}" if log
+      tries -= 1
+      sleep 3
+      if tries > 0
+          retry
+      else
+          nil
+      end
+    end
+  end
+end

data/lib/log_aware.rb ADDED Viewed

@@ -0,0 +1,8 @@
+module LogAware
+    def self.log=(log)
+      @@log = log #TODO think of a more elegant way of logging than a static attribute
+    end
+    def self.log()
+      @@log
+    end
+end

data/lib/photo_service_processor.rb ADDED Viewed

@@ -0,0 +1,122 @@
+require 'log_aware'
+module PhotoServiceProcessor
+  LOCATION_START_INDICATOR = 'Location: '
+  LOCATION_STOP_INDICATOR  = "\r\n"
+  PIC_REGEXP = /(.*?)\.(jpg|jpeg|png|gif)/i
+  include LogAware
+  def self.find_image_url(link)
+    url = nil
+    if link && !(photo? link)
+      url = image_url_instagram link if (link.index('instagr.am') || link.index('instagram.com'))
+      url = image_url_picplz link if link.index 'picplz'
+      url = image_url_twitpic link if link.index 'twitpic'
+      url = image_url_yfrog link if link.index 'yfrog'
+      url = image_url_imgly link if link.index 'img.ly'
+      url = image_url_tco link if link.index 't.co'
+      url = image_url_lockerz link if link.index 'lockerz.com'
+      url = image_url_foursquare link if link.index '4sq.com'
+      url = image_url_embedly link if url.nil? #just try embed.ly for anything else. could do all image url processing w/ embedly, but there's probably some kind of rate limit invovled.
+    elsif photo? link
+      url = link
+    end
+    url
+  end
+  def self.photo?(link)
+    link =~ PIC_REGEXP
+  end
+  #find the image's url via embed.ly
+  def self.image_url_embedly(link_url)
+    response = HttpProcessor::http_get "http://api.embed.ly/1/oembed?url=#{link_url}"
+    if response && response['type'] == 'photo'
+      image_url = response['url']
+    end
+    image_url
+  end
+  #find the image's url for a foursquare link
+  def self.image_url_foursquare(link_url)
+    image_url_embedly link_url
+  end
+  #find the image's url for a lockerz link
+  def self.image_url_lockerz(link_url)
+    response = HttpProcessor::http_get "http://api.plixi.com/api/tpapi.svc/json/metadatafromurl?details=false&url=#{link_url}"
+    response["BigImageUrl"] if response
+  end
+  #find the image's url for an twitter shortened link
+  def self.image_url_tco(link_url)
+    service_url = link_url_redirect link_url
+    find_image_url service_url
+  end
+  #find the image's url for an instagram link
+  def self.image_url_instagram(link_url)
+    link_url['instagram.com'] = 'instagr.am' if link_url.index 'instagram.com' #instagram's oembed does not work for .com links
+    response = HttpProcessor::http_get "http://api.instagram.com/oembed?url=#{link_url}"
+    response['url'] if response
+  end
+  #find the image's url for a picplz short/longlink
+  def self.image_url_picplz(link_url)
+    id = extract_id link_url
+    #try short url
+    response = HttpProcessor::http_get "http://picplz.com/api/v2/pic.json?shorturl_ids=#{id}"
+    #if short url fails, try long url
+    #response = HTTParty.get "http://picplz.com/api/v2/pic.json?longurl_ids=#{id}"
+    #extract url
+    if response && response['value'] && response['value']['pics'] && response['value']['pics'].first && response['value']['pics'].first['pic_files'] && response['value']['pics'].first['pic_files']['640r']
+      response['value']['pics'].first['pic_files']['640r']['img_url']
+    else
+      nil
+    end
+  end
+  #find the image's url for a twitpic link
+  def self.image_url_twitpic(link_url)
+    image_url_redirect link_url, "http://twitpic.com/show/full/"
+  end
+  #find the image'S url for a yfrog link
+  def self.image_url_yfrog(link_url)
+    response = HttpProcessor::http_get("http://www.yfrog.com/api/oembed?url=#{link_url}")
+    response['url'] if response
+  end
+  #find the image's url for a img.ly link
+  def self.image_url_imgly(link_url)
+    image_url_redirect link_url, "http://img.ly/show/full/", "\r\n"
+  end
+  # extract image url from services like twitpic & img.ly that do not offer oembed interfaces
+  def self.image_url_redirect(link_url, service_endpoint, stop_indicator = LOCATION_STOP_INDICATOR)
+    link_url_redirect "#{service_endpoint}#{extract_id link_url}", stop_indicator
+  end
+  def self.link_url_redirect(short_url, stop_indicator = LOCATION_STOP_INDICATOR)
+    tries = 3
+    begin
+      resp = Curl::Easy.http_get(short_url) { |res| res.follow_location = true }
+    rescue Curl::Err::CurlError => err
+        log.error "Curl::Easy.http_get failed: #{err}"
+        tries -= 1
+        sleep 3
+        if tries > 0
+            retry
+        else
+           return nil
+        end
+    end
+    if(resp && resp.header_str.index(LOCATION_START_INDICATOR) && resp.header_str.index(stop_indicator))
+      start = resp.header_str.index(LOCATION_START_INDICATOR) + LOCATION_START_INDICATOR.size
+      stop  = resp.header_str.index(stop_indicator, start)
+      resp.header_str[start...stop]
+    else
+      nil
+    end
+  end
+  #extract the pic id from a given <code>link</code>
+  def self.extract_id(link)
+    link.split('/').last if link.split('/')
+  end
+end

data/lib/tumblr_processor.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module TumblrProcessor
+end

data/lib/tweetlr.rb CHANGED Viewed

@@ -3,14 +3,17 @@ require 'logger'
 require 'yaml'
 require 'curb'
 require 'json'
+require 'twitter_processor'
+require 'http_processor'
+require 'photo_service_processor'
+require 'log_aware'
 class Tweetlr
+  attr_accessor :twitter_config
-  VERSION = '0.1.6'
+  VERSION = '0.1.7pre'
   GENERATOR = %{tweetlr - http://tweetlr.5v3n.com}
-  USER_AGENT = %{Mozilla/5.0 (compatible; tweetlr/#{VERSION}; +http://tweetlr.5v3n.com)}
-  LOCATION_START_INDICATOR = 'Location: '
-  LOCATION_STOP_INDICATOR  = "\r\n"
   API_ENDPOINT_TWITTER = 'http://search.twitter.com/search.json'
   API_ENDPOINT_TUMBLR = 'http://www.tumblr.com'
@@ -18,8 +21,6 @@ class Tweetlr
   TWITTER_RESULTS_TYPE = 'recent'
   UPDATE_PERIOD = 600 #10 minutes
-  PIC_REGEXP = /(.*?)\.(jpg|jpeg|png|gif)/i
   def initialize(email, password, args={:terms=>nil, :whitelist => nil, :shouts => nil, :since_id=>nil, :results_per_page => nil, :loglevel=>nil, :result_type => nil})
     @log = Logger.new(STDOUT)
     if (Logger::DEBUG..Logger::UNKNOWN).to_a.index(args[:loglevel])
@@ -28,21 +29,33 @@ class Tweetlr
       @log.level = Logger::INFO
     end
     @log.debug "log level set to #{@log.level}"
+    LogAware.log=@log
+    @twitter_config = {
+      :since_id => args[:since_id],
+      :search_term => args[:terms],
+      :results_per_page => args[:results_per_page] || TWITTER_RESULTS_PER_PAGE,
+      :result_type => args[:result_type] || TWITTER_RESULTS_TYPE,
+      :api_endpoint_twitter => args[:api_endpoint_twitter] || API_ENDPOINT_TWITTER
+    }
+    @twitter_config[:refresh_url] = "?ors=#{@twitter_config[:search_term]}&since_id=#{@twitter_config[:since_id]}&rpp=#{@twitter_config[:results_per_page]}&result_type=#{@twitter_config[:result_type]}" if (@twitter_config[:since_id] && @twitter_config[:search_term])
+    @twitter_config[:logger] = @log
     @email = email
     @password = password
-    @since_id = args[:since_id]
-    @search_term = args[:terms]
     @cookie = args[:cookie]
-    @results_per_page = args[:results_per_page] || TWITTER_RESULTS_PER_PAGE
-    @result_type = args[:result_type] || TWITTER_RESULTS_TYPE
-    @api_endpoint_twitter = args[:api_endpoint_twitter] || API_ENDPOINT_TWITTER
+    @api_endpoint_twitter =
     @api_endpoint_tumblr = args[:api_endpoint_tumblr] || API_ENDPOINT_TUMBLR
     @whitelist = args[:whitelist]
     @shouts = args[:shouts]
     @update_period = args[:update_period] || UPDATE_PERIOD
     @whitelist.each {|entry| entry.downcase!} if @whitelist
-    @refresh_url = "#{@api_endpoint_twitter}?ors=#{@search_term}&since_id=#{@since_id}&rpp=#{@results_per_page}&result_type=#{@result_type}" if (@since_id && @search_term)
   end
+  def lazy_search_twitter(refresh_url=nil)
+    @twitter_config[:refresh_url] = refresh_url if refresh_url
+    TwitterProcessor::lazy_search(@twitter_config)
+  end
   #post a tumblr photo entry. required arguments are :type, :date, :source, :caption, :state. optional argument: :tags
   def post_to_tumblr(options={})
     tries = 3
@@ -60,7 +73,7 @@ class Tweetlr
         Curl::PostField.content('state', options[:state]),
         Curl::PostField.content('tags', tags)
         )
-      rescue Curl::Err => err
+      rescue Curl::Err::CurlError => err
         @log.error "Failure in Curl call: #{err}"
         tries -= 1
         sleep 3
@@ -78,7 +91,7 @@ class Tweetlr
   def generate_tumblr_photo_post tweet
     tumblr_post = nil
     message = tweet['text']
-    if !retweet? message
+    if !TwitterProcessor::retweet? message
       @log.debug "tweet: #{tweet}"
       tumblr_post = {}
       tumblr_post[:type] = 'photo'
@@ -100,183 +113,17 @@ class Tweetlr
     tumblr_post
   end
-  #checks if the message is a retweet
-  def retweet?(message)
-    message.index('RT @') || message.index(%{ "@}) || message.index(" \u201c@") #detect retweets
-  end
-  #fire a new search
-  def search_twitter()
-    search_call = "#{@api_endpoint_twitter}?ors=#{@search_term}&result_type=#{@result_type}&rpp=#{@results_per_page}"
-    @response = http_get search_call
-  end
-  # lazy update - search for a term or refresh the search if a response is available already
-  def lazy_search_twitter()
-    @refresh_url = "#{@api_endpoint_twitter}#{@response['refresh_url']}" unless (@response.nil? || @response['refresh_url'].nil? || @response['refresh_url'].empty?)
-    if @refresh_url
-     search_url = "#{@refresh_url}&result_type=#{@result_type}&rpp=#{@results_per_page}"
-     @log.info "lazy search using '#{search_url}'"
-     @response = http_get search_url
-    else
-      @log.debug "regular search using '#{@search_term}'"
-      @response = search_twitter()
-    end
-  end
   #extract a linked image file's url from a tweet. first found image will be used.
   def extract_image_url(tweet)
-    links = extract_links tweet
+    links = TwitterProcessor::extract_links tweet
     image_url = nil
     if links
       links.each do |link|
-        image_url = find_image_url(link)
-        return image_url if image_url =~ PIC_REGEXP
+        image_url = PhotoServiceProcessor::find_image_url(link)
+        return image_url if PhotoServiceProcessor::photo? image_url
       end
     end
     image_url
   end
-  #extract the linked image file's url from a tweet
-  def find_image_url(link)
-    url = nil
-    if !link.nil?
-      url = image_url_instagram link if (link.index('instagr.am') || link.index('instagram.com'))
-      url = image_url_picplz link if link.index 'picplz'
-      url = image_url_twitpic link if link.index 'twitpic'
-      url = image_url_yfrog link if link.index 'yfrog'
-      url = image_url_imgly link if link.index 'img.ly'
-      url = image_url_tco link if link.index 't.co'
-      url = image_url_lockerz link if link.index 'lockerz.com'
-      url = image_url_foursquare link if link.index '4sq.com'
-      url = image_url_embedly link if url.nil? #just try embed.ly for anything else. could do all image url processing w/ embedly, but there's probably some kind of rate limit invovled.
-    end
-    url
-  end
-  #find the image's url via embed.ly
-  def image_url_embedly(link_url)
-    response = http_get "http://api.embed.ly/1/oembed?url=#{link_url}"
-    response['url'] if response
-  end
-  #find the image's url for a foursquare link
-  def image_url_foursquare(link_url)
-    image_url_embedly link_url
-  end
-  #find the image's url for a lockerz link
-  def image_url_lockerz(link_url)
-    response = http_get "http://api.plixi.com/api/tpapi.svc/json/metadatafromurl?details=false&url=#{link_url}"
-    response["BigImageUrl"] if response
-  end
-  #find the image's url for an twitter shortened link
-  def image_url_tco(link_url)
-    service_url = link_url_redirect link_url
-    find_image_url service_url
-  end
-  #find the image's url for an instagram link
-  def image_url_instagram(link_url)
-    link_url['instagram.com'] = 'instagr.am' if link_url.index 'instagram.com' #instagram's oembed does not work for .com links
-    response = http_get "http://api.instagram.com/oembed?url=#{link_url}"
-    response['url'] if response
-  end
-  #find the image's url for a picplz short/longlink
-  def image_url_picplz(link_url)
-    id = extract_id link_url
-    #try short url
-    response = http_get "http://picplz.com/api/v2/pic.json?shorturl_ids=#{id}"
-    #if short url fails, try long url
-    #response = HTTParty.get "http://picplz.com/api/v2/pic.json?longurl_ids=#{id}"
-    #extract url
-    if response && response['value'] && response['value']['pics'] && response['value']['pics'].first && response['value']['pics'].first['pic_files'] && response['value']['pics'].first['pic_files']['640r']
-      response['value']['pics'].first['pic_files']['640r']['img_url']
-    else
-      nil
-    end
-  end
-  #find the image's url for a twitpic link
-  def image_url_twitpic(link_url)
-    image_url_redirect link_url, "http://twitpic.com/show/full/"
-  end
-  #find the image'S url for a yfrog link
-  def image_url_yfrog(link_url)
-    response = http_get("http://www.yfrog.com/api/oembed?url=#{link_url}")
-    response['url'] if response
-  end
-  #find the image's url for a img.ly link
-  def image_url_imgly(link_url)
-    image_url_redirect link_url, "http://img.ly/show/full/", "\r\n"
-  end
-  # extract image url from services like twitpic & img.ly that do not offer oembed interfaces
-  def image_url_redirect(link_url, service_endpoint, stop_indicator = LOCATION_STOP_INDICATOR)
-    link_url_redirect "#{service_endpoint}#{extract_id link_url}", stop_indicator
-  end
-  def link_url_redirect(short_url, stop_indicator = LOCATION_STOP_INDICATOR)
-    tries = 3
-    begin
-      resp = Curl::Easy.http_get(short_url) { |res| res.follow_location = true }
-    rescue Curl::Err => err
-        @log.error "Curl::Easy.http_get failed: #{err}"
-        tries -= 1
-        sleep 3
-        if tries > 0
-            retry
-        else
-           return nil
-        end
-    end
-    if(resp && resp.header_str.index(LOCATION_START_INDICATOR) && resp.header_str.index(stop_indicator))
-      start = resp.header_str.index(LOCATION_START_INDICATOR) + LOCATION_START_INDICATOR.size
-      stop  = resp.header_str.index(stop_indicator, start)
-      resp.header_str[start...stop]
-    else
-      nil
-    end
-  end
-  #extract the pic id from a given <code>link</code>
-  def extract_id(link)
-    link.split('/').last if link.split('/')
-  end
-  #extract the links from a given tweet
-  def extract_links(tweet)
-    if tweet
-      text = tweet['text']
-      text.gsub(/https?:\/\/[\S]+/).to_a if text
-    end
-  end
-  private
-  #convenience method for curl http get calls and parsing them to json.
-  def http_get(request)
-    tries = 3
-    begin
-      curl = Curl::Easy.new request
-      curl.useragent = USER_AGENT
-      curl.perform
-      begin
-        JSON.parse curl.body_str
-      rescue JSON::ParserError => err
-        begin
-          @log.warn "#{err}: Could not parse response for #{request} - this is probably not a json response: #{curl.body_str}"
-          return nil
-        rescue Encoding::CompatibilityError => err
-          @log.error "Trying to rescue a JSON::ParserError for '#{request}' we got stuck in a Encoding::CompatibilityError."
-          return nil
-        end
-      end
-    rescue Curl::Err => err
-      @log.error "Failure in Curl call: #{err}"
-      tries -= 1
-      sleep 3
-      if tries > 0
-          retry
-      else
-          nil
-      end
-    end
-  end
 end

data/lib/twitter_processor.rb ADDED Viewed

@@ -0,0 +1,39 @@
+require 'http_processor'
+module TwitterProcessor
+  #checks if the message is a retweet
+  def self.retweet?(message)
+    message.index('RT @') || message.index(%{"@}) || message.index("\u201c@") #detect retweets
+  end
+  #extract the links from a given tweet
+  def self.extract_links(tweet)
+    if tweet
+      text = tweet['text']
+      text.gsub(/https?:\/\/[\S]+/).to_a if text
+    end
+  end
+  #fire a new search
+  def self.search(config)
+    search_call = "#{config[:api_endpoint_twitter]}?ors=#{config[:search_term]}&result_type=#{config[:result_type]}&rpp=#{config[:results_per_page]}"
+    HttpProcessor::http_get search_call
+  end
+  # lazy update - search for a term or refresh the search if a response is available already
+  def self.lazy_search(config)
+    result = nil
+    refresh_url = config[:refresh_url]
+    log = config[:logger]
+    if refresh_url
+     search_url = "#{config[:api_endpoint_twitter]}#{refresh_url}&result_type=#{config[:result_type]}&rpp=#{config[:results_per_page]}"
+     log.info "lazy search using '#{search_url}'" if log
+     result = HttpProcessor::http_get search_url
+    else
+      log.debug "regular search using '#{config[:search_term]}'" if log
+      result = search(config)
+    end
+    result
+  end
+end

data/spec/photo_services_processor_spec.rb ADDED Viewed

@@ -0,0 +1,38 @@
+require 'spec_helper'
+describe PhotoServiceProcessor do
+  before :each do
+    @links = {
+      :instagram => "http://instagr.am/p/DzCWn/",
+      :twitpic => "http://twitpic.com/449o2x",
+      :yfrog => "http://yfrog.com/h4vlfp",
+      :picplz => "http://picplz.com/2hWv",
+      :imgly => "http://img.ly/3M1o",
+      :tco => 'http://t.co/MUGNayA',
+      :lockerz => 'http://lockerz.com/s/100269159',
+      :embedly => 'http://flic.kr/p/973hTv',
+      :twitter_pics => 'http://t.co/FmyBGfyY'
+      }
+  end
+  it "should find a picture's url from the supported services" do
+    @links.each do |service,link|
+      send "stub_#{service}"
+      url = PhotoServiceProcessor::find_image_url link
+      url.should be, "service #{service} not working!"
+      check_pic_url_extraction service if [:instagram,:picplz,:yfrog,:imgly,:not_listed].index service
+    end
+  end
+  it "should not crash if embedly fallback won't find a link" do
+    stub_bad_request
+    url = PhotoServiceProcessor::find_image_url "http://mopskopf"
+  end
+  it "should not crash with an encoding error when response is non-us-ascii" do
+    stub_utf8_response
+    url = PhotoServiceProcessor::find_image_url "http://api.instagram.com/oembed?url=http://instagr.am/p/Gx%E2%80%946/"
+  end
+  it "follows redirects" do
+    stub_imgly
+    link = PhotoServiceProcessor::link_url_redirect 'im mocked anyways'
+    link.should == 'http://s3.amazonaws.com/imgly_production/899582/full.jpg'
+  end
+end

data/spec/spec_helper.rb CHANGED Viewed

@@ -2,6 +2,11 @@
 require "bundler"
 Bundler.require :default, :development, :test
+def check_pic_url_extraction(service)
+  image_url = PhotoServiceProcessor::send "image_url_#{service}".to_sym, @links[service]
+  image_url.should =~ PhotoServiceProcessor::PIC_REGEXP
+end
 def stub_twitter
   Curl::Easy.any_instance.stub(:body_str).and_return %|{"results":[{"from_user_id_str":"220650275","profile_image_url":"http://a2.twimg.com/profile_images/668619338/9729_148876458070_505518070_2628895_7160219_n_normal.jpg","created_at":"Sat, 16 Jul 2011 23:20:01 +0000","from_user":"LoMuma","id_str":"92372947855093760","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Need to stop procrastinating! 5 quizzes and personal responses due tomorrow... #fail","id":92372947855093760,"from_user_id":220650275,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"&lt;a href=&quot;http://twitter.com/&quot;&gt;web&lt;/a&gt;"},{"from_user_id_str":"129718556","profile_image_url":"http://a2.twimg.com/profile_images/1428268221/twitter_normal.png","created_at":"Sat, 16 Jul 2011 23:20:01 +0000","from_user":"priiislopes","id_str":"92372947846692865","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Esse jogo do Flu foi uma vergonha. Se ele fez o melhor dele no brasileiro semana passada, hj fez o pior de todos os tempos. #Fail","id":92372947846692865,"from_user_id":129718556,"geo":null,"iso_language_code":"pt","to_user_id_str":null,"source":"&lt;a href=&quot;http://twitter.com/&quot;&gt;web&lt;/a&gt;"},{"from_user_id_str":"259930166","profile_image_url":"http://a3.twimg.com/profile_images/1425221519/foto_normal.jpg","created_at":"Sat, 16 Jul 2011 23:20:00 +0000","from_user":"YamiiG4","id_str":"92372943132303360","metadata":{"result_type":"recent"},"to_user_id":null,"text":"vaya que eran 2 minutos..#FAIL!","id":92372943132303360,"from_user_id":259930166,"geo":null,"iso_language_code":"es","to_user_id_str":null,"source":"&lt;a href=&quot;http://www.tweetdeck.com&quot; rel=&quot;nofollow&quot;&gt;TweetDeck&lt;/a&gt;"},{"from_user_id_str":"321557905","profile_image_url":"http://a0.twimg.com/profile_images/1445672626/profile_normal.png","created_at":"Sat, 16 Jul 2011 23:20:00 +0000","from_user":"JasWafer_FFOE","id_str":"92372941379088384","metadata":{"result_type":"recent"},"to_user_id":null,"text":"RT @eye_OFBEHOLDER: RT @JasWafer_FFOE #Oomf said that he'll NEVER eat pussy! O.o --#FAIL","id":92372941379088384,"from_user_id":321557905,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"&lt;a href=&quot;http://twidroyd.com&quot; rel=&quot;nofollow&quot;&gt;Twidroyd for Android&lt;/a&gt;"},{"from_user_id_str":"279395613","profile_image_url":"http://a0.twimg.com/profile_images/1334871419/lnnsquare_normal.jpg","created_at":"Sat, 16 Jul 2011 23:19:59 +0000","from_user":"LanguageNewsNet","id_str":"92372940640890881","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Questioning the Inca Paradox: Did the civilization behind Machu Picchu really fail to develop a written la... http://tinyurl.com/5sfos23","id":92372940640890881,"from_user_id":279395613,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"&lt;a href=&quot;http://twitterfeed.com&quot; rel=&quot;nofollow&quot;&gt;twitterfeed&lt;/a&gt;"}],"max_id":92372947855093760,"since_id":0,"refresh_url":"?since_id=92372947855093760&q=+fail","next_page":"?page=2&max_id=92372947855093760&rpp=5&q=+fail","results_per_page":5,"page":1,"completed_in":0.022152,"since_id_str":"0","max_id_str":"92372947855093760","query":"+fail"}|
   Curl::Easy.any_instance.stub(:perform).and_return Curl::Easy.new
@@ -17,6 +22,12 @@ def stub_instagram
   Curl::Easy.any_instance.stub(:perform).and_return Curl::Easy.new
 end
+#instagram syntax but without a valid image link
+def stub_no_image_link
+  Curl::Easy.any_instance.stub(:body_str).and_return %|{"url":"http://noimageurl"}|
+  Curl::Easy.any_instance.stub(:perform).and_return Curl::Easy.new
+end
 def stub_bad_request
   Curl::Easy.any_instance.stub(:body_str).and_return %|<html><title>400: Bad Request - Invalid URL format http://mopskopf</title><body>400: Bad Request - Invalid URL format http://mopskopf</body></html>|
   Curl::Easy.any_instance.stub(:perform).and_return Curl::Easy.new

data/spec/tweetlr_spec.rb CHANGED Viewed

@@ -12,17 +12,6 @@ describe Tweetlr do
   before :each do
     @credentials = {:email => USER, :password => PW}
     @searchterm = 'fail'
-    @links = {
-      :instagram => "http://instagr.am/p/DzCWn/",
-      :twitpic => "http://twitpic.com/449o2x",
-      :yfrog => "http://yfrog.com/h4vlfp",
-      :picplz => "http://picplz.com/2hWv",
-      :imgly => "http://img.ly/3M1o",
-      :tco => 'http://t.co/MUGNayA',
-      :lockerz => 'http://lockerz.com/s/100269159',
-      :embedly => 'http://flic.kr/p/973hTv',
-      :twitter_pics => 'http://t.co/FmyBGfyY'
-      }
     @tweets = {
       :instagram => {'text' => "jadda jadda http://instagr.am/p/DzCWn/"},
       :twitpic => {'text' => "jadda jadda http://twitpic.com/449o2x"},
@@ -34,14 +23,25 @@ describe Tweetlr do
       :embedly => {'text' => "jadda jadda http://flic.kr/p/973hTv"},
       :twitter_pics => {'text' => "jadda jadda http://t.co/FmyBGfyY"}
       }
+      @links = {
+        :instagram => "http://instagr.am/p/DzCWn/",
+        :twitpic => "http://twitpic.com/449o2x",
+        :yfrog => "http://yfrog.com/h4vlfp",
+        :picplz => "http://picplz.com/2hWv",
+        :imgly => "http://img.ly/3M1o",
+        :tco => 'http://t.co/MUGNayA',
+        :lockerz => 'http://lockerz.com/s/100269159',
+        :embedly => 'http://flic.kr/p/973hTv',
+        :twitter_pics => 'http://t.co/FmyBGfyY'
+        }
     @first_link = "http://url.com"
-    @second_link = @links[:instagram]
+    @second_link = "http://instagr.am/p/DzCWn/"
     @third_link = "https://imageurl.com"
     @twitter_response = {"from_user_id_str"=>"1915714", "profile_image_url"=>"http://a0.twimg.com/profile_images/386000279/2_normal.jpg", "created_at"=>"Sun, 17 Apr 2011 16:48:42 +0000", "from_user"=>"whitey_Mc_whIteLIst", "id_str"=>"59659561224765440", "metadata"=>{"result_type"=>"recent"}, "to_user_id"=>nil, "text"=>"Rigaer #wirsounterwegs #{@first_link}  @ Augenarzt Dr. Lierow #{@second_link} #{@third_link}", "id"=>59659561224765440, "from_user_id"=>1915714, "geo"=>{"type"=>"Point", "coordinates"=>[52.5182, 13.454]}, "iso_language_code"=>"de", "place"=>{"id"=>"3078869807f9dd36", "type"=>"city", "full_name"=>"Berlin, Berlin"}, "to_user_id_str"=>nil, "source"=>"&lt;a href=&quot;http://instagr.am&quot; rel=&quot;nofollow&quot;&gt;instagram&lt;/a&gt;"}
     @non_whitelist_tweet = @twitter_response.merge 'from_user' => 'nonwhitelist user'
     @retweet = @twitter_response.merge "text" => "bla bla RT @fgd: tueddelkram"
     @new_style_retweet = @twitter_response.merge "text" => "and it scales! \u201c@moeffju: http://t.co/8gUSPKu #hktbl1 #origami success! :)\u201d"
-    @pic_regexp = /(.*?)\.(jpg|jpeg|png|gif)/i
+    @new_style_retweet_no_addition = @twitter_response.merge "text" => "\u201c@moeffju: http://t.co/8gUSPKu #hktbl1 #origami success! :)\u201d"
     @config_file = File.join( Dir.pwd, 'config', 'tweetlr.yml')
     @tweetlr = Tweetlr.new(USER, PW, {:whitelist => WHITELIST, :results_per_page => 5, :since_id => TIMESTAMP, :terms => @searchterm, :loglevel => 4})
   end
@@ -55,7 +55,7 @@ describe Tweetlr do
   it "should search twitter for a given term" do
     stub_twitter
     tweetlr = @tweetlr
-    response = tweetlr.search_twitter
+    response = tweetlr.lazy_search_twitter
     tweets = response['results']
     tweets.should be
     tweets.should_not be_empty
@@ -94,61 +94,34 @@ describe Tweetlr do
     post = @tweetlr.generate_tumblr_photo_post @retweet
     post.should_not be
   end
-  it "should not use new style retweets which would produce double blog posts" do
-    post = @tweetlr.generate_tumblr_photo_post @new_style_retweet
-    post.should_not be
+  context "should not use new style retweets which would produce double blog posts" do
+    it "for quotes in context" do
+      post = @tweetlr.generate_tumblr_photo_post @new_style_retweet
+      post.should_not be
+    end
+    it "for quotes without further text addition" do
+      post = @tweetlr.generate_tumblr_photo_post @new_style_retweet_no_addition
+      post.should_not be
+    end
   end
-  context "image url processing" do
-    it "should find a picture's url from the supported services" do
-      @links.each do |key,value|
+  context "handles pictures in tweets" do
+    it "extracting their corresponding links" do
+      @tweets.each do |key,value|
         send "stub_#{key}"
-        url = @tweetlr.find_image_url value
+        url = @tweetlr.extract_image_url value
         url.should be, "service #{key} not working!"
         check_pic_url_extraction key if [:instagram,:picplz,:yfrog,:imgly,:not_listed].index key
       end
     end
-    it "should not crash if embedly fallback won't find a link" do
-      stub_bad_request
-      url = @tweetlr.find_image_url "http://mopskopf"
-    end
-    it "should not crash with an encoding error when response is non-us-ascii" do
-      stub_utf8_response
-      url = @tweetlr.find_image_url "http://api.instagram.com/oembed?url=http://instagr.am/p/Gx%E2%80%946/"
-    end
-  end
-  describe "tweet api response processing" do
-    it "extracts links" do
-      links = @tweetlr.extract_links ''
-      links.should be_nil
-      links = @tweetlr.extract_links @twitter_response
-      links[0].should == @first_link
-      links[1].should == @second_link
-      links[2].should == @third_link
-    end
-    it "uses the first image link found in a tweet with multiple links" do
+    it "using the first image link found in a tweet with multiple links" do
       stub_instagram
       link = @tweetlr.extract_image_url @twitter_response
       link.should == 'http://distillery.s3.amazonaws.com/media/2011/05/02/d25df62b9cec4a138967a3ad027d055b_7.jpg'
     end
-    it "follows redirects" do
-      stub_imgly
-      link = @tweetlr.link_url_redirect 'im mocked anyways'
-      link.should == 'http://s3.amazonaws.com/imgly_production/899582/full.jpg'
-    end
-    it "extracts pictures from links" do
-      @tweets.each do |key,value|
-        send "stub_#{key}"
-        url = @tweetlr.extract_image_url value
-        url.should be, "service #{key} not working!"
-        check_pic_url_extraction key if [:instagram,:picplz,:yfrog,:imgly,:not_listed].index key
-      end
+    it "not returning links that do not belong to images" do
+      stub_no_image_link
+      link = @tweetlr.extract_image_url @twitter_response
+      link.should_not be
     end
   end
-  def check_pic_url_extraction(service)
-    image_url = @tweetlr.send "image_url_#{service}".to_sym, @links[service]
-    image_url.should =~ @pic_regexp
-  end
 end

data/spec/twitter_processor_spec.rb ADDED Viewed

@@ -0,0 +1,18 @@
+require 'spec_helper'
+describe TwitterProcessor do
+  before :each do
+    @first_link = "http://url.com"
+    @second_link = "http://instagr.am/p/DzCWn/"
+    @third_link = "https://imageurl.com"
+    @twitter_response = {"from_user_id_str"=>"1915714", "profile_image_url"=>"http://a0.twimg.com/profile_images/386000279/2_normal.jpg", "created_at"=>"Sun, 17 Apr 2011 16:48:42 +0000", "from_user"=>"whitey_Mc_whIteLIst", "id_str"=>"59659561224765440", "metadata"=>{"result_type"=>"recent"}, "to_user_id"=>nil, "text"=>"Rigaer #wirsounterwegs #{@first_link}  @ Augenarzt Dr. Lierow #{@second_link} #{@third_link}", "id"=>59659561224765440, "from_user_id"=>1915714, "geo"=>{"type"=>"Point", "coordinates"=>[52.5182, 13.454]}, "iso_language_code"=>"de", "place"=>{"id"=>"3078869807f9dd36", "type"=>"city", "full_name"=>"Berlin, Berlin"}, "to_user_id_str"=>nil, "source"=>"&lt;a href=&quot;http://instagr.am&quot; rel=&quot;nofollow&quot;&gt;instagram&lt;/a&gt;"}
+  end
+  it "extracts links" do
+    links = TwitterProcessor::extract_links ''
+    links.should be_nil
+    links = TwitterProcessor::extract_links @twitter_response
+    links[0].should == @first_link
+    links[1].should == @second_link
+    links[2].should == @third_link
+  end
+end

data/tweetlr.gemspec CHANGED Viewed

@@ -1,6 +1,6 @@
 Gem::Specification.new do |s|
   s.name        = "tweetlr"
-  s.version     = "0.1.6"
+  s.version     = "0.1.7pre"
   s.author      = "Sven Kraeuter"
   s.email       = "sven.kraeuter@gmail.com"
   s.homepage    = "http://tweetlr.5v3n.com"

metadata CHANGED Viewed

@@ -1,19 +1,19 @@
 --- !ruby/object:Gem::Specification
 name: tweetlr
 version: !ruby/object:Gem::Version
-  version: 0.1.6
-  prerelease:
+  version: 0.1.7pre
+  prerelease: 5
 platform: ruby
 authors:
 - Sven Kraeuter
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-11-05 00:00:00.000000000Z
+date: 2011-11-12 00:00:00.000000000Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: daemons
-  requirement: &2156339340 !ruby/object:Gem::Requirement
+  requirement: &2153597020 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
         version: '0'
   type: :runtime
   prerelease: false
-  version_requirements: *2156339340
+  version_requirements: *2153597020
 - !ruby/object:Gem::Dependency
   name: eventmachine
-  requirement: &2156338900 !ruby/object:Gem::Requirement
+  requirement: &2153596600 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
         version: '0'
   type: :runtime
   prerelease: false
-  version_requirements: *2156338900
+  version_requirements: *2153596600
 - !ruby/object:Gem::Dependency
   name: curb
-  requirement: &2156338480 !ruby/object:Gem::Requirement
+  requirement: &2153596180 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
         version: '0'
   type: :runtime
   prerelease: false
-  version_requirements: *2156338480
+  version_requirements: *2153596180
 - !ruby/object:Gem::Dependency
   name: json
-  requirement: &2156338060 !ruby/object:Gem::Requirement
+  requirement: &2153595760 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
         version: '0'
   type: :runtime
   prerelease: false
-  version_requirements: *2156338060
+  version_requirements: *2153595760
 - !ruby/object:Gem::Dependency
   name: rake
-  requirement: &2156337560 !ruby/object:Gem::Requirement
+  requirement: &2153595260 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ~>
@@ -65,10 +65,10 @@ dependencies:
         version: 0.8.7
   type: :development
   prerelease: false
-  version_requirements: *2156337560
+  version_requirements: *2153595260
 - !ruby/object:Gem::Dependency
   name: rspec
-  requirement: &2156337140 !ruby/object:Gem::Requirement
+  requirement: &2153621460 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
         version: '0'
   type: :development
   prerelease: false
-  version_requirements: *2156337140
+  version_requirements: *2153621460
 - !ruby/object:Gem::Dependency
   name: rdoc
-  requirement: &2156336680 !ruby/object:Gem::Requirement
+  requirement: &2153621000 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -87,7 +87,7 @@ dependencies:
         version: '0'
   type: :development
   prerelease: false
-  version_requirements: *2156336680
+  version_requirements: *2153621000
 description: tweetlr crawls twitter for a given term, extracts photos out of the collected
   tweets' short urls and posts the images to tumblr.
 email: sven.kraeuter@gmail.com
@@ -108,9 +108,16 @@ files:
 - Rakefile
 - bin/tweetlr
 - config/tweetlr.yml
+- lib/http_processor.rb
+- lib/log_aware.rb
+- lib/photo_service_processor.rb
+- lib/tumblr_processor.rb
 - lib/tweetlr.rb
+- lib/twitter_processor.rb
+- spec/photo_services_processor_spec.rb
 - spec/spec_helper.rb
 - spec/tweetlr_spec.rb
+- spec/twitter_processor_spec.rb
 - tweetlr.gemspec
 homepage: http://tweetlr.5v3n.com
 licenses: []
@@ -127,9 +134,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements:
-  - - ! '>='
+  - - ! '>'
     - !ruby/object:Gem::Version
-      version: '0'
+      version: 1.3.1
 requirements: []
 rubyforge_project: tweetlr
 rubygems_version: 1.8.10
@@ -138,5 +145,7 @@ specification_version: 3
 summary: tweetlr crawls twitter for a given term, extracts photos out of the collected
   tweets' short urls and posts the images to tumblr.
 test_files:
+- spec/photo_services_processor_spec.rb
 - spec/spec_helper.rb
 - spec/tweetlr_spec.rb
+- spec/twitter_processor_spec.rb