tweetlr 0.1.7pre → 0.1.7pre4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.travis.yml CHANGED
@@ -4,7 +4,7 @@ bundler_args: --binstubs
4
4
  # Specify which ruby versions you wish to run your tests on, each version will be used
5
5
  rvm:
6
6
  - 1.9.2
7
- - 1.8.7 # (current default)
7
+ #- 1.8.7 # (current default)
8
8
 
9
9
  # Define how to run your tests (defaults to `bundle exec rake` or `rake` depending on whether you have a `Gemfile`)
10
10
  script: "bundle exec rake test"
data/README.md CHANGED
@@ -15,7 +15,7 @@ tweetlr supports
15
15
  - imgly
16
16
  - twitter / photobucket
17
17
  - t.co shortened links to pictures
18
- - every service accessible via embed.ly (see [photo providers](http://embed.ly/providers))
18
+ - every service accessible via embed.ly (see [photo providers](http://embed.ly/providers))
19
19
 
20
20
  ## Installation
21
21
 
@@ -34,10 +34,11 @@ api_endpoint_twitter: 'http://search.twitter.com/search.json'
34
34
  api_endpoint_tumblr: 'http://www.tumblr.com'
35
35
  tumblr_username: YOUR_TUMBLR_EMAIL
36
36
  tumblr_password: YOUR_TUMBLR_PW
37
+ embedly_key: '' #tweetlr uses http://embedly.com for link processing. a free plan containing an api key is available & recommended to use in order to ensure full support
37
38
  update_period: 300 #check for updates every 300 secs = 5 minutes
38
39
  shouts: 'says' # will be concatenated after the username, before the message: @mr_x says: awesome things on a photo!
39
40
  loglevel: 1 # 0: debug, 1: info (default), 2: warn, 3: error, 5: fatal
40
- whitelist: #twitter accounts in that list will have their tweets published immediately. post from others will be saved as drafts. blank list will publish all tweets immediately.
41
+ whitelist: #twitter accounts in that list will have their tweets published immediately. post from others will be saved as drafts. blank list will publish all tweets immediately
41
42
  - whitey_mc_whitelist
42
43
  - sven_kr
43
44
  ```
data/bin/tweetlr CHANGED
@@ -22,7 +22,9 @@ begin
22
22
 
23
23
  UPDATE_PERIOD = CONFIG['update_period']
24
24
 
25
- @tweetlr = Tweetlr.new(CONFIG['tumblr_username'], CONFIG['tumblr_password'], {
25
+ @tweetlr_config = {
26
+ :tumblr_email => CONFIG['tumblr_username'],
27
+ :tumblr_password => CONFIG['tumblr_password'],
26
28
  :whitelist => CONFIG['whitelist'],
27
29
  :shouts => CONFIG['shouts'],
28
30
  :since_id => CONFIG['start_at_tweet_id'] ,
@@ -32,8 +34,9 @@ begin
32
34
  :api_endpoint_tumblr => CONFIG['api_endpoint_tumblr'],
33
35
  :api_endpoint_twitter => CONFIG['api_endpoint_twitter'],
34
36
  :results_per_page => CONFIG['results_per_page'],
35
- :result_type => CONFIG['result_type']
36
- })
37
+ :result_type => CONFIG['result_type'],
38
+ :embedly_key => CONFIG['embedly_key']
39
+ }
37
40
  rescue SystemCallError
38
41
  $stderr.puts "Ooops - looks like there is no ./config/tweetlr.yml found. I'm affraid tweetlr won't work properly until you introduced that configuration file."
39
42
  exit(1)
@@ -43,32 +46,11 @@ Daemons.run_proc('tweetlr', :dir_mode => :script, :dir => './', :backtrace => tr
43
46
  @log = Logger.new(STDOUT)
44
47
  @log.info "starting tweetlr daemon..."
45
48
  @log.info "creating a new tweetlr instance using this config: #{CONFIG.inspect}"
46
- EventMachine::run {
47
- EventMachine::add_periodic_timer( UPDATE_PERIOD ) {
48
- @log.info "starting tweetlr crawl..."
49
- response = {}
50
- response = @tweetlr.lazy_search_twitter(@tweetlr.twitter_config["refresh_url"]) #looks awkward, but the refresh url will come from the db soon and make sense then...
51
- if response
52
- tweets = response['results']
53
- if tweets
54
- tweets.each do |tweet|
55
- tumblr_post = @tweetlr.generate_tumblr_photo_post tweet
56
- if tumblr_post.nil? || tumblr_post[:source].nil?
57
- @log.warn "could not get image source: tweet: #{tweet} --- tumblr post: #{tumblr_post.inspect}"
58
- else
59
- @log.debug "tumblr post: #{tumblr_post}"
60
- res = @tweetlr.post_to_tumblr tumblr_post
61
- @log.warn "tumblr response: #{res.header_str} #{res.body_str}" unless res.response_code == 201
62
- end
63
- end
64
- # store the highest tweet id
65
- @tweetlr.twitter_config[:refresh_url]=response['refresh_url']
66
- File.open(tid_file, "w+") { |io| io.write(response['max_id']) }
67
- end
68
- else
69
- @log.error "twitter search returned no response. hail the failwhale!"
49
+ EventMachine::run do
50
+ EventMachine::add_periodic_timer( UPDATE_PERIOD ) do
51
+ response = Tweetlr.crawl(@tweetlr_config)
52
+ File.open(tid_file, "w+") { |io| io.write(response[:since_id]) }
53
+ @tweetlr_config.merge! response
70
54
  end
71
- @log.info "finished tweetlr crawl."
72
- }
73
- }
55
+ end
74
56
  end
data/config/tweetlr.yml CHANGED
@@ -6,9 +6,10 @@ api_endpoint_twitter: 'http://search.twitter.com/search.json'
6
6
  api_endpoint_tumblr: 'http://www.tumblr.com'
7
7
  tumblr_username: YOUR_TUMBLR_EMAIL
8
8
  tumblr_password: YOUR_TUMBLR_PW
9
+ embedly_key: '' #tweetlr uses http://embedly.com for link processing. a free plan containing an api key is available & recommended to use in order to ensure full support
9
10
  update_period: 300 #check for updates every 300 secs = 5 minutes
10
11
  shouts: 'says' # will be concatenated after the username, before the message: @mr_x says: awesome things on a photo!
11
- loglevel: 0 # 0: debug, 1: info (default), 2: warn, 3: error, 5: fatal
12
+ loglevel: 1 # 0: debug, 1: info (default), 2: warn, 3: error, 5: fatal
12
13
  whitelist: #twitter accounts in that list will have their tweets published immediately. post from others will be saved as drafts. blank list will publish all tweets immediately
13
14
  - whitey_mc_whitelist
14
- - sven_kr
15
+ - sven_kr
@@ -0,0 +1,54 @@
1
+ require 'processors/twitter'
2
+ require 'processors/tumblr'
3
+ require 'processors/photo_service'
4
+
5
+ require 'log_aware'
6
+
7
+ module Combinators
8
+ module TwitterTumblr
9
+ include LogAware
10
+ def self.log
11
+ LogAware.log #TODO why doesn't the include make the log method accessible?
12
+ end
13
+ #extract a linked image file's url from a tweet. first found image will be used.
14
+ def self.extract_image_url(tweet, embedly_key=nil)
15
+ links = Processors::Twitter::extract_links tweet
16
+ image_url = nil
17
+ if links
18
+ links.each do |link|
19
+ image_url = Processors::PhotoService::find_image_url(link, embedly_key)
20
+ return image_url if Processors::PhotoService::photo? image_url
21
+ end
22
+ end
23
+ image_url
24
+ end
25
+ #generate the data for a tumblr photo entry by parsing a tweet
26
+ def self.generate_photo_post_from_tweet(tweet, options = {})
27
+ log.debug "#{self}.generate_photo_post_from_tweet with options: #{options.inspect}"
28
+ tumblr_post = nil
29
+ message = tweet['text']
30
+ whitelist = options[:whitelist]
31
+ whitelist.each {|entry| entry.downcase!} if whitelist
32
+ if !Processors::Twitter::retweet? message
33
+ log.debug "tweet: #{tweet}"
34
+ tumblr_post = {}
35
+ tumblr_post[:type] = 'photo'
36
+ tumblr_post[:date] = tweet['created_at']
37
+ tumblr_post[:source] = extract_image_url tweet, options[:embedly_key]
38
+ user = tweet['from_user']
39
+ tumblr_post[:tags] = user
40
+ tweet_id = tweet['id']
41
+ if !whitelist || whitelist.member?(user.downcase)
42
+ state = 'published'
43
+ else
44
+ state = 'draft'
45
+ end
46
+ tumblr_post[:state] = state
47
+ shouts = " #{@shouts}" if @shouts
48
+ tumblr_post[:caption] = %?<a href="http://twitter.com/#{user}/statuses/#{tweet_id}" alt="tweet">@#{user}</a>#{shouts}: #{tweet['text']}?
49
+ #TODO make the caption a bigger matter of yml/ general configuration
50
+ end
51
+ tumblr_post
52
+ end
53
+ end
54
+ end
data/lib/log_aware.rb CHANGED
@@ -1,8 +1,9 @@
1
+ #use centralized logging
1
2
  module LogAware
2
3
  def self.log=(log)
3
- @@log = log #TODO think of a more elegant way of logging than a static attribute
4
+ @log = log
4
5
  end
5
6
  def self.log()
6
- @@log
7
+ @log || Logger.new(STDOUT)
7
8
  end
8
9
  end
@@ -0,0 +1,45 @@
1
+ require 'curb'
2
+ require 'log_aware'
3
+
4
+ module Processors
5
+ #utilities for handling http
6
+ module Http
7
+ include LogAware
8
+
9
+ USER_AGENT = %{Mozilla/5.0 (compatible; tweetlr; +http://tweetlr.5v3n.com)}
10
+
11
+ def self.log
12
+ LogAware.log #TODO why doesn't the include make the log method accessible?
13
+ end
14
+
15
+ #convenience method for curl http get calls and parsing them to json.
16
+ def self.http_get(request)
17
+ tries = 3
18
+ begin
19
+ curl = Curl::Easy.new request
20
+ curl.useragent = USER_AGENT
21
+ curl.perform
22
+ begin
23
+ JSON.parse curl.body_str
24
+ rescue JSON::ParserError => err
25
+ begin
26
+ log.warn "#{err}: Could not parse response for #{request} - this is probably not a json response: #{curl.body_str}"
27
+ return nil
28
+ rescue Encoding::CompatibilityError => err
29
+ log.error "Trying to rescue a JSON::ParserError for '#{request}' we got stuck in a Encoding::CompatibilityError."
30
+ return nil
31
+ end
32
+ end
33
+ rescue Curl::Err::CurlError => err
34
+ log.error "Failure in Curl call: #{err}" if log
35
+ tries -= 1
36
+ sleep 3
37
+ if tries > 0
38
+ retry
39
+ else
40
+ nil
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,126 @@
1
+ require 'processors/http'
2
+ require 'log_aware'
3
+
4
+ module Processors
5
+ #utilities for dealing with photo services
6
+ module PhotoService
7
+
8
+ LOCATION_START_INDICATOR = 'Location: '
9
+ LOCATION_STOP_INDICATOR = "\r\n"
10
+ PIC_REGEXP = /(.*?)\.(jpg|jpeg|png|gif)/i
11
+
12
+ include LogAware
13
+
14
+ def self.log
15
+ LogAware.log #TODO why doesn't the include make the log method accessible?
16
+ end
17
+
18
+ def self.find_image_url(link, embedly_key=nil)
19
+ url = nil
20
+ if link && !(photo? link)
21
+ url = image_url_instagram link if (link.index('instagr.am') || link.index('instagram.com'))
22
+ url = image_url_picplz link if link.index 'picplz'
23
+ url = image_url_twitpic link if link.index 'twitpic'
24
+ url = image_url_yfrog link if link.index 'yfrog'
25
+ url = image_url_imgly link if link.index 'img.ly'
26
+ url = image_url_tco link, embedly_key if link.index 't.co'
27
+ url = image_url_lockerz link if link.index 'lockerz.com'
28
+ url = image_url_embedly link, embedly_key if url.nil? #just try embed.ly for anything else. could do all image url processing w/ embedly, but there's probably some kind of rate limit invovled.
29
+ elsif photo? link
30
+ url = link
31
+ end
32
+ url
33
+ end
34
+
35
+ def self.photo?(link)
36
+ link =~ PIC_REGEXP
37
+ end
38
+
39
+ #find the image's url via embed.ly
40
+ def self.image_url_embedly(link_url, key)
41
+ response = Processors::Http::http_get "http://api.embed.ly/1/oembed?key=#{key}&url=#{link_url}"
42
+ log.debug "embedly call: http://api.embed.ly/1/oembed?key=#{key}&url=#{link_url}"
43
+ if response && response['type'] == 'photo'
44
+ image_url = response['url']
45
+ end
46
+ image_url
47
+ end
48
+ #find the image's url for a lockerz link
49
+ def self.image_url_lockerz(link_url)
50
+ response = Processors::Http::http_get "http://api.plixi.com/api/tpapi.svc/json/metadatafromurl?details=false&url=#{link_url}"
51
+ response["BigImageUrl"] if response
52
+ end
53
+ #find the image's url for an twitter shortened link
54
+ def self.image_url_tco(link_url, embedly_key = nil)
55
+ service_url = link_url_redirect link_url
56
+ find_image_url service_url, embedly_key
57
+ end
58
+ #find the image's url for an instagram link
59
+ def self.image_url_instagram(link_url)
60
+ link_url['instagram.com'] = 'instagr.am' if link_url.index 'instagram.com' #instagram's oembed does not work for .com links
61
+ response = Processors::Http::http_get "http://api.instagram.com/oembed?url=#{link_url}"
62
+ response['url'] if response
63
+ end
64
+
65
+ #find the image's url for a picplz short/longlink
66
+ def self.image_url_picplz(link_url)
67
+ id = extract_id link_url
68
+ #try short url
69
+ response = Processors::Http::http_get "http://picplz.com/api/v2/pic.json?shorturl_ids=#{id}"
70
+ #if short url fails, try long url
71
+ #response = HTTParty.get "http://picplz.com/api/v2/pic.json?longurl_ids=#{id}"
72
+ #extract url
73
+ if response && response['value'] && response['value']['pics'] && response['value']['pics'].first && response['value']['pics'].first['pic_files'] && response['value']['pics'].first['pic_files']['640r']
74
+ response['value']['pics'].first['pic_files']['640r']['img_url']
75
+ else
76
+ nil
77
+ end
78
+ end
79
+ #find the image's url for a twitpic link
80
+ def self.image_url_twitpic(link_url)
81
+ image_url_redirect link_url, "http://twitpic.com/show/full/"
82
+ end
83
+ #find the image'S url for a yfrog link
84
+ def self.image_url_yfrog(link_url)
85
+ response = Processors::Http::http_get("http://www.yfrog.com/api/oembed?url=#{link_url}")
86
+ response['url'] if response
87
+ end
88
+ #find the image's url for a img.ly link
89
+ def self.image_url_imgly(link_url)
90
+ image_url_redirect link_url, "http://img.ly/show/full/", "\r\n"
91
+ end
92
+
93
+ # extract image url from services like twitpic & img.ly that do not offer oembed interfaces
94
+ def self.image_url_redirect(link_url, service_endpoint, stop_indicator = LOCATION_STOP_INDICATOR)
95
+ link_url_redirect "#{service_endpoint}#{extract_id link_url}", stop_indicator
96
+ end
97
+
98
+ def self.link_url_redirect(short_url, stop_indicator = LOCATION_STOP_INDICATOR)
99
+ tries = 3
100
+ begin
101
+ resp = Curl::Easy.http_get(short_url) { |res| res.follow_location = true }
102
+ rescue Curl::Err::CurlError => err
103
+ log.error "Curl::Easy.http_get failed: #{err}"
104
+ tries -= 1
105
+ sleep 3
106
+ if tries > 0
107
+ retry
108
+ else
109
+ return nil
110
+ end
111
+ end
112
+ if(resp && resp.header_str && resp.header_str.index(LOCATION_START_INDICATOR) && resp.header_str.index(stop_indicator))
113
+ start = resp.header_str.index(LOCATION_START_INDICATOR) + LOCATION_START_INDICATOR.size
114
+ stop = resp.header_str.index(stop_indicator, start)
115
+ resp.header_str[start...stop]
116
+ else
117
+ nil
118
+ end
119
+ end
120
+
121
+ #extract the pic id from a given <code>link</code>
122
+ def self.extract_id(link)
123
+ link.split('/').last if link.split('/')
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,46 @@
1
+ require 'log_aware'
2
+
3
+ module Processors
4
+ #utilities for handling tumblr
5
+ module Tumblr
6
+ GENERATOR = %{tweetlr - http://tweetlr.5v3n.com}
7
+ API_ENDPOINT_TUMBLR = 'http://www.tumblr.com'
8
+ include LogAware
9
+ def self.log
10
+ LogAware.log #TODO why doesn't the include make the log method accessible?
11
+ end
12
+ #post a tumblr photo entry.
13
+ #
14
+ #required arguments are :email, :password, :type, :date, :source, :caption, :state, :source
15
+ #
16
+ #optional arguments: :api_endpoint_tumblr, :tags
17
+ #
18
+ def self.post(options={})
19
+ tries = 3
20
+ tags = options[:tags]
21
+ begin
22
+ response = Curl::Easy.http_post("#{options[:api_endpoint_tumblr] || API_ENDPOINT_TUMBLR}/api/write",
23
+ Curl::PostField.content('generator', GENERATOR),
24
+ Curl::PostField.content('email', options[:email]),
25
+ Curl::PostField.content('password', options[:password]),
26
+ Curl::PostField.content('type', options[:type]),
27
+ Curl::PostField.content('date', options[:date]),
28
+ Curl::PostField.content('source', options[:source]),
29
+ Curl::PostField.content('caption', options[:caption]),
30
+ Curl::PostField.content('state', options[:state]),
31
+ Curl::PostField.content('tags', tags)
32
+ )
33
+ rescue Curl::Err::CurlError => err
34
+ log.error "Failure in Curl call: #{err}"
35
+ tries -= 1
36
+ sleep 3
37
+ if tries > 0
38
+ retry
39
+ else
40
+ response = nil
41
+ end
42
+ end
43
+ response
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,44 @@
1
+ require 'processors/http'
2
+ require 'log_aware'
3
+
4
+ module Processors
5
+ #utilities for dealing with twitter
6
+ module Twitter
7
+ include LogAware
8
+ def self.log
9
+ LogAware.log #TODO why doesn't the include make the log method accessible?
10
+ end
11
+
12
+ #checks if the message is a retweet
13
+ def self.retweet?(message)
14
+ message.index('RT @') || message.index(%{"@}) || message.index("\u201c@") #detect retweets
15
+ end
16
+
17
+ #extract the links from a given tweet
18
+ def self.extract_links(tweet)
19
+ if tweet
20
+ text = tweet['text']
21
+ text.gsub(/https?:\/\/[\S]+/).to_a if text
22
+ end
23
+ end
24
+
25
+ #fire a new search
26
+ def self.search(config)
27
+ search_call = "#{config[:api_endpoint_twitter]}?ors=#{config[:search_term]}&result_type=#{config[:result_type]}&rpp=#{config[:results_per_page]}"
28
+ Processors::Http::http_get search_call
29
+ end
30
+
31
+ # lazy update - search for a term or refresh the search if a response is available already
32
+ def self.lazy_search(config)
33
+ response = nil
34
+ if config
35
+ search_url = "#{config[:api_endpoint_twitter]}?since_id=#{config[:since_id]}&ors=#{config[:search_term]}&result_type=#{config[:result_type]}&rpp=#{config[:results_per_page]}"
36
+ log.info "lazy search using '#{search_url}'"
37
+ response = Processors::Http::http_get search_url
38
+ else
39
+ log.error "#{self}.lazy_search: no config given!"
40
+ end
41
+ response
42
+ end
43
+ end
44
+ end
data/lib/tweetlr.rb CHANGED
@@ -3,17 +3,16 @@ require 'logger'
3
3
  require 'yaml'
4
4
  require 'curb'
5
5
  require 'json'
6
- require 'twitter_processor'
7
- require 'http_processor'
8
- require 'photo_service_processor'
6
+ require 'processors/twitter'
7
+ require 'processors/http'
8
+ require 'processors/photo_service'
9
+ require 'processors/tumblr'
10
+ require 'combinators/twitter_tumblr'
9
11
  require 'log_aware'
10
12
 
11
13
  class Tweetlr
12
-
13
- attr_accessor :twitter_config
14
14
 
15
- VERSION = '0.1.7pre'
16
- GENERATOR = %{tweetlr - http://tweetlr.5v3n.com}
15
+ VERSION = '0.1.7pre4'
17
16
 
18
17
  API_ENDPOINT_TWITTER = 'http://search.twitter.com/search.json'
19
18
  API_ENDPOINT_TUMBLR = 'http://www.tumblr.com'
@@ -21,27 +20,23 @@ class Tweetlr
21
20
  TWITTER_RESULTS_TYPE = 'recent'
22
21
  UPDATE_PERIOD = 600 #10 minutes
23
22
 
24
- def initialize(email, password, args={:terms=>nil, :whitelist => nil, :shouts => nil, :since_id=>nil, :results_per_page => nil, :loglevel=>nil, :result_type => nil})
25
- @log = Logger.new(STDOUT)
23
+ include LogAware
24
+ def self.log
25
+ LogAware.log #TODO why doesn't the include make the log method accessible?
26
+ end
27
+
28
+ def initialize(args)
29
+ log = Logger.new(STDOUT)
26
30
  if (Logger::DEBUG..Logger::UNKNOWN).to_a.index(args[:loglevel])
27
- @log.level = args[:loglevel]
31
+ log.level = args[:loglevel]
28
32
  else
29
- @log.level = Logger::INFO
33
+ log.level = Logger::INFO
30
34
  end
31
- @log.debug "log level set to #{@log.level}"
32
- LogAware.log=@log
33
- @twitter_config = {
34
- :since_id => args[:since_id],
35
- :search_term => args[:terms],
36
- :results_per_page => args[:results_per_page] || TWITTER_RESULTS_PER_PAGE,
37
- :result_type => args[:result_type] || TWITTER_RESULTS_TYPE,
38
- :api_endpoint_twitter => args[:api_endpoint_twitter] || API_ENDPOINT_TWITTER
39
- }
40
- @twitter_config[:refresh_url] = "?ors=#{@twitter_config[:search_term]}&since_id=#{@twitter_config[:since_id]}&rpp=#{@twitter_config[:results_per_page]}&result_type=#{@twitter_config[:result_type]}" if (@twitter_config[:since_id] && @twitter_config[:search_term])
41
- @twitter_config[:logger] = @log
35
+ log.debug "log level set to #{log.level}"
36
+ LogAware.log=log
42
37
 
43
- @email = email
44
- @password = password
38
+ @email = args[:tumblr_email]
39
+ @password = args[:tumblr_password]
45
40
  @cookie = args[:cookie]
46
41
  @api_endpoint_twitter =
47
42
  @api_endpoint_tumblr = args[:api_endpoint_tumblr] || API_ENDPOINT_TUMBLR
@@ -51,79 +46,38 @@ class Tweetlr
51
46
  @whitelist.each {|entry| entry.downcase!} if @whitelist
52
47
  end
53
48
 
54
- def lazy_search_twitter(refresh_url=nil)
55
- @twitter_config[:refresh_url] = refresh_url if refresh_url
56
- TwitterProcessor::lazy_search(@twitter_config)
57
- end
58
-
59
- #post a tumblr photo entry. required arguments are :type, :date, :source, :caption, :state. optional argument: :tags
60
- def post_to_tumblr(options={})
61
- tries = 3
62
- if options[:type] && options[:date] && options[:source] && options[:caption] && options[:state]
63
- tags = options[:tags]
64
- begin
65
- response = Curl::Easy.http_post("#{@api_endpoint_tumblr}/api/write",
66
- Curl::PostField.content('generator', GENERATOR),
67
- Curl::PostField.content('email', @email),
68
- Curl::PostField.content('password', @password),
69
- Curl::PostField.content('type', options[:type]),
70
- Curl::PostField.content('date', options[:date]),
71
- Curl::PostField.content('source', options[:source]),
72
- Curl::PostField.content('caption', options[:caption]),
73
- Curl::PostField.content('state', options[:state]),
74
- Curl::PostField.content('tags', tags)
75
- )
76
- rescue Curl::Err::CurlError => err
77
- @log.error "Failure in Curl call: #{err}"
78
- tries -= 1
79
- sleep 3
80
- if tries > 0
81
- retry
49
+ def self.crawl(config)
50
+ log.debug "#{self}.crawl() using config: #{config.inspect}"
51
+ twitter_config = {
52
+ :since_id => config[:since_id] || config[:start_at_tweet_id],
53
+ :search_term => config[:terms] || config[:search_term] ,
54
+ :results_per_page => config[:results_per_page] || TWITTER_RESULTS_PER_PAGE,
55
+ :result_type => config[:result_type] || TWITTER_RESULTS_TYPE,
56
+ :api_endpoint_twitter => config[:api_endpoint_twitter] || API_ENDPOINT_TWITTER
57
+ }
58
+ log.info "starting tweetlr crawl..."
59
+ response = {}
60
+ response = Processors::Twitter::lazy_search(twitter_config) #looks awkward, but the refresh url will come from the db soon and make sense then...
61
+ if response
62
+ tweets = response['results']
63
+ if tweets
64
+ tweets.each do |tweet|
65
+ tumblr_post = Combinators::TwitterTumblr::generate_photo_post_from_tweet(tweet, {:whitelist => config[:whitelist], :embedly_key => config[:embedly_key]})
66
+ if tumblr_post.nil? || tumblr_post[:source].nil?
67
+ log.warn "could not get image source: tweet: #{tweet} --- tumblr post: #{tumblr_post.inspect}"
82
68
  else
83
- response = nil
69
+ log.debug "tumblr post: #{tumblr_post}"
70
+ res = Processors::Tumblr.post tumblr_post.merge({:password => config[:tumblr_password], :email => config[:tumblr_email]})
71
+ log.warn "tumblr response: #{res.header_str} #{res.body_str}" unless res.response_code == 201
84
72
  end
73
+ end
74
+ # store the highest tweet id
75
+ config[:since_id] = response['max_id']
85
76
  end
77
+ else
78
+ log.error "twitter search returned no response. hail the failwhale!"
86
79
  end
87
- response
88
- end
89
-
90
- #generate the data for a tumblr photo entry by parsing a tweet
91
- def generate_tumblr_photo_post tweet
92
- tumblr_post = nil
93
- message = tweet['text']
94
- if !TwitterProcessor::retweet? message
95
- @log.debug "tweet: #{tweet}"
96
- tumblr_post = {}
97
- tumblr_post[:type] = 'photo'
98
- tumblr_post[:date] = tweet['created_at']
99
- tumblr_post[:source] = extract_image_url tweet
100
- user = tweet['from_user']
101
- tumblr_post[:tags] = user
102
- tweet_id = tweet['id']
103
- if !@whitelist || @whitelist.member?(user.downcase)
104
- state = 'published'
105
- else
106
- state = 'draft'
107
- end
108
- tumblr_post[:state] = state
109
- shouts = " #{@shouts}" if @shouts
110
- tumblr_post[:caption] = %?<a href="http://twitter.com/#{user}/statuses/#{tweet_id}" alt="tweet">@#{user}</a>#{shouts}: #{tweet['text']}?
111
- #TODO make the caption a bigger matter of yml/ general configuration
112
- end
113
- tumblr_post
114
- end
115
-
116
- #extract a linked image file's url from a tweet. first found image will be used.
117
- def extract_image_url(tweet)
118
- links = TwitterProcessor::extract_links tweet
119
- image_url = nil
120
- if links
121
- links.each do |link|
122
- image_url = PhotoServiceProcessor::find_image_url(link)
123
- return image_url if PhotoServiceProcessor::photo? image_url
124
- end
125
- end
126
- image_url
127
- end
128
-
80
+ log.info "finished tweetlr crawl."
81
+ return config
82
+ end
129
83
  end