tweetlr 0.1.7pre → 0.1.7pre4

Sign up to get free protection for your applications and to get access to all the features.
data/.travis.yml CHANGED
@@ -4,7 +4,7 @@ bundler_args: --binstubs
4
4
  # Specify which ruby versions you wish to run your tests on, each version will be used
5
5
  rvm:
6
6
  - 1.9.2
7
- - 1.8.7 # (current default)
7
+ #- 1.8.7 # (current default)
8
8
 
9
9
  # Define how to run your tests (defaults to `bundle exec rake` or `rake` depending on whether you have a `Gemfile`)
10
10
  script: "bundle exec rake test"
data/README.md CHANGED
@@ -15,7 +15,7 @@ tweetlr supports
15
15
  - imgly
16
16
  - twitter / photobucket
17
17
  - t.co shortened links to pictures
18
- - every service accessible via embed.ly (see [photo providers](http://embed.ly/providers))
18
+ - every service accessible via embed.ly (see [photo providers](http://embed.ly/providers))
19
19
 
20
20
  ## Installation
21
21
 
@@ -34,10 +34,11 @@ api_endpoint_twitter: 'http://search.twitter.com/search.json'
34
34
  api_endpoint_tumblr: 'http://www.tumblr.com'
35
35
  tumblr_username: YOUR_TUMBLR_EMAIL
36
36
  tumblr_password: YOUR_TUMBLR_PW
37
+ embedly_key: '' #tweetlr uses http://embedly.com for link processing. a free plan containing an api key is available & recommended to use in order to ensure full support
37
38
  update_period: 300 #check for updates every 300 secs = 5 minutes
38
39
  shouts: 'says' # will be concatenated after the username, before the message: @mr_x says: awesome things on a photo!
39
40
  loglevel: 1 # 0: debug, 1: info (default), 2: warn, 3: error, 5: fatal
40
- whitelist: #twitter accounts in that list will have their tweets published immediately. post from others will be saved as drafts. blank list will publish all tweets immediately.
41
+ whitelist: #twitter accounts in that list will have their tweets published immediately. post from others will be saved as drafts. blank list will publish all tweets immediately
41
42
  - whitey_mc_whitelist
42
43
  - sven_kr
43
44
  ```
data/bin/tweetlr CHANGED
@@ -22,7 +22,9 @@ begin
22
22
 
23
23
  UPDATE_PERIOD = CONFIG['update_period']
24
24
 
25
- @tweetlr = Tweetlr.new(CONFIG['tumblr_username'], CONFIG['tumblr_password'], {
25
+ @tweetlr_config = {
26
+ :tumblr_email => CONFIG['tumblr_username'],
27
+ :tumblr_password => CONFIG['tumblr_password'],
26
28
  :whitelist => CONFIG['whitelist'],
27
29
  :shouts => CONFIG['shouts'],
28
30
  :since_id => CONFIG['start_at_tweet_id'] ,
@@ -32,8 +34,9 @@ begin
32
34
  :api_endpoint_tumblr => CONFIG['api_endpoint_tumblr'],
33
35
  :api_endpoint_twitter => CONFIG['api_endpoint_twitter'],
34
36
  :results_per_page => CONFIG['results_per_page'],
35
- :result_type => CONFIG['result_type']
36
- })
37
+ :result_type => CONFIG['result_type'],
38
+ :embedly_key => CONFIG['embedly_key']
39
+ }
37
40
  rescue SystemCallError
38
41
  $stderr.puts "Ooops - looks like there is no ./config/tweetlr.yml found. I'm affraid tweetlr won't work properly until you introduced that configuration file."
39
42
  exit(1)
@@ -43,32 +46,11 @@ Daemons.run_proc('tweetlr', :dir_mode => :script, :dir => './', :backtrace => tr
43
46
  @log = Logger.new(STDOUT)
44
47
  @log.info "starting tweetlr daemon..."
45
48
  @log.info "creating a new tweetlr instance using this config: #{CONFIG.inspect}"
46
- EventMachine::run {
47
- EventMachine::add_periodic_timer( UPDATE_PERIOD ) {
48
- @log.info "starting tweetlr crawl..."
49
- response = {}
50
- response = @tweetlr.lazy_search_twitter(@tweetlr.twitter_config["refresh_url"]) #looks awkward, but the refresh url will come from the db soon and make sense then...
51
- if response
52
- tweets = response['results']
53
- if tweets
54
- tweets.each do |tweet|
55
- tumblr_post = @tweetlr.generate_tumblr_photo_post tweet
56
- if tumblr_post.nil? || tumblr_post[:source].nil?
57
- @log.warn "could not get image source: tweet: #{tweet} --- tumblr post: #{tumblr_post.inspect}"
58
- else
59
- @log.debug "tumblr post: #{tumblr_post}"
60
- res = @tweetlr.post_to_tumblr tumblr_post
61
- @log.warn "tumblr response: #{res.header_str} #{res.body_str}" unless res.response_code == 201
62
- end
63
- end
64
- # store the highest tweet id
65
- @tweetlr.twitter_config[:refresh_url]=response['refresh_url']
66
- File.open(tid_file, "w+") { |io| io.write(response['max_id']) }
67
- end
68
- else
69
- @log.error "twitter search returned no response. hail the failwhale!"
49
+ EventMachine::run do
50
+ EventMachine::add_periodic_timer( UPDATE_PERIOD ) do
51
+ response = Tweetlr.crawl(@tweetlr_config)
52
+ File.open(tid_file, "w+") { |io| io.write(response[:since_id]) }
53
+ @tweetlr_config.merge! response
70
54
  end
71
- @log.info "finished tweetlr crawl."
72
- }
73
- }
55
+ end
74
56
  end
data/config/tweetlr.yml CHANGED
@@ -6,9 +6,10 @@ api_endpoint_twitter: 'http://search.twitter.com/search.json'
6
6
  api_endpoint_tumblr: 'http://www.tumblr.com'
7
7
  tumblr_username: YOUR_TUMBLR_EMAIL
8
8
  tumblr_password: YOUR_TUMBLR_PW
9
+ embedly_key: '' #tweetlr uses http://embedly.com for link processing. a free plan containing an api key is available & recommended to use in order to ensure full support
9
10
  update_period: 300 #check for updates every 300 secs = 5 minutes
10
11
  shouts: 'says' # will be concatenated after the username, before the message: @mr_x says: awesome things on a photo!
11
- loglevel: 0 # 0: debug, 1: info (default), 2: warn, 3: error, 5: fatal
12
+ loglevel: 1 # 0: debug, 1: info (default), 2: warn, 3: error, 5: fatal
12
13
  whitelist: #twitter accounts in that list will have their tweets published immediately. post from others will be saved as drafts. blank list will publish all tweets immediately
13
14
  - whitey_mc_whitelist
14
- - sven_kr
15
+ - sven_kr
@@ -0,0 +1,54 @@
1
+ require 'processors/twitter'
2
+ require 'processors/tumblr'
3
+ require 'processors/photo_service'
4
+
5
+ require 'log_aware'
6
+
7
+ module Combinators
8
+ module TwitterTumblr
9
+ include LogAware
10
+ def self.log
11
+ LogAware.log #TODO why doesn't the include make the log method accessible?
12
+ end
13
+ #extract a linked image file's url from a tweet. first found image will be used.
14
+ def self.extract_image_url(tweet, embedly_key=nil)
15
+ links = Processors::Twitter::extract_links tweet
16
+ image_url = nil
17
+ if links
18
+ links.each do |link|
19
+ image_url = Processors::PhotoService::find_image_url(link, embedly_key)
20
+ return image_url if Processors::PhotoService::photo? image_url
21
+ end
22
+ end
23
+ image_url
24
+ end
25
+ #generate the data for a tumblr photo entry by parsing a tweet
26
+ def self.generate_photo_post_from_tweet(tweet, options = {})
27
+ log.debug "#{self}.generate_photo_post_from_tweet with options: #{options.inspect}"
28
+ tumblr_post = nil
29
+ message = tweet['text']
30
+ whitelist = options[:whitelist]
31
+ whitelist.each {|entry| entry.downcase!} if whitelist
32
+ if !Processors::Twitter::retweet? message
33
+ log.debug "tweet: #{tweet}"
34
+ tumblr_post = {}
35
+ tumblr_post[:type] = 'photo'
36
+ tumblr_post[:date] = tweet['created_at']
37
+ tumblr_post[:source] = extract_image_url tweet, options[:embedly_key]
38
+ user = tweet['from_user']
39
+ tumblr_post[:tags] = user
40
+ tweet_id = tweet['id']
41
+ if !whitelist || whitelist.member?(user.downcase)
42
+ state = 'published'
43
+ else
44
+ state = 'draft'
45
+ end
46
+ tumblr_post[:state] = state
47
+ shouts = " #{@shouts}" if @shouts
48
+ tumblr_post[:caption] = %?<a href="http://twitter.com/#{user}/statuses/#{tweet_id}" alt="tweet">@#{user}</a>#{shouts}: #{tweet['text']}?
49
+ #TODO make the caption a bigger matter of yml/ general configuration
50
+ end
51
+ tumblr_post
52
+ end
53
+ end
54
+ end
data/lib/log_aware.rb CHANGED
@@ -1,8 +1,9 @@
1
+ #use centralized logging
1
2
  module LogAware
2
3
  def self.log=(log)
3
- @@log = log #TODO think of a more elegant way of logging than a static attribute
4
+ @log = log
4
5
  end
5
6
  def self.log()
6
- @@log
7
+ @log || Logger.new(STDOUT)
7
8
  end
8
9
  end
@@ -0,0 +1,45 @@
1
+ require 'curb'
2
+ require 'log_aware'
3
+
4
+ module Processors
5
+ #utilities for handling http
6
+ module Http
7
+ include LogAware
8
+
9
+ USER_AGENT = %{Mozilla/5.0 (compatible; tweetlr; +http://tweetlr.5v3n.com)}
10
+
11
+ def self.log
12
+ LogAware.log #TODO why doesn't the include make the log method accessible?
13
+ end
14
+
15
+ #convenience method for curl http get calls and parsing them to json.
16
+ def self.http_get(request)
17
+ tries = 3
18
+ begin
19
+ curl = Curl::Easy.new request
20
+ curl.useragent = USER_AGENT
21
+ curl.perform
22
+ begin
23
+ JSON.parse curl.body_str
24
+ rescue JSON::ParserError => err
25
+ begin
26
+ log.warn "#{err}: Could not parse response for #{request} - this is probably not a json response: #{curl.body_str}"
27
+ return nil
28
+ rescue Encoding::CompatibilityError => err
29
+ log.error "Trying to rescue a JSON::ParserError for '#{request}' we got stuck in a Encoding::CompatibilityError."
30
+ return nil
31
+ end
32
+ end
33
+ rescue Curl::Err::CurlError => err
34
+ log.error "Failure in Curl call: #{err}" if log
35
+ tries -= 1
36
+ sleep 3
37
+ if tries > 0
38
+ retry
39
+ else
40
+ nil
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,126 @@
1
+ require 'processors/http'
2
+ require 'log_aware'
3
+
4
+ module Processors
5
+ #utilities for dealing with photo services
6
+ module PhotoService
7
+
8
+ LOCATION_START_INDICATOR = 'Location: '
9
+ LOCATION_STOP_INDICATOR = "\r\n"
10
+ PIC_REGEXP = /(.*?)\.(jpg|jpeg|png|gif)/i
11
+
12
+ include LogAware
13
+
14
+ def self.log
15
+ LogAware.log #TODO why doesn't the include make the log method accessible?
16
+ end
17
+
18
+ def self.find_image_url(link, embedly_key=nil)
19
+ url = nil
20
+ if link && !(photo? link)
21
+ url = image_url_instagram link if (link.index('instagr.am') || link.index('instagram.com'))
22
+ url = image_url_picplz link if link.index 'picplz'
23
+ url = image_url_twitpic link if link.index 'twitpic'
24
+ url = image_url_yfrog link if link.index 'yfrog'
25
+ url = image_url_imgly link if link.index 'img.ly'
26
+ url = image_url_tco link, embedly_key if link.index 't.co'
27
+ url = image_url_lockerz link if link.index 'lockerz.com'
28
+ url = image_url_embedly link, embedly_key if url.nil? #just try embed.ly for anything else. could do all image url processing w/ embedly, but there's probably some kind of rate limit invovled.
29
+ elsif photo? link
30
+ url = link
31
+ end
32
+ url
33
+ end
34
+
35
+ def self.photo?(link)
36
+ link =~ PIC_REGEXP
37
+ end
38
+
39
+ #find the image's url via embed.ly
40
+ def self.image_url_embedly(link_url, key)
41
+ response = Processors::Http::http_get "http://api.embed.ly/1/oembed?key=#{key}&url=#{link_url}"
42
+ log.debug "embedly call: http://api.embed.ly/1/oembed?key=#{key}&url=#{link_url}"
43
+ if response && response['type'] == 'photo'
44
+ image_url = response['url']
45
+ end
46
+ image_url
47
+ end
48
+ #find the image's url for a lockerz link
49
+ def self.image_url_lockerz(link_url)
50
+ response = Processors::Http::http_get "http://api.plixi.com/api/tpapi.svc/json/metadatafromurl?details=false&url=#{link_url}"
51
+ response["BigImageUrl"] if response
52
+ end
53
+ #find the image's url for an twitter shortened link
54
+ def self.image_url_tco(link_url, embedly_key = nil)
55
+ service_url = link_url_redirect link_url
56
+ find_image_url service_url, embedly_key
57
+ end
58
+ #find the image's url for an instagram link
59
+ def self.image_url_instagram(link_url)
60
+ link_url['instagram.com'] = 'instagr.am' if link_url.index 'instagram.com' #instagram's oembed does not work for .com links
61
+ response = Processors::Http::http_get "http://api.instagram.com/oembed?url=#{link_url}"
62
+ response['url'] if response
63
+ end
64
+
65
+ #find the image's url for a picplz short/longlink
66
+ def self.image_url_picplz(link_url)
67
+ id = extract_id link_url
68
+ #try short url
69
+ response = Processors::Http::http_get "http://picplz.com/api/v2/pic.json?shorturl_ids=#{id}"
70
+ #if short url fails, try long url
71
+ #response = HTTParty.get "http://picplz.com/api/v2/pic.json?longurl_ids=#{id}"
72
+ #extract url
73
+ if response && response['value'] && response['value']['pics'] && response['value']['pics'].first && response['value']['pics'].first['pic_files'] && response['value']['pics'].first['pic_files']['640r']
74
+ response['value']['pics'].first['pic_files']['640r']['img_url']
75
+ else
76
+ nil
77
+ end
78
+ end
79
+ #find the image's url for a twitpic link
80
+ def self.image_url_twitpic(link_url)
81
+ image_url_redirect link_url, "http://twitpic.com/show/full/"
82
+ end
83
+ #find the image'S url for a yfrog link
84
+ def self.image_url_yfrog(link_url)
85
+ response = Processors::Http::http_get("http://www.yfrog.com/api/oembed?url=#{link_url}")
86
+ response['url'] if response
87
+ end
88
+ #find the image's url for a img.ly link
89
+ def self.image_url_imgly(link_url)
90
+ image_url_redirect link_url, "http://img.ly/show/full/", "\r\n"
91
+ end
92
+
93
+ # extract image url from services like twitpic & img.ly that do not offer oembed interfaces
94
+ def self.image_url_redirect(link_url, service_endpoint, stop_indicator = LOCATION_STOP_INDICATOR)
95
+ link_url_redirect "#{service_endpoint}#{extract_id link_url}", stop_indicator
96
+ end
97
+
98
+ def self.link_url_redirect(short_url, stop_indicator = LOCATION_STOP_INDICATOR)
99
+ tries = 3
100
+ begin
101
+ resp = Curl::Easy.http_get(short_url) { |res| res.follow_location = true }
102
+ rescue Curl::Err::CurlError => err
103
+ log.error "Curl::Easy.http_get failed: #{err}"
104
+ tries -= 1
105
+ sleep 3
106
+ if tries > 0
107
+ retry
108
+ else
109
+ return nil
110
+ end
111
+ end
112
+ if(resp && resp.header_str && resp.header_str.index(LOCATION_START_INDICATOR) && resp.header_str.index(stop_indicator))
113
+ start = resp.header_str.index(LOCATION_START_INDICATOR) + LOCATION_START_INDICATOR.size
114
+ stop = resp.header_str.index(stop_indicator, start)
115
+ resp.header_str[start...stop]
116
+ else
117
+ nil
118
+ end
119
+ end
120
+
121
+ #extract the pic id from a given <code>link</code>
122
+ def self.extract_id(link)
123
+ link.split('/').last if link.split('/')
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,46 @@
1
+ require 'log_aware'
2
+
3
+ module Processors
4
+ #utilities for handling tumblr
5
+ module Tumblr
6
+ GENERATOR = %{tweetlr - http://tweetlr.5v3n.com}
7
+ API_ENDPOINT_TUMBLR = 'http://www.tumblr.com'
8
+ include LogAware
9
+ def self.log
10
+ LogAware.log #TODO why doesn't the include make the log method accessible?
11
+ end
12
+ #post a tumblr photo entry.
13
+ #
14
+ #required arguments are :email, :password, :type, :date, :source, :caption, :state, :source
15
+ #
16
+ #optional arguments: :api_endpoint_tumblr, :tags
17
+ #
18
+ def self.post(options={})
19
+ tries = 3
20
+ tags = options[:tags]
21
+ begin
22
+ response = Curl::Easy.http_post("#{options[:api_endpoint_tumblr] || API_ENDPOINT_TUMBLR}/api/write",
23
+ Curl::PostField.content('generator', GENERATOR),
24
+ Curl::PostField.content('email', options[:email]),
25
+ Curl::PostField.content('password', options[:password]),
26
+ Curl::PostField.content('type', options[:type]),
27
+ Curl::PostField.content('date', options[:date]),
28
+ Curl::PostField.content('source', options[:source]),
29
+ Curl::PostField.content('caption', options[:caption]),
30
+ Curl::PostField.content('state', options[:state]),
31
+ Curl::PostField.content('tags', tags)
32
+ )
33
+ rescue Curl::Err::CurlError => err
34
+ log.error "Failure in Curl call: #{err}"
35
+ tries -= 1
36
+ sleep 3
37
+ if tries > 0
38
+ retry
39
+ else
40
+ response = nil
41
+ end
42
+ end
43
+ response
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,44 @@
1
+ require 'processors/http'
2
+ require 'log_aware'
3
+
4
+ module Processors
5
+ #utilities for dealing with twitter
6
+ module Twitter
7
+ include LogAware
8
+ def self.log
9
+ LogAware.log #TODO why doesn't the include make the log method accessible?
10
+ end
11
+
12
+ #checks if the message is a retweet
13
+ def self.retweet?(message)
14
+ message.index('RT @') || message.index(%{"@}) || message.index("\u201c@") #detect retweets
15
+ end
16
+
17
+ #extract the links from a given tweet
18
+ def self.extract_links(tweet)
19
+ if tweet
20
+ text = tweet['text']
21
+ text.gsub(/https?:\/\/[\S]+/).to_a if text
22
+ end
23
+ end
24
+
25
+ #fire a new search
26
+ def self.search(config)
27
+ search_call = "#{config[:api_endpoint_twitter]}?ors=#{config[:search_term]}&result_type=#{config[:result_type]}&rpp=#{config[:results_per_page]}"
28
+ Processors::Http::http_get search_call
29
+ end
30
+
31
+ # lazy update - search for a term or refresh the search if a response is available already
32
+ def self.lazy_search(config)
33
+ response = nil
34
+ if config
35
+ search_url = "#{config[:api_endpoint_twitter]}?since_id=#{config[:since_id]}&ors=#{config[:search_term]}&result_type=#{config[:result_type]}&rpp=#{config[:results_per_page]}"
36
+ log.info "lazy search using '#{search_url}'"
37
+ response = Processors::Http::http_get search_url
38
+ else
39
+ log.error "#{self}.lazy_search: no config given!"
40
+ end
41
+ response
42
+ end
43
+ end
44
+ end
data/lib/tweetlr.rb CHANGED
@@ -3,17 +3,16 @@ require 'logger'
3
3
  require 'yaml'
4
4
  require 'curb'
5
5
  require 'json'
6
- require 'twitter_processor'
7
- require 'http_processor'
8
- require 'photo_service_processor'
6
+ require 'processors/twitter'
7
+ require 'processors/http'
8
+ require 'processors/photo_service'
9
+ require 'processors/tumblr'
10
+ require 'combinators/twitter_tumblr'
9
11
  require 'log_aware'
10
12
 
11
13
  class Tweetlr
12
-
13
- attr_accessor :twitter_config
14
14
 
15
- VERSION = '0.1.7pre'
16
- GENERATOR = %{tweetlr - http://tweetlr.5v3n.com}
15
+ VERSION = '0.1.7pre4'
17
16
 
18
17
  API_ENDPOINT_TWITTER = 'http://search.twitter.com/search.json'
19
18
  API_ENDPOINT_TUMBLR = 'http://www.tumblr.com'
@@ -21,27 +20,23 @@ class Tweetlr
21
20
  TWITTER_RESULTS_TYPE = 'recent'
22
21
  UPDATE_PERIOD = 600 #10 minutes
23
22
 
24
- def initialize(email, password, args={:terms=>nil, :whitelist => nil, :shouts => nil, :since_id=>nil, :results_per_page => nil, :loglevel=>nil, :result_type => nil})
25
- @log = Logger.new(STDOUT)
23
+ include LogAware
24
+ def self.log
25
+ LogAware.log #TODO why doesn't the include make the log method accessible?
26
+ end
27
+
28
+ def initialize(args)
29
+ log = Logger.new(STDOUT)
26
30
  if (Logger::DEBUG..Logger::UNKNOWN).to_a.index(args[:loglevel])
27
- @log.level = args[:loglevel]
31
+ log.level = args[:loglevel]
28
32
  else
29
- @log.level = Logger::INFO
33
+ log.level = Logger::INFO
30
34
  end
31
- @log.debug "log level set to #{@log.level}"
32
- LogAware.log=@log
33
- @twitter_config = {
34
- :since_id => args[:since_id],
35
- :search_term => args[:terms],
36
- :results_per_page => args[:results_per_page] || TWITTER_RESULTS_PER_PAGE,
37
- :result_type => args[:result_type] || TWITTER_RESULTS_TYPE,
38
- :api_endpoint_twitter => args[:api_endpoint_twitter] || API_ENDPOINT_TWITTER
39
- }
40
- @twitter_config[:refresh_url] = "?ors=#{@twitter_config[:search_term]}&since_id=#{@twitter_config[:since_id]}&rpp=#{@twitter_config[:results_per_page]}&result_type=#{@twitter_config[:result_type]}" if (@twitter_config[:since_id] && @twitter_config[:search_term])
41
- @twitter_config[:logger] = @log
35
+ log.debug "log level set to #{log.level}"
36
+ LogAware.log=log
42
37
 
43
- @email = email
44
- @password = password
38
+ @email = args[:tumblr_email]
39
+ @password = args[:tumblr_password]
45
40
  @cookie = args[:cookie]
46
41
  @api_endpoint_twitter =
47
42
  @api_endpoint_tumblr = args[:api_endpoint_tumblr] || API_ENDPOINT_TUMBLR
@@ -51,79 +46,38 @@ class Tweetlr
51
46
  @whitelist.each {|entry| entry.downcase!} if @whitelist
52
47
  end
53
48
 
54
- def lazy_search_twitter(refresh_url=nil)
55
- @twitter_config[:refresh_url] = refresh_url if refresh_url
56
- TwitterProcessor::lazy_search(@twitter_config)
57
- end
58
-
59
- #post a tumblr photo entry. required arguments are :type, :date, :source, :caption, :state. optional argument: :tags
60
- def post_to_tumblr(options={})
61
- tries = 3
62
- if options[:type] && options[:date] && options[:source] && options[:caption] && options[:state]
63
- tags = options[:tags]
64
- begin
65
- response = Curl::Easy.http_post("#{@api_endpoint_tumblr}/api/write",
66
- Curl::PostField.content('generator', GENERATOR),
67
- Curl::PostField.content('email', @email),
68
- Curl::PostField.content('password', @password),
69
- Curl::PostField.content('type', options[:type]),
70
- Curl::PostField.content('date', options[:date]),
71
- Curl::PostField.content('source', options[:source]),
72
- Curl::PostField.content('caption', options[:caption]),
73
- Curl::PostField.content('state', options[:state]),
74
- Curl::PostField.content('tags', tags)
75
- )
76
- rescue Curl::Err::CurlError => err
77
- @log.error "Failure in Curl call: #{err}"
78
- tries -= 1
79
- sleep 3
80
- if tries > 0
81
- retry
49
+ def self.crawl(config)
50
+ log.debug "#{self}.crawl() using config: #{config.inspect}"
51
+ twitter_config = {
52
+ :since_id => config[:since_id] || config[:start_at_tweet_id],
53
+ :search_term => config[:terms] || config[:search_term] ,
54
+ :results_per_page => config[:results_per_page] || TWITTER_RESULTS_PER_PAGE,
55
+ :result_type => config[:result_type] || TWITTER_RESULTS_TYPE,
56
+ :api_endpoint_twitter => config[:api_endpoint_twitter] || API_ENDPOINT_TWITTER
57
+ }
58
+ log.info "starting tweetlr crawl..."
59
+ response = {}
60
+ response = Processors::Twitter::lazy_search(twitter_config) #looks awkward, but the refresh url will come from the db soon and make sense then...
61
+ if response
62
+ tweets = response['results']
63
+ if tweets
64
+ tweets.each do |tweet|
65
+ tumblr_post = Combinators::TwitterTumblr::generate_photo_post_from_tweet(tweet, {:whitelist => config[:whitelist], :embedly_key => config[:embedly_key]})
66
+ if tumblr_post.nil? || tumblr_post[:source].nil?
67
+ log.warn "could not get image source: tweet: #{tweet} --- tumblr post: #{tumblr_post.inspect}"
82
68
  else
83
- response = nil
69
+ log.debug "tumblr post: #{tumblr_post}"
70
+ res = Processors::Tumblr.post tumblr_post.merge({:password => config[:tumblr_password], :email => config[:tumblr_email]})
71
+ log.warn "tumblr response: #{res.header_str} #{res.body_str}" unless res.response_code == 201
84
72
  end
73
+ end
74
+ # store the highest tweet id
75
+ config[:since_id] = response['max_id']
85
76
  end
77
+ else
78
+ log.error "twitter search returned no response. hail the failwhale!"
86
79
  end
87
- response
88
- end
89
-
90
- #generate the data for a tumblr photo entry by parsing a tweet
91
- def generate_tumblr_photo_post tweet
92
- tumblr_post = nil
93
- message = tweet['text']
94
- if !TwitterProcessor::retweet? message
95
- @log.debug "tweet: #{tweet}"
96
- tumblr_post = {}
97
- tumblr_post[:type] = 'photo'
98
- tumblr_post[:date] = tweet['created_at']
99
- tumblr_post[:source] = extract_image_url tweet
100
- user = tweet['from_user']
101
- tumblr_post[:tags] = user
102
- tweet_id = tweet['id']
103
- if !@whitelist || @whitelist.member?(user.downcase)
104
- state = 'published'
105
- else
106
- state = 'draft'
107
- end
108
- tumblr_post[:state] = state
109
- shouts = " #{@shouts}" if @shouts
110
- tumblr_post[:caption] = %?<a href="http://twitter.com/#{user}/statuses/#{tweet_id}" alt="tweet">@#{user}</a>#{shouts}: #{tweet['text']}?
111
- #TODO make the caption a bigger matter of yml/ general configuration
112
- end
113
- tumblr_post
114
- end
115
-
116
- #extract a linked image file's url from a tweet. first found image will be used.
117
- def extract_image_url(tweet)
118
- links = TwitterProcessor::extract_links tweet
119
- image_url = nil
120
- if links
121
- links.each do |link|
122
- image_url = PhotoServiceProcessor::find_image_url(link)
123
- return image_url if PhotoServiceProcessor::photo? image_url
124
- end
125
- end
126
- image_url
127
- end
128
-
80
+ log.info "finished tweetlr crawl."
81
+ return config
82
+ end
129
83
  end