tweetlr 0.1.7pre → 0.1.7pre4
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +1 -1
- data/README.md +3 -2
- data/bin/tweetlr +12 -30
- data/config/tweetlr.yml +3 -2
- data/lib/combinators/twitter_tumblr.rb +54 -0
- data/lib/log_aware.rb +3 -2
- data/lib/processors/http.rb +45 -0
- data/lib/processors/photo_service.rb +126 -0
- data/lib/processors/tumblr.rb +46 -0
- data/lib/processors/twitter.rb +44 -0
- data/lib/tweetlr.rb +49 -95
- data/spec/combinators/twitter_tumblr_combinator_spec.rb +93 -0
- data/spec/{photo_services_processor_spec.rb → processors/photo_services_processor_spec.rb} +5 -5
- data/spec/{twitter_processor_spec.rb → processors/twitter_processor_spec.rb} +17 -3
- data/spec/spec_helper.rb +24 -2
- data/spec/tweetlr_spec.rb +21 -107
- data/tweetlr.gemspec +2 -2
- metadata +29 -26
- data/lib/http_processor.rb +0 -42
- data/lib/photo_service_processor.rb +0 -122
- data/lib/tumblr_processor.rb +0 -3
- data/lib/twitter_processor.rb +0 -39
data/.travis.yml
CHANGED
@@ -4,7 +4,7 @@ bundler_args: --binstubs
|
|
4
4
|
# Specify which ruby versions you wish to run your tests on, each version will be used
|
5
5
|
rvm:
|
6
6
|
- 1.9.2
|
7
|
-
|
7
|
+
#- 1.8.7 # (current default)
|
8
8
|
|
9
9
|
# Define how to run your tests (defaults to `bundle exec rake` or `rake` depending on whether you have a `Gemfile`)
|
10
10
|
script: "bundle exec rake test"
|
data/README.md
CHANGED
@@ -15,7 +15,7 @@ tweetlr supports
|
|
15
15
|
- imgly
|
16
16
|
- twitter / photobucket
|
17
17
|
- t.co shortened links to pictures
|
18
|
-
- every service accessible via embed.ly (see [photo providers](http://embed.ly/providers))
|
18
|
+
- every service accessible via embed.ly (see [photo providers](http://embed.ly/providers))
|
19
19
|
|
20
20
|
## Installation
|
21
21
|
|
@@ -34,10 +34,11 @@ api_endpoint_twitter: 'http://search.twitter.com/search.json'
|
|
34
34
|
api_endpoint_tumblr: 'http://www.tumblr.com'
|
35
35
|
tumblr_username: YOUR_TUMBLR_EMAIL
|
36
36
|
tumblr_password: YOUR_TUMBLR_PW
|
37
|
+
embedly_key: '' #tweetlr uses http://embedly.com for link processing. a free plan containing an api key is available & recommended to use in order to ensure full support
|
37
38
|
update_period: 300 #check for updates every 300 secs = 5 minutes
|
38
39
|
shouts: 'says' # will be concatenated after the username, before the message: @mr_x says: awesome things on a photo!
|
39
40
|
loglevel: 1 # 0: debug, 1: info (default), 2: warn, 3: error, 5: fatal
|
40
|
-
whitelist: #twitter accounts in that list will have their tweets published immediately. post from others will be saved as drafts. blank list will publish all tweets immediately
|
41
|
+
whitelist: #twitter accounts in that list will have their tweets published immediately. post from others will be saved as drafts. blank list will publish all tweets immediately
|
41
42
|
- whitey_mc_whitelist
|
42
43
|
- sven_kr
|
43
44
|
```
|
data/bin/tweetlr
CHANGED
@@ -22,7 +22,9 @@ begin
|
|
22
22
|
|
23
23
|
UPDATE_PERIOD = CONFIG['update_period']
|
24
24
|
|
25
|
-
@
|
25
|
+
@tweetlr_config = {
|
26
|
+
:tumblr_email => CONFIG['tumblr_username'],
|
27
|
+
:tumblr_password => CONFIG['tumblr_password'],
|
26
28
|
:whitelist => CONFIG['whitelist'],
|
27
29
|
:shouts => CONFIG['shouts'],
|
28
30
|
:since_id => CONFIG['start_at_tweet_id'] ,
|
@@ -32,8 +34,9 @@ begin
|
|
32
34
|
:api_endpoint_tumblr => CONFIG['api_endpoint_tumblr'],
|
33
35
|
:api_endpoint_twitter => CONFIG['api_endpoint_twitter'],
|
34
36
|
:results_per_page => CONFIG['results_per_page'],
|
35
|
-
:result_type => CONFIG['result_type']
|
36
|
-
|
37
|
+
:result_type => CONFIG['result_type'],
|
38
|
+
:embedly_key => CONFIG['embedly_key']
|
39
|
+
}
|
37
40
|
rescue SystemCallError
|
38
41
|
$stderr.puts "Ooops - looks like there is no ./config/tweetlr.yml found. I'm affraid tweetlr won't work properly until you introduced that configuration file."
|
39
42
|
exit(1)
|
@@ -43,32 +46,11 @@ Daemons.run_proc('tweetlr', :dir_mode => :script, :dir => './', :backtrace => tr
|
|
43
46
|
@log = Logger.new(STDOUT)
|
44
47
|
@log.info "starting tweetlr daemon..."
|
45
48
|
@log.info "creating a new tweetlr instance using this config: #{CONFIG.inspect}"
|
46
|
-
EventMachine::run
|
47
|
-
EventMachine::add_periodic_timer( UPDATE_PERIOD )
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
if response
|
52
|
-
tweets = response['results']
|
53
|
-
if tweets
|
54
|
-
tweets.each do |tweet|
|
55
|
-
tumblr_post = @tweetlr.generate_tumblr_photo_post tweet
|
56
|
-
if tumblr_post.nil? || tumblr_post[:source].nil?
|
57
|
-
@log.warn "could not get image source: tweet: #{tweet} --- tumblr post: #{tumblr_post.inspect}"
|
58
|
-
else
|
59
|
-
@log.debug "tumblr post: #{tumblr_post}"
|
60
|
-
res = @tweetlr.post_to_tumblr tumblr_post
|
61
|
-
@log.warn "tumblr response: #{res.header_str} #{res.body_str}" unless res.response_code == 201
|
62
|
-
end
|
63
|
-
end
|
64
|
-
# store the highest tweet id
|
65
|
-
@tweetlr.twitter_config[:refresh_url]=response['refresh_url']
|
66
|
-
File.open(tid_file, "w+") { |io| io.write(response['max_id']) }
|
67
|
-
end
|
68
|
-
else
|
69
|
-
@log.error "twitter search returned no response. hail the failwhale!"
|
49
|
+
EventMachine::run do
|
50
|
+
EventMachine::add_periodic_timer( UPDATE_PERIOD ) do
|
51
|
+
response = Tweetlr.crawl(@tweetlr_config)
|
52
|
+
File.open(tid_file, "w+") { |io| io.write(response[:since_id]) }
|
53
|
+
@tweetlr_config.merge! response
|
70
54
|
end
|
71
|
-
|
72
|
-
}
|
73
|
-
}
|
55
|
+
end
|
74
56
|
end
|
data/config/tweetlr.yml
CHANGED
@@ -6,9 +6,10 @@ api_endpoint_twitter: 'http://search.twitter.com/search.json'
|
|
6
6
|
api_endpoint_tumblr: 'http://www.tumblr.com'
|
7
7
|
tumblr_username: YOUR_TUMBLR_EMAIL
|
8
8
|
tumblr_password: YOUR_TUMBLR_PW
|
9
|
+
embedly_key: '' #tweetlr uses http://embedly.com for link processing. a free plan containing an api key is available & recommended to use in order to ensure full support
|
9
10
|
update_period: 300 #check for updates every 300 secs = 5 minutes
|
10
11
|
shouts: 'says' # will be concatenated after the username, before the message: @mr_x says: awesome things on a photo!
|
11
|
-
loglevel:
|
12
|
+
loglevel: 1 # 0: debug, 1: info (default), 2: warn, 3: error, 5: fatal
|
12
13
|
whitelist: #twitter accounts in that list will have their tweets published immediately. post from others will be saved as drafts. blank list will publish all tweets immediately
|
13
14
|
- whitey_mc_whitelist
|
14
|
-
- sven_kr
|
15
|
+
- sven_kr
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'processors/twitter'
|
2
|
+
require 'processors/tumblr'
|
3
|
+
require 'processors/photo_service'
|
4
|
+
|
5
|
+
require 'log_aware'
|
6
|
+
|
7
|
+
module Combinators
|
8
|
+
module TwitterTumblr
|
9
|
+
include LogAware
|
10
|
+
def self.log
|
11
|
+
LogAware.log #TODO why doesn't the include make the log method accessible?
|
12
|
+
end
|
13
|
+
#extract a linked image file's url from a tweet. first found image will be used.
|
14
|
+
def self.extract_image_url(tweet, embedly_key=nil)
|
15
|
+
links = Processors::Twitter::extract_links tweet
|
16
|
+
image_url = nil
|
17
|
+
if links
|
18
|
+
links.each do |link|
|
19
|
+
image_url = Processors::PhotoService::find_image_url(link, embedly_key)
|
20
|
+
return image_url if Processors::PhotoService::photo? image_url
|
21
|
+
end
|
22
|
+
end
|
23
|
+
image_url
|
24
|
+
end
|
25
|
+
#generate the data for a tumblr photo entry by parsing a tweet
|
26
|
+
def self.generate_photo_post_from_tweet(tweet, options = {})
|
27
|
+
log.debug "#{self}.generate_photo_post_from_tweet with options: #{options.inspect}"
|
28
|
+
tumblr_post = nil
|
29
|
+
message = tweet['text']
|
30
|
+
whitelist = options[:whitelist]
|
31
|
+
whitelist.each {|entry| entry.downcase!} if whitelist
|
32
|
+
if !Processors::Twitter::retweet? message
|
33
|
+
log.debug "tweet: #{tweet}"
|
34
|
+
tumblr_post = {}
|
35
|
+
tumblr_post[:type] = 'photo'
|
36
|
+
tumblr_post[:date] = tweet['created_at']
|
37
|
+
tumblr_post[:source] = extract_image_url tweet, options[:embedly_key]
|
38
|
+
user = tweet['from_user']
|
39
|
+
tumblr_post[:tags] = user
|
40
|
+
tweet_id = tweet['id']
|
41
|
+
if !whitelist || whitelist.member?(user.downcase)
|
42
|
+
state = 'published'
|
43
|
+
else
|
44
|
+
state = 'draft'
|
45
|
+
end
|
46
|
+
tumblr_post[:state] = state
|
47
|
+
shouts = " #{@shouts}" if @shouts
|
48
|
+
tumblr_post[:caption] = %?<a href="http://twitter.com/#{user}/statuses/#{tweet_id}" alt="tweet">@#{user}</a>#{shouts}: #{tweet['text']}?
|
49
|
+
#TODO make the caption a bigger matter of yml/ general configuration
|
50
|
+
end
|
51
|
+
tumblr_post
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/lib/log_aware.rb
CHANGED
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'curb'
|
2
|
+
require 'log_aware'
|
3
|
+
|
4
|
+
module Processors
|
5
|
+
#utilities for handling http
|
6
|
+
module Http
|
7
|
+
include LogAware
|
8
|
+
|
9
|
+
USER_AGENT = %{Mozilla/5.0 (compatible; tweetlr; +http://tweetlr.5v3n.com)}
|
10
|
+
|
11
|
+
def self.log
|
12
|
+
LogAware.log #TODO why doesn't the include make the log method accessible?
|
13
|
+
end
|
14
|
+
|
15
|
+
#convenience method for curl http get calls and parsing them to json.
|
16
|
+
def self.http_get(request)
|
17
|
+
tries = 3
|
18
|
+
begin
|
19
|
+
curl = Curl::Easy.new request
|
20
|
+
curl.useragent = USER_AGENT
|
21
|
+
curl.perform
|
22
|
+
begin
|
23
|
+
JSON.parse curl.body_str
|
24
|
+
rescue JSON::ParserError => err
|
25
|
+
begin
|
26
|
+
log.warn "#{err}: Could not parse response for #{request} - this is probably not a json response: #{curl.body_str}"
|
27
|
+
return nil
|
28
|
+
rescue Encoding::CompatibilityError => err
|
29
|
+
log.error "Trying to rescue a JSON::ParserError for '#{request}' we got stuck in a Encoding::CompatibilityError."
|
30
|
+
return nil
|
31
|
+
end
|
32
|
+
end
|
33
|
+
rescue Curl::Err::CurlError => err
|
34
|
+
log.error "Failure in Curl call: #{err}" if log
|
35
|
+
tries -= 1
|
36
|
+
sleep 3
|
37
|
+
if tries > 0
|
38
|
+
retry
|
39
|
+
else
|
40
|
+
nil
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
require 'processors/http'
|
2
|
+
require 'log_aware'
|
3
|
+
|
4
|
+
module Processors
|
5
|
+
#utilities for dealing with photo services
|
6
|
+
module PhotoService
|
7
|
+
|
8
|
+
LOCATION_START_INDICATOR = 'Location: '
|
9
|
+
LOCATION_STOP_INDICATOR = "\r\n"
|
10
|
+
PIC_REGEXP = /(.*?)\.(jpg|jpeg|png|gif)/i
|
11
|
+
|
12
|
+
include LogAware
|
13
|
+
|
14
|
+
def self.log
|
15
|
+
LogAware.log #TODO why doesn't the include make the log method accessible?
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.find_image_url(link, embedly_key=nil)
|
19
|
+
url = nil
|
20
|
+
if link && !(photo? link)
|
21
|
+
url = image_url_instagram link if (link.index('instagr.am') || link.index('instagram.com'))
|
22
|
+
url = image_url_picplz link if link.index 'picplz'
|
23
|
+
url = image_url_twitpic link if link.index 'twitpic'
|
24
|
+
url = image_url_yfrog link if link.index 'yfrog'
|
25
|
+
url = image_url_imgly link if link.index 'img.ly'
|
26
|
+
url = image_url_tco link, embedly_key if link.index 't.co'
|
27
|
+
url = image_url_lockerz link if link.index 'lockerz.com'
|
28
|
+
url = image_url_embedly link, embedly_key if url.nil? #just try embed.ly for anything else. could do all image url processing w/ embedly, but there's probably some kind of rate limit invovled.
|
29
|
+
elsif photo? link
|
30
|
+
url = link
|
31
|
+
end
|
32
|
+
url
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.photo?(link)
|
36
|
+
link =~ PIC_REGEXP
|
37
|
+
end
|
38
|
+
|
39
|
+
#find the image's url via embed.ly
|
40
|
+
def self.image_url_embedly(link_url, key)
|
41
|
+
response = Processors::Http::http_get "http://api.embed.ly/1/oembed?key=#{key}&url=#{link_url}"
|
42
|
+
log.debug "embedly call: http://api.embed.ly/1/oembed?key=#{key}&url=#{link_url}"
|
43
|
+
if response && response['type'] == 'photo'
|
44
|
+
image_url = response['url']
|
45
|
+
end
|
46
|
+
image_url
|
47
|
+
end
|
48
|
+
#find the image's url for a lockerz link
|
49
|
+
def self.image_url_lockerz(link_url)
|
50
|
+
response = Processors::Http::http_get "http://api.plixi.com/api/tpapi.svc/json/metadatafromurl?details=false&url=#{link_url}"
|
51
|
+
response["BigImageUrl"] if response
|
52
|
+
end
|
53
|
+
#find the image's url for an twitter shortened link
|
54
|
+
def self.image_url_tco(link_url, embedly_key = nil)
|
55
|
+
service_url = link_url_redirect link_url
|
56
|
+
find_image_url service_url, embedly_key
|
57
|
+
end
|
58
|
+
#find the image's url for an instagram link
|
59
|
+
def self.image_url_instagram(link_url)
|
60
|
+
link_url['instagram.com'] = 'instagr.am' if link_url.index 'instagram.com' #instagram's oembed does not work for .com links
|
61
|
+
response = Processors::Http::http_get "http://api.instagram.com/oembed?url=#{link_url}"
|
62
|
+
response['url'] if response
|
63
|
+
end
|
64
|
+
|
65
|
+
#find the image's url for a picplz short/longlink
|
66
|
+
def self.image_url_picplz(link_url)
|
67
|
+
id = extract_id link_url
|
68
|
+
#try short url
|
69
|
+
response = Processors::Http::http_get "http://picplz.com/api/v2/pic.json?shorturl_ids=#{id}"
|
70
|
+
#if short url fails, try long url
|
71
|
+
#response = HTTParty.get "http://picplz.com/api/v2/pic.json?longurl_ids=#{id}"
|
72
|
+
#extract url
|
73
|
+
if response && response['value'] && response['value']['pics'] && response['value']['pics'].first && response['value']['pics'].first['pic_files'] && response['value']['pics'].first['pic_files']['640r']
|
74
|
+
response['value']['pics'].first['pic_files']['640r']['img_url']
|
75
|
+
else
|
76
|
+
nil
|
77
|
+
end
|
78
|
+
end
|
79
|
+
#find the image's url for a twitpic link
|
80
|
+
def self.image_url_twitpic(link_url)
|
81
|
+
image_url_redirect link_url, "http://twitpic.com/show/full/"
|
82
|
+
end
|
83
|
+
#find the image'S url for a yfrog link
|
84
|
+
def self.image_url_yfrog(link_url)
|
85
|
+
response = Processors::Http::http_get("http://www.yfrog.com/api/oembed?url=#{link_url}")
|
86
|
+
response['url'] if response
|
87
|
+
end
|
88
|
+
#find the image's url for a img.ly link
|
89
|
+
def self.image_url_imgly(link_url)
|
90
|
+
image_url_redirect link_url, "http://img.ly/show/full/", "\r\n"
|
91
|
+
end
|
92
|
+
|
93
|
+
# extract image url from services like twitpic & img.ly that do not offer oembed interfaces
|
94
|
+
def self.image_url_redirect(link_url, service_endpoint, stop_indicator = LOCATION_STOP_INDICATOR)
|
95
|
+
link_url_redirect "#{service_endpoint}#{extract_id link_url}", stop_indicator
|
96
|
+
end
|
97
|
+
|
98
|
+
def self.link_url_redirect(short_url, stop_indicator = LOCATION_STOP_INDICATOR)
|
99
|
+
tries = 3
|
100
|
+
begin
|
101
|
+
resp = Curl::Easy.http_get(short_url) { |res| res.follow_location = true }
|
102
|
+
rescue Curl::Err::CurlError => err
|
103
|
+
log.error "Curl::Easy.http_get failed: #{err}"
|
104
|
+
tries -= 1
|
105
|
+
sleep 3
|
106
|
+
if tries > 0
|
107
|
+
retry
|
108
|
+
else
|
109
|
+
return nil
|
110
|
+
end
|
111
|
+
end
|
112
|
+
if(resp && resp.header_str && resp.header_str.index(LOCATION_START_INDICATOR) && resp.header_str.index(stop_indicator))
|
113
|
+
start = resp.header_str.index(LOCATION_START_INDICATOR) + LOCATION_START_INDICATOR.size
|
114
|
+
stop = resp.header_str.index(stop_indicator, start)
|
115
|
+
resp.header_str[start...stop]
|
116
|
+
else
|
117
|
+
nil
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
#extract the pic id from a given <code>link</code>
|
122
|
+
def self.extract_id(link)
|
123
|
+
link.split('/').last if link.split('/')
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'log_aware'
|
2
|
+
|
3
|
+
module Processors
|
4
|
+
#utilities for handling tumblr
|
5
|
+
module Tumblr
|
6
|
+
GENERATOR = %{tweetlr - http://tweetlr.5v3n.com}
|
7
|
+
API_ENDPOINT_TUMBLR = 'http://www.tumblr.com'
|
8
|
+
include LogAware
|
9
|
+
def self.log
|
10
|
+
LogAware.log #TODO why doesn't the include make the log method accessible?
|
11
|
+
end
|
12
|
+
#post a tumblr photo entry.
|
13
|
+
#
|
14
|
+
#required arguments are :email, :password, :type, :date, :source, :caption, :state, :source
|
15
|
+
#
|
16
|
+
#optional arguments: :api_endpoint_tumblr, :tags
|
17
|
+
#
|
18
|
+
def self.post(options={})
|
19
|
+
tries = 3
|
20
|
+
tags = options[:tags]
|
21
|
+
begin
|
22
|
+
response = Curl::Easy.http_post("#{options[:api_endpoint_tumblr] || API_ENDPOINT_TUMBLR}/api/write",
|
23
|
+
Curl::PostField.content('generator', GENERATOR),
|
24
|
+
Curl::PostField.content('email', options[:email]),
|
25
|
+
Curl::PostField.content('password', options[:password]),
|
26
|
+
Curl::PostField.content('type', options[:type]),
|
27
|
+
Curl::PostField.content('date', options[:date]),
|
28
|
+
Curl::PostField.content('source', options[:source]),
|
29
|
+
Curl::PostField.content('caption', options[:caption]),
|
30
|
+
Curl::PostField.content('state', options[:state]),
|
31
|
+
Curl::PostField.content('tags', tags)
|
32
|
+
)
|
33
|
+
rescue Curl::Err::CurlError => err
|
34
|
+
log.error "Failure in Curl call: #{err}"
|
35
|
+
tries -= 1
|
36
|
+
sleep 3
|
37
|
+
if tries > 0
|
38
|
+
retry
|
39
|
+
else
|
40
|
+
response = nil
|
41
|
+
end
|
42
|
+
end
|
43
|
+
response
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'processors/http'
|
2
|
+
require 'log_aware'
|
3
|
+
|
4
|
+
module Processors
|
5
|
+
#utilities for dealing with twitter
|
6
|
+
module Twitter
|
7
|
+
include LogAware
|
8
|
+
def self.log
|
9
|
+
LogAware.log #TODO why doesn't the include make the log method accessible?
|
10
|
+
end
|
11
|
+
|
12
|
+
#checks if the message is a retweet
|
13
|
+
def self.retweet?(message)
|
14
|
+
message.index('RT @') || message.index(%{"@}) || message.index("\u201c@") #detect retweets
|
15
|
+
end
|
16
|
+
|
17
|
+
#extract the links from a given tweet
|
18
|
+
def self.extract_links(tweet)
|
19
|
+
if tweet
|
20
|
+
text = tweet['text']
|
21
|
+
text.gsub(/https?:\/\/[\S]+/).to_a if text
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
#fire a new search
|
26
|
+
def self.search(config)
|
27
|
+
search_call = "#{config[:api_endpoint_twitter]}?ors=#{config[:search_term]}&result_type=#{config[:result_type]}&rpp=#{config[:results_per_page]}"
|
28
|
+
Processors::Http::http_get search_call
|
29
|
+
end
|
30
|
+
|
31
|
+
# lazy update - search for a term or refresh the search if a response is available already
|
32
|
+
def self.lazy_search(config)
|
33
|
+
response = nil
|
34
|
+
if config
|
35
|
+
search_url = "#{config[:api_endpoint_twitter]}?since_id=#{config[:since_id]}&ors=#{config[:search_term]}&result_type=#{config[:result_type]}&rpp=#{config[:results_per_page]}"
|
36
|
+
log.info "lazy search using '#{search_url}'"
|
37
|
+
response = Processors::Http::http_get search_url
|
38
|
+
else
|
39
|
+
log.error "#{self}.lazy_search: no config given!"
|
40
|
+
end
|
41
|
+
response
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
data/lib/tweetlr.rb
CHANGED
@@ -3,17 +3,16 @@ require 'logger'
|
|
3
3
|
require 'yaml'
|
4
4
|
require 'curb'
|
5
5
|
require 'json'
|
6
|
-
require '
|
7
|
-
require '
|
8
|
-
require '
|
6
|
+
require 'processors/twitter'
|
7
|
+
require 'processors/http'
|
8
|
+
require 'processors/photo_service'
|
9
|
+
require 'processors/tumblr'
|
10
|
+
require 'combinators/twitter_tumblr'
|
9
11
|
require 'log_aware'
|
10
12
|
|
11
13
|
class Tweetlr
|
12
|
-
|
13
|
-
attr_accessor :twitter_config
|
14
14
|
|
15
|
-
VERSION = '0.1.
|
16
|
-
GENERATOR = %{tweetlr - http://tweetlr.5v3n.com}
|
15
|
+
VERSION = '0.1.7pre4'
|
17
16
|
|
18
17
|
API_ENDPOINT_TWITTER = 'http://search.twitter.com/search.json'
|
19
18
|
API_ENDPOINT_TUMBLR = 'http://www.tumblr.com'
|
@@ -21,27 +20,23 @@ class Tweetlr
|
|
21
20
|
TWITTER_RESULTS_TYPE = 'recent'
|
22
21
|
UPDATE_PERIOD = 600 #10 minutes
|
23
22
|
|
24
|
-
|
25
|
-
|
23
|
+
include LogAware
|
24
|
+
def self.log
|
25
|
+
LogAware.log #TODO why doesn't the include make the log method accessible?
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize(args)
|
29
|
+
log = Logger.new(STDOUT)
|
26
30
|
if (Logger::DEBUG..Logger::UNKNOWN).to_a.index(args[:loglevel])
|
27
|
-
|
31
|
+
log.level = args[:loglevel]
|
28
32
|
else
|
29
|
-
|
33
|
+
log.level = Logger::INFO
|
30
34
|
end
|
31
|
-
|
32
|
-
LogAware.log
|
33
|
-
@twitter_config = {
|
34
|
-
:since_id => args[:since_id],
|
35
|
-
:search_term => args[:terms],
|
36
|
-
:results_per_page => args[:results_per_page] || TWITTER_RESULTS_PER_PAGE,
|
37
|
-
:result_type => args[:result_type] || TWITTER_RESULTS_TYPE,
|
38
|
-
:api_endpoint_twitter => args[:api_endpoint_twitter] || API_ENDPOINT_TWITTER
|
39
|
-
}
|
40
|
-
@twitter_config[:refresh_url] = "?ors=#{@twitter_config[:search_term]}&since_id=#{@twitter_config[:since_id]}&rpp=#{@twitter_config[:results_per_page]}&result_type=#{@twitter_config[:result_type]}" if (@twitter_config[:since_id] && @twitter_config[:search_term])
|
41
|
-
@twitter_config[:logger] = @log
|
35
|
+
log.debug "log level set to #{log.level}"
|
36
|
+
LogAware.log=log
|
42
37
|
|
43
|
-
@email =
|
44
|
-
@password =
|
38
|
+
@email = args[:tumblr_email]
|
39
|
+
@password = args[:tumblr_password]
|
45
40
|
@cookie = args[:cookie]
|
46
41
|
@api_endpoint_twitter =
|
47
42
|
@api_endpoint_tumblr = args[:api_endpoint_tumblr] || API_ENDPOINT_TUMBLR
|
@@ -51,79 +46,38 @@ class Tweetlr
|
|
51
46
|
@whitelist.each {|entry| entry.downcase!} if @whitelist
|
52
47
|
end
|
53
48
|
|
54
|
-
def
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
Curl::PostField.content('state', options[:state]),
|
74
|
-
Curl::PostField.content('tags', tags)
|
75
|
-
)
|
76
|
-
rescue Curl::Err::CurlError => err
|
77
|
-
@log.error "Failure in Curl call: #{err}"
|
78
|
-
tries -= 1
|
79
|
-
sleep 3
|
80
|
-
if tries > 0
|
81
|
-
retry
|
49
|
+
def self.crawl(config)
|
50
|
+
log.debug "#{self}.crawl() using config: #{config.inspect}"
|
51
|
+
twitter_config = {
|
52
|
+
:since_id => config[:since_id] || config[:start_at_tweet_id],
|
53
|
+
:search_term => config[:terms] || config[:search_term] ,
|
54
|
+
:results_per_page => config[:results_per_page] || TWITTER_RESULTS_PER_PAGE,
|
55
|
+
:result_type => config[:result_type] || TWITTER_RESULTS_TYPE,
|
56
|
+
:api_endpoint_twitter => config[:api_endpoint_twitter] || API_ENDPOINT_TWITTER
|
57
|
+
}
|
58
|
+
log.info "starting tweetlr crawl..."
|
59
|
+
response = {}
|
60
|
+
response = Processors::Twitter::lazy_search(twitter_config) #looks awkward, but the refresh url will come from the db soon and make sense then...
|
61
|
+
if response
|
62
|
+
tweets = response['results']
|
63
|
+
if tweets
|
64
|
+
tweets.each do |tweet|
|
65
|
+
tumblr_post = Combinators::TwitterTumblr::generate_photo_post_from_tweet(tweet, {:whitelist => config[:whitelist], :embedly_key => config[:embedly_key]})
|
66
|
+
if tumblr_post.nil? || tumblr_post[:source].nil?
|
67
|
+
log.warn "could not get image source: tweet: #{tweet} --- tumblr post: #{tumblr_post.inspect}"
|
82
68
|
else
|
83
|
-
|
69
|
+
log.debug "tumblr post: #{tumblr_post}"
|
70
|
+
res = Processors::Tumblr.post tumblr_post.merge({:password => config[:tumblr_password], :email => config[:tumblr_email]})
|
71
|
+
log.warn "tumblr response: #{res.header_str} #{res.body_str}" unless res.response_code == 201
|
84
72
|
end
|
73
|
+
end
|
74
|
+
# store the highest tweet id
|
75
|
+
config[:since_id] = response['max_id']
|
85
76
|
end
|
77
|
+
else
|
78
|
+
log.error "twitter search returned no response. hail the failwhale!"
|
86
79
|
end
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
#generate the data for a tumblr photo entry by parsing a tweet
|
91
|
-
def generate_tumblr_photo_post tweet
|
92
|
-
tumblr_post = nil
|
93
|
-
message = tweet['text']
|
94
|
-
if !TwitterProcessor::retweet? message
|
95
|
-
@log.debug "tweet: #{tweet}"
|
96
|
-
tumblr_post = {}
|
97
|
-
tumblr_post[:type] = 'photo'
|
98
|
-
tumblr_post[:date] = tweet['created_at']
|
99
|
-
tumblr_post[:source] = extract_image_url tweet
|
100
|
-
user = tweet['from_user']
|
101
|
-
tumblr_post[:tags] = user
|
102
|
-
tweet_id = tweet['id']
|
103
|
-
if !@whitelist || @whitelist.member?(user.downcase)
|
104
|
-
state = 'published'
|
105
|
-
else
|
106
|
-
state = 'draft'
|
107
|
-
end
|
108
|
-
tumblr_post[:state] = state
|
109
|
-
shouts = " #{@shouts}" if @shouts
|
110
|
-
tumblr_post[:caption] = %?<a href="http://twitter.com/#{user}/statuses/#{tweet_id}" alt="tweet">@#{user}</a>#{shouts}: #{tweet['text']}?
|
111
|
-
#TODO make the caption a bigger matter of yml/ general configuration
|
112
|
-
end
|
113
|
-
tumblr_post
|
114
|
-
end
|
115
|
-
|
116
|
-
#extract a linked image file's url from a tweet. first found image will be used.
|
117
|
-
def extract_image_url(tweet)
|
118
|
-
links = TwitterProcessor::extract_links tweet
|
119
|
-
image_url = nil
|
120
|
-
if links
|
121
|
-
links.each do |link|
|
122
|
-
image_url = PhotoServiceProcessor::find_image_url(link)
|
123
|
-
return image_url if PhotoServiceProcessor::photo? image_url
|
124
|
-
end
|
125
|
-
end
|
126
|
-
image_url
|
127
|
-
end
|
128
|
-
|
80
|
+
log.info "finished tweetlr crawl."
|
81
|
+
return config
|
82
|
+
end
|
129
83
|
end
|