tweetlr 0.1.7pre → 0.1.7pre4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +1 -1
- data/README.md +3 -2
- data/bin/tweetlr +12 -30
- data/config/tweetlr.yml +3 -2
- data/lib/combinators/twitter_tumblr.rb +54 -0
- data/lib/log_aware.rb +3 -2
- data/lib/processors/http.rb +45 -0
- data/lib/processors/photo_service.rb +126 -0
- data/lib/processors/tumblr.rb +46 -0
- data/lib/processors/twitter.rb +44 -0
- data/lib/tweetlr.rb +49 -95
- data/spec/combinators/twitter_tumblr_combinator_spec.rb +93 -0
- data/spec/{photo_services_processor_spec.rb → processors/photo_services_processor_spec.rb} +5 -5
- data/spec/{twitter_processor_spec.rb → processors/twitter_processor_spec.rb} +17 -3
- data/spec/spec_helper.rb +24 -2
- data/spec/tweetlr_spec.rb +21 -107
- data/tweetlr.gemspec +2 -2
- metadata +29 -26
- data/lib/http_processor.rb +0 -42
- data/lib/photo_service_processor.rb +0 -122
- data/lib/tumblr_processor.rb +0 -3
- data/lib/twitter_processor.rb +0 -39
data/.travis.yml
CHANGED
@@ -4,7 +4,7 @@ bundler_args: --binstubs
|
|
4
4
|
# Specify which ruby versions you wish to run your tests on, each version will be used
|
5
5
|
rvm:
|
6
6
|
- 1.9.2
|
7
|
-
|
7
|
+
#- 1.8.7 # (current default)
|
8
8
|
|
9
9
|
# Define how to run your tests (defaults to `bundle exec rake` or `rake` depending on whether you have a `Gemfile`)
|
10
10
|
script: "bundle exec rake test"
|
data/README.md
CHANGED
@@ -15,7 +15,7 @@ tweetlr supports
|
|
15
15
|
- imgly
|
16
16
|
- twitter / photobucket
|
17
17
|
- t.co shortened links to pictures
|
18
|
-
- every service accessible via embed.ly (see [photo providers](http://embed.ly/providers))
|
18
|
+
- every service accessible via embed.ly (see [photo providers](http://embed.ly/providers))
|
19
19
|
|
20
20
|
## Installation
|
21
21
|
|
@@ -34,10 +34,11 @@ api_endpoint_twitter: 'http://search.twitter.com/search.json'
|
|
34
34
|
api_endpoint_tumblr: 'http://www.tumblr.com'
|
35
35
|
tumblr_username: YOUR_TUMBLR_EMAIL
|
36
36
|
tumblr_password: YOUR_TUMBLR_PW
|
37
|
+
embedly_key: '' #tweetlr uses http://embedly.com for link processing. a free plan containing an api key is available & recommended to use in order to ensure full support
|
37
38
|
update_period: 300 #check for updates every 300 secs = 5 minutes
|
38
39
|
shouts: 'says' # will be concatenated after the username, before the message: @mr_x says: awesome things on a photo!
|
39
40
|
loglevel: 1 # 0: debug, 1: info (default), 2: warn, 3: error, 5: fatal
|
40
|
-
whitelist: #twitter accounts in that list will have their tweets published immediately. post from others will be saved as drafts. blank list will publish all tweets immediately
|
41
|
+
whitelist: #twitter accounts in that list will have their tweets published immediately. post from others will be saved as drafts. blank list will publish all tweets immediately
|
41
42
|
- whitey_mc_whitelist
|
42
43
|
- sven_kr
|
43
44
|
```
|
data/bin/tweetlr
CHANGED
@@ -22,7 +22,9 @@ begin
|
|
22
22
|
|
23
23
|
UPDATE_PERIOD = CONFIG['update_period']
|
24
24
|
|
25
|
-
@
|
25
|
+
@tweetlr_config = {
|
26
|
+
:tumblr_email => CONFIG['tumblr_username'],
|
27
|
+
:tumblr_password => CONFIG['tumblr_password'],
|
26
28
|
:whitelist => CONFIG['whitelist'],
|
27
29
|
:shouts => CONFIG['shouts'],
|
28
30
|
:since_id => CONFIG['start_at_tweet_id'] ,
|
@@ -32,8 +34,9 @@ begin
|
|
32
34
|
:api_endpoint_tumblr => CONFIG['api_endpoint_tumblr'],
|
33
35
|
:api_endpoint_twitter => CONFIG['api_endpoint_twitter'],
|
34
36
|
:results_per_page => CONFIG['results_per_page'],
|
35
|
-
:result_type => CONFIG['result_type']
|
36
|
-
|
37
|
+
:result_type => CONFIG['result_type'],
|
38
|
+
:embedly_key => CONFIG['embedly_key']
|
39
|
+
}
|
37
40
|
rescue SystemCallError
|
38
41
|
$stderr.puts "Ooops - looks like there is no ./config/tweetlr.yml found. I'm affraid tweetlr won't work properly until you introduced that configuration file."
|
39
42
|
exit(1)
|
@@ -43,32 +46,11 @@ Daemons.run_proc('tweetlr', :dir_mode => :script, :dir => './', :backtrace => tr
|
|
43
46
|
@log = Logger.new(STDOUT)
|
44
47
|
@log.info "starting tweetlr daemon..."
|
45
48
|
@log.info "creating a new tweetlr instance using this config: #{CONFIG.inspect}"
|
46
|
-
EventMachine::run
|
47
|
-
EventMachine::add_periodic_timer( UPDATE_PERIOD )
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
if response
|
52
|
-
tweets = response['results']
|
53
|
-
if tweets
|
54
|
-
tweets.each do |tweet|
|
55
|
-
tumblr_post = @tweetlr.generate_tumblr_photo_post tweet
|
56
|
-
if tumblr_post.nil? || tumblr_post[:source].nil?
|
57
|
-
@log.warn "could not get image source: tweet: #{tweet} --- tumblr post: #{tumblr_post.inspect}"
|
58
|
-
else
|
59
|
-
@log.debug "tumblr post: #{tumblr_post}"
|
60
|
-
res = @tweetlr.post_to_tumblr tumblr_post
|
61
|
-
@log.warn "tumblr response: #{res.header_str} #{res.body_str}" unless res.response_code == 201
|
62
|
-
end
|
63
|
-
end
|
64
|
-
# store the highest tweet id
|
65
|
-
@tweetlr.twitter_config[:refresh_url]=response['refresh_url']
|
66
|
-
File.open(tid_file, "w+") { |io| io.write(response['max_id']) }
|
67
|
-
end
|
68
|
-
else
|
69
|
-
@log.error "twitter search returned no response. hail the failwhale!"
|
49
|
+
EventMachine::run do
|
50
|
+
EventMachine::add_periodic_timer( UPDATE_PERIOD ) do
|
51
|
+
response = Tweetlr.crawl(@tweetlr_config)
|
52
|
+
File.open(tid_file, "w+") { |io| io.write(response[:since_id]) }
|
53
|
+
@tweetlr_config.merge! response
|
70
54
|
end
|
71
|
-
|
72
|
-
}
|
73
|
-
}
|
55
|
+
end
|
74
56
|
end
|
data/config/tweetlr.yml
CHANGED
@@ -6,9 +6,10 @@ api_endpoint_twitter: 'http://search.twitter.com/search.json'
|
|
6
6
|
api_endpoint_tumblr: 'http://www.tumblr.com'
|
7
7
|
tumblr_username: YOUR_TUMBLR_EMAIL
|
8
8
|
tumblr_password: YOUR_TUMBLR_PW
|
9
|
+
embedly_key: '' #tweetlr uses http://embedly.com for link processing. a free plan containing an api key is available & recommended to use in order to ensure full support
|
9
10
|
update_period: 300 #check for updates every 300 secs = 5 minutes
|
10
11
|
shouts: 'says' # will be concatenated after the username, before the message: @mr_x says: awesome things on a photo!
|
11
|
-
loglevel:
|
12
|
+
loglevel: 1 # 0: debug, 1: info (default), 2: warn, 3: error, 5: fatal
|
12
13
|
whitelist: #twitter accounts in that list will have their tweets published immediately. post from others will be saved as drafts. blank list will publish all tweets immediately
|
13
14
|
- whitey_mc_whitelist
|
14
|
-
- sven_kr
|
15
|
+
- sven_kr
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'processors/twitter'
|
2
|
+
require 'processors/tumblr'
|
3
|
+
require 'processors/photo_service'
|
4
|
+
|
5
|
+
require 'log_aware'
|
6
|
+
|
7
|
+
module Combinators
|
8
|
+
module TwitterTumblr
|
9
|
+
include LogAware
|
10
|
+
def self.log
|
11
|
+
LogAware.log #TODO why doesn't the include make the log method accessible?
|
12
|
+
end
|
13
|
+
#extract a linked image file's url from a tweet. first found image will be used.
|
14
|
+
def self.extract_image_url(tweet, embedly_key=nil)
|
15
|
+
links = Processors::Twitter::extract_links tweet
|
16
|
+
image_url = nil
|
17
|
+
if links
|
18
|
+
links.each do |link|
|
19
|
+
image_url = Processors::PhotoService::find_image_url(link, embedly_key)
|
20
|
+
return image_url if Processors::PhotoService::photo? image_url
|
21
|
+
end
|
22
|
+
end
|
23
|
+
image_url
|
24
|
+
end
|
25
|
+
#generate the data for a tumblr photo entry by parsing a tweet
|
26
|
+
def self.generate_photo_post_from_tweet(tweet, options = {})
|
27
|
+
log.debug "#{self}.generate_photo_post_from_tweet with options: #{options.inspect}"
|
28
|
+
tumblr_post = nil
|
29
|
+
message = tweet['text']
|
30
|
+
whitelist = options[:whitelist]
|
31
|
+
whitelist.each {|entry| entry.downcase!} if whitelist
|
32
|
+
if !Processors::Twitter::retweet? message
|
33
|
+
log.debug "tweet: #{tweet}"
|
34
|
+
tumblr_post = {}
|
35
|
+
tumblr_post[:type] = 'photo'
|
36
|
+
tumblr_post[:date] = tweet['created_at']
|
37
|
+
tumblr_post[:source] = extract_image_url tweet, options[:embedly_key]
|
38
|
+
user = tweet['from_user']
|
39
|
+
tumblr_post[:tags] = user
|
40
|
+
tweet_id = tweet['id']
|
41
|
+
if !whitelist || whitelist.member?(user.downcase)
|
42
|
+
state = 'published'
|
43
|
+
else
|
44
|
+
state = 'draft'
|
45
|
+
end
|
46
|
+
tumblr_post[:state] = state
|
47
|
+
shouts = " #{@shouts}" if @shouts
|
48
|
+
tumblr_post[:caption] = %?<a href="http://twitter.com/#{user}/statuses/#{tweet_id}" alt="tweet">@#{user}</a>#{shouts}: #{tweet['text']}?
|
49
|
+
#TODO make the caption a bigger matter of yml/ general configuration
|
50
|
+
end
|
51
|
+
tumblr_post
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/lib/log_aware.rb
CHANGED
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'curb'
|
2
|
+
require 'log_aware'
|
3
|
+
|
4
|
+
module Processors
|
5
|
+
#utilities for handling http
|
6
|
+
module Http
|
7
|
+
include LogAware
|
8
|
+
|
9
|
+
USER_AGENT = %{Mozilla/5.0 (compatible; tweetlr; +http://tweetlr.5v3n.com)}
|
10
|
+
|
11
|
+
def self.log
|
12
|
+
LogAware.log #TODO why doesn't the include make the log method accessible?
|
13
|
+
end
|
14
|
+
|
15
|
+
#convenience method for curl http get calls and parsing them to json.
|
16
|
+
def self.http_get(request)
|
17
|
+
tries = 3
|
18
|
+
begin
|
19
|
+
curl = Curl::Easy.new request
|
20
|
+
curl.useragent = USER_AGENT
|
21
|
+
curl.perform
|
22
|
+
begin
|
23
|
+
JSON.parse curl.body_str
|
24
|
+
rescue JSON::ParserError => err
|
25
|
+
begin
|
26
|
+
log.warn "#{err}: Could not parse response for #{request} - this is probably not a json response: #{curl.body_str}"
|
27
|
+
return nil
|
28
|
+
rescue Encoding::CompatibilityError => err
|
29
|
+
log.error "Trying to rescue a JSON::ParserError for '#{request}' we got stuck in a Encoding::CompatibilityError."
|
30
|
+
return nil
|
31
|
+
end
|
32
|
+
end
|
33
|
+
rescue Curl::Err::CurlError => err
|
34
|
+
log.error "Failure in Curl call: #{err}" if log
|
35
|
+
tries -= 1
|
36
|
+
sleep 3
|
37
|
+
if tries > 0
|
38
|
+
retry
|
39
|
+
else
|
40
|
+
nil
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
require 'processors/http'
|
2
|
+
require 'log_aware'
|
3
|
+
|
4
|
+
module Processors
|
5
|
+
#utilities for dealing with photo services
|
6
|
+
module PhotoService
|
7
|
+
|
8
|
+
LOCATION_START_INDICATOR = 'Location: '
|
9
|
+
LOCATION_STOP_INDICATOR = "\r\n"
|
10
|
+
PIC_REGEXP = /(.*?)\.(jpg|jpeg|png|gif)/i
|
11
|
+
|
12
|
+
include LogAware
|
13
|
+
|
14
|
+
def self.log
|
15
|
+
LogAware.log #TODO why doesn't the include make the log method accessible?
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.find_image_url(link, embedly_key=nil)
|
19
|
+
url = nil
|
20
|
+
if link && !(photo? link)
|
21
|
+
url = image_url_instagram link if (link.index('instagr.am') || link.index('instagram.com'))
|
22
|
+
url = image_url_picplz link if link.index 'picplz'
|
23
|
+
url = image_url_twitpic link if link.index 'twitpic'
|
24
|
+
url = image_url_yfrog link if link.index 'yfrog'
|
25
|
+
url = image_url_imgly link if link.index 'img.ly'
|
26
|
+
url = image_url_tco link, embedly_key if link.index 't.co'
|
27
|
+
url = image_url_lockerz link if link.index 'lockerz.com'
|
28
|
+
url = image_url_embedly link, embedly_key if url.nil? #just try embed.ly for anything else. could do all image url processing w/ embedly, but there's probably some kind of rate limit invovled.
|
29
|
+
elsif photo? link
|
30
|
+
url = link
|
31
|
+
end
|
32
|
+
url
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.photo?(link)
|
36
|
+
link =~ PIC_REGEXP
|
37
|
+
end
|
38
|
+
|
39
|
+
#find the image's url via embed.ly
|
40
|
+
def self.image_url_embedly(link_url, key)
|
41
|
+
response = Processors::Http::http_get "http://api.embed.ly/1/oembed?key=#{key}&url=#{link_url}"
|
42
|
+
log.debug "embedly call: http://api.embed.ly/1/oembed?key=#{key}&url=#{link_url}"
|
43
|
+
if response && response['type'] == 'photo'
|
44
|
+
image_url = response['url']
|
45
|
+
end
|
46
|
+
image_url
|
47
|
+
end
|
48
|
+
#find the image's url for a lockerz link
|
49
|
+
def self.image_url_lockerz(link_url)
|
50
|
+
response = Processors::Http::http_get "http://api.plixi.com/api/tpapi.svc/json/metadatafromurl?details=false&url=#{link_url}"
|
51
|
+
response["BigImageUrl"] if response
|
52
|
+
end
|
53
|
+
#find the image's url for an twitter shortened link
|
54
|
+
def self.image_url_tco(link_url, embedly_key = nil)
|
55
|
+
service_url = link_url_redirect link_url
|
56
|
+
find_image_url service_url, embedly_key
|
57
|
+
end
|
58
|
+
#find the image's url for an instagram link
|
59
|
+
def self.image_url_instagram(link_url)
|
60
|
+
link_url['instagram.com'] = 'instagr.am' if link_url.index 'instagram.com' #instagram's oembed does not work for .com links
|
61
|
+
response = Processors::Http::http_get "http://api.instagram.com/oembed?url=#{link_url}"
|
62
|
+
response['url'] if response
|
63
|
+
end
|
64
|
+
|
65
|
+
#find the image's url for a picplz short/longlink
|
66
|
+
def self.image_url_picplz(link_url)
|
67
|
+
id = extract_id link_url
|
68
|
+
#try short url
|
69
|
+
response = Processors::Http::http_get "http://picplz.com/api/v2/pic.json?shorturl_ids=#{id}"
|
70
|
+
#if short url fails, try long url
|
71
|
+
#response = HTTParty.get "http://picplz.com/api/v2/pic.json?longurl_ids=#{id}"
|
72
|
+
#extract url
|
73
|
+
if response && response['value'] && response['value']['pics'] && response['value']['pics'].first && response['value']['pics'].first['pic_files'] && response['value']['pics'].first['pic_files']['640r']
|
74
|
+
response['value']['pics'].first['pic_files']['640r']['img_url']
|
75
|
+
else
|
76
|
+
nil
|
77
|
+
end
|
78
|
+
end
|
79
|
+
#find the image's url for a twitpic link
|
80
|
+
def self.image_url_twitpic(link_url)
|
81
|
+
image_url_redirect link_url, "http://twitpic.com/show/full/"
|
82
|
+
end
|
83
|
+
#find the image'S url for a yfrog link
|
84
|
+
def self.image_url_yfrog(link_url)
|
85
|
+
response = Processors::Http::http_get("http://www.yfrog.com/api/oembed?url=#{link_url}")
|
86
|
+
response['url'] if response
|
87
|
+
end
|
88
|
+
#find the image's url for a img.ly link
|
89
|
+
def self.image_url_imgly(link_url)
|
90
|
+
image_url_redirect link_url, "http://img.ly/show/full/", "\r\n"
|
91
|
+
end
|
92
|
+
|
93
|
+
# extract image url from services like twitpic & img.ly that do not offer oembed interfaces
|
94
|
+
def self.image_url_redirect(link_url, service_endpoint, stop_indicator = LOCATION_STOP_INDICATOR)
|
95
|
+
link_url_redirect "#{service_endpoint}#{extract_id link_url}", stop_indicator
|
96
|
+
end
|
97
|
+
|
98
|
+
def self.link_url_redirect(short_url, stop_indicator = LOCATION_STOP_INDICATOR)
|
99
|
+
tries = 3
|
100
|
+
begin
|
101
|
+
resp = Curl::Easy.http_get(short_url) { |res| res.follow_location = true }
|
102
|
+
rescue Curl::Err::CurlError => err
|
103
|
+
log.error "Curl::Easy.http_get failed: #{err}"
|
104
|
+
tries -= 1
|
105
|
+
sleep 3
|
106
|
+
if tries > 0
|
107
|
+
retry
|
108
|
+
else
|
109
|
+
return nil
|
110
|
+
end
|
111
|
+
end
|
112
|
+
if(resp && resp.header_str && resp.header_str.index(LOCATION_START_INDICATOR) && resp.header_str.index(stop_indicator))
|
113
|
+
start = resp.header_str.index(LOCATION_START_INDICATOR) + LOCATION_START_INDICATOR.size
|
114
|
+
stop = resp.header_str.index(stop_indicator, start)
|
115
|
+
resp.header_str[start...stop]
|
116
|
+
else
|
117
|
+
nil
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
#extract the pic id from a given <code>link</code>
|
122
|
+
def self.extract_id(link)
|
123
|
+
link.split('/').last if link.split('/')
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'log_aware'
|
2
|
+
|
3
|
+
module Processors
|
4
|
+
#utilities for handling tumblr
|
5
|
+
module Tumblr
|
6
|
+
GENERATOR = %{tweetlr - http://tweetlr.5v3n.com}
|
7
|
+
API_ENDPOINT_TUMBLR = 'http://www.tumblr.com'
|
8
|
+
include LogAware
|
9
|
+
def self.log
|
10
|
+
LogAware.log #TODO why doesn't the include make the log method accessible?
|
11
|
+
end
|
12
|
+
#post a tumblr photo entry.
|
13
|
+
#
|
14
|
+
#required arguments are :email, :password, :type, :date, :source, :caption, :state, :source
|
15
|
+
#
|
16
|
+
#optional arguments: :api_endpoint_tumblr, :tags
|
17
|
+
#
|
18
|
+
def self.post(options={})
|
19
|
+
tries = 3
|
20
|
+
tags = options[:tags]
|
21
|
+
begin
|
22
|
+
response = Curl::Easy.http_post("#{options[:api_endpoint_tumblr] || API_ENDPOINT_TUMBLR}/api/write",
|
23
|
+
Curl::PostField.content('generator', GENERATOR),
|
24
|
+
Curl::PostField.content('email', options[:email]),
|
25
|
+
Curl::PostField.content('password', options[:password]),
|
26
|
+
Curl::PostField.content('type', options[:type]),
|
27
|
+
Curl::PostField.content('date', options[:date]),
|
28
|
+
Curl::PostField.content('source', options[:source]),
|
29
|
+
Curl::PostField.content('caption', options[:caption]),
|
30
|
+
Curl::PostField.content('state', options[:state]),
|
31
|
+
Curl::PostField.content('tags', tags)
|
32
|
+
)
|
33
|
+
rescue Curl::Err::CurlError => err
|
34
|
+
log.error "Failure in Curl call: #{err}"
|
35
|
+
tries -= 1
|
36
|
+
sleep 3
|
37
|
+
if tries > 0
|
38
|
+
retry
|
39
|
+
else
|
40
|
+
response = nil
|
41
|
+
end
|
42
|
+
end
|
43
|
+
response
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'processors/http'
|
2
|
+
require 'log_aware'
|
3
|
+
|
4
|
+
module Processors
|
5
|
+
#utilities for dealing with twitter
|
6
|
+
module Twitter
|
7
|
+
include LogAware
|
8
|
+
def self.log
|
9
|
+
LogAware.log #TODO why doesn't the include make the log method accessible?
|
10
|
+
end
|
11
|
+
|
12
|
+
#checks if the message is a retweet
|
13
|
+
def self.retweet?(message)
|
14
|
+
message.index('RT @') || message.index(%{"@}) || message.index("\u201c@") #detect retweets
|
15
|
+
end
|
16
|
+
|
17
|
+
#extract the links from a given tweet
|
18
|
+
def self.extract_links(tweet)
|
19
|
+
if tweet
|
20
|
+
text = tweet['text']
|
21
|
+
text.gsub(/https?:\/\/[\S]+/).to_a if text
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
#fire a new search
|
26
|
+
def self.search(config)
|
27
|
+
search_call = "#{config[:api_endpoint_twitter]}?ors=#{config[:search_term]}&result_type=#{config[:result_type]}&rpp=#{config[:results_per_page]}"
|
28
|
+
Processors::Http::http_get search_call
|
29
|
+
end
|
30
|
+
|
31
|
+
# lazy update - search for a term or refresh the search if a response is available already
|
32
|
+
def self.lazy_search(config)
|
33
|
+
response = nil
|
34
|
+
if config
|
35
|
+
search_url = "#{config[:api_endpoint_twitter]}?since_id=#{config[:since_id]}&ors=#{config[:search_term]}&result_type=#{config[:result_type]}&rpp=#{config[:results_per_page]}"
|
36
|
+
log.info "lazy search using '#{search_url}'"
|
37
|
+
response = Processors::Http::http_get search_url
|
38
|
+
else
|
39
|
+
log.error "#{self}.lazy_search: no config given!"
|
40
|
+
end
|
41
|
+
response
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
data/lib/tweetlr.rb
CHANGED
@@ -3,17 +3,16 @@ require 'logger'
|
|
3
3
|
require 'yaml'
|
4
4
|
require 'curb'
|
5
5
|
require 'json'
|
6
|
-
require '
|
7
|
-
require '
|
8
|
-
require '
|
6
|
+
require 'processors/twitter'
|
7
|
+
require 'processors/http'
|
8
|
+
require 'processors/photo_service'
|
9
|
+
require 'processors/tumblr'
|
10
|
+
require 'combinators/twitter_tumblr'
|
9
11
|
require 'log_aware'
|
10
12
|
|
11
13
|
class Tweetlr
|
12
|
-
|
13
|
-
attr_accessor :twitter_config
|
14
14
|
|
15
|
-
VERSION = '0.1.
|
16
|
-
GENERATOR = %{tweetlr - http://tweetlr.5v3n.com}
|
15
|
+
VERSION = '0.1.7pre4'
|
17
16
|
|
18
17
|
API_ENDPOINT_TWITTER = 'http://search.twitter.com/search.json'
|
19
18
|
API_ENDPOINT_TUMBLR = 'http://www.tumblr.com'
|
@@ -21,27 +20,23 @@ class Tweetlr
|
|
21
20
|
TWITTER_RESULTS_TYPE = 'recent'
|
22
21
|
UPDATE_PERIOD = 600 #10 minutes
|
23
22
|
|
24
|
-
|
25
|
-
|
23
|
+
include LogAware
|
24
|
+
def self.log
|
25
|
+
LogAware.log #TODO why doesn't the include make the log method accessible?
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize(args)
|
29
|
+
log = Logger.new(STDOUT)
|
26
30
|
if (Logger::DEBUG..Logger::UNKNOWN).to_a.index(args[:loglevel])
|
27
|
-
|
31
|
+
log.level = args[:loglevel]
|
28
32
|
else
|
29
|
-
|
33
|
+
log.level = Logger::INFO
|
30
34
|
end
|
31
|
-
|
32
|
-
LogAware.log
|
33
|
-
@twitter_config = {
|
34
|
-
:since_id => args[:since_id],
|
35
|
-
:search_term => args[:terms],
|
36
|
-
:results_per_page => args[:results_per_page] || TWITTER_RESULTS_PER_PAGE,
|
37
|
-
:result_type => args[:result_type] || TWITTER_RESULTS_TYPE,
|
38
|
-
:api_endpoint_twitter => args[:api_endpoint_twitter] || API_ENDPOINT_TWITTER
|
39
|
-
}
|
40
|
-
@twitter_config[:refresh_url] = "?ors=#{@twitter_config[:search_term]}&since_id=#{@twitter_config[:since_id]}&rpp=#{@twitter_config[:results_per_page]}&result_type=#{@twitter_config[:result_type]}" if (@twitter_config[:since_id] && @twitter_config[:search_term])
|
41
|
-
@twitter_config[:logger] = @log
|
35
|
+
log.debug "log level set to #{log.level}"
|
36
|
+
LogAware.log=log
|
42
37
|
|
43
|
-
@email =
|
44
|
-
@password =
|
38
|
+
@email = args[:tumblr_email]
|
39
|
+
@password = args[:tumblr_password]
|
45
40
|
@cookie = args[:cookie]
|
46
41
|
@api_endpoint_twitter =
|
47
42
|
@api_endpoint_tumblr = args[:api_endpoint_tumblr] || API_ENDPOINT_TUMBLR
|
@@ -51,79 +46,38 @@ class Tweetlr
|
|
51
46
|
@whitelist.each {|entry| entry.downcase!} if @whitelist
|
52
47
|
end
|
53
48
|
|
54
|
-
def
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
Curl::PostField.content('state', options[:state]),
|
74
|
-
Curl::PostField.content('tags', tags)
|
75
|
-
)
|
76
|
-
rescue Curl::Err::CurlError => err
|
77
|
-
@log.error "Failure in Curl call: #{err}"
|
78
|
-
tries -= 1
|
79
|
-
sleep 3
|
80
|
-
if tries > 0
|
81
|
-
retry
|
49
|
+
def self.crawl(config)
|
50
|
+
log.debug "#{self}.crawl() using config: #{config.inspect}"
|
51
|
+
twitter_config = {
|
52
|
+
:since_id => config[:since_id] || config[:start_at_tweet_id],
|
53
|
+
:search_term => config[:terms] || config[:search_term] ,
|
54
|
+
:results_per_page => config[:results_per_page] || TWITTER_RESULTS_PER_PAGE,
|
55
|
+
:result_type => config[:result_type] || TWITTER_RESULTS_TYPE,
|
56
|
+
:api_endpoint_twitter => config[:api_endpoint_twitter] || API_ENDPOINT_TWITTER
|
57
|
+
}
|
58
|
+
log.info "starting tweetlr crawl..."
|
59
|
+
response = {}
|
60
|
+
response = Processors::Twitter::lazy_search(twitter_config) #looks awkward, but the refresh url will come from the db soon and make sense then...
|
61
|
+
if response
|
62
|
+
tweets = response['results']
|
63
|
+
if tweets
|
64
|
+
tweets.each do |tweet|
|
65
|
+
tumblr_post = Combinators::TwitterTumblr::generate_photo_post_from_tweet(tweet, {:whitelist => config[:whitelist], :embedly_key => config[:embedly_key]})
|
66
|
+
if tumblr_post.nil? || tumblr_post[:source].nil?
|
67
|
+
log.warn "could not get image source: tweet: #{tweet} --- tumblr post: #{tumblr_post.inspect}"
|
82
68
|
else
|
83
|
-
|
69
|
+
log.debug "tumblr post: #{tumblr_post}"
|
70
|
+
res = Processors::Tumblr.post tumblr_post.merge({:password => config[:tumblr_password], :email => config[:tumblr_email]})
|
71
|
+
log.warn "tumblr response: #{res.header_str} #{res.body_str}" unless res.response_code == 201
|
84
72
|
end
|
73
|
+
end
|
74
|
+
# store the highest tweet id
|
75
|
+
config[:since_id] = response['max_id']
|
85
76
|
end
|
77
|
+
else
|
78
|
+
log.error "twitter search returned no response. hail the failwhale!"
|
86
79
|
end
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
#generate the data for a tumblr photo entry by parsing a tweet
|
91
|
-
def generate_tumblr_photo_post tweet
|
92
|
-
tumblr_post = nil
|
93
|
-
message = tweet['text']
|
94
|
-
if !TwitterProcessor::retweet? message
|
95
|
-
@log.debug "tweet: #{tweet}"
|
96
|
-
tumblr_post = {}
|
97
|
-
tumblr_post[:type] = 'photo'
|
98
|
-
tumblr_post[:date] = tweet['created_at']
|
99
|
-
tumblr_post[:source] = extract_image_url tweet
|
100
|
-
user = tweet['from_user']
|
101
|
-
tumblr_post[:tags] = user
|
102
|
-
tweet_id = tweet['id']
|
103
|
-
if !@whitelist || @whitelist.member?(user.downcase)
|
104
|
-
state = 'published'
|
105
|
-
else
|
106
|
-
state = 'draft'
|
107
|
-
end
|
108
|
-
tumblr_post[:state] = state
|
109
|
-
shouts = " #{@shouts}" if @shouts
|
110
|
-
tumblr_post[:caption] = %?<a href="http://twitter.com/#{user}/statuses/#{tweet_id}" alt="tweet">@#{user}</a>#{shouts}: #{tweet['text']}?
|
111
|
-
#TODO make the caption a bigger matter of yml/ general configuration
|
112
|
-
end
|
113
|
-
tumblr_post
|
114
|
-
end
|
115
|
-
|
116
|
-
#extract a linked image file's url from a tweet. first found image will be used.
|
117
|
-
def extract_image_url(tweet)
|
118
|
-
links = TwitterProcessor::extract_links tweet
|
119
|
-
image_url = nil
|
120
|
-
if links
|
121
|
-
links.each do |link|
|
122
|
-
image_url = PhotoServiceProcessor::find_image_url(link)
|
123
|
-
return image_url if PhotoServiceProcessor::photo? image_url
|
124
|
-
end
|
125
|
-
end
|
126
|
-
image_url
|
127
|
-
end
|
128
|
-
|
80
|
+
log.info "finished tweetlr crawl."
|
81
|
+
return config
|
82
|
+
end
|
129
83
|
end
|