tweetlr 0.1.17 → 0.1.18
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +5 -3
- data/bin/tweetlr +24 -20
- data/config/tweetlr.yml +9 -9
- data/lib/{combinators → tweetlr/combinators}/twitter_tumblr.rb +12 -12
- data/lib/tweetlr/core.rb +93 -0
- data/lib/{log_aware.rb → tweetlr/log_aware.rb} +3 -1
- data/lib/{processors → tweetlr/processors}/http.rb +4 -4
- data/lib/{processors → tweetlr/processors}/photo_service.rb +22 -30
- data/lib/tweetlr/processors/tumblr.rb +55 -0
- data/lib/{processors → tweetlr/processors}/twitter.rb +8 -7
- data/lib/tweetlr.rb +5 -82
- data/spec/combinators/twitter_tumblr_combinator_spec.rb +15 -15
- data/spec/core_spec.rb +60 -0
- data/spec/processors/photo_services_processor_spec.rb +9 -9
- data/spec/processors/tumblr_processor_spec.rb +3 -3
- data/spec/processors/twitter_processor_spec.rb +4 -4
- data/spec/spec_helper.rb +5 -3
- data/tweetlr.gemspec +1 -1
- metadata +29 -28
- data/lib/processors/tumblr.rb +0 -51
- data/spec/tweetlr_spec.rb +0 -28
data/README.md
CHANGED
@@ -40,9 +40,11 @@ search_term: 'cat+dog+unicorn' #find tweets containing any of these terms
|
|
40
40
|
start_at_tweet_id: 61847783463854082 # the tweet id to start searching at
|
41
41
|
api_endpoint_twitter: 'http://search.twitter.com/search.json'
|
42
42
|
api_endpoint_tumblr: 'http://www.tumblr.com'
|
43
|
-
|
44
|
-
|
45
|
-
|
43
|
+
tumblr_oauth_api_key: YOUR APPS TUMBLR API TOKEN
|
44
|
+
tumblr_oauth_api_secret: YOUR APPS TUMBLR API SECRET
|
45
|
+
tumblr_oauth_access_token_key: YOUR BLOGS OAUTH ACCESS TOKEN KEY
|
46
|
+
tumblr_oauth_access_token_secret: YOUR BLOGS OAUTH ACCESS TOKEN SECRE
|
47
|
+
tumblr_blog_hostname: YOUR BLOGS HOSTNAME #e.g. myblog.tumblr.com
|
46
48
|
embedly_key: '' #tweetlr uses http://embedly.com for link processing. a free plan containing an api key is available & recommended to use in order to ensure full support
|
47
49
|
update_period: 300 #check for updates every 300 secs = 5 minutes
|
48
50
|
shouts: 'says' # will be concatenated after the username, before the message: @mr_x says: awesome things on a photo!
|
data/bin/tweetlr
CHANGED
@@ -22,25 +22,7 @@ begin
|
|
22
22
|
|
23
23
|
UPDATE_PERIOD = CONFIG['update_period']
|
24
24
|
|
25
|
-
@tweetlr_config =
|
26
|
-
:tumblr_blog_hostname => CONFIG['tumblr_blog_hostname'] || CONFIG['group'],
|
27
|
-
:tumblr_oauth_api_key => CONFIG['tumblr_oauth_api_key'],
|
28
|
-
:tumblr_oauth_api_secret => CONFIG['tumblr_oauth_api_secret'],
|
29
|
-
:tumblr_blog_hostname => CONFIG['tumblr_blog_hostname'],
|
30
|
-
:tumblr_oauth_access_token_key => CONFIG['tumblr_oauth_access_token_key'],
|
31
|
-
:tumblr_oauth_access_token_secret => CONFIG['tumblr_oauth_access_token_secret'],
|
32
|
-
:whitelist => CONFIG['whitelist'],
|
33
|
-
:shouts => CONFIG['shouts'],
|
34
|
-
:since_id => CONFIG['start_at_tweet_id'] ,
|
35
|
-
:terms => CONFIG['search_term'],
|
36
|
-
:loglevel => CONFIG['loglevel'],
|
37
|
-
:update_period => UPDATE_PERIOD,
|
38
|
-
:api_endpoint_tumblr => CONFIG['api_endpoint_tumblr'],
|
39
|
-
:api_endpoint_twitter => CONFIG['api_endpoint_twitter'],
|
40
|
-
:results_per_page => CONFIG['results_per_page'],
|
41
|
-
:result_type => CONFIG['result_type'],
|
42
|
-
:embedly_key => CONFIG['embedly_key']
|
43
|
-
}
|
25
|
+
@tweetlr_config = prepare_tweetlr_config CONFIG
|
44
26
|
rescue SystemCallError
|
45
27
|
$stderr.puts "Ooops - looks like there is no ./config/tweetlr.yml found. I'm affraid tweetlr won't work properly until you introduced that configuration file."
|
46
28
|
exit(1)
|
@@ -52,9 +34,31 @@ Daemons.run_proc('tweetlr', :dir_mode => :script, :dir => './', :backtrace => tr
|
|
52
34
|
@log.info "creating a new tweetlr instance using this config: #{@tweetlr_config.inspect}"
|
53
35
|
EventMachine::run do
|
54
36
|
EventMachine::add_periodic_timer( UPDATE_PERIOD ) do
|
55
|
-
response = Tweetlr.crawl(@tweetlr_config)
|
37
|
+
response = Tweetlr::Core.crawl(@tweetlr_config)
|
56
38
|
File.open(tid_file, "w+") { |io| io.write(response[:since_id]) }
|
57
39
|
@tweetlr_config.merge! response
|
58
40
|
end
|
59
41
|
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def prepare_tweetlr_config(config)
|
45
|
+
{
|
46
|
+
:tumblr_blog_hostname => config['tumblr_blog_hostname'] || config['group'],
|
47
|
+
:tumblr_oauth_api_key => config['tumblr_oauth_api_key'],
|
48
|
+
:tumblr_oauth_api_secret => config['tumblr_oauth_api_secret'],
|
49
|
+
:tumblr_blog_hostname => config['tumblr_blog_hostname'],
|
50
|
+
:tumblr_oauth_access_token_key => config['tumblr_oauth_access_token_key'],
|
51
|
+
:tumblr_oauth_access_token_secret => config['tumblr_oauth_access_token_secret'],
|
52
|
+
:whitelist => config['whitelist'],
|
53
|
+
:shouts => config['shouts'],
|
54
|
+
:since_id => config['start_at_tweet_id'] ,
|
55
|
+
:terms => config['search_term'],
|
56
|
+
:loglevel => config['loglevel'],
|
57
|
+
:update_period => UPDATE_PERIOD,
|
58
|
+
:api_endpoint_tumblr => config['api_endpoint_tumblr'],
|
59
|
+
:api_endpoint_twitter => config['api_endpoint_twitter'],
|
60
|
+
:results_per_page => config['results_per_page'],
|
61
|
+
:result_type => config['result_type'],
|
62
|
+
:embedly_key => config['embedly_key']
|
63
|
+
}
|
60
64
|
end
|
data/config/tweetlr.yml
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
results_per_page: 100
|
2
2
|
result_type: recent
|
3
|
-
search_term: '
|
3
|
+
search_term: 'coffeediary' #find tweets containing any of these terms
|
4
4
|
start_at_tweet_id: 61847783463854082 # the tweet id to start searching at
|
5
5
|
api_endpoint_twitter: 'http://search.twitter.com/search.json'
|
6
6
|
api_endpoint_tumblr: 'http://www.tumblr.com'
|
7
|
-
tumblr_oauth_api_key:
|
8
|
-
tumblr_oauth_api_secret:
|
9
|
-
tumblr_oauth_access_token_key:
|
10
|
-
tumblr_oauth_access_token_secret:
|
11
|
-
|
12
|
-
embedly_key: '' #tweetlr uses http://embedly.com for link processing. a free plan containing an api key is available & recommended to use in order to ensure full support
|
13
|
-
update_period:
|
7
|
+
tumblr_oauth_api_key: 'Buq8j3koYLqrZEMmTM4GL32S0guZU2Qvoz8xSvFRumaWuaxAnG'
|
8
|
+
tumblr_oauth_api_secret: 'EnjBUAjUHo4Qi4d3BPUL5xsdc8qClDqRTLssz8Jzd2sKC7KZaq'
|
9
|
+
tumblr_oauth_access_token_key: 'MQES8SEqr3JogLyUYzcV68RFfQ0b3ClxbnUihChh8p9UMH3tkM'
|
10
|
+
tumblr_oauth_access_token_secret: 'FBnrEFW1p9RG7Zh1kztPjPDCbeE229fMPX5VwuzgZqUdD2hXSS'
|
11
|
+
tumblr_blog_hostname: 'tweetlr-testlr.tumblr.com' #e.g. mysubblog.tumblr.com
|
12
|
+
embedly_key: '9e6c2bb8372e11e1a92e4040d3dc5c07' #tweetlr uses http://embedly.com for link processing. a free plan containing an api key is available & recommended to use in order to ensure full support
|
13
|
+
update_period: 10 #check for updates every 300 secs = 5 minutes
|
14
14
|
shouts: 'says' # will be concatenated after the username, before the message: @mr_x says: awesome things on a photo!
|
15
15
|
loglevel: 1 # 0: debug, 1: info (default), 2: warn, 3: error, 5: fatal
|
16
16
|
whitelist: #twitter accounts in that list will have their tweets published immediately. post from others will be saved as drafts. blank list will publish all tweets immediately
|
17
17
|
- whitey_mc_whitelist
|
18
|
-
- sven_kr
|
18
|
+
- sven_kr
|
@@ -1,23 +1,23 @@
|
|
1
|
-
|
2
|
-
require
|
3
|
-
require
|
1
|
+
local_path=File.dirname(__FILE__)
|
2
|
+
require "#{local_path}/../processors/twitter"
|
3
|
+
require "#{local_path}/../processors/tumblr"
|
4
|
+
require "#{local_path}/../processors/photo_service"
|
5
|
+
require "#{local_path}/../log_aware"
|
4
6
|
|
5
|
-
|
6
|
-
|
7
|
-
module Combinators
|
7
|
+
module Tweetlr::Combinators
|
8
8
|
module TwitterTumblr
|
9
|
-
include LogAware
|
9
|
+
include Tweetlr::LogAware
|
10
10
|
def self.log
|
11
|
-
LogAware.log #TODO why doesn't the include make the log method accessible?
|
11
|
+
Tweetlr::LogAware.log #TODO why doesn't the include make the log method accessible?
|
12
12
|
end
|
13
13
|
#extract a linked image file's url from a tweet. first found image will be used.
|
14
14
|
def self.extract_image_url(tweet, embedly_key=nil)
|
15
|
-
links = Processors::Twitter::extract_links tweet
|
15
|
+
links = Tweetlr::Processors::Twitter::extract_links tweet
|
16
16
|
image_url = nil
|
17
17
|
if links
|
18
18
|
links.each do |link|
|
19
|
-
image_url = Processors::PhotoService::find_image_url(link, embedly_key)
|
20
|
-
return image_url if Processors::PhotoService::photo? image_url
|
19
|
+
image_url = Tweetlr::Processors::PhotoService::find_image_url(link, embedly_key)
|
20
|
+
return image_url if Tweetlr::Processors::PhotoService::photo? image_url
|
21
21
|
end
|
22
22
|
end
|
23
23
|
image_url
|
@@ -29,7 +29,7 @@ module Combinators
|
|
29
29
|
message = tweet['text']
|
30
30
|
whitelist = options[:whitelist]
|
31
31
|
whitelist.each {|entry| entry.downcase!} if (whitelist && whitelist.size != 0)
|
32
|
-
if !Processors::Twitter::retweet? message
|
32
|
+
if !Tweetlr::Processors::Twitter::retweet? message
|
33
33
|
log.debug "tweet: #{tweet}"
|
34
34
|
tumblr_post = {}
|
35
35
|
tumblr_post[:tumblr_blog_hostname] = options[:tumblr_blog_hostname] || options[:group]
|
data/lib/tweetlr/core.rb
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
# encode: UTF-8
|
2
|
+
local_path=File.dirname(__FILE__)
|
3
|
+
require "#{local_path}/processors/twitter"
|
4
|
+
require "#{local_path}/processors/http"
|
5
|
+
require "#{local_path}/processors/photo_service"
|
6
|
+
require "#{local_path}/processors/tumblr"
|
7
|
+
require "#{local_path}/combinators/twitter_tumblr"
|
8
|
+
require "#{local_path}/log_aware"
|
9
|
+
require 'uri'
|
10
|
+
|
11
|
+
class Tweetlr::Core
|
12
|
+
include Tweetlr::LogAware
|
13
|
+
def self.log
|
14
|
+
Tweetlr::LogAware.log #TODO why doesn't the include make the log method accessible?
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(args)
|
18
|
+
log = Logger.new(STDOUT)
|
19
|
+
if (Logger::DEBUG..Logger::UNKNOWN).to_a.index(args[:loglevel])
|
20
|
+
log.level = args[:loglevel]
|
21
|
+
else
|
22
|
+
log.level = Logger::INFO
|
23
|
+
end
|
24
|
+
log.debug "log level set to #{log.level}"
|
25
|
+
Tweetlr::LogAware.log=log
|
26
|
+
|
27
|
+
@email = args[:tumblr_email]
|
28
|
+
@password = args[:tumblr_password]
|
29
|
+
@cookie = args[:cookie]
|
30
|
+
@api_endpoint_twitter = args[:api_endpoint_twitter] || Tweetlr::API_ENDPOINT_TWITTER
|
31
|
+
@api_endpoint_tumblr = args[:api_endpoint_tumblr] || Tweetlr::API_ENDPOINT_TUMBLR
|
32
|
+
@whitelist = args[:whitelist]
|
33
|
+
@shouts = args[:shouts]
|
34
|
+
@update_period = args[:update_period] || Tweetlr::UPDATE_PERIOD
|
35
|
+
@whitelist.each {|entry| entry.downcase!} if @whitelist
|
36
|
+
log.info "Tweetlr #{Tweetlr::VERSION} initialized. Ready to roll."
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.crawl(config)
|
40
|
+
log.debug "#{self}.crawl() using config: #{config.inspect}"
|
41
|
+
twitter_config = prepare_twitter_config config
|
42
|
+
tumblr_config = prepare_tumblr_config config
|
43
|
+
twitter_config[:search_term] = URI::escape(twitter_config[:search_term]) if twitter_config[:search_term]
|
44
|
+
log.info "starting tweetlr crawl..."
|
45
|
+
response = {}
|
46
|
+
response = Tweetlr::Processors::Twitter::lazy_search(twitter_config)
|
47
|
+
if response
|
48
|
+
tweets = response['results']
|
49
|
+
if tweets
|
50
|
+
tweets.each do |tweet|
|
51
|
+
tumblr_post = Tweetlr::Combinators::TwitterTumblr::generate_photo_post_from_tweet(tweet, {:whitelist => config[:whitelist], :embedly_key => config[:embedly_key], :group => config[:group]})
|
52
|
+
if tumblr_post.nil? || tumblr_post[:source].nil?
|
53
|
+
log.warn "could not get image source: tweet: #{tweet} --- tumblr post: #{tumblr_post.inspect}"
|
54
|
+
else
|
55
|
+
log.debug "tumblr post: #{tumblr_post}"
|
56
|
+
res = Tweetlr::Processors::Tumblr.post tumblr_post.merge(tumblr_config)
|
57
|
+
log.debug "tumblr response: #{res}"
|
58
|
+
if res.code == "201"
|
59
|
+
log.info "tumblr post created (tumblr response: #{res.header} #{res.body}"
|
60
|
+
else
|
61
|
+
log.warn "tumblr response: #{res.header} #{res.body}"
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
# store the highest tweet id
|
66
|
+
config[:since_id] = response['max_id']
|
67
|
+
end
|
68
|
+
else
|
69
|
+
log.error "twitter search returned no response. hail the failwhale!"
|
70
|
+
end
|
71
|
+
log.info "finished tweetlr crawl."
|
72
|
+
return config
|
73
|
+
end
|
74
|
+
private
|
75
|
+
def self.prepare_twitter_config(config)
|
76
|
+
{
|
77
|
+
:since_id => config[:since_id] || config[:start_at_tweet_id],
|
78
|
+
:search_term => config[:terms] || config[:search_term] ,
|
79
|
+
:results_per_page => config[:results_per_page] || Tweetlr::TWITTER_RESULTS_PER_PAGE,
|
80
|
+
:result_type => config[:result_type] || Tweetlr::TWITTER_RESULTS_TYPE,
|
81
|
+
:api_endpoint_twitter => config[:api_endpoint_twitter] || Tweetlr::API_ENDPOINT_TWITTER
|
82
|
+
}
|
83
|
+
end
|
84
|
+
def self.prepare_tumblr_config(config)
|
85
|
+
{
|
86
|
+
:tumblr_oauth_access_token_key => config[:tumblr_oauth_access_token_key],
|
87
|
+
:tumblr_oauth_access_token_secret => config[:tumblr_oauth_access_token_secret],
|
88
|
+
:tumblr_oauth_api_key => config[:tumblr_oauth_api_key],
|
89
|
+
:tumblr_oauth_api_secret => config[:tumblr_oauth_api_secret],
|
90
|
+
:tumblr_blog_hostname => config[:tumblr_blog_hostname] || config[:group]
|
91
|
+
}
|
92
|
+
end
|
93
|
+
end
|
@@ -1,16 +1,16 @@
|
|
1
1
|
require 'curb'
|
2
2
|
require 'json'
|
3
|
-
require
|
3
|
+
require "#{File.dirname(__FILE__)}/../log_aware"
|
4
4
|
|
5
|
-
module Processors
|
5
|
+
module Tweetlr::Processors
|
6
6
|
#utilities for handling http
|
7
7
|
module Http
|
8
|
-
include LogAware
|
8
|
+
include Tweetlr::LogAware
|
9
9
|
|
10
10
|
USER_AGENT = %{Mozilla/5.0 (compatible; tweetlr; +http://tweetlr.5v3n.com)}
|
11
11
|
|
12
12
|
def self.log
|
13
|
-
LogAware.log #TODO why doesn't the include make the log method accessible?
|
13
|
+
Tweetlr::LogAware.log #TODO why doesn't the include make the log method accessible?
|
14
14
|
end
|
15
15
|
#convenience method for curl http get calls
|
16
16
|
def self.http_get(request)
|
@@ -1,8 +1,9 @@
|
|
1
|
-
|
1
|
+
local_path=File.dirname(__FILE__)
|
2
|
+
require "#{local_path}/http"
|
3
|
+
require "#{local_path}/../log_aware"
|
2
4
|
require 'nokogiri'
|
3
|
-
require 'log_aware'
|
4
5
|
|
5
|
-
module Processors
|
6
|
+
module Tweetlr::Processors
|
6
7
|
#utilities for dealing with photo services
|
7
8
|
module PhotoService
|
8
9
|
|
@@ -10,10 +11,10 @@ module Processors
|
|
10
11
|
LOCATION_STOP_INDICATOR = "\r\n"
|
11
12
|
PIC_REGEXP = /(.*?)\.(jpg|jpeg|png|gif)/i
|
12
13
|
|
13
|
-
include LogAware
|
14
|
+
include Tweetlr::LogAware
|
14
15
|
|
15
16
|
def self.log
|
16
|
-
LogAware.log #TODO why doesn't the include make the log method accessible?
|
17
|
+
Tweetlr::LogAware.log #TODO why doesn't the include make the log method accessible?
|
17
18
|
end
|
18
19
|
|
19
20
|
def self.find_image_url(link, embedly_key=nil)
|
@@ -41,40 +42,24 @@ module Processors
|
|
41
42
|
link =~ PIC_REGEXP
|
42
43
|
end
|
43
44
|
def self.image_url_twimg(link_url)
|
44
|
-
|
45
|
-
link_url = service_url if service_url #if there's no redirect, service_url will be nil
|
46
|
-
response = Processors::Http::http_get(link_url)
|
47
|
-
image_url = parse_html_for '.twimg img', Nokogiri::HTML.parse(response.body_str)
|
48
|
-
return image_url
|
45
|
+
retrieve_image_url_by_css link_url, '.twimg img'
|
49
46
|
end
|
50
47
|
#extract the image of an eyeem.com pic
|
51
48
|
def self.image_url_eyeem(link_url)
|
52
|
-
|
53
|
-
link_url = service_url if service_url #if there's no redirect, service_url will be nil
|
54
|
-
response = Processors::Http::http_get link_url
|
55
|
-
image_url = parse_html_for '.viewport-pic img', Nokogiri::HTML.parse(response.body_str)
|
56
|
-
return image_url
|
49
|
+
retrieve_image_url_by_css link_url, '.viewport-pic img'
|
57
50
|
end
|
58
51
|
#extract the image of a foursquare.com pic
|
59
52
|
def self.image_url_foursqaure(link_url)
|
60
|
-
|
61
|
-
link_url = service_url if service_url #if there's no redirect, service_url will be nil
|
62
|
-
response = Processors::Http::http_get link_url
|
63
|
-
image_url = parse_html_for '.commentPhoto img', Nokogiri::HTML.parse(response.body_str)
|
64
|
-
return image_url
|
53
|
+
retrieve_image_url_by_css link_url, '.commentPhoto img'
|
65
54
|
end
|
66
55
|
#extract the image of a path.com pic
|
67
56
|
def self.image_url_path(link_url)
|
68
|
-
|
69
|
-
link_url = service_url if service_url #if there's no redirect, service_url will be nil
|
70
|
-
response = Processors::Http::http_get link_url
|
71
|
-
image_url = parse_html_for 'img.photo-image', Nokogiri::HTML.parse(response.body_str)
|
72
|
-
return image_url
|
57
|
+
retrieve_image_url_by_css link_url, 'img.photo-image'
|
73
58
|
end
|
74
59
|
|
75
60
|
#find the image's url via embed.ly
|
76
61
|
def self.image_url_embedly(link_url, key)
|
77
|
-
response = Processors::Http::http_get_json "http://api.embed.ly/1/oembed?key=#{key}&url=#{link_url}"
|
62
|
+
response = Tweetlr::Processors::Http::http_get_json "http://api.embed.ly/1/oembed?key=#{key}&url=#{link_url}"
|
78
63
|
log.debug "embedly call: http://api.embed.ly/1/oembed?key=#{key}&url=#{link_url}"
|
79
64
|
if response && response['type'] == 'photo'
|
80
65
|
image_url = response['url']
|
@@ -83,7 +68,7 @@ module Processors
|
|
83
68
|
end
|
84
69
|
#find the image's url for a lockerz link
|
85
70
|
def self.image_url_lockerz(link_url)
|
86
|
-
response = Processors::Http::http_get_json "http://api.plixi.com/api/tpapi.svc/json/metadatafromurl?details=false&url=#{link_url}"
|
71
|
+
response = Tweetlr::Processors::Http::http_get_json "http://api.plixi.com/api/tpapi.svc/json/metadatafromurl?details=false&url=#{link_url}"
|
87
72
|
response["BigImageUrl"] if response
|
88
73
|
end
|
89
74
|
#find the image's url for an twitter shortened link
|
@@ -94,7 +79,7 @@ module Processors
|
|
94
79
|
#find the image's url for an instagram link
|
95
80
|
def self.image_url_instagram(link_url)
|
96
81
|
link_url['instagram.com'] = 'instagr.am' if link_url.index 'instagram.com' #instagram's oembed does not work for .com links
|
97
|
-
response = Processors::Http::http_get_json "http://api.instagram.com/oembed?url=#{link_url}"
|
82
|
+
response = Tweetlr::Processors::Http::http_get_json "http://api.instagram.com/oembed?url=#{link_url}"
|
98
83
|
response['url'] if response
|
99
84
|
end
|
100
85
|
|
@@ -102,7 +87,7 @@ module Processors
|
|
102
87
|
def self.image_url_picplz(link_url)
|
103
88
|
id = extract_id link_url
|
104
89
|
#try short url
|
105
|
-
response = Processors::Http::http_get_json "http://picplz.com/api/v2/pic.json?shorturl_ids=#{id}"
|
90
|
+
response = Tweetlr::Processors::Http::http_get_json "http://picplz.com/api/v2/pic.json?shorturl_ids=#{id}"
|
106
91
|
#if short url fails, try long url
|
107
92
|
#response = HTTParty.get "http://picplz.com/api/v2/pic.json?longurl_ids=#{id}"
|
108
93
|
#extract url
|
@@ -118,7 +103,7 @@ module Processors
|
|
118
103
|
end
|
119
104
|
#find the image'S url for a yfrog link
|
120
105
|
def self.image_url_yfrog(link_url)
|
121
|
-
response = Processors::Http::http_get_json("http://www.yfrog.com/api/oembed?url=#{link_url}")
|
106
|
+
response = Tweetlr::Processors::Http::http_get_json("http://www.yfrog.com/api/oembed?url=#{link_url}")
|
122
107
|
response['url'] if response
|
123
108
|
end
|
124
109
|
#find the image's url for a img.ly link
|
@@ -169,5 +154,12 @@ module Processors
|
|
169
154
|
end
|
170
155
|
image_url
|
171
156
|
end
|
157
|
+
def self.retrieve_image_url_by_css link_url, css_path
|
158
|
+
service_url = link_url_redirect link_url #follow possible redirects
|
159
|
+
link_url = service_url if service_url #if there's no redirect, service_url will be nil
|
160
|
+
response = Tweetlr::Processors::Http::http_get link_url
|
161
|
+
image_url = parse_html_for css_path, Nokogiri::HTML.parse(response.body_str)
|
162
|
+
return image_url
|
163
|
+
end
|
172
164
|
end
|
173
165
|
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require "#{File.dirname(__FILE__)}/../log_aware"
|
2
|
+
require 'oauth'
|
3
|
+
|
4
|
+
module Tweetlr::Processors
|
5
|
+
#utilities for handling tumblr
|
6
|
+
module Tumblr
|
7
|
+
GENERATOR = %{tweetlr - http://tweetlr.5v3n.com}
|
8
|
+
API_ENDPOINT_TUMBLR = 'http://www.tumblr.com'
|
9
|
+
include Tweetlr::LogAware
|
10
|
+
def self.log
|
11
|
+
Tweetlr::LogAware.log #TODO why doesn't the include make the log method accessible?
|
12
|
+
end
|
13
|
+
#post a tumblr photo entry.
|
14
|
+
#
|
15
|
+
#required arguments are :tumblr_blog_hostname, :tumblr_blog_hostname, :tumblr_oauth_api_secret, :tumblr_oauth_access_token_secret, :source, :caption, :state
|
16
|
+
#
|
17
|
+
#optional arguments: :tags, :type (default: 'photo')
|
18
|
+
#
|
19
|
+
def self.post(options={})
|
20
|
+
base_hostname = options[:tumblr_blog_hostname] || options[:group]
|
21
|
+
tumblr_oauth_api_key= options[:tumblr_oauth_api_key]
|
22
|
+
tumblr_oauth_api_secret= options[:tumblr_oauth_api_secret]
|
23
|
+
access_token_key = options[:tumblr_oauth_access_token_key]
|
24
|
+
access_token_secret = options[:tumblr_oauth_access_token_secret]
|
25
|
+
type = options[:type] || 'photo'
|
26
|
+
tags = options[:tags] || ''
|
27
|
+
post_response = nil
|
28
|
+
|
29
|
+
if base_hostname && access_token_key && access_token_secret
|
30
|
+
|
31
|
+
consumer = OAuth::Consumer.new(tumblr_oauth_api_key, tumblr_oauth_api_secret,
|
32
|
+
{ :site => 'http://www.tumblr.com',
|
33
|
+
:request_token_path => '/oauth/request_token',
|
34
|
+
:authorize_path => '/oauth/authorize',
|
35
|
+
:access_token_path => '/oauth/access_token',
|
36
|
+
:http_method => :post } )
|
37
|
+
|
38
|
+
access_token = OAuth::AccessToken.new(consumer, access_token_key, access_token_secret)
|
39
|
+
|
40
|
+
post_response = access_token.post(
|
41
|
+
"http://api.tumblr.com/v2/blog/#{base_hostname}/post", {
|
42
|
+
:type => type,
|
43
|
+
:source => options[:source],
|
44
|
+
:caption => options[:caption],
|
45
|
+
:date => options[:date],
|
46
|
+
:tags => tags,
|
47
|
+
:state => options[:state],
|
48
|
+
:generator => GENERATOR
|
49
|
+
}
|
50
|
+
)
|
51
|
+
end
|
52
|
+
post_response
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -1,12 +1,13 @@
|
|
1
|
-
|
2
|
-
require
|
1
|
+
local_path=File.dirname(__FILE__)
|
2
|
+
require "#{local_path}/http"
|
3
|
+
require "#{local_path}/../log_aware"
|
3
4
|
|
4
|
-
module Processors
|
5
|
+
module Tweetlr::Processors
|
5
6
|
#utilities for dealing with twitter
|
6
7
|
module Twitter
|
7
|
-
include LogAware
|
8
|
+
include Tweetlr::LogAware
|
8
9
|
def self.log
|
9
|
-
LogAware.log #TODO why doesn't the include make the log method accessible?
|
10
|
+
Tweetlr::LogAware.log #TODO why doesn't the include make the log method accessible?
|
10
11
|
end
|
11
12
|
|
12
13
|
#checks if the message is a retweet
|
@@ -25,7 +26,7 @@ module Processors
|
|
25
26
|
#fire a new search
|
26
27
|
def self.search(config)
|
27
28
|
search_call = "#{config[:api_endpoint_twitter]}?ors=#{config[:search_term]}&result_type=#{config[:result_type]}&rpp=#{config[:results_per_page]}"
|
28
|
-
Processors::Http::http_get_json search_call
|
29
|
+
Tweetlr::Processors::Http::http_get_json search_call
|
29
30
|
end
|
30
31
|
|
31
32
|
# lazy update - search for a term or refresh the search if a response is available already
|
@@ -34,7 +35,7 @@ module Processors
|
|
34
35
|
if config
|
35
36
|
search_url = "#{config[:api_endpoint_twitter]}?since_id=#{config[:since_id]}&ors=#{config[:search_term]}&result_type=#{config[:result_type]}&rpp=#{config[:results_per_page]}"
|
36
37
|
log.info "lazy search using '#{search_url}'"
|
37
|
-
response = Processors::Http::http_get_json search_url
|
38
|
+
response = Tweetlr::Processors::Http::http_get_json search_url
|
38
39
|
else
|
39
40
|
log.error "#{self}.lazy_search: no config given!"
|
40
41
|
end
|
data/lib/tweetlr.rb
CHANGED
@@ -1,89 +1,12 @@
|
|
1
|
-
|
2
|
-
require '
|
3
|
-
require 'processors/http'
|
4
|
-
require 'processors/photo_service'
|
5
|
-
require 'processors/tumblr'
|
6
|
-
require 'combinators/twitter_tumblr'
|
7
|
-
require 'log_aware'
|
8
|
-
require 'uri'
|
1
|
+
require 'tweetlr/log_aware'
|
2
|
+
require 'tweetlr/core'
|
9
3
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
4
|
+
module Tweetlr
|
5
|
+
VERSION = '0.1.18'
|
6
|
+
|
14
7
|
API_ENDPOINT_TWITTER = 'http://search.twitter.com/search.json'
|
15
8
|
API_ENDPOINT_TUMBLR = 'http://www.tumblr.com'
|
16
9
|
TWITTER_RESULTS_PER_PAGE = 100
|
17
10
|
TWITTER_RESULTS_TYPE = 'recent'
|
18
11
|
UPDATE_PERIOD = 600 #10 minutes
|
19
|
-
|
20
|
-
include LogAware
|
21
|
-
def self.log
|
22
|
-
LogAware.log #TODO why doesn't the include make the log method accessible?
|
23
|
-
end
|
24
|
-
|
25
|
-
def initialize(args)
|
26
|
-
log = Logger.new(STDOUT)
|
27
|
-
if (Logger::DEBUG..Logger::UNKNOWN).to_a.index(args[:loglevel])
|
28
|
-
log.level = args[:loglevel]
|
29
|
-
else
|
30
|
-
log.level = Logger::INFO
|
31
|
-
end
|
32
|
-
log.debug "log level set to #{log.level}"
|
33
|
-
LogAware.log=log
|
34
|
-
|
35
|
-
@email = args[:tumblr_email]
|
36
|
-
@password = args[:tumblr_password]
|
37
|
-
@cookie = args[:cookie]
|
38
|
-
@api_endpoint_twitter = args[:api_endpoint_twitter] || API_ENDPOINT_TWITTER
|
39
|
-
@api_endpoint_tumblr = args[:api_endpoint_tumblr] || API_ENDPOINT_TUMBLR
|
40
|
-
@whitelist = args[:whitelist]
|
41
|
-
@shouts = args[:shouts]
|
42
|
-
@update_period = args[:update_period] || UPDATE_PERIOD
|
43
|
-
@whitelist.each {|entry| entry.downcase!} if @whitelist
|
44
|
-
log.info "Tweetlr #{Tweetlr::VERSION} initialized. Ready to roll."
|
45
|
-
end
|
46
|
-
|
47
|
-
def self.crawl(config)
|
48
|
-
log.debug "#{self}.crawl() using config: #{config.inspect}"
|
49
|
-
twitter_config = {
|
50
|
-
:since_id => config[:since_id] || config[:start_at_tweet_id],
|
51
|
-
:search_term => config[:terms] || config[:search_term] ,
|
52
|
-
:results_per_page => config[:results_per_page] || TWITTER_RESULTS_PER_PAGE,
|
53
|
-
:result_type => config[:result_type] || TWITTER_RESULTS_TYPE,
|
54
|
-
:api_endpoint_twitter => config[:api_endpoint_twitter] || API_ENDPOINT_TWITTER
|
55
|
-
}
|
56
|
-
tumblr_config = { :tumblr_oauth_access_token_key => config[:tumblr_oauth_access_token_key],
|
57
|
-
:tumblr_oauth_access_token_secret => config[:tumblr_oauth_access_token_secret],
|
58
|
-
:tumblr_oauth_api_key => config[:tumblr_oauth_api_key],
|
59
|
-
:tumblr_oauth_api_secret => config[:tumblr_oauth_api_secret],
|
60
|
-
:tumblr_blog_hostname => config[:tumblr_blog_hostname] || config[:group]
|
61
|
-
}
|
62
|
-
|
63
|
-
twitter_config[:search_term] = URI::escape(twitter_config[:search_term]) if twitter_config[:search_term]
|
64
|
-
log.info "starting tweetlr crawl..."
|
65
|
-
response = {}
|
66
|
-
response = Processors::Twitter::lazy_search(twitter_config)
|
67
|
-
if response
|
68
|
-
tweets = response['results']
|
69
|
-
if tweets
|
70
|
-
tweets.each do |tweet|
|
71
|
-
tumblr_post = Combinators::TwitterTumblr::generate_photo_post_from_tweet(tweet, {:whitelist => config[:whitelist], :embedly_key => config[:embedly_key], :group => config[:group]})
|
72
|
-
if tumblr_post.nil? || tumblr_post[:source].nil?
|
73
|
-
log.warn "could not get image source: tweet: #{tweet} --- tumblr post: #{tumblr_post.inspect}"
|
74
|
-
else
|
75
|
-
log.debug "tumblr post: #{tumblr_post}"
|
76
|
-
res = Processors::Tumblr.post tumblr_post.merge(tumblr_config)
|
77
|
-
log.warn "tumblr response: #{res.header} #{res.body}" unless res.code == "201"
|
78
|
-
end
|
79
|
-
end
|
80
|
-
# store the highest tweet id
|
81
|
-
config[:since_id] = response['max_id']
|
82
|
-
end
|
83
|
-
else
|
84
|
-
log.error "twitter search returned no response. hail the failwhale!"
|
85
|
-
end
|
86
|
-
log.info "finished tweetlr crawl."
|
87
|
-
return config
|
88
|
-
end
|
89
12
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe Combinators::TwitterTumblr do
|
3
|
+
describe Tweetlr::Combinators::TwitterTumblr do
|
4
4
|
before :each do
|
5
5
|
@first_link = "http://url.com"
|
6
6
|
@second_link = "http://instagr.am/p/DzCWn/"
|
@@ -38,60 +38,60 @@ describe Combinators::TwitterTumblr do
|
|
38
38
|
it "extracting their corresponding links" do
|
39
39
|
@tweets.each do |key,value|
|
40
40
|
send "stub_#{key}"
|
41
|
-
url = Combinators::TwitterTumblr.extract_image_url value
|
41
|
+
url = Tweetlr::Combinators::TwitterTumblr.extract_image_url value
|
42
42
|
url.should be, "service #{key} not working!"
|
43
43
|
check_pic_url_extraction key if [:instagram,:picplz,:yfrog,:imgly,:not_listed].index key
|
44
44
|
end
|
45
45
|
end
|
46
46
|
it "using the first image link found in a tweet with multiple links" do
|
47
47
|
stub_instagram
|
48
|
-
link = Combinators::TwitterTumblr.extract_image_url @twitter_response
|
48
|
+
link = Tweetlr::Combinators::TwitterTumblr.extract_image_url @twitter_response
|
49
49
|
link.should == 'http://distillery.s3.amazonaws.com/media/2011/05/02/d25df62b9cec4a138967a3ad027d055b_7.jpg'
|
50
50
|
end
|
51
51
|
it "not returning links that do not belong to images" do
|
52
52
|
stub_no_image_link
|
53
|
-
link = Combinators::TwitterTumblr.extract_image_url @twitter_response
|
53
|
+
link = Tweetlr::Combinators::TwitterTumblr.extract_image_url @twitter_response
|
54
54
|
link.should_not be
|
55
55
|
end
|
56
56
|
end
|
57
57
|
context "given a user whitelist" do
|
58
58
|
it "should mark whitelist users' tweets as published" do
|
59
59
|
stub_instagram
|
60
|
-
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @twitter_response, :whitelist => @whitelist
|
60
|
+
post = Tweetlr::Combinators::TwitterTumblr::generate_photo_post_from_tweet @twitter_response, :whitelist => @whitelist
|
61
61
|
post[:state].should == 'published'
|
62
62
|
end
|
63
63
|
it "should mark non whitelist users' tweets as drafts" do
|
64
64
|
stub_instagram
|
65
|
-
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @non_whitelist_tweet, :whitelist => @whitelist
|
65
|
+
post = Tweetlr::Combinators::TwitterTumblr::generate_photo_post_from_tweet @non_whitelist_tweet, :whitelist => @whitelist
|
66
66
|
post[:state].should == 'draft'
|
67
67
|
end
|
68
68
|
end
|
69
69
|
context "without a user whitelist (whitelist nil or empty)" do
|
70
70
|
it "should mark every users' posts as published" do
|
71
71
|
stub_instagram
|
72
|
-
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @twitter_response, :whitelist => nil
|
72
|
+
post = Tweetlr::Combinators::TwitterTumblr::generate_photo_post_from_tweet @twitter_response, :whitelist => nil
|
73
73
|
post[:state].should == 'published'
|
74
74
|
stub_instagram
|
75
|
-
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @non_whitelist_tweet, :whitelist => nil
|
75
|
+
post = Tweetlr::Combinators::TwitterTumblr::generate_photo_post_from_tweet @non_whitelist_tweet, :whitelist => nil
|
76
76
|
post[:state].should == 'published'
|
77
|
-
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @twitter_response, :whitelist => ""
|
77
|
+
post = Tweetlr::Combinators::TwitterTumblr::generate_photo_post_from_tweet @twitter_response, :whitelist => ""
|
78
78
|
post[:state].should == 'published'
|
79
79
|
stub_instagram
|
80
|
-
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @non_whitelist_tweet, :whitelist => ""
|
80
|
+
post = Tweetlr::Combinators::TwitterTumblr::generate_photo_post_from_tweet @non_whitelist_tweet, :whitelist => ""
|
81
81
|
post[:state].should == 'published'
|
82
82
|
end
|
83
83
|
end
|
84
84
|
it "should not use retweets which would produce double blog posts" do
|
85
|
-
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @retweet, :whitelist => @whitelist
|
85
|
+
post = Tweetlr::Combinators::TwitterTumblr::generate_photo_post_from_tweet @retweet, :whitelist => @whitelist
|
86
86
|
post.should_not be
|
87
87
|
end
|
88
88
|
context "should not use new style retweets which would produce double blog posts" do
|
89
89
|
it "for quotes in context" do
|
90
|
-
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @new_style_retweet, :whitelist => @whitelist
|
90
|
+
post = Tweetlr::Combinators::TwitterTumblr::generate_photo_post_from_tweet @new_style_retweet, :whitelist => @whitelist
|
91
91
|
post.should_not be
|
92
92
|
end
|
93
93
|
it "for quotes without further text addition" do
|
94
|
-
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @new_style_retweet_no_addition, :whitelist => @whitelist
|
94
|
+
post = Tweetlr::Combinators::TwitterTumblr::generate_photo_post_from_tweet @new_style_retweet_no_addition, :whitelist => @whitelist
|
95
95
|
post.should_not be
|
96
96
|
end
|
97
97
|
end
|
@@ -99,13 +99,13 @@ describe Combinators::TwitterTumblr do
|
|
99
99
|
it "uses a given blog via group option to post to" do
|
100
100
|
stub_instagram
|
101
101
|
desired_group = 'mygroup.tumblr.com'
|
102
|
-
tumblr_post = Combinators::TwitterTumblr.generate_photo_post_from_tweet @twitter_response, {:whitelist => @whitelist, :group => desired_group}
|
102
|
+
tumblr_post = Tweetlr::Combinators::TwitterTumblr.generate_photo_post_from_tweet @twitter_response, {:whitelist => @whitelist, :group => desired_group}
|
103
103
|
tumblr_post[:tumblr_blog_hostname].should eq desired_group
|
104
104
|
end
|
105
105
|
it "uses a given blog via tumblr_blog_hostname to post to" do
|
106
106
|
stub_instagram
|
107
107
|
desired_group = 'mygroup.tumblr.com'
|
108
|
-
tumblr_post = Combinators::TwitterTumblr.generate_photo_post_from_tweet @twitter_response, {:whitelist => @whitelist, :tumblr_blog_hostname => desired_group}
|
108
|
+
tumblr_post = Tweetlr::Combinators::TwitterTumblr.generate_photo_post_from_tweet @twitter_response, {:whitelist => @whitelist, :tumblr_blog_hostname => desired_group}
|
109
109
|
tumblr_post[:tumblr_blog_hostname].should eq desired_group
|
110
110
|
end
|
111
111
|
end
|
data/spec/core_spec.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Tweetlr::Core do
|
4
|
+
|
5
|
+
config_file = File.join( Dir.pwd, 'config', 'tweetlr.yml')
|
6
|
+
config = YAML.load_file(config_file)
|
7
|
+
TIMESTAMP = config['twitter_timestamp']
|
8
|
+
WHITELIST = config['whitelist']
|
9
|
+
|
10
|
+
before :each do
|
11
|
+
@first_link = "http://url.com"
|
12
|
+
@second_link = "http://instagr.am/p/DzCWn/"
|
13
|
+
@third_link = "https://imageurl.com"
|
14
|
+
@twitter_response = {"from_user_id_str"=>"1915714", "profile_image_url"=>"http://a0.twimg.com/profile_images/386000279/2_normal.jpg", "created_at"=>"Sun, 17 Apr 2011 16:48:42 +0000", "from_user"=>"whitey_Mc_whIteLIst", "id_str"=>"59659561224765440", "metadata"=>{"result_type"=>"recent"}, "to_user_id"=>nil, "text"=>"Rigaer #wirsounterwegs #{@first_link} @ Augenarzt Dr. Lierow #{@second_link} #{@third_link}", "id"=>59659561224765440, "from_user_id"=>1915714, "geo"=>{"type"=>"Point", "coordinates"=>[52.5182, 13.454]}, "iso_language_code"=>"de", "place"=>{"id"=>"3078869807f9dd36", "type"=>"city", "full_name"=>"Berlin, Berlin"}, "to_user_id_str"=>nil, "source"=>"<a href="http://instagr.am" rel="nofollow">instagram</a>"}
|
15
|
+
@tweetlr_config = {
|
16
|
+
:since_id => 0,
|
17
|
+
:results_per_page => 3,
|
18
|
+
:search_term => 'coffeediary',
|
19
|
+
:result_type => 'recent',
|
20
|
+
:api_endpoint_twitter => Tweetlr::API_ENDPOINT_TWITTER,
|
21
|
+
:loglevel=>1,
|
22
|
+
:tumblr_oauth_access_token_key => config['tumblr_oauth_access_token_key'],
|
23
|
+
:tumblr_oauth_access_token_secret => config['tumblr_oauth_access_token_secret'],
|
24
|
+
:tumblr_oauth_api_secret => config['tumblr_oauth_api_secret'],
|
25
|
+
:tumblr_oauth_api_key => config['tumblr_oauth_api_key'],
|
26
|
+
:tumblr_blog_hostname => config['tumblr_blog_hostname']
|
27
|
+
}
|
28
|
+
stub_tumblr
|
29
|
+
stub_twitter
|
30
|
+
stub_oauth
|
31
|
+
end
|
32
|
+
it "crawls twitter and posts to tumblr" do
|
33
|
+
since_id_before = @tweetlr_config[:since_id]
|
34
|
+
result = Tweetlr::Core.crawl(@tweetlr_config)
|
35
|
+
since_id_before.should_not == result[:since_id]
|
36
|
+
end
|
37
|
+
it "copes with legacy config that use tumblr v1 api (basic auth)" do
|
38
|
+
legacy_config = {
|
39
|
+
:id=>16,
|
40
|
+
:search_term=>"booga",
|
41
|
+
:tumblr_email=>"wooga@booga.de",
|
42
|
+
:tumblr_password=>"boogawooga",
|
43
|
+
:since_id=>"246543935663661057",
|
44
|
+
:results_per_page=>3,
|
45
|
+
:result_type=>nil,
|
46
|
+
:api_endpoint_twitter=>nil,
|
47
|
+
:api_endpoint_tumblr=>nil,
|
48
|
+
:update_period=>900,
|
49
|
+
:shouts=>nil,
|
50
|
+
:loglevel=>1,
|
51
|
+
:whitelist=>["user1", "user2"],
|
52
|
+
:last_crawl=>"Fri, 14 Sep 2012 09:43:10 UTC +00:00",
|
53
|
+
:active=>true,
|
54
|
+
:tumblr_oauth_access_token_key=>nil,
|
55
|
+
:tumblr_oauth_access_token_secret=>nil}
|
56
|
+
since_id_before = legacy_config[:since_id]
|
57
|
+
result = Tweetlr::Core.crawl(legacy_config)
|
58
|
+
since_id_before.should_not == result[:since_id]
|
59
|
+
end
|
60
|
+
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe Processors::PhotoService do
|
3
|
+
describe Tweetlr::Processors::PhotoService do
|
4
4
|
before :each do
|
5
5
|
@links = {
|
6
6
|
:twimg => 'http://twitter.com/KSilbereisen/status/228035435237097472',
|
@@ -20,44 +20,44 @@ describe Processors::PhotoService do
|
|
20
20
|
end
|
21
21
|
it "extracts images from eye em" do
|
22
22
|
stub_eyeem
|
23
|
-
link = Processors::PhotoService::find_image_url @links[:eyeem]
|
23
|
+
link = Tweetlr::Processors::PhotoService::find_image_url @links[:eyeem]
|
24
24
|
link.should be
|
25
25
|
link.should == "http://www.eyeem.com/thumb/h/1024/e35db836c5d3f02498ef60fc3d53837fbe621561-1334126483"
|
26
26
|
end
|
27
27
|
it "doesnt find images in embedly results that are not explicitly marked as 'Photo' via the response's 'thumbnail_url' attribute" do
|
28
28
|
stub_embedly_no_photo
|
29
|
-
link = Processors::PhotoService::find_image_url 'http://makersand.co/'
|
29
|
+
link = Tweetlr::Processors::PhotoService::find_image_url 'http://makersand.co/'
|
30
30
|
link.should be_nil
|
31
31
|
end
|
32
32
|
it "does find an image for foursquare that is not he profile pic" do
|
33
33
|
stub_foursquare
|
34
|
-
link = Processors::PhotoService::find_image_url @links[:foursquare]
|
34
|
+
link = Tweetlr::Processors::PhotoService::find_image_url @links[:foursquare]
|
35
35
|
link.index('userpix_thumbs').should_not be
|
36
36
|
end
|
37
37
|
it "should find a picture's url from the supported services" do
|
38
38
|
@links.each do |service,link|
|
39
39
|
send "stub_#{service}"
|
40
|
-
url = Processors::PhotoService::find_image_url link
|
40
|
+
url = Tweetlr::Processors::PhotoService::find_image_url link
|
41
41
|
url.should be, "service #{service} not working!"
|
42
42
|
check_pic_url_extraction service if [:twimg, :instagram,:picplz,:yfrog,:imgly,:foursqaure,:not_listed].index service
|
43
43
|
end
|
44
44
|
end
|
45
45
|
it "finds path images for redirected moments as well" do
|
46
46
|
stub_path_redirected
|
47
|
-
url = Processors::PhotoService::find_image_url @links[:path]
|
47
|
+
url = Tweetlr::Processors::PhotoService::find_image_url @links[:path]
|
48
48
|
url.should == 'https://s3-us-west-1.amazonaws.com/images.path.com/photos2/f90fd831-43c3-48fd-84cb-5c3bae52957a/2x.jpg'
|
49
49
|
end
|
50
50
|
it "should not crash if embedly fallback won't find a link" do
|
51
51
|
stub_bad_request
|
52
|
-
url = Processors::PhotoService::find_image_url "http://mopskopf"
|
52
|
+
url = Tweetlr::Processors::PhotoService::find_image_url "http://mopskopf"
|
53
53
|
end
|
54
54
|
it "should not crash with an encoding error when response is non-us-ascii" do
|
55
55
|
stub_utf8_response
|
56
|
-
url = Processors::PhotoService::find_image_url "http://api.instagram.com/oembed?url=http://instagr.am/p/Gx%E2%80%946/"
|
56
|
+
url = Tweetlr::Processors::PhotoService::find_image_url "http://api.instagram.com/oembed?url=http://instagr.am/p/Gx%E2%80%946/"
|
57
57
|
end
|
58
58
|
it "follows redirects" do
|
59
59
|
stub_imgly
|
60
|
-
link = Processors::PhotoService::link_url_redirect 'im mocked anyways'
|
60
|
+
link = Tweetlr::Processors::PhotoService::link_url_redirect 'im mocked anyways'
|
61
61
|
link.should == 'http://s3.amazonaws.com/imgly_production/899582/full.jpg'
|
62
62
|
end
|
63
63
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe Processors::Tumblr do
|
3
|
+
describe Tweetlr::Processors::Tumblr do
|
4
4
|
before :all do
|
5
5
|
config_file = File.join( Dir.pwd, 'config', 'tweetlr.yml')
|
6
6
|
config = YAML.load_file(config_file)
|
@@ -21,10 +21,10 @@ describe Processors::Tumblr do
|
|
21
21
|
it "posts to tumblr" do
|
22
22
|
stub_tumblr
|
23
23
|
stub_oauth
|
24
|
-
tumblr_post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @twitter_response, @tweetlr_config
|
24
|
+
tumblr_post = Tweetlr::Combinators::TwitterTumblr::generate_photo_post_from_tweet @twitter_response, @tweetlr_config
|
25
25
|
tumblr_post[:date] = Time.now.to_s
|
26
26
|
tumblr_post[:source] = 'http://distilleryimage6.instagram.com/db72627effde11e1b3f322000a1e8899_7.jpg'
|
27
|
-
response = Processors::Tumblr::post @tweetlr_config.merge(tumblr_post)
|
27
|
+
response = Tweetlr::Processors::Tumblr::post @tweetlr_config.merge(tumblr_post)
|
28
28
|
response.should be
|
29
29
|
response.code.should == "201"
|
30
30
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe Processors::Twitter do
|
3
|
+
describe Tweetlr::Processors::Twitter do
|
4
4
|
before :each do
|
5
5
|
@first_link = "http://url.com"
|
6
6
|
@second_link = "http://instagr.am/p/DzCWn/"
|
@@ -16,15 +16,15 @@ describe Processors::Twitter do
|
|
16
16
|
end
|
17
17
|
it "should search twitter for a given term" do
|
18
18
|
stub_twitter
|
19
|
-
response = Processors::Twitter::lazy_search @twitter_config
|
19
|
+
response = Tweetlr::Processors::Twitter::lazy_search @twitter_config
|
20
20
|
tweets = response['results']
|
21
21
|
tweets.should be
|
22
22
|
tweets.should_not be_empty
|
23
23
|
end
|
24
24
|
it "extracts links" do
|
25
|
-
links = Processors::Twitter::extract_links ''
|
25
|
+
links = Tweetlr::Processors::Twitter::extract_links ''
|
26
26
|
links.should be_nil
|
27
|
-
links = Processors::Twitter::extract_links @twitter_response
|
27
|
+
links = Tweetlr::Processors::Twitter::extract_links @twitter_response
|
28
28
|
links[0].should == @first_link
|
29
29
|
links[1].should == @second_link
|
30
30
|
links[2].should == @third_link
|
data/spec/spec_helper.rb
CHANGED
@@ -1,15 +1,17 @@
|
|
1
1
|
#encoding: utf-8
|
2
2
|
require "bundler"
|
3
3
|
require "logger"
|
4
|
+
require "#{File.dirname(__FILE__)}/../lib/tweetlr"
|
5
|
+
|
4
6
|
Bundler.require :default, :development, :test
|
5
7
|
|
6
8
|
logger = Logger.new(STDOUT)
|
7
9
|
logger.level = Logger::FATAL
|
8
|
-
LogAware.log = logger
|
10
|
+
Tweetlr::LogAware.log = logger
|
9
11
|
|
10
12
|
def check_pic_url_extraction(service)
|
11
|
-
image_url = Processors::PhotoService::send "image_url_#{service}".to_sym, @links[service]
|
12
|
-
image_url.should =~ Processors::PhotoService::PIC_REGEXP
|
13
|
+
image_url = Tweetlr::Processors::PhotoService::send "image_url_#{service}".to_sym, @links[service]
|
14
|
+
image_url.should =~ Tweetlr::Processors::PhotoService::PIC_REGEXP
|
13
15
|
end
|
14
16
|
|
15
17
|
def stub_oauth
|
data/tweetlr.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tweetlr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.18
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-09-
|
12
|
+
date: 2012-09-29 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: daemons
|
16
|
-
requirement: &
|
16
|
+
requirement: &2160420980 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2160420980
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: eventmachine
|
27
|
-
requirement: &
|
27
|
+
requirement: &2160420140 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2160420140
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: curb
|
38
|
-
requirement: &
|
38
|
+
requirement: &2160419680 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2160419680
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: json
|
49
|
-
requirement: &
|
49
|
+
requirement: &2160419220 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *2160419220
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: nokogiri
|
60
|
-
requirement: &
|
60
|
+
requirement: &2160418520 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: '0'
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *2160418520
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: oauth
|
71
|
-
requirement: &
|
71
|
+
requirement: &2160417840 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: '0'
|
77
77
|
type: :runtime
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *2160417840
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: rake
|
82
|
-
requirement: &
|
82
|
+
requirement: &2160417320 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ! '>='
|
@@ -87,10 +87,10 @@ dependencies:
|
|
87
87
|
version: '0'
|
88
88
|
type: :development
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *2160417320
|
91
91
|
- !ruby/object:Gem::Dependency
|
92
92
|
name: rspec
|
93
|
-
requirement: &
|
93
|
+
requirement: &2160416900 !ruby/object:Gem::Requirement
|
94
94
|
none: false
|
95
95
|
requirements:
|
96
96
|
- - ! '>='
|
@@ -98,10 +98,10 @@ dependencies:
|
|
98
98
|
version: '0'
|
99
99
|
type: :development
|
100
100
|
prerelease: false
|
101
|
-
version_requirements: *
|
101
|
+
version_requirements: *2160416900
|
102
102
|
- !ruby/object:Gem::Dependency
|
103
103
|
name: rdoc
|
104
|
-
requirement: &
|
104
|
+
requirement: &2160416480 !ruby/object:Gem::Requirement
|
105
105
|
none: false
|
106
106
|
requirements:
|
107
107
|
- - ! '>='
|
@@ -109,7 +109,7 @@ dependencies:
|
|
109
109
|
version: '0'
|
110
110
|
type: :development
|
111
111
|
prerelease: false
|
112
|
-
version_requirements: *
|
112
|
+
version_requirements: *2160416480
|
113
113
|
description: tweetlr crawls twitter for a given term, extracts photos out of the collected
|
114
114
|
tweets' short urls and posts the images to tumblr.
|
115
115
|
email: sven.kraeuter@gmail.com
|
@@ -130,19 +130,20 @@ files:
|
|
130
130
|
- Rakefile
|
131
131
|
- bin/tweetlr
|
132
132
|
- config/tweetlr.yml
|
133
|
-
- lib/combinators/twitter_tumblr.rb
|
134
|
-
- lib/log_aware.rb
|
135
|
-
- lib/processors/http.rb
|
136
|
-
- lib/processors/photo_service.rb
|
137
|
-
- lib/processors/tumblr.rb
|
138
|
-
- lib/processors/twitter.rb
|
139
133
|
- lib/tweetlr.rb
|
134
|
+
- lib/tweetlr/combinators/twitter_tumblr.rb
|
135
|
+
- lib/tweetlr/core.rb
|
136
|
+
- lib/tweetlr/log_aware.rb
|
137
|
+
- lib/tweetlr/processors/http.rb
|
138
|
+
- lib/tweetlr/processors/photo_service.rb
|
139
|
+
- lib/tweetlr/processors/tumblr.rb
|
140
|
+
- lib/tweetlr/processors/twitter.rb
|
140
141
|
- spec/combinators/twitter_tumblr_combinator_spec.rb
|
142
|
+
- spec/core_spec.rb
|
141
143
|
- spec/processors/photo_services_processor_spec.rb
|
142
144
|
- spec/processors/tumblr_processor_spec.rb
|
143
145
|
- spec/processors/twitter_processor_spec.rb
|
144
146
|
- spec/spec_helper.rb
|
145
|
-
- spec/tweetlr_spec.rb
|
146
147
|
- tweetlr.gemspec
|
147
148
|
homepage: http://tweetlr.5v3n.com
|
148
149
|
licenses: []
|
@@ -171,8 +172,8 @@ summary: tweetlr crawls twitter for a given term, extracts photos out of the col
|
|
171
172
|
tweets' short urls and posts the images to tumblr.
|
172
173
|
test_files:
|
173
174
|
- spec/combinators/twitter_tumblr_combinator_spec.rb
|
175
|
+
- spec/core_spec.rb
|
174
176
|
- spec/processors/photo_services_processor_spec.rb
|
175
177
|
- spec/processors/tumblr_processor_spec.rb
|
176
178
|
- spec/processors/twitter_processor_spec.rb
|
177
179
|
- spec/spec_helper.rb
|
178
|
-
- spec/tweetlr_spec.rb
|
data/lib/processors/tumblr.rb
DELETED
@@ -1,51 +0,0 @@
|
|
1
|
-
require 'log_aware'
|
2
|
-
require 'oauth'
|
3
|
-
|
4
|
-
module Processors
|
5
|
-
#utilities for handling tumblr
|
6
|
-
module Tumblr
|
7
|
-
GENERATOR = %{tweetlr - http://tweetlr.5v3n.com}
|
8
|
-
API_ENDPOINT_TUMBLR = 'http://www.tumblr.com'
|
9
|
-
include LogAware
|
10
|
-
def self.log
|
11
|
-
LogAware.log #TODO why doesn't the include make the log method accessible?
|
12
|
-
end
|
13
|
-
#post a tumblr photo entry.
|
14
|
-
#
|
15
|
-
#required arguments are :tumblr_blog_hostname, :tumblr_blog_hostname, :tumblr_oauth_api_secret, :tumblr_oauth_access_token_secret, :source, :caption, :state
|
16
|
-
#
|
17
|
-
#optional arguments: :tags, :type (default: 'photo')
|
18
|
-
#
|
19
|
-
def self.post(options={})
|
20
|
-
base_hostname = options[:tumblr_blog_hostname] || options[:group]
|
21
|
-
tumblr_oauth_api_key= options[:tumblr_oauth_api_key]
|
22
|
-
tumblr_oauth_api_secret= options[:tumblr_oauth_api_secret]
|
23
|
-
access_token_key = options[:tumblr_oauth_access_token_key]
|
24
|
-
access_token_secret = options[:tumblr_oauth_access_token_secret]
|
25
|
-
type = options[:type] || 'photo'
|
26
|
-
tags = options[:tags] || ''
|
27
|
-
|
28
|
-
consumer = OAuth::Consumer.new(tumblr_oauth_api_key, tumblr_oauth_api_secret,
|
29
|
-
{ :site => 'http://www.tumblr.com',
|
30
|
-
:request_token_path => '/oauth/request_token',
|
31
|
-
:authorize_path => '/oauth/authorize',
|
32
|
-
:access_token_path => '/oauth/access_token',
|
33
|
-
:http_method => :post } )
|
34
|
-
|
35
|
-
access_token = OAuth::AccessToken.new(consumer, access_token_key, access_token_secret)
|
36
|
-
|
37
|
-
post_response = access_token.post(
|
38
|
-
"http://api.tumblr.com/v2/blog/#{base_hostname}/post", {
|
39
|
-
:type => type,
|
40
|
-
:source => options[:source],
|
41
|
-
:caption => options[:caption],
|
42
|
-
:date => options[:date],
|
43
|
-
:tags => tags,
|
44
|
-
:state => options[:state],
|
45
|
-
:generator => GENERATOR
|
46
|
-
}
|
47
|
-
)
|
48
|
-
post_response
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
data/spec/tweetlr_spec.rb
DELETED
@@ -1,28 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Tweetlr do
|
4
|
-
|
5
|
-
config_file = File.join( Dir.pwd, 'config', 'tweetlr.yml')
|
6
|
-
config = YAML.load_file(config_file)
|
7
|
-
TIMESTAMP = config['twitter_timestamp']
|
8
|
-
WHITELIST = config['whitelist']
|
9
|
-
|
10
|
-
before :each do
|
11
|
-
@first_link = "http://url.com"
|
12
|
-
@second_link = "http://instagr.am/p/DzCWn/"
|
13
|
-
@third_link = "https://imageurl.com"
|
14
|
-
@twitter_response = {"from_user_id_str"=>"1915714", "profile_image_url"=>"http://a0.twimg.com/profile_images/386000279/2_normal.jpg", "created_at"=>"Sun, 17 Apr 2011 16:48:42 +0000", "from_user"=>"whitey_Mc_whIteLIst", "id_str"=>"59659561224765440", "metadata"=>{"result_type"=>"recent"}, "to_user_id"=>nil, "text"=>"Rigaer #wirsounterwegs #{@first_link} @ Augenarzt Dr. Lierow #{@second_link} #{@third_link}", "id"=>59659561224765440, "from_user_id"=>1915714, "geo"=>{"type"=>"Point", "coordinates"=>[52.5182, 13.454]}, "iso_language_code"=>"de", "place"=>{"id"=>"3078869807f9dd36", "type"=>"city", "full_name"=>"Berlin, Berlin"}, "to_user_id_str"=>nil, "source"=>"<a href="http://instagr.am" rel="nofollow">instagram</a>"}
|
15
|
-
@tweetlr_config = {
|
16
|
-
:since_id => 0,
|
17
|
-
:search_term => 'moped',
|
18
|
-
:results_per_page => 100,
|
19
|
-
:result_type => 'recent',
|
20
|
-
:api_endpoint_twitter => Tweetlr::API_ENDPOINT_TWITTER
|
21
|
-
}
|
22
|
-
end
|
23
|
-
it "crawls twitter and posts to tumblr" do
|
24
|
-
stub_tumblr
|
25
|
-
stub_twitter
|
26
|
-
Tweetlr.crawl(@tweetlr_config)
|
27
|
-
end
|
28
|
-
end
|