tweetlr 0.1.17 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +5 -3
- data/bin/tweetlr +24 -20
- data/config/tweetlr.yml +9 -9
- data/lib/{combinators → tweetlr/combinators}/twitter_tumblr.rb +12 -12
- data/lib/tweetlr/core.rb +93 -0
- data/lib/{log_aware.rb → tweetlr/log_aware.rb} +3 -1
- data/lib/{processors → tweetlr/processors}/http.rb +4 -4
- data/lib/{processors → tweetlr/processors}/photo_service.rb +22 -30
- data/lib/tweetlr/processors/tumblr.rb +55 -0
- data/lib/{processors → tweetlr/processors}/twitter.rb +8 -7
- data/lib/tweetlr.rb +5 -82
- data/spec/combinators/twitter_tumblr_combinator_spec.rb +15 -15
- data/spec/core_spec.rb +60 -0
- data/spec/processors/photo_services_processor_spec.rb +9 -9
- data/spec/processors/tumblr_processor_spec.rb +3 -3
- data/spec/processors/twitter_processor_spec.rb +4 -4
- data/spec/spec_helper.rb +5 -3
- data/tweetlr.gemspec +1 -1
- metadata +29 -28
- data/lib/processors/tumblr.rb +0 -51
- data/spec/tweetlr_spec.rb +0 -28
data/README.md
CHANGED
@@ -40,9 +40,11 @@ search_term: 'cat+dog+unicorn' #find tweets containing any of these terms
|
|
40
40
|
start_at_tweet_id: 61847783463854082 # the tweet id to start searching at
|
41
41
|
api_endpoint_twitter: 'http://search.twitter.com/search.json'
|
42
42
|
api_endpoint_tumblr: 'http://www.tumblr.com'
|
43
|
-
|
44
|
-
|
45
|
-
|
43
|
+
tumblr_oauth_api_key: YOUR APPS TUMBLR API TOKEN
|
44
|
+
tumblr_oauth_api_secret: YOUR APPS TUMBLR API SECRET
|
45
|
+
tumblr_oauth_access_token_key: YOUR BLOGS OAUTH ACCESS TOKEN KEY
|
46
|
+
tumblr_oauth_access_token_secret: YOUR BLOGS OAUTH ACCESS TOKEN SECRE
|
47
|
+
tumblr_blog_hostname: YOUR BLOGS HOSTNAME #e.g. myblog.tumblr.com
|
46
48
|
embedly_key: '' #tweetlr uses http://embedly.com for link processing. a free plan containing an api key is available & recommended to use in order to ensure full support
|
47
49
|
update_period: 300 #check for updates every 300 secs = 5 minutes
|
48
50
|
shouts: 'says' # will be concatenated after the username, before the message: @mr_x says: awesome things on a photo!
|
data/bin/tweetlr
CHANGED
@@ -22,25 +22,7 @@ begin
|
|
22
22
|
|
23
23
|
UPDATE_PERIOD = CONFIG['update_period']
|
24
24
|
|
25
|
-
@tweetlr_config =
|
26
|
-
:tumblr_blog_hostname => CONFIG['tumblr_blog_hostname'] || CONFIG['group'],
|
27
|
-
:tumblr_oauth_api_key => CONFIG['tumblr_oauth_api_key'],
|
28
|
-
:tumblr_oauth_api_secret => CONFIG['tumblr_oauth_api_secret'],
|
29
|
-
:tumblr_blog_hostname => CONFIG['tumblr_blog_hostname'],
|
30
|
-
:tumblr_oauth_access_token_key => CONFIG['tumblr_oauth_access_token_key'],
|
31
|
-
:tumblr_oauth_access_token_secret => CONFIG['tumblr_oauth_access_token_secret'],
|
32
|
-
:whitelist => CONFIG['whitelist'],
|
33
|
-
:shouts => CONFIG['shouts'],
|
34
|
-
:since_id => CONFIG['start_at_tweet_id'] ,
|
35
|
-
:terms => CONFIG['search_term'],
|
36
|
-
:loglevel => CONFIG['loglevel'],
|
37
|
-
:update_period => UPDATE_PERIOD,
|
38
|
-
:api_endpoint_tumblr => CONFIG['api_endpoint_tumblr'],
|
39
|
-
:api_endpoint_twitter => CONFIG['api_endpoint_twitter'],
|
40
|
-
:results_per_page => CONFIG['results_per_page'],
|
41
|
-
:result_type => CONFIG['result_type'],
|
42
|
-
:embedly_key => CONFIG['embedly_key']
|
43
|
-
}
|
25
|
+
@tweetlr_config = prepare_tweetlr_config CONFIG
|
44
26
|
rescue SystemCallError
|
45
27
|
$stderr.puts "Ooops - looks like there is no ./config/tweetlr.yml found. I'm affraid tweetlr won't work properly until you introduced that configuration file."
|
46
28
|
exit(1)
|
@@ -52,9 +34,31 @@ Daemons.run_proc('tweetlr', :dir_mode => :script, :dir => './', :backtrace => tr
|
|
52
34
|
@log.info "creating a new tweetlr instance using this config: #{@tweetlr_config.inspect}"
|
53
35
|
EventMachine::run do
|
54
36
|
EventMachine::add_periodic_timer( UPDATE_PERIOD ) do
|
55
|
-
response = Tweetlr.crawl(@tweetlr_config)
|
37
|
+
response = Tweetlr::Core.crawl(@tweetlr_config)
|
56
38
|
File.open(tid_file, "w+") { |io| io.write(response[:since_id]) }
|
57
39
|
@tweetlr_config.merge! response
|
58
40
|
end
|
59
41
|
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def prepare_tweetlr_config(config)
|
45
|
+
{
|
46
|
+
:tumblr_blog_hostname => config['tumblr_blog_hostname'] || config['group'],
|
47
|
+
:tumblr_oauth_api_key => config['tumblr_oauth_api_key'],
|
48
|
+
:tumblr_oauth_api_secret => config['tumblr_oauth_api_secret'],
|
49
|
+
:tumblr_blog_hostname => config['tumblr_blog_hostname'],
|
50
|
+
:tumblr_oauth_access_token_key => config['tumblr_oauth_access_token_key'],
|
51
|
+
:tumblr_oauth_access_token_secret => config['tumblr_oauth_access_token_secret'],
|
52
|
+
:whitelist => config['whitelist'],
|
53
|
+
:shouts => config['shouts'],
|
54
|
+
:since_id => config['start_at_tweet_id'] ,
|
55
|
+
:terms => config['search_term'],
|
56
|
+
:loglevel => config['loglevel'],
|
57
|
+
:update_period => UPDATE_PERIOD,
|
58
|
+
:api_endpoint_tumblr => config['api_endpoint_tumblr'],
|
59
|
+
:api_endpoint_twitter => config['api_endpoint_twitter'],
|
60
|
+
:results_per_page => config['results_per_page'],
|
61
|
+
:result_type => config['result_type'],
|
62
|
+
:embedly_key => config['embedly_key']
|
63
|
+
}
|
60
64
|
end
|
data/config/tweetlr.yml
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
results_per_page: 100
|
2
2
|
result_type: recent
|
3
|
-
search_term: '
|
3
|
+
search_term: 'coffeediary' #find tweets containing any of these terms
|
4
4
|
start_at_tweet_id: 61847783463854082 # the tweet id to start searching at
|
5
5
|
api_endpoint_twitter: 'http://search.twitter.com/search.json'
|
6
6
|
api_endpoint_tumblr: 'http://www.tumblr.com'
|
7
|
-
tumblr_oauth_api_key:
|
8
|
-
tumblr_oauth_api_secret:
|
9
|
-
tumblr_oauth_access_token_key:
|
10
|
-
tumblr_oauth_access_token_secret:
|
11
|
-
|
12
|
-
embedly_key: '' #tweetlr uses http://embedly.com for link processing. a free plan containing an api key is available & recommended to use in order to ensure full support
|
13
|
-
update_period:
|
7
|
+
tumblr_oauth_api_key: 'Buq8j3koYLqrZEMmTM4GL32S0guZU2Qvoz8xSvFRumaWuaxAnG'
|
8
|
+
tumblr_oauth_api_secret: 'EnjBUAjUHo4Qi4d3BPUL5xsdc8qClDqRTLssz8Jzd2sKC7KZaq'
|
9
|
+
tumblr_oauth_access_token_key: 'MQES8SEqr3JogLyUYzcV68RFfQ0b3ClxbnUihChh8p9UMH3tkM'
|
10
|
+
tumblr_oauth_access_token_secret: 'FBnrEFW1p9RG7Zh1kztPjPDCbeE229fMPX5VwuzgZqUdD2hXSS'
|
11
|
+
tumblr_blog_hostname: 'tweetlr-testlr.tumblr.com' #e.g. mysubblog.tumblr.com
|
12
|
+
embedly_key: '9e6c2bb8372e11e1a92e4040d3dc5c07' #tweetlr uses http://embedly.com for link processing. a free plan containing an api key is available & recommended to use in order to ensure full support
|
13
|
+
update_period: 10 #check for updates every 300 secs = 5 minutes
|
14
14
|
shouts: 'says' # will be concatenated after the username, before the message: @mr_x says: awesome things on a photo!
|
15
15
|
loglevel: 1 # 0: debug, 1: info (default), 2: warn, 3: error, 5: fatal
|
16
16
|
whitelist: #twitter accounts in that list will have their tweets published immediately. post from others will be saved as drafts. blank list will publish all tweets immediately
|
17
17
|
- whitey_mc_whitelist
|
18
|
-
- sven_kr
|
18
|
+
- sven_kr
|
@@ -1,23 +1,23 @@
|
|
1
|
-
|
2
|
-
require
|
3
|
-
require
|
1
|
+
local_path=File.dirname(__FILE__)
|
2
|
+
require "#{local_path}/../processors/twitter"
|
3
|
+
require "#{local_path}/../processors/tumblr"
|
4
|
+
require "#{local_path}/../processors/photo_service"
|
5
|
+
require "#{local_path}/../log_aware"
|
4
6
|
|
5
|
-
|
6
|
-
|
7
|
-
module Combinators
|
7
|
+
module Tweetlr::Combinators
|
8
8
|
module TwitterTumblr
|
9
|
-
include LogAware
|
9
|
+
include Tweetlr::LogAware
|
10
10
|
def self.log
|
11
|
-
LogAware.log #TODO why doesn't the include make the log method accessible?
|
11
|
+
Tweetlr::LogAware.log #TODO why doesn't the include make the log method accessible?
|
12
12
|
end
|
13
13
|
#extract a linked image file's url from a tweet. first found image will be used.
|
14
14
|
def self.extract_image_url(tweet, embedly_key=nil)
|
15
|
-
links = Processors::Twitter::extract_links tweet
|
15
|
+
links = Tweetlr::Processors::Twitter::extract_links tweet
|
16
16
|
image_url = nil
|
17
17
|
if links
|
18
18
|
links.each do |link|
|
19
|
-
image_url = Processors::PhotoService::find_image_url(link, embedly_key)
|
20
|
-
return image_url if Processors::PhotoService::photo? image_url
|
19
|
+
image_url = Tweetlr::Processors::PhotoService::find_image_url(link, embedly_key)
|
20
|
+
return image_url if Tweetlr::Processors::PhotoService::photo? image_url
|
21
21
|
end
|
22
22
|
end
|
23
23
|
image_url
|
@@ -29,7 +29,7 @@ module Combinators
|
|
29
29
|
message = tweet['text']
|
30
30
|
whitelist = options[:whitelist]
|
31
31
|
whitelist.each {|entry| entry.downcase!} if (whitelist && whitelist.size != 0)
|
32
|
-
if !Processors::Twitter::retweet? message
|
32
|
+
if !Tweetlr::Processors::Twitter::retweet? message
|
33
33
|
log.debug "tweet: #{tweet}"
|
34
34
|
tumblr_post = {}
|
35
35
|
tumblr_post[:tumblr_blog_hostname] = options[:tumblr_blog_hostname] || options[:group]
|
data/lib/tweetlr/core.rb
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
# encode: UTF-8
|
2
|
+
local_path=File.dirname(__FILE__)
|
3
|
+
require "#{local_path}/processors/twitter"
|
4
|
+
require "#{local_path}/processors/http"
|
5
|
+
require "#{local_path}/processors/photo_service"
|
6
|
+
require "#{local_path}/processors/tumblr"
|
7
|
+
require "#{local_path}/combinators/twitter_tumblr"
|
8
|
+
require "#{local_path}/log_aware"
|
9
|
+
require 'uri'
|
10
|
+
|
11
|
+
class Tweetlr::Core
|
12
|
+
include Tweetlr::LogAware
|
13
|
+
def self.log
|
14
|
+
Tweetlr::LogAware.log #TODO why doesn't the include make the log method accessible?
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(args)
|
18
|
+
log = Logger.new(STDOUT)
|
19
|
+
if (Logger::DEBUG..Logger::UNKNOWN).to_a.index(args[:loglevel])
|
20
|
+
log.level = args[:loglevel]
|
21
|
+
else
|
22
|
+
log.level = Logger::INFO
|
23
|
+
end
|
24
|
+
log.debug "log level set to #{log.level}"
|
25
|
+
Tweetlr::LogAware.log=log
|
26
|
+
|
27
|
+
@email = args[:tumblr_email]
|
28
|
+
@password = args[:tumblr_password]
|
29
|
+
@cookie = args[:cookie]
|
30
|
+
@api_endpoint_twitter = args[:api_endpoint_twitter] || Tweetlr::API_ENDPOINT_TWITTER
|
31
|
+
@api_endpoint_tumblr = args[:api_endpoint_tumblr] || Tweetlr::API_ENDPOINT_TUMBLR
|
32
|
+
@whitelist = args[:whitelist]
|
33
|
+
@shouts = args[:shouts]
|
34
|
+
@update_period = args[:update_period] || Tweetlr::UPDATE_PERIOD
|
35
|
+
@whitelist.each {|entry| entry.downcase!} if @whitelist
|
36
|
+
log.info "Tweetlr #{Tweetlr::VERSION} initialized. Ready to roll."
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.crawl(config)
|
40
|
+
log.debug "#{self}.crawl() using config: #{config.inspect}"
|
41
|
+
twitter_config = prepare_twitter_config config
|
42
|
+
tumblr_config = prepare_tumblr_config config
|
43
|
+
twitter_config[:search_term] = URI::escape(twitter_config[:search_term]) if twitter_config[:search_term]
|
44
|
+
log.info "starting tweetlr crawl..."
|
45
|
+
response = {}
|
46
|
+
response = Tweetlr::Processors::Twitter::lazy_search(twitter_config)
|
47
|
+
if response
|
48
|
+
tweets = response['results']
|
49
|
+
if tweets
|
50
|
+
tweets.each do |tweet|
|
51
|
+
tumblr_post = Tweetlr::Combinators::TwitterTumblr::generate_photo_post_from_tweet(tweet, {:whitelist => config[:whitelist], :embedly_key => config[:embedly_key], :group => config[:group]})
|
52
|
+
if tumblr_post.nil? || tumblr_post[:source].nil?
|
53
|
+
log.warn "could not get image source: tweet: #{tweet} --- tumblr post: #{tumblr_post.inspect}"
|
54
|
+
else
|
55
|
+
log.debug "tumblr post: #{tumblr_post}"
|
56
|
+
res = Tweetlr::Processors::Tumblr.post tumblr_post.merge(tumblr_config)
|
57
|
+
log.debug "tumblr response: #{res}"
|
58
|
+
if res.code == "201"
|
59
|
+
log.info "tumblr post created (tumblr response: #{res.header} #{res.body}"
|
60
|
+
else
|
61
|
+
log.warn "tumblr response: #{res.header} #{res.body}"
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
# store the highest tweet id
|
66
|
+
config[:since_id] = response['max_id']
|
67
|
+
end
|
68
|
+
else
|
69
|
+
log.error "twitter search returned no response. hail the failwhale!"
|
70
|
+
end
|
71
|
+
log.info "finished tweetlr crawl."
|
72
|
+
return config
|
73
|
+
end
|
74
|
+
private
|
75
|
+
def self.prepare_twitter_config(config)
|
76
|
+
{
|
77
|
+
:since_id => config[:since_id] || config[:start_at_tweet_id],
|
78
|
+
:search_term => config[:terms] || config[:search_term] ,
|
79
|
+
:results_per_page => config[:results_per_page] || Tweetlr::TWITTER_RESULTS_PER_PAGE,
|
80
|
+
:result_type => config[:result_type] || Tweetlr::TWITTER_RESULTS_TYPE,
|
81
|
+
:api_endpoint_twitter => config[:api_endpoint_twitter] || Tweetlr::API_ENDPOINT_TWITTER
|
82
|
+
}
|
83
|
+
end
|
84
|
+
def self.prepare_tumblr_config(config)
|
85
|
+
{
|
86
|
+
:tumblr_oauth_access_token_key => config[:tumblr_oauth_access_token_key],
|
87
|
+
:tumblr_oauth_access_token_secret => config[:tumblr_oauth_access_token_secret],
|
88
|
+
:tumblr_oauth_api_key => config[:tumblr_oauth_api_key],
|
89
|
+
:tumblr_oauth_api_secret => config[:tumblr_oauth_api_secret],
|
90
|
+
:tumblr_blog_hostname => config[:tumblr_blog_hostname] || config[:group]
|
91
|
+
}
|
92
|
+
end
|
93
|
+
end
|
@@ -1,16 +1,16 @@
|
|
1
1
|
require 'curb'
|
2
2
|
require 'json'
|
3
|
-
require
|
3
|
+
require "#{File.dirname(__FILE__)}/../log_aware"
|
4
4
|
|
5
|
-
module Processors
|
5
|
+
module Tweetlr::Processors
|
6
6
|
#utilities for handling http
|
7
7
|
module Http
|
8
|
-
include LogAware
|
8
|
+
include Tweetlr::LogAware
|
9
9
|
|
10
10
|
USER_AGENT = %{Mozilla/5.0 (compatible; tweetlr; +http://tweetlr.5v3n.com)}
|
11
11
|
|
12
12
|
def self.log
|
13
|
-
LogAware.log #TODO why doesn't the include make the log method accessible?
|
13
|
+
Tweetlr::LogAware.log #TODO why doesn't the include make the log method accessible?
|
14
14
|
end
|
15
15
|
#convenience method for curl http get calls
|
16
16
|
def self.http_get(request)
|
@@ -1,8 +1,9 @@
|
|
1
|
-
|
1
|
+
local_path=File.dirname(__FILE__)
|
2
|
+
require "#{local_path}/http"
|
3
|
+
require "#{local_path}/../log_aware"
|
2
4
|
require 'nokogiri'
|
3
|
-
require 'log_aware'
|
4
5
|
|
5
|
-
module Processors
|
6
|
+
module Tweetlr::Processors
|
6
7
|
#utilities for dealing with photo services
|
7
8
|
module PhotoService
|
8
9
|
|
@@ -10,10 +11,10 @@ module Processors
|
|
10
11
|
LOCATION_STOP_INDICATOR = "\r\n"
|
11
12
|
PIC_REGEXP = /(.*?)\.(jpg|jpeg|png|gif)/i
|
12
13
|
|
13
|
-
include LogAware
|
14
|
+
include Tweetlr::LogAware
|
14
15
|
|
15
16
|
def self.log
|
16
|
-
LogAware.log #TODO why doesn't the include make the log method accessible?
|
17
|
+
Tweetlr::LogAware.log #TODO why doesn't the include make the log method accessible?
|
17
18
|
end
|
18
19
|
|
19
20
|
def self.find_image_url(link, embedly_key=nil)
|
@@ -41,40 +42,24 @@ module Processors
|
|
41
42
|
link =~ PIC_REGEXP
|
42
43
|
end
|
43
44
|
def self.image_url_twimg(link_url)
|
44
|
-
|
45
|
-
link_url = service_url if service_url #if there's no redirect, service_url will be nil
|
46
|
-
response = Processors::Http::http_get(link_url)
|
47
|
-
image_url = parse_html_for '.twimg img', Nokogiri::HTML.parse(response.body_str)
|
48
|
-
return image_url
|
45
|
+
retrieve_image_url_by_css link_url, '.twimg img'
|
49
46
|
end
|
50
47
|
#extract the image of an eyeem.com pic
|
51
48
|
def self.image_url_eyeem(link_url)
|
52
|
-
|
53
|
-
link_url = service_url if service_url #if there's no redirect, service_url will be nil
|
54
|
-
response = Processors::Http::http_get link_url
|
55
|
-
image_url = parse_html_for '.viewport-pic img', Nokogiri::HTML.parse(response.body_str)
|
56
|
-
return image_url
|
49
|
+
retrieve_image_url_by_css link_url, '.viewport-pic img'
|
57
50
|
end
|
58
51
|
#extract the image of a foursquare.com pic
|
59
52
|
def self.image_url_foursqaure(link_url)
|
60
|
-
|
61
|
-
link_url = service_url if service_url #if there's no redirect, service_url will be nil
|
62
|
-
response = Processors::Http::http_get link_url
|
63
|
-
image_url = parse_html_for '.commentPhoto img', Nokogiri::HTML.parse(response.body_str)
|
64
|
-
return image_url
|
53
|
+
retrieve_image_url_by_css link_url, '.commentPhoto img'
|
65
54
|
end
|
66
55
|
#extract the image of a path.com pic
|
67
56
|
def self.image_url_path(link_url)
|
68
|
-
|
69
|
-
link_url = service_url if service_url #if there's no redirect, service_url will be nil
|
70
|
-
response = Processors::Http::http_get link_url
|
71
|
-
image_url = parse_html_for 'img.photo-image', Nokogiri::HTML.parse(response.body_str)
|
72
|
-
return image_url
|
57
|
+
retrieve_image_url_by_css link_url, 'img.photo-image'
|
73
58
|
end
|
74
59
|
|
75
60
|
#find the image's url via embed.ly
|
76
61
|
def self.image_url_embedly(link_url, key)
|
77
|
-
response = Processors::Http::http_get_json "http://api.embed.ly/1/oembed?key=#{key}&url=#{link_url}"
|
62
|
+
response = Tweetlr::Processors::Http::http_get_json "http://api.embed.ly/1/oembed?key=#{key}&url=#{link_url}"
|
78
63
|
log.debug "embedly call: http://api.embed.ly/1/oembed?key=#{key}&url=#{link_url}"
|
79
64
|
if response && response['type'] == 'photo'
|
80
65
|
image_url = response['url']
|
@@ -83,7 +68,7 @@ module Processors
|
|
83
68
|
end
|
84
69
|
#find the image's url for a lockerz link
|
85
70
|
def self.image_url_lockerz(link_url)
|
86
|
-
response = Processors::Http::http_get_json "http://api.plixi.com/api/tpapi.svc/json/metadatafromurl?details=false&url=#{link_url}"
|
71
|
+
response = Tweetlr::Processors::Http::http_get_json "http://api.plixi.com/api/tpapi.svc/json/metadatafromurl?details=false&url=#{link_url}"
|
87
72
|
response["BigImageUrl"] if response
|
88
73
|
end
|
89
74
|
#find the image's url for an twitter shortened link
|
@@ -94,7 +79,7 @@ module Processors
|
|
94
79
|
#find the image's url for an instagram link
|
95
80
|
def self.image_url_instagram(link_url)
|
96
81
|
link_url['instagram.com'] = 'instagr.am' if link_url.index 'instagram.com' #instagram's oembed does not work for .com links
|
97
|
-
response = Processors::Http::http_get_json "http://api.instagram.com/oembed?url=#{link_url}"
|
82
|
+
response = Tweetlr::Processors::Http::http_get_json "http://api.instagram.com/oembed?url=#{link_url}"
|
98
83
|
response['url'] if response
|
99
84
|
end
|
100
85
|
|
@@ -102,7 +87,7 @@ module Processors
|
|
102
87
|
def self.image_url_picplz(link_url)
|
103
88
|
id = extract_id link_url
|
104
89
|
#try short url
|
105
|
-
response = Processors::Http::http_get_json "http://picplz.com/api/v2/pic.json?shorturl_ids=#{id}"
|
90
|
+
response = Tweetlr::Processors::Http::http_get_json "http://picplz.com/api/v2/pic.json?shorturl_ids=#{id}"
|
106
91
|
#if short url fails, try long url
|
107
92
|
#response = HTTParty.get "http://picplz.com/api/v2/pic.json?longurl_ids=#{id}"
|
108
93
|
#extract url
|
@@ -118,7 +103,7 @@ module Processors
|
|
118
103
|
end
|
119
104
|
#find the image'S url for a yfrog link
|
120
105
|
def self.image_url_yfrog(link_url)
|
121
|
-
response = Processors::Http::http_get_json("http://www.yfrog.com/api/oembed?url=#{link_url}")
|
106
|
+
response = Tweetlr::Processors::Http::http_get_json("http://www.yfrog.com/api/oembed?url=#{link_url}")
|
122
107
|
response['url'] if response
|
123
108
|
end
|
124
109
|
#find the image's url for a img.ly link
|
@@ -169,5 +154,12 @@ module Processors
|
|
169
154
|
end
|
170
155
|
image_url
|
171
156
|
end
|
157
|
+
def self.retrieve_image_url_by_css link_url, css_path
|
158
|
+
service_url = link_url_redirect link_url #follow possible redirects
|
159
|
+
link_url = service_url if service_url #if there's no redirect, service_url will be nil
|
160
|
+
response = Tweetlr::Processors::Http::http_get link_url
|
161
|
+
image_url = parse_html_for css_path, Nokogiri::HTML.parse(response.body_str)
|
162
|
+
return image_url
|
163
|
+
end
|
172
164
|
end
|
173
165
|
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require "#{File.dirname(__FILE__)}/../log_aware"
|
2
|
+
require 'oauth'
|
3
|
+
|
4
|
+
module Tweetlr::Processors
|
5
|
+
#utilities for handling tumblr
|
6
|
+
module Tumblr
|
7
|
+
GENERATOR = %{tweetlr - http://tweetlr.5v3n.com}
|
8
|
+
API_ENDPOINT_TUMBLR = 'http://www.tumblr.com'
|
9
|
+
include Tweetlr::LogAware
|
10
|
+
def self.log
|
11
|
+
Tweetlr::LogAware.log #TODO why doesn't the include make the log method accessible?
|
12
|
+
end
|
13
|
+
#post a tumblr photo entry.
|
14
|
+
#
|
15
|
+
#required arguments are :tumblr_blog_hostname, :tumblr_blog_hostname, :tumblr_oauth_api_secret, :tumblr_oauth_access_token_secret, :source, :caption, :state
|
16
|
+
#
|
17
|
+
#optional arguments: :tags, :type (default: 'photo')
|
18
|
+
#
|
19
|
+
def self.post(options={})
|
20
|
+
base_hostname = options[:tumblr_blog_hostname] || options[:group]
|
21
|
+
tumblr_oauth_api_key= options[:tumblr_oauth_api_key]
|
22
|
+
tumblr_oauth_api_secret= options[:tumblr_oauth_api_secret]
|
23
|
+
access_token_key = options[:tumblr_oauth_access_token_key]
|
24
|
+
access_token_secret = options[:tumblr_oauth_access_token_secret]
|
25
|
+
type = options[:type] || 'photo'
|
26
|
+
tags = options[:tags] || ''
|
27
|
+
post_response = nil
|
28
|
+
|
29
|
+
if base_hostname && access_token_key && access_token_secret
|
30
|
+
|
31
|
+
consumer = OAuth::Consumer.new(tumblr_oauth_api_key, tumblr_oauth_api_secret,
|
32
|
+
{ :site => 'http://www.tumblr.com',
|
33
|
+
:request_token_path => '/oauth/request_token',
|
34
|
+
:authorize_path => '/oauth/authorize',
|
35
|
+
:access_token_path => '/oauth/access_token',
|
36
|
+
:http_method => :post } )
|
37
|
+
|
38
|
+
access_token = OAuth::AccessToken.new(consumer, access_token_key, access_token_secret)
|
39
|
+
|
40
|
+
post_response = access_token.post(
|
41
|
+
"http://api.tumblr.com/v2/blog/#{base_hostname}/post", {
|
42
|
+
:type => type,
|
43
|
+
:source => options[:source],
|
44
|
+
:caption => options[:caption],
|
45
|
+
:date => options[:date],
|
46
|
+
:tags => tags,
|
47
|
+
:state => options[:state],
|
48
|
+
:generator => GENERATOR
|
49
|
+
}
|
50
|
+
)
|
51
|
+
end
|
52
|
+
post_response
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -1,12 +1,13 @@
|
|
1
|
-
|
2
|
-
require
|
1
|
+
local_path=File.dirname(__FILE__)
|
2
|
+
require "#{local_path}/http"
|
3
|
+
require "#{local_path}/../log_aware"
|
3
4
|
|
4
|
-
module Processors
|
5
|
+
module Tweetlr::Processors
|
5
6
|
#utilities for dealing with twitter
|
6
7
|
module Twitter
|
7
|
-
include LogAware
|
8
|
+
include Tweetlr::LogAware
|
8
9
|
def self.log
|
9
|
-
LogAware.log #TODO why doesn't the include make the log method accessible?
|
10
|
+
Tweetlr::LogAware.log #TODO why doesn't the include make the log method accessible?
|
10
11
|
end
|
11
12
|
|
12
13
|
#checks if the message is a retweet
|
@@ -25,7 +26,7 @@ module Processors
|
|
25
26
|
#fire a new search
|
26
27
|
def self.search(config)
|
27
28
|
search_call = "#{config[:api_endpoint_twitter]}?ors=#{config[:search_term]}&result_type=#{config[:result_type]}&rpp=#{config[:results_per_page]}"
|
28
|
-
Processors::Http::http_get_json search_call
|
29
|
+
Tweetlr::Processors::Http::http_get_json search_call
|
29
30
|
end
|
30
31
|
|
31
32
|
# lazy update - search for a term or refresh the search if a response is available already
|
@@ -34,7 +35,7 @@ module Processors
|
|
34
35
|
if config
|
35
36
|
search_url = "#{config[:api_endpoint_twitter]}?since_id=#{config[:since_id]}&ors=#{config[:search_term]}&result_type=#{config[:result_type]}&rpp=#{config[:results_per_page]}"
|
36
37
|
log.info "lazy search using '#{search_url}'"
|
37
|
-
response = Processors::Http::http_get_json search_url
|
38
|
+
response = Tweetlr::Processors::Http::http_get_json search_url
|
38
39
|
else
|
39
40
|
log.error "#{self}.lazy_search: no config given!"
|
40
41
|
end
|
data/lib/tweetlr.rb
CHANGED
@@ -1,89 +1,12 @@
|
|
1
|
-
|
2
|
-
require '
|
3
|
-
require 'processors/http'
|
4
|
-
require 'processors/photo_service'
|
5
|
-
require 'processors/tumblr'
|
6
|
-
require 'combinators/twitter_tumblr'
|
7
|
-
require 'log_aware'
|
8
|
-
require 'uri'
|
1
|
+
require 'tweetlr/log_aware'
|
2
|
+
require 'tweetlr/core'
|
9
3
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
4
|
+
module Tweetlr
|
5
|
+
VERSION = '0.1.18'
|
6
|
+
|
14
7
|
API_ENDPOINT_TWITTER = 'http://search.twitter.com/search.json'
|
15
8
|
API_ENDPOINT_TUMBLR = 'http://www.tumblr.com'
|
16
9
|
TWITTER_RESULTS_PER_PAGE = 100
|
17
10
|
TWITTER_RESULTS_TYPE = 'recent'
|
18
11
|
UPDATE_PERIOD = 600 #10 minutes
|
19
|
-
|
20
|
-
include LogAware
|
21
|
-
def self.log
|
22
|
-
LogAware.log #TODO why doesn't the include make the log method accessible?
|
23
|
-
end
|
24
|
-
|
25
|
-
def initialize(args)
|
26
|
-
log = Logger.new(STDOUT)
|
27
|
-
if (Logger::DEBUG..Logger::UNKNOWN).to_a.index(args[:loglevel])
|
28
|
-
log.level = args[:loglevel]
|
29
|
-
else
|
30
|
-
log.level = Logger::INFO
|
31
|
-
end
|
32
|
-
log.debug "log level set to #{log.level}"
|
33
|
-
LogAware.log=log
|
34
|
-
|
35
|
-
@email = args[:tumblr_email]
|
36
|
-
@password = args[:tumblr_password]
|
37
|
-
@cookie = args[:cookie]
|
38
|
-
@api_endpoint_twitter = args[:api_endpoint_twitter] || API_ENDPOINT_TWITTER
|
39
|
-
@api_endpoint_tumblr = args[:api_endpoint_tumblr] || API_ENDPOINT_TUMBLR
|
40
|
-
@whitelist = args[:whitelist]
|
41
|
-
@shouts = args[:shouts]
|
42
|
-
@update_period = args[:update_period] || UPDATE_PERIOD
|
43
|
-
@whitelist.each {|entry| entry.downcase!} if @whitelist
|
44
|
-
log.info "Tweetlr #{Tweetlr::VERSION} initialized. Ready to roll."
|
45
|
-
end
|
46
|
-
|
47
|
-
def self.crawl(config)
|
48
|
-
log.debug "#{self}.crawl() using config: #{config.inspect}"
|
49
|
-
twitter_config = {
|
50
|
-
:since_id => config[:since_id] || config[:start_at_tweet_id],
|
51
|
-
:search_term => config[:terms] || config[:search_term] ,
|
52
|
-
:results_per_page => config[:results_per_page] || TWITTER_RESULTS_PER_PAGE,
|
53
|
-
:result_type => config[:result_type] || TWITTER_RESULTS_TYPE,
|
54
|
-
:api_endpoint_twitter => config[:api_endpoint_twitter] || API_ENDPOINT_TWITTER
|
55
|
-
}
|
56
|
-
tumblr_config = { :tumblr_oauth_access_token_key => config[:tumblr_oauth_access_token_key],
|
57
|
-
:tumblr_oauth_access_token_secret => config[:tumblr_oauth_access_token_secret],
|
58
|
-
:tumblr_oauth_api_key => config[:tumblr_oauth_api_key],
|
59
|
-
:tumblr_oauth_api_secret => config[:tumblr_oauth_api_secret],
|
60
|
-
:tumblr_blog_hostname => config[:tumblr_blog_hostname] || config[:group]
|
61
|
-
}
|
62
|
-
|
63
|
-
twitter_config[:search_term] = URI::escape(twitter_config[:search_term]) if twitter_config[:search_term]
|
64
|
-
log.info "starting tweetlr crawl..."
|
65
|
-
response = {}
|
66
|
-
response = Processors::Twitter::lazy_search(twitter_config)
|
67
|
-
if response
|
68
|
-
tweets = response['results']
|
69
|
-
if tweets
|
70
|
-
tweets.each do |tweet|
|
71
|
-
tumblr_post = Combinators::TwitterTumblr::generate_photo_post_from_tweet(tweet, {:whitelist => config[:whitelist], :embedly_key => config[:embedly_key], :group => config[:group]})
|
72
|
-
if tumblr_post.nil? || tumblr_post[:source].nil?
|
73
|
-
log.warn "could not get image source: tweet: #{tweet} --- tumblr post: #{tumblr_post.inspect}"
|
74
|
-
else
|
75
|
-
log.debug "tumblr post: #{tumblr_post}"
|
76
|
-
res = Processors::Tumblr.post tumblr_post.merge(tumblr_config)
|
77
|
-
log.warn "tumblr response: #{res.header} #{res.body}" unless res.code == "201"
|
78
|
-
end
|
79
|
-
end
|
80
|
-
# store the highest tweet id
|
81
|
-
config[:since_id] = response['max_id']
|
82
|
-
end
|
83
|
-
else
|
84
|
-
log.error "twitter search returned no response. hail the failwhale!"
|
85
|
-
end
|
86
|
-
log.info "finished tweetlr crawl."
|
87
|
-
return config
|
88
|
-
end
|
89
12
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe Combinators::TwitterTumblr do
|
3
|
+
describe Tweetlr::Combinators::TwitterTumblr do
|
4
4
|
before :each do
|
5
5
|
@first_link = "http://url.com"
|
6
6
|
@second_link = "http://instagr.am/p/DzCWn/"
|
@@ -38,60 +38,60 @@ describe Combinators::TwitterTumblr do
|
|
38
38
|
it "extracting their corresponding links" do
|
39
39
|
@tweets.each do |key,value|
|
40
40
|
send "stub_#{key}"
|
41
|
-
url = Combinators::TwitterTumblr.extract_image_url value
|
41
|
+
url = Tweetlr::Combinators::TwitterTumblr.extract_image_url value
|
42
42
|
url.should be, "service #{key} not working!"
|
43
43
|
check_pic_url_extraction key if [:instagram,:picplz,:yfrog,:imgly,:not_listed].index key
|
44
44
|
end
|
45
45
|
end
|
46
46
|
it "using the first image link found in a tweet with multiple links" do
|
47
47
|
stub_instagram
|
48
|
-
link = Combinators::TwitterTumblr.extract_image_url @twitter_response
|
48
|
+
link = Tweetlr::Combinators::TwitterTumblr.extract_image_url @twitter_response
|
49
49
|
link.should == 'http://distillery.s3.amazonaws.com/media/2011/05/02/d25df62b9cec4a138967a3ad027d055b_7.jpg'
|
50
50
|
end
|
51
51
|
it "not returning links that do not belong to images" do
|
52
52
|
stub_no_image_link
|
53
|
-
link = Combinators::TwitterTumblr.extract_image_url @twitter_response
|
53
|
+
link = Tweetlr::Combinators::TwitterTumblr.extract_image_url @twitter_response
|
54
54
|
link.should_not be
|
55
55
|
end
|
56
56
|
end
|
57
57
|
context "given a user whitelist" do
|
58
58
|
it "should mark whitelist users' tweets as published" do
|
59
59
|
stub_instagram
|
60
|
-
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @twitter_response, :whitelist => @whitelist
|
60
|
+
post = Tweetlr::Combinators::TwitterTumblr::generate_photo_post_from_tweet @twitter_response, :whitelist => @whitelist
|
61
61
|
post[:state].should == 'published'
|
62
62
|
end
|
63
63
|
it "should mark non whitelist users' tweets as drafts" do
|
64
64
|
stub_instagram
|
65
|
-
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @non_whitelist_tweet, :whitelist => @whitelist
|
65
|
+
post = Tweetlr::Combinators::TwitterTumblr::generate_photo_post_from_tweet @non_whitelist_tweet, :whitelist => @whitelist
|
66
66
|
post[:state].should == 'draft'
|
67
67
|
end
|
68
68
|
end
|
69
69
|
context "without a user whitelist (whitelist nil or empty)" do
|
70
70
|
it "should mark every users' posts as published" do
|
71
71
|
stub_instagram
|
72
|
-
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @twitter_response, :whitelist => nil
|
72
|
+
post = Tweetlr::Combinators::TwitterTumblr::generate_photo_post_from_tweet @twitter_response, :whitelist => nil
|
73
73
|
post[:state].should == 'published'
|
74
74
|
stub_instagram
|
75
|
-
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @non_whitelist_tweet, :whitelist => nil
|
75
|
+
post = Tweetlr::Combinators::TwitterTumblr::generate_photo_post_from_tweet @non_whitelist_tweet, :whitelist => nil
|
76
76
|
post[:state].should == 'published'
|
77
|
-
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @twitter_response, :whitelist => ""
|
77
|
+
post = Tweetlr::Combinators::TwitterTumblr::generate_photo_post_from_tweet @twitter_response, :whitelist => ""
|
78
78
|
post[:state].should == 'published'
|
79
79
|
stub_instagram
|
80
|
-
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @non_whitelist_tweet, :whitelist => ""
|
80
|
+
post = Tweetlr::Combinators::TwitterTumblr::generate_photo_post_from_tweet @non_whitelist_tweet, :whitelist => ""
|
81
81
|
post[:state].should == 'published'
|
82
82
|
end
|
83
83
|
end
|
84
84
|
it "should not use retweets which would produce double blog posts" do
|
85
|
-
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @retweet, :whitelist => @whitelist
|
85
|
+
post = Tweetlr::Combinators::TwitterTumblr::generate_photo_post_from_tweet @retweet, :whitelist => @whitelist
|
86
86
|
post.should_not be
|
87
87
|
end
|
88
88
|
context "should not use new style retweets which would produce double blog posts" do
|
89
89
|
it "for quotes in context" do
|
90
|
-
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @new_style_retweet, :whitelist => @whitelist
|
90
|
+
post = Tweetlr::Combinators::TwitterTumblr::generate_photo_post_from_tweet @new_style_retweet, :whitelist => @whitelist
|
91
91
|
post.should_not be
|
92
92
|
end
|
93
93
|
it "for quotes without further text addition" do
|
94
|
-
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @new_style_retweet_no_addition, :whitelist => @whitelist
|
94
|
+
post = Tweetlr::Combinators::TwitterTumblr::generate_photo_post_from_tweet @new_style_retweet_no_addition, :whitelist => @whitelist
|
95
95
|
post.should_not be
|
96
96
|
end
|
97
97
|
end
|
@@ -99,13 +99,13 @@ describe Combinators::TwitterTumblr do
|
|
99
99
|
it "uses a given blog via group option to post to" do
|
100
100
|
stub_instagram
|
101
101
|
desired_group = 'mygroup.tumblr.com'
|
102
|
-
tumblr_post = Combinators::TwitterTumblr.generate_photo_post_from_tweet @twitter_response, {:whitelist => @whitelist, :group => desired_group}
|
102
|
+
tumblr_post = Tweetlr::Combinators::TwitterTumblr.generate_photo_post_from_tweet @twitter_response, {:whitelist => @whitelist, :group => desired_group}
|
103
103
|
tumblr_post[:tumblr_blog_hostname].should eq desired_group
|
104
104
|
end
|
105
105
|
it "uses a given blog via tumblr_blog_hostname to post to" do
|
106
106
|
stub_instagram
|
107
107
|
desired_group = 'mygroup.tumblr.com'
|
108
|
-
tumblr_post = Combinators::TwitterTumblr.generate_photo_post_from_tweet @twitter_response, {:whitelist => @whitelist, :tumblr_blog_hostname => desired_group}
|
108
|
+
tumblr_post = Tweetlr::Combinators::TwitterTumblr.generate_photo_post_from_tweet @twitter_response, {:whitelist => @whitelist, :tumblr_blog_hostname => desired_group}
|
109
109
|
tumblr_post[:tumblr_blog_hostname].should eq desired_group
|
110
110
|
end
|
111
111
|
end
|
data/spec/core_spec.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Tweetlr::Core do
|
4
|
+
|
5
|
+
config_file = File.join( Dir.pwd, 'config', 'tweetlr.yml')
|
6
|
+
config = YAML.load_file(config_file)
|
7
|
+
TIMESTAMP = config['twitter_timestamp']
|
8
|
+
WHITELIST = config['whitelist']
|
9
|
+
|
10
|
+
before :each do
|
11
|
+
@first_link = "http://url.com"
|
12
|
+
@second_link = "http://instagr.am/p/DzCWn/"
|
13
|
+
@third_link = "https://imageurl.com"
|
14
|
+
@twitter_response = {"from_user_id_str"=>"1915714", "profile_image_url"=>"http://a0.twimg.com/profile_images/386000279/2_normal.jpg", "created_at"=>"Sun, 17 Apr 2011 16:48:42 +0000", "from_user"=>"whitey_Mc_whIteLIst", "id_str"=>"59659561224765440", "metadata"=>{"result_type"=>"recent"}, "to_user_id"=>nil, "text"=>"Rigaer #wirsounterwegs #{@first_link} @ Augenarzt Dr. Lierow #{@second_link} #{@third_link}", "id"=>59659561224765440, "from_user_id"=>1915714, "geo"=>{"type"=>"Point", "coordinates"=>[52.5182, 13.454]}, "iso_language_code"=>"de", "place"=>{"id"=>"3078869807f9dd36", "type"=>"city", "full_name"=>"Berlin, Berlin"}, "to_user_id_str"=>nil, "source"=>"<a href="http://instagr.am" rel="nofollow">instagram</a>"}
|
15
|
+
@tweetlr_config = {
|
16
|
+
:since_id => 0,
|
17
|
+
:results_per_page => 3,
|
18
|
+
:search_term => 'coffeediary',
|
19
|
+
:result_type => 'recent',
|
20
|
+
:api_endpoint_twitter => Tweetlr::API_ENDPOINT_TWITTER,
|
21
|
+
:loglevel=>1,
|
22
|
+
:tumblr_oauth_access_token_key => config['tumblr_oauth_access_token_key'],
|
23
|
+
:tumblr_oauth_access_token_secret => config['tumblr_oauth_access_token_secret'],
|
24
|
+
:tumblr_oauth_api_secret => config['tumblr_oauth_api_secret'],
|
25
|
+
:tumblr_oauth_api_key => config['tumblr_oauth_api_key'],
|
26
|
+
:tumblr_blog_hostname => config['tumblr_blog_hostname']
|
27
|
+
}
|
28
|
+
stub_tumblr
|
29
|
+
stub_twitter
|
30
|
+
stub_oauth
|
31
|
+
end
|
32
|
+
it "crawls twitter and posts to tumblr" do
|
33
|
+
since_id_before = @tweetlr_config[:since_id]
|
34
|
+
result = Tweetlr::Core.crawl(@tweetlr_config)
|
35
|
+
since_id_before.should_not == result[:since_id]
|
36
|
+
end
|
37
|
+
it "copes with legacy config that use tumblr v1 api (basic auth)" do
|
38
|
+
legacy_config = {
|
39
|
+
:id=>16,
|
40
|
+
:search_term=>"booga",
|
41
|
+
:tumblr_email=>"wooga@booga.de",
|
42
|
+
:tumblr_password=>"boogawooga",
|
43
|
+
:since_id=>"246543935663661057",
|
44
|
+
:results_per_page=>3,
|
45
|
+
:result_type=>nil,
|
46
|
+
:api_endpoint_twitter=>nil,
|
47
|
+
:api_endpoint_tumblr=>nil,
|
48
|
+
:update_period=>900,
|
49
|
+
:shouts=>nil,
|
50
|
+
:loglevel=>1,
|
51
|
+
:whitelist=>["user1", "user2"],
|
52
|
+
:last_crawl=>"Fri, 14 Sep 2012 09:43:10 UTC +00:00",
|
53
|
+
:active=>true,
|
54
|
+
:tumblr_oauth_access_token_key=>nil,
|
55
|
+
:tumblr_oauth_access_token_secret=>nil}
|
56
|
+
since_id_before = legacy_config[:since_id]
|
57
|
+
result = Tweetlr::Core.crawl(legacy_config)
|
58
|
+
since_id_before.should_not == result[:since_id]
|
59
|
+
end
|
60
|
+
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe Processors::PhotoService do
|
3
|
+
describe Tweetlr::Processors::PhotoService do
|
4
4
|
before :each do
|
5
5
|
@links = {
|
6
6
|
:twimg => 'http://twitter.com/KSilbereisen/status/228035435237097472',
|
@@ -20,44 +20,44 @@ describe Processors::PhotoService do
|
|
20
20
|
end
|
21
21
|
it "extracts images from eye em" do
|
22
22
|
stub_eyeem
|
23
|
-
link = Processors::PhotoService::find_image_url @links[:eyeem]
|
23
|
+
link = Tweetlr::Processors::PhotoService::find_image_url @links[:eyeem]
|
24
24
|
link.should be
|
25
25
|
link.should == "http://www.eyeem.com/thumb/h/1024/e35db836c5d3f02498ef60fc3d53837fbe621561-1334126483"
|
26
26
|
end
|
27
27
|
it "doesnt find images in embedly results that are not explicitly marked as 'Photo' via the response's 'thumbnail_url' attribute" do
|
28
28
|
stub_embedly_no_photo
|
29
|
-
link = Processors::PhotoService::find_image_url 'http://makersand.co/'
|
29
|
+
link = Tweetlr::Processors::PhotoService::find_image_url 'http://makersand.co/'
|
30
30
|
link.should be_nil
|
31
31
|
end
|
32
32
|
it "does find an image for foursquare that is not he profile pic" do
|
33
33
|
stub_foursquare
|
34
|
-
link = Processors::PhotoService::find_image_url @links[:foursquare]
|
34
|
+
link = Tweetlr::Processors::PhotoService::find_image_url @links[:foursquare]
|
35
35
|
link.index('userpix_thumbs').should_not be
|
36
36
|
end
|
37
37
|
it "should find a picture's url from the supported services" do
|
38
38
|
@links.each do |service,link|
|
39
39
|
send "stub_#{service}"
|
40
|
-
url = Processors::PhotoService::find_image_url link
|
40
|
+
url = Tweetlr::Processors::PhotoService::find_image_url link
|
41
41
|
url.should be, "service #{service} not working!"
|
42
42
|
check_pic_url_extraction service if [:twimg, :instagram,:picplz,:yfrog,:imgly,:foursqaure,:not_listed].index service
|
43
43
|
end
|
44
44
|
end
|
45
45
|
it "finds path images for redirected moments as well" do
|
46
46
|
stub_path_redirected
|
47
|
-
url = Processors::PhotoService::find_image_url @links[:path]
|
47
|
+
url = Tweetlr::Processors::PhotoService::find_image_url @links[:path]
|
48
48
|
url.should == 'https://s3-us-west-1.amazonaws.com/images.path.com/photos2/f90fd831-43c3-48fd-84cb-5c3bae52957a/2x.jpg'
|
49
49
|
end
|
50
50
|
it "should not crash if embedly fallback won't find a link" do
|
51
51
|
stub_bad_request
|
52
|
-
url = Processors::PhotoService::find_image_url "http://mopskopf"
|
52
|
+
url = Tweetlr::Processors::PhotoService::find_image_url "http://mopskopf"
|
53
53
|
end
|
54
54
|
it "should not crash with an encoding error when response is non-us-ascii" do
|
55
55
|
stub_utf8_response
|
56
|
-
url = Processors::PhotoService::find_image_url "http://api.instagram.com/oembed?url=http://instagr.am/p/Gx%E2%80%946/"
|
56
|
+
url = Tweetlr::Processors::PhotoService::find_image_url "http://api.instagram.com/oembed?url=http://instagr.am/p/Gx%E2%80%946/"
|
57
57
|
end
|
58
58
|
it "follows redirects" do
|
59
59
|
stub_imgly
|
60
|
-
link = Processors::PhotoService::link_url_redirect 'im mocked anyways'
|
60
|
+
link = Tweetlr::Processors::PhotoService::link_url_redirect 'im mocked anyways'
|
61
61
|
link.should == 'http://s3.amazonaws.com/imgly_production/899582/full.jpg'
|
62
62
|
end
|
63
63
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe Processors::Tumblr do
|
3
|
+
describe Tweetlr::Processors::Tumblr do
|
4
4
|
before :all do
|
5
5
|
config_file = File.join( Dir.pwd, 'config', 'tweetlr.yml')
|
6
6
|
config = YAML.load_file(config_file)
|
@@ -21,10 +21,10 @@ describe Processors::Tumblr do
|
|
21
21
|
it "posts to tumblr" do
|
22
22
|
stub_tumblr
|
23
23
|
stub_oauth
|
24
|
-
tumblr_post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @twitter_response, @tweetlr_config
|
24
|
+
tumblr_post = Tweetlr::Combinators::TwitterTumblr::generate_photo_post_from_tweet @twitter_response, @tweetlr_config
|
25
25
|
tumblr_post[:date] = Time.now.to_s
|
26
26
|
tumblr_post[:source] = 'http://distilleryimage6.instagram.com/db72627effde11e1b3f322000a1e8899_7.jpg'
|
27
|
-
response = Processors::Tumblr::post @tweetlr_config.merge(tumblr_post)
|
27
|
+
response = Tweetlr::Processors::Tumblr::post @tweetlr_config.merge(tumblr_post)
|
28
28
|
response.should be
|
29
29
|
response.code.should == "201"
|
30
30
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe Processors::Twitter do
|
3
|
+
describe Tweetlr::Processors::Twitter do
|
4
4
|
before :each do
|
5
5
|
@first_link = "http://url.com"
|
6
6
|
@second_link = "http://instagr.am/p/DzCWn/"
|
@@ -16,15 +16,15 @@ describe Processors::Twitter do
|
|
16
16
|
end
|
17
17
|
it "should search twitter for a given term" do
|
18
18
|
stub_twitter
|
19
|
-
response = Processors::Twitter::lazy_search @twitter_config
|
19
|
+
response = Tweetlr::Processors::Twitter::lazy_search @twitter_config
|
20
20
|
tweets = response['results']
|
21
21
|
tweets.should be
|
22
22
|
tweets.should_not be_empty
|
23
23
|
end
|
24
24
|
it "extracts links" do
|
25
|
-
links = Processors::Twitter::extract_links ''
|
25
|
+
links = Tweetlr::Processors::Twitter::extract_links ''
|
26
26
|
links.should be_nil
|
27
|
-
links = Processors::Twitter::extract_links @twitter_response
|
27
|
+
links = Tweetlr::Processors::Twitter::extract_links @twitter_response
|
28
28
|
links[0].should == @first_link
|
29
29
|
links[1].should == @second_link
|
30
30
|
links[2].should == @third_link
|
data/spec/spec_helper.rb
CHANGED
@@ -1,15 +1,17 @@
|
|
1
1
|
#encoding: utf-8
|
2
2
|
require "bundler"
|
3
3
|
require "logger"
|
4
|
+
require "#{File.dirname(__FILE__)}/../lib/tweetlr"
|
5
|
+
|
4
6
|
Bundler.require :default, :development, :test
|
5
7
|
|
6
8
|
logger = Logger.new(STDOUT)
|
7
9
|
logger.level = Logger::FATAL
|
8
|
-
LogAware.log = logger
|
10
|
+
Tweetlr::LogAware.log = logger
|
9
11
|
|
10
12
|
def check_pic_url_extraction(service)
|
11
|
-
image_url = Processors::PhotoService::send "image_url_#{service}".to_sym, @links[service]
|
12
|
-
image_url.should =~ Processors::PhotoService::PIC_REGEXP
|
13
|
+
image_url = Tweetlr::Processors::PhotoService::send "image_url_#{service}".to_sym, @links[service]
|
14
|
+
image_url.should =~ Tweetlr::Processors::PhotoService::PIC_REGEXP
|
13
15
|
end
|
14
16
|
|
15
17
|
def stub_oauth
|
data/tweetlr.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tweetlr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.18
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-09-
|
12
|
+
date: 2012-09-29 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: daemons
|
16
|
-
requirement: &
|
16
|
+
requirement: &2160420980 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2160420980
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: eventmachine
|
27
|
-
requirement: &
|
27
|
+
requirement: &2160420140 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2160420140
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: curb
|
38
|
-
requirement: &
|
38
|
+
requirement: &2160419680 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2160419680
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: json
|
49
|
-
requirement: &
|
49
|
+
requirement: &2160419220 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *2160419220
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: nokogiri
|
60
|
-
requirement: &
|
60
|
+
requirement: &2160418520 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: '0'
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *2160418520
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: oauth
|
71
|
-
requirement: &
|
71
|
+
requirement: &2160417840 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: '0'
|
77
77
|
type: :runtime
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *2160417840
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: rake
|
82
|
-
requirement: &
|
82
|
+
requirement: &2160417320 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ! '>='
|
@@ -87,10 +87,10 @@ dependencies:
|
|
87
87
|
version: '0'
|
88
88
|
type: :development
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *2160417320
|
91
91
|
- !ruby/object:Gem::Dependency
|
92
92
|
name: rspec
|
93
|
-
requirement: &
|
93
|
+
requirement: &2160416900 !ruby/object:Gem::Requirement
|
94
94
|
none: false
|
95
95
|
requirements:
|
96
96
|
- - ! '>='
|
@@ -98,10 +98,10 @@ dependencies:
|
|
98
98
|
version: '0'
|
99
99
|
type: :development
|
100
100
|
prerelease: false
|
101
|
-
version_requirements: *
|
101
|
+
version_requirements: *2160416900
|
102
102
|
- !ruby/object:Gem::Dependency
|
103
103
|
name: rdoc
|
104
|
-
requirement: &
|
104
|
+
requirement: &2160416480 !ruby/object:Gem::Requirement
|
105
105
|
none: false
|
106
106
|
requirements:
|
107
107
|
- - ! '>='
|
@@ -109,7 +109,7 @@ dependencies:
|
|
109
109
|
version: '0'
|
110
110
|
type: :development
|
111
111
|
prerelease: false
|
112
|
-
version_requirements: *
|
112
|
+
version_requirements: *2160416480
|
113
113
|
description: tweetlr crawls twitter for a given term, extracts photos out of the collected
|
114
114
|
tweets' short urls and posts the images to tumblr.
|
115
115
|
email: sven.kraeuter@gmail.com
|
@@ -130,19 +130,20 @@ files:
|
|
130
130
|
- Rakefile
|
131
131
|
- bin/tweetlr
|
132
132
|
- config/tweetlr.yml
|
133
|
-
- lib/combinators/twitter_tumblr.rb
|
134
|
-
- lib/log_aware.rb
|
135
|
-
- lib/processors/http.rb
|
136
|
-
- lib/processors/photo_service.rb
|
137
|
-
- lib/processors/tumblr.rb
|
138
|
-
- lib/processors/twitter.rb
|
139
133
|
- lib/tweetlr.rb
|
134
|
+
- lib/tweetlr/combinators/twitter_tumblr.rb
|
135
|
+
- lib/tweetlr/core.rb
|
136
|
+
- lib/tweetlr/log_aware.rb
|
137
|
+
- lib/tweetlr/processors/http.rb
|
138
|
+
- lib/tweetlr/processors/photo_service.rb
|
139
|
+
- lib/tweetlr/processors/tumblr.rb
|
140
|
+
- lib/tweetlr/processors/twitter.rb
|
140
141
|
- spec/combinators/twitter_tumblr_combinator_spec.rb
|
142
|
+
- spec/core_spec.rb
|
141
143
|
- spec/processors/photo_services_processor_spec.rb
|
142
144
|
- spec/processors/tumblr_processor_spec.rb
|
143
145
|
- spec/processors/twitter_processor_spec.rb
|
144
146
|
- spec/spec_helper.rb
|
145
|
-
- spec/tweetlr_spec.rb
|
146
147
|
- tweetlr.gemspec
|
147
148
|
homepage: http://tweetlr.5v3n.com
|
148
149
|
licenses: []
|
@@ -171,8 +172,8 @@ summary: tweetlr crawls twitter for a given term, extracts photos out of the col
|
|
171
172
|
tweets' short urls and posts the images to tumblr.
|
172
173
|
test_files:
|
173
174
|
- spec/combinators/twitter_tumblr_combinator_spec.rb
|
175
|
+
- spec/core_spec.rb
|
174
176
|
- spec/processors/photo_services_processor_spec.rb
|
175
177
|
- spec/processors/tumblr_processor_spec.rb
|
176
178
|
- spec/processors/twitter_processor_spec.rb
|
177
179
|
- spec/spec_helper.rb
|
178
|
-
- spec/tweetlr_spec.rb
|
data/lib/processors/tumblr.rb
DELETED
@@ -1,51 +0,0 @@
|
|
1
|
-
require 'log_aware'
|
2
|
-
require 'oauth'
|
3
|
-
|
4
|
-
module Processors
|
5
|
-
#utilities for handling tumblr
|
6
|
-
module Tumblr
|
7
|
-
GENERATOR = %{tweetlr - http://tweetlr.5v3n.com}
|
8
|
-
API_ENDPOINT_TUMBLR = 'http://www.tumblr.com'
|
9
|
-
include LogAware
|
10
|
-
def self.log
|
11
|
-
LogAware.log #TODO why doesn't the include make the log method accessible?
|
12
|
-
end
|
13
|
-
#post a tumblr photo entry.
|
14
|
-
#
|
15
|
-
#required arguments are :tumblr_blog_hostname, :tumblr_blog_hostname, :tumblr_oauth_api_secret, :tumblr_oauth_access_token_secret, :source, :caption, :state
|
16
|
-
#
|
17
|
-
#optional arguments: :tags, :type (default: 'photo')
|
18
|
-
#
|
19
|
-
def self.post(options={})
|
20
|
-
base_hostname = options[:tumblr_blog_hostname] || options[:group]
|
21
|
-
tumblr_oauth_api_key= options[:tumblr_oauth_api_key]
|
22
|
-
tumblr_oauth_api_secret= options[:tumblr_oauth_api_secret]
|
23
|
-
access_token_key = options[:tumblr_oauth_access_token_key]
|
24
|
-
access_token_secret = options[:tumblr_oauth_access_token_secret]
|
25
|
-
type = options[:type] || 'photo'
|
26
|
-
tags = options[:tags] || ''
|
27
|
-
|
28
|
-
consumer = OAuth::Consumer.new(tumblr_oauth_api_key, tumblr_oauth_api_secret,
|
29
|
-
{ :site => 'http://www.tumblr.com',
|
30
|
-
:request_token_path => '/oauth/request_token',
|
31
|
-
:authorize_path => '/oauth/authorize',
|
32
|
-
:access_token_path => '/oauth/access_token',
|
33
|
-
:http_method => :post } )
|
34
|
-
|
35
|
-
access_token = OAuth::AccessToken.new(consumer, access_token_key, access_token_secret)
|
36
|
-
|
37
|
-
post_response = access_token.post(
|
38
|
-
"http://api.tumblr.com/v2/blog/#{base_hostname}/post", {
|
39
|
-
:type => type,
|
40
|
-
:source => options[:source],
|
41
|
-
:caption => options[:caption],
|
42
|
-
:date => options[:date],
|
43
|
-
:tags => tags,
|
44
|
-
:state => options[:state],
|
45
|
-
:generator => GENERATOR
|
46
|
-
}
|
47
|
-
)
|
48
|
-
post_response
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
data/spec/tweetlr_spec.rb
DELETED
@@ -1,28 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Tweetlr do
|
4
|
-
|
5
|
-
config_file = File.join( Dir.pwd, 'config', 'tweetlr.yml')
|
6
|
-
config = YAML.load_file(config_file)
|
7
|
-
TIMESTAMP = config['twitter_timestamp']
|
8
|
-
WHITELIST = config['whitelist']
|
9
|
-
|
10
|
-
before :each do
|
11
|
-
@first_link = "http://url.com"
|
12
|
-
@second_link = "http://instagr.am/p/DzCWn/"
|
13
|
-
@third_link = "https://imageurl.com"
|
14
|
-
@twitter_response = {"from_user_id_str"=>"1915714", "profile_image_url"=>"http://a0.twimg.com/profile_images/386000279/2_normal.jpg", "created_at"=>"Sun, 17 Apr 2011 16:48:42 +0000", "from_user"=>"whitey_Mc_whIteLIst", "id_str"=>"59659561224765440", "metadata"=>{"result_type"=>"recent"}, "to_user_id"=>nil, "text"=>"Rigaer #wirsounterwegs #{@first_link} @ Augenarzt Dr. Lierow #{@second_link} #{@third_link}", "id"=>59659561224765440, "from_user_id"=>1915714, "geo"=>{"type"=>"Point", "coordinates"=>[52.5182, 13.454]}, "iso_language_code"=>"de", "place"=>{"id"=>"3078869807f9dd36", "type"=>"city", "full_name"=>"Berlin, Berlin"}, "to_user_id_str"=>nil, "source"=>"<a href="http://instagr.am" rel="nofollow">instagram</a>"}
|
15
|
-
@tweetlr_config = {
|
16
|
-
:since_id => 0,
|
17
|
-
:search_term => 'moped',
|
18
|
-
:results_per_page => 100,
|
19
|
-
:result_type => 'recent',
|
20
|
-
:api_endpoint_twitter => Tweetlr::API_ENDPOINT_TWITTER
|
21
|
-
}
|
22
|
-
end
|
23
|
-
it "crawls twitter and posts to tumblr" do
|
24
|
-
stub_tumblr
|
25
|
-
stub_twitter
|
26
|
-
Tweetlr.crawl(@tweetlr_config)
|
27
|
-
end
|
28
|
-
end
|