tweetlr 0.1.7pre → 0.1.7pre4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +1 -1
- data/README.md +3 -2
- data/bin/tweetlr +12 -30
- data/config/tweetlr.yml +3 -2
- data/lib/combinators/twitter_tumblr.rb +54 -0
- data/lib/log_aware.rb +3 -2
- data/lib/processors/http.rb +45 -0
- data/lib/processors/photo_service.rb +126 -0
- data/lib/processors/tumblr.rb +46 -0
- data/lib/processors/twitter.rb +44 -0
- data/lib/tweetlr.rb +49 -95
- data/spec/combinators/twitter_tumblr_combinator_spec.rb +93 -0
- data/spec/{photo_services_processor_spec.rb → processors/photo_services_processor_spec.rb} +5 -5
- data/spec/{twitter_processor_spec.rb → processors/twitter_processor_spec.rb} +17 -3
- data/spec/spec_helper.rb +24 -2
- data/spec/tweetlr_spec.rb +21 -107
- data/tweetlr.gemspec +2 -2
- metadata +29 -26
- data/lib/http_processor.rb +0 -42
- data/lib/photo_service_processor.rb +0 -122
- data/lib/tumblr_processor.rb +0 -3
- data/lib/twitter_processor.rb +0 -39
@@ -0,0 +1,93 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Combinators::TwitterTumblr do
|
4
|
+
before :each do
|
5
|
+
@first_link = "http://url.com"
|
6
|
+
@second_link = "http://instagr.am/p/DzCWn/"
|
7
|
+
@third_link = "https://imageurl.com"
|
8
|
+
@twitter_response = {"from_user_id_str"=>"1915714", "profile_image_url"=>"http://a0.twimg.com/profile_images/386000279/2_normal.jpg", "created_at"=>"Sun, 17 Apr 2011 16:48:42 +0000", "from_user"=>"whitey_Mc_whIteLIst", "id_str"=>"59659561224765440", "metadata"=>{"result_type"=>"recent"}, "to_user_id"=>nil, "text"=>"Rigaer #wirsounterwegs #{@first_link} @ Augenarzt Dr. Lierow #{@second_link} #{@third_link}", "id"=>59659561224765440, "from_user_id"=>1915714, "geo"=>{"type"=>"Point", "coordinates"=>[52.5182, 13.454]}, "iso_language_code"=>"de", "place"=>{"id"=>"3078869807f9dd36", "type"=>"city", "full_name"=>"Berlin, Berlin"}, "to_user_id_str"=>nil, "source"=>"<a href="http://instagr.am" rel="nofollow">instagram</a>"}
|
9
|
+
@retweet = @twitter_response.merge "text" => "bla bla RT @fgd: tueddelkram"
|
10
|
+
@new_style_retweet = @twitter_response.merge "text" => "and it scales! \u201c@moeffju: http://t.co/8gUSPKu #hktbl1 #origami success! :)\u201d"
|
11
|
+
@new_style_retweet_no_addition = @twitter_response.merge "text" => "\u201c@moeffju: http://t.co/8gUSPKu #hktbl1 #origami success! :)\u201d"
|
12
|
+
@non_whitelist_tweet = @twitter_response.merge 'from_user' => 'nonwhitelist user'
|
13
|
+
@whitelist = ['whitey_mc_whitelist']
|
14
|
+
@tweets = {
|
15
|
+
:instagram => {'text' => "jadda jadda http://instagr.am/p/DzCWn/"},
|
16
|
+
:twitpic => {'text' => "jadda jadda http://twitpic.com/449o2x"},
|
17
|
+
:yfrog => {'text' => "jadda jadda http://yfrog.com/h4vlfp"},
|
18
|
+
:picplz => {'text' => "jadda jadda http://picplz.com/2hWv"},
|
19
|
+
:imgly => {'text' => "jadda jadda http://img.ly/3M1o"},
|
20
|
+
:tco => {'text' => "jadda jadda http://t.co/MUGNayA"},
|
21
|
+
:lockerz => {'text' => "jadda jadda http://lockerz.com/s/100269159"},
|
22
|
+
:embedly => {'text' => "jadda jadda http://flic.kr/p/973hTv"},
|
23
|
+
:twitter_pics => {'text' => "jadda jadda http://t.co/FmyBGfyY"}
|
24
|
+
}
|
25
|
+
@links = {
|
26
|
+
:instagram => "http://instagr.am/p/DzCWn/",
|
27
|
+
:twitpic => "http://twitpic.com/449o2x",
|
28
|
+
:yfrog => "http://yfrog.com/h4vlfp",
|
29
|
+
:picplz => "http://picplz.com/2hWv",
|
30
|
+
:imgly => "http://img.ly/3M1o",
|
31
|
+
:tco => 'http://t.co/MUGNayA',
|
32
|
+
:lockerz => 'http://lockerz.com/s/100269159',
|
33
|
+
:embedly => 'http://flic.kr/p/973hTv',
|
34
|
+
:twitter_pics => 'http://t.co/FmyBGfyY'
|
35
|
+
}
|
36
|
+
end
|
37
|
+
context "handles pictures in tweets" do
|
38
|
+
it "extracting their corresponding links" do
|
39
|
+
@tweets.each do |key,value|
|
40
|
+
send "stub_#{key}"
|
41
|
+
url = Combinators::TwitterTumblr.extract_image_url value
|
42
|
+
url.should be, "service #{key} not working!"
|
43
|
+
check_pic_url_extraction key if [:instagram,:picplz,:yfrog,:imgly,:not_listed].index key
|
44
|
+
end
|
45
|
+
end
|
46
|
+
it "using the first image link found in a tweet with multiple links" do
|
47
|
+
stub_instagram
|
48
|
+
link = Combinators::TwitterTumblr.extract_image_url @twitter_response
|
49
|
+
link.should == 'http://distillery.s3.amazonaws.com/media/2011/05/02/d25df62b9cec4a138967a3ad027d055b_7.jpg'
|
50
|
+
end
|
51
|
+
it "not returning links that do not belong to images" do
|
52
|
+
stub_no_image_link
|
53
|
+
link = Combinators::TwitterTumblr.extract_image_url @twitter_response
|
54
|
+
link.should_not be
|
55
|
+
end
|
56
|
+
end
|
57
|
+
context "given a user whitelist" do
|
58
|
+
it "should mark whitelist users' tweets as published" do
|
59
|
+
stub_instagram
|
60
|
+
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @twitter_response, :whitelist => @whitelist
|
61
|
+
post[:state].should == 'published'
|
62
|
+
end
|
63
|
+
it "should mark non whitelist users' tweets as drafts" do
|
64
|
+
stub_instagram
|
65
|
+
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @non_whitelist_tweet, :whitelist => @whitelist
|
66
|
+
post[:state].should == 'draft'
|
67
|
+
end
|
68
|
+
end
|
69
|
+
context "without a user whitelist" do
|
70
|
+
it "should mark every users' posts as published" do
|
71
|
+
stub_instagram
|
72
|
+
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @twitter_response, :whitelist => nil
|
73
|
+
post[:state].should == 'published'
|
74
|
+
stub_instagram
|
75
|
+
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @non_whitelist_tweet, :whitelist => nil
|
76
|
+
post[:state].should == 'published'
|
77
|
+
end
|
78
|
+
end
|
79
|
+
it "should not use retweets which would produce double blog posts" do
|
80
|
+
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @retweet, :whitelist => @whitelist
|
81
|
+
post.should_not be
|
82
|
+
end
|
83
|
+
context "should not use new style retweets which would produce double blog posts" do
|
84
|
+
it "for quotes in context" do
|
85
|
+
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @new_style_retweet, :whitelist => @whitelist
|
86
|
+
post.should_not be
|
87
|
+
end
|
88
|
+
it "for quotes without further text addition" do
|
89
|
+
post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @new_style_retweet_no_addition, :whitelist => @whitelist
|
90
|
+
post.should_not be
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe
|
3
|
+
describe Processors::PhotoService do
|
4
4
|
before :each do
|
5
5
|
@links = {
|
6
6
|
:instagram => "http://instagr.am/p/DzCWn/",
|
@@ -17,22 +17,22 @@ describe PhotoServiceProcessor do
|
|
17
17
|
it "should find a picture's url from the supported services" do
|
18
18
|
@links.each do |service,link|
|
19
19
|
send "stub_#{service}"
|
20
|
-
url =
|
20
|
+
url = Processors::PhotoService::find_image_url link
|
21
21
|
url.should be, "service #{service} not working!"
|
22
22
|
check_pic_url_extraction service if [:instagram,:picplz,:yfrog,:imgly,:not_listed].index service
|
23
23
|
end
|
24
24
|
end
|
25
25
|
it "should not crash if embedly fallback won't find a link" do
|
26
26
|
stub_bad_request
|
27
|
-
url =
|
27
|
+
url = Processors::PhotoService::find_image_url "http://mopskopf"
|
28
28
|
end
|
29
29
|
it "should not crash with an encoding error when response is non-us-ascii" do
|
30
30
|
stub_utf8_response
|
31
|
-
url =
|
31
|
+
url = Processors::PhotoService::find_image_url "http://api.instagram.com/oembed?url=http://instagr.am/p/Gx%E2%80%946/"
|
32
32
|
end
|
33
33
|
it "follows redirects" do
|
34
34
|
stub_imgly
|
35
|
-
link =
|
35
|
+
link = Processors::PhotoService::link_url_redirect 'im mocked anyways'
|
36
36
|
link.should == 'http://s3.amazonaws.com/imgly_production/899582/full.jpg'
|
37
37
|
end
|
38
38
|
end
|
@@ -1,16 +1,30 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe
|
3
|
+
describe Processors::Twitter do
|
4
4
|
before :each do
|
5
5
|
@first_link = "http://url.com"
|
6
6
|
@second_link = "http://instagr.am/p/DzCWn/"
|
7
7
|
@third_link = "https://imageurl.com"
|
8
8
|
@twitter_response = {"from_user_id_str"=>"1915714", "profile_image_url"=>"http://a0.twimg.com/profile_images/386000279/2_normal.jpg", "created_at"=>"Sun, 17 Apr 2011 16:48:42 +0000", "from_user"=>"whitey_Mc_whIteLIst", "id_str"=>"59659561224765440", "metadata"=>{"result_type"=>"recent"}, "to_user_id"=>nil, "text"=>"Rigaer #wirsounterwegs #{@first_link} @ Augenarzt Dr. Lierow #{@second_link} #{@third_link}", "id"=>59659561224765440, "from_user_id"=>1915714, "geo"=>{"type"=>"Point", "coordinates"=>[52.5182, 13.454]}, "iso_language_code"=>"de", "place"=>{"id"=>"3078869807f9dd36", "type"=>"city", "full_name"=>"Berlin, Berlin"}, "to_user_id_str"=>nil, "source"=>"<a href="http://instagr.am" rel="nofollow">instagram</a>"}
|
9
|
+
@twitter_config = {
|
10
|
+
:since_id => 0,
|
11
|
+
:search_term => 'moped',
|
12
|
+
:results_per_page => 100,
|
13
|
+
:result_type => 'recent',
|
14
|
+
:api_endpoint_twitter => Tweetlr::API_ENDPOINT_TWITTER
|
15
|
+
}
|
16
|
+
end
|
17
|
+
it "should search twitter for a given term" do
|
18
|
+
stub_twitter
|
19
|
+
response = Processors::Twitter::lazy_search @twitter_config
|
20
|
+
tweets = response['results']
|
21
|
+
tweets.should be
|
22
|
+
tweets.should_not be_empty
|
9
23
|
end
|
10
24
|
it "extracts links" do
|
11
|
-
links =
|
25
|
+
links = Processors::Twitter::extract_links ''
|
12
26
|
links.should be_nil
|
13
|
-
links =
|
27
|
+
links = Processors::Twitter::extract_links @twitter_response
|
14
28
|
links[0].should == @first_link
|
15
29
|
links[1].should == @second_link
|
16
30
|
links[2].should == @third_link
|
data/spec/spec_helper.rb
CHANGED
@@ -2,9 +2,31 @@
|
|
2
2
|
require "bundler"
|
3
3
|
Bundler.require :default, :development, :test
|
4
4
|
|
5
|
+
logger = Logger.new(STDOUT)
|
6
|
+
logger.level = Logger::FATAL
|
7
|
+
LogAware.log = logger
|
8
|
+
|
5
9
|
def check_pic_url_extraction(service)
|
6
|
-
image_url =
|
7
|
-
image_url.should =~
|
10
|
+
image_url = Processors::PhotoService::send "image_url_#{service}".to_sym, @links[service]
|
11
|
+
image_url.should =~ Processors::PhotoService::PIC_REGEXP
|
12
|
+
end
|
13
|
+
|
14
|
+
def stub_tumblr
|
15
|
+
Curl::Easy.any_instance.stub(:response_code).and_return 201
|
16
|
+
Curl::Easy.any_instance.stub(:header_str).and_return %|HTTP/1.1 201 Created
|
17
|
+
Date: Sun, 13 Nov 2011 16:56:02 GMT
|
18
|
+
Server: Apache
|
19
|
+
P3P: CP="ALL ADM DEV PSAi COM OUR OTRo STP IND ONL"
|
20
|
+
Vary: Accept-Encoding
|
21
|
+
X-Tumblr-Usec: D=2600406
|
22
|
+
Content-Length: 11
|
23
|
+
Connection: close
|
24
|
+
Content-Type: text/plain; charset=utf-8
|
25
|
+
|
26
|
+
|
|
27
|
+
Curl::Easy.any_instance.stub(:body_str).and_return %|12742797055|
|
28
|
+
Curl::Easy.stub!(:http_post).and_return Curl::Easy.new
|
29
|
+
stub_instagram
|
8
30
|
end
|
9
31
|
|
10
32
|
def stub_twitter
|
data/spec/tweetlr_spec.rb
CHANGED
@@ -10,118 +10,32 @@ describe Tweetlr do
|
|
10
10
|
WHITELIST = config['whitelist']
|
11
11
|
|
12
12
|
before :each do
|
13
|
-
@credentials = {:email => USER, :password => PW}
|
14
|
-
@searchterm = 'fail'
|
15
|
-
@tweets = {
|
16
|
-
:instagram => {'text' => "jadda jadda http://instagr.am/p/DzCWn/"},
|
17
|
-
:twitpic => {'text' => "jadda jadda http://twitpic.com/449o2x"},
|
18
|
-
:yfrog => {'text' => "jadda jadda http://yfrog.com/h4vlfp"},
|
19
|
-
:picplz => {'text' => "jadda jadda http://picplz.com/2hWv"},
|
20
|
-
:imgly => {'text' => "jadda jadda http://img.ly/3M1o"},
|
21
|
-
:tco => {'text' => "jadda jadda http://t.co/MUGNayA"},
|
22
|
-
:lockerz => {'text' => "jadda jadda http://lockerz.com/s/100269159"},
|
23
|
-
:embedly => {'text' => "jadda jadda http://flic.kr/p/973hTv"},
|
24
|
-
:twitter_pics => {'text' => "jadda jadda http://t.co/FmyBGfyY"}
|
25
|
-
}
|
26
|
-
@links = {
|
27
|
-
:instagram => "http://instagr.am/p/DzCWn/",
|
28
|
-
:twitpic => "http://twitpic.com/449o2x",
|
29
|
-
:yfrog => "http://yfrog.com/h4vlfp",
|
30
|
-
:picplz => "http://picplz.com/2hWv",
|
31
|
-
:imgly => "http://img.ly/3M1o",
|
32
|
-
:tco => 'http://t.co/MUGNayA',
|
33
|
-
:lockerz => 'http://lockerz.com/s/100269159',
|
34
|
-
:embedly => 'http://flic.kr/p/973hTv',
|
35
|
-
:twitter_pics => 'http://t.co/FmyBGfyY'
|
36
|
-
}
|
37
13
|
@first_link = "http://url.com"
|
38
14
|
@second_link = "http://instagr.am/p/DzCWn/"
|
39
15
|
@third_link = "https://imageurl.com"
|
40
16
|
@twitter_response = {"from_user_id_str"=>"1915714", "profile_image_url"=>"http://a0.twimg.com/profile_images/386000279/2_normal.jpg", "created_at"=>"Sun, 17 Apr 2011 16:48:42 +0000", "from_user"=>"whitey_Mc_whIteLIst", "id_str"=>"59659561224765440", "metadata"=>{"result_type"=>"recent"}, "to_user_id"=>nil, "text"=>"Rigaer #wirsounterwegs #{@first_link} @ Augenarzt Dr. Lierow #{@second_link} #{@third_link}", "id"=>59659561224765440, "from_user_id"=>1915714, "geo"=>{"type"=>"Point", "coordinates"=>[52.5182, 13.454]}, "iso_language_code"=>"de", "place"=>{"id"=>"3078869807f9dd36", "type"=>"city", "full_name"=>"Berlin, Berlin"}, "to_user_id_str"=>nil, "source"=>"<a href="http://instagr.am" rel="nofollow">instagram</a>"}
|
41
|
-
@
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
17
|
+
@tweetlr_config = {
|
18
|
+
:tumblr_email => USER,
|
19
|
+
:tumblr_pw => PW,
|
20
|
+
:whitelist => WHITELIST,
|
21
|
+
:since_id => 0,
|
22
|
+
:search_term => 'moped',
|
23
|
+
:results_per_page => 100,
|
24
|
+
:result_type => 'recent',
|
25
|
+
:api_endpoint_twitter => Tweetlr::API_ENDPOINT_TWITTER
|
26
|
+
}
|
47
27
|
end
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
it "should search twitter for a given term" do
|
56
|
-
stub_twitter
|
57
|
-
tweetlr = @tweetlr
|
58
|
-
response = tweetlr.lazy_search_twitter
|
59
|
-
tweets = response['results']
|
60
|
-
tweets.should be
|
61
|
-
tweets.should_not be_empty
|
28
|
+
it "should post to tumblr" do
|
29
|
+
stub_tumblr
|
30
|
+
tumblr_post = Combinators::TwitterTumblr::generate_photo_post_from_tweet @twitter_response
|
31
|
+
tumblr_post[:date] = Time.now.to_s
|
32
|
+
response = Processors::Tumblr::post tumblr_post.merge({:email => USER, :password => PW})
|
33
|
+
response.should be
|
34
|
+
response.response_code.should be 201
|
62
35
|
end
|
63
|
-
|
64
|
-
|
65
|
-
stub_instagram
|
66
|
-
post = @tweetlr.generate_tumblr_photo_post @twitter_response
|
67
|
-
post[:state].should == 'published'
|
68
|
-
end
|
69
|
-
it "should mark non whitelist users' tweets as drafts" do
|
70
|
-
stub_instagram
|
71
|
-
post = @tweetlr.generate_tumblr_photo_post @non_whitelist_tweet
|
72
|
-
post[:state].should == 'draft'
|
73
|
-
end
|
74
|
-
end
|
75
|
-
context "without a user whitelist" do
|
76
|
-
before :each do
|
77
|
-
@tweetlr = Tweetlr.new(USER, PW, {
|
78
|
-
:whitelist => nil,
|
79
|
-
:results_per_page => 5,
|
80
|
-
:since_id => TIMESTAMP,
|
81
|
-
:terms => @searchterm,
|
82
|
-
:loglevel => 4})
|
83
|
-
end
|
84
|
-
it "should mark every users' posts as published" do
|
85
|
-
stub_instagram
|
86
|
-
post = @tweetlr.generate_tumblr_photo_post @twitter_response
|
87
|
-
post[:state].should == 'published'
|
88
|
-
stub_instagram
|
89
|
-
post = @tweetlr.generate_tumblr_photo_post @non_whitelist_tweet
|
90
|
-
post[:state].should == 'published'
|
91
|
-
end
|
92
|
-
end
|
93
|
-
it "should not use retweets which would produce double blog posts" do
|
94
|
-
post = @tweetlr.generate_tumblr_photo_post @retweet
|
95
|
-
post.should_not be
|
96
|
-
end
|
97
|
-
context "should not use new style retweets which would produce double blog posts" do
|
98
|
-
it "for quotes in context" do
|
99
|
-
post = @tweetlr.generate_tumblr_photo_post @new_style_retweet
|
100
|
-
post.should_not be
|
101
|
-
end
|
102
|
-
it "for quotes without further text addition" do
|
103
|
-
post = @tweetlr.generate_tumblr_photo_post @new_style_retweet_no_addition
|
104
|
-
post.should_not be
|
105
|
-
end
|
106
|
-
end
|
107
|
-
context "handles pictures in tweets" do
|
108
|
-
it "extracting their corresponding links" do
|
109
|
-
@tweets.each do |key,value|
|
110
|
-
send "stub_#{key}"
|
111
|
-
url = @tweetlr.extract_image_url value
|
112
|
-
url.should be, "service #{key} not working!"
|
113
|
-
check_pic_url_extraction key if [:instagram,:picplz,:yfrog,:imgly,:not_listed].index key
|
114
|
-
end
|
115
|
-
end
|
116
|
-
it "using the first image link found in a tweet with multiple links" do
|
117
|
-
stub_instagram
|
118
|
-
link = @tweetlr.extract_image_url @twitter_response
|
119
|
-
link.should == 'http://distillery.s3.amazonaws.com/media/2011/05/02/d25df62b9cec4a138967a3ad027d055b_7.jpg'
|
120
|
-
end
|
121
|
-
it "not returning links that do not belong to images" do
|
122
|
-
stub_no_image_link
|
123
|
-
link = @tweetlr.extract_image_url @twitter_response
|
124
|
-
link.should_not be
|
125
|
-
end
|
36
|
+
it "crawls twitter and posts to tumblr" do
|
37
|
+
Tweetlr.crawl(config)
|
126
38
|
end
|
39
|
+
|
40
|
+
|
127
41
|
end
|
data/tweetlr.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = "tweetlr"
|
3
|
-
s.version = "0.1.
|
3
|
+
s.version = "0.1.7pre4"
|
4
4
|
s.author = "Sven Kraeuter"
|
5
5
|
s.email = "sven.kraeuter@gmail.com"
|
6
6
|
s.homepage = "http://tweetlr.5v3n.com"
|
@@ -15,7 +15,7 @@ Gem::Specification.new do |s|
|
|
15
15
|
s.add_dependency "curb"
|
16
16
|
s.add_dependency "json"
|
17
17
|
|
18
|
-
s.add_development_dependency "rake"
|
18
|
+
s.add_development_dependency "rake"
|
19
19
|
s.add_development_dependency "rspec"
|
20
20
|
s.add_development_dependency "rdoc"
|
21
21
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tweetlr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7pre4
|
5
5
|
prerelease: 5
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-11-
|
12
|
+
date: 2011-11-18 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: daemons
|
16
|
-
requirement: &
|
16
|
+
requirement: &2153761960 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2153761960
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: eventmachine
|
27
|
-
requirement: &
|
27
|
+
requirement: &2153761540 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2153761540
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: curb
|
38
|
-
requirement: &
|
38
|
+
requirement: &2153761120 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2153761120
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: json
|
49
|
-
requirement: &
|
49
|
+
requirement: &2153760700 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,21 +54,21 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *2153760700
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: rake
|
60
|
-
requirement: &
|
60
|
+
requirement: &2153760280 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
|
-
- -
|
63
|
+
- - ! '>='
|
64
64
|
- !ruby/object:Gem::Version
|
65
|
-
version: 0
|
65
|
+
version: '0'
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *2153760280
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: rspec
|
71
|
-
requirement: &
|
71
|
+
requirement: &2153759860 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: '0'
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *2153759860
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: rdoc
|
82
|
-
requirement: &
|
82
|
+
requirement: &2153759440 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ! '>='
|
@@ -87,7 +87,7 @@ dependencies:
|
|
87
87
|
version: '0'
|
88
88
|
type: :development
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *2153759440
|
91
91
|
description: tweetlr crawls twitter for a given term, extracts photos out of the collected
|
92
92
|
tweets' short urls and posts the images to tumblr.
|
93
93
|
email: sven.kraeuter@gmail.com
|
@@ -108,16 +108,18 @@ files:
|
|
108
108
|
- Rakefile
|
109
109
|
- bin/tweetlr
|
110
110
|
- config/tweetlr.yml
|
111
|
-
- lib/
|
111
|
+
- lib/combinators/twitter_tumblr.rb
|
112
112
|
- lib/log_aware.rb
|
113
|
-
- lib/
|
114
|
-
- lib/
|
113
|
+
- lib/processors/http.rb
|
114
|
+
- lib/processors/photo_service.rb
|
115
|
+
- lib/processors/tumblr.rb
|
116
|
+
- lib/processors/twitter.rb
|
115
117
|
- lib/tweetlr.rb
|
116
|
-
-
|
117
|
-
- spec/photo_services_processor_spec.rb
|
118
|
+
- spec/combinators/twitter_tumblr_combinator_spec.rb
|
119
|
+
- spec/processors/photo_services_processor_spec.rb
|
120
|
+
- spec/processors/twitter_processor_spec.rb
|
118
121
|
- spec/spec_helper.rb
|
119
122
|
- spec/tweetlr_spec.rb
|
120
|
-
- spec/twitter_processor_spec.rb
|
121
123
|
- tweetlr.gemspec
|
122
124
|
homepage: http://tweetlr.5v3n.com
|
123
125
|
licenses: []
|
@@ -145,7 +147,8 @@ specification_version: 3
|
|
145
147
|
summary: tweetlr crawls twitter for a given term, extracts photos out of the collected
|
146
148
|
tweets' short urls and posts the images to tumblr.
|
147
149
|
test_files:
|
148
|
-
- spec/
|
150
|
+
- spec/combinators/twitter_tumblr_combinator_spec.rb
|
151
|
+
- spec/processors/photo_services_processor_spec.rb
|
152
|
+
- spec/processors/twitter_processor_spec.rb
|
149
153
|
- spec/spec_helper.rb
|
150
154
|
- spec/tweetlr_spec.rb
|
151
|
-
- spec/twitter_processor_spec.rb
|
data/lib/http_processor.rb
DELETED
@@ -1,42 +0,0 @@
|
|
1
|
-
require 'curb'
|
2
|
-
require 'log_aware'
|
3
|
-
|
4
|
-
module HttpProcessor
|
5
|
-
include LogAware
|
6
|
-
|
7
|
-
USER_AGENT = %{Mozilla/5.0 (compatible; tweetlr; +http://tweetlr.5v3n.com)}
|
8
|
-
|
9
|
-
#convenience method for curl http get calls and parsing them to json.
|
10
|
-
def HttpProcessor::http_get(request, log=nil)
|
11
|
-
tries = 3
|
12
|
-
begin
|
13
|
-
curl = Curl::Easy.new request
|
14
|
-
curl.useragent = USER_AGENT
|
15
|
-
curl.perform
|
16
|
-
begin
|
17
|
-
JSON.parse curl.body_str
|
18
|
-
rescue JSON::ParserError => err
|
19
|
-
begin
|
20
|
-
if log
|
21
|
-
log.warn "#{err}: Could not parse response for #{request} - this is probably not a json response: #{curl.body_str}"
|
22
|
-
end
|
23
|
-
return nil
|
24
|
-
rescue Encoding::CompatibilityError => err
|
25
|
-
if log
|
26
|
-
log.error "Trying to rescue a JSON::ParserError for '#{request}' we got stuck in a Encoding::CompatibilityError."
|
27
|
-
end
|
28
|
-
return nil
|
29
|
-
end
|
30
|
-
end
|
31
|
-
rescue Curl::Err::CurlError => err
|
32
|
-
log.error "Failure in Curl call: #{err}" if log
|
33
|
-
tries -= 1
|
34
|
-
sleep 3
|
35
|
-
if tries > 0
|
36
|
-
retry
|
37
|
-
else
|
38
|
-
nil
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
@@ -1,122 +0,0 @@
|
|
1
|
-
require 'log_aware'
|
2
|
-
|
3
|
-
module PhotoServiceProcessor
|
4
|
-
|
5
|
-
LOCATION_START_INDICATOR = 'Location: '
|
6
|
-
LOCATION_STOP_INDICATOR = "\r\n"
|
7
|
-
PIC_REGEXP = /(.*?)\.(jpg|jpeg|png|gif)/i
|
8
|
-
|
9
|
-
include LogAware
|
10
|
-
|
11
|
-
def self.find_image_url(link)
|
12
|
-
url = nil
|
13
|
-
if link && !(photo? link)
|
14
|
-
url = image_url_instagram link if (link.index('instagr.am') || link.index('instagram.com'))
|
15
|
-
url = image_url_picplz link if link.index 'picplz'
|
16
|
-
url = image_url_twitpic link if link.index 'twitpic'
|
17
|
-
url = image_url_yfrog link if link.index 'yfrog'
|
18
|
-
url = image_url_imgly link if link.index 'img.ly'
|
19
|
-
url = image_url_tco link if link.index 't.co'
|
20
|
-
url = image_url_lockerz link if link.index 'lockerz.com'
|
21
|
-
url = image_url_foursquare link if link.index '4sq.com'
|
22
|
-
url = image_url_embedly link if url.nil? #just try embed.ly for anything else. could do all image url processing w/ embedly, but there's probably some kind of rate limit invovled.
|
23
|
-
elsif photo? link
|
24
|
-
url = link
|
25
|
-
end
|
26
|
-
url
|
27
|
-
end
|
28
|
-
|
29
|
-
def self.photo?(link)
|
30
|
-
link =~ PIC_REGEXP
|
31
|
-
end
|
32
|
-
|
33
|
-
#find the image's url via embed.ly
|
34
|
-
def self.image_url_embedly(link_url)
|
35
|
-
response = HttpProcessor::http_get "http://api.embed.ly/1/oembed?url=#{link_url}"
|
36
|
-
if response && response['type'] == 'photo'
|
37
|
-
image_url = response['url']
|
38
|
-
end
|
39
|
-
image_url
|
40
|
-
end
|
41
|
-
#find the image's url for a foursquare link
|
42
|
-
def self.image_url_foursquare(link_url)
|
43
|
-
image_url_embedly link_url
|
44
|
-
end
|
45
|
-
#find the image's url for a lockerz link
|
46
|
-
def self.image_url_lockerz(link_url)
|
47
|
-
response = HttpProcessor::http_get "http://api.plixi.com/api/tpapi.svc/json/metadatafromurl?details=false&url=#{link_url}"
|
48
|
-
response["BigImageUrl"] if response
|
49
|
-
end
|
50
|
-
#find the image's url for an twitter shortened link
|
51
|
-
def self.image_url_tco(link_url)
|
52
|
-
service_url = link_url_redirect link_url
|
53
|
-
find_image_url service_url
|
54
|
-
end
|
55
|
-
#find the image's url for an instagram link
|
56
|
-
def self.image_url_instagram(link_url)
|
57
|
-
link_url['instagram.com'] = 'instagr.am' if link_url.index 'instagram.com' #instagram's oembed does not work for .com links
|
58
|
-
response = HttpProcessor::http_get "http://api.instagram.com/oembed?url=#{link_url}"
|
59
|
-
response['url'] if response
|
60
|
-
end
|
61
|
-
|
62
|
-
#find the image's url for a picplz short/longlink
|
63
|
-
def self.image_url_picplz(link_url)
|
64
|
-
id = extract_id link_url
|
65
|
-
#try short url
|
66
|
-
response = HttpProcessor::http_get "http://picplz.com/api/v2/pic.json?shorturl_ids=#{id}"
|
67
|
-
#if short url fails, try long url
|
68
|
-
#response = HTTParty.get "http://picplz.com/api/v2/pic.json?longurl_ids=#{id}"
|
69
|
-
#extract url
|
70
|
-
if response && response['value'] && response['value']['pics'] && response['value']['pics'].first && response['value']['pics'].first['pic_files'] && response['value']['pics'].first['pic_files']['640r']
|
71
|
-
response['value']['pics'].first['pic_files']['640r']['img_url']
|
72
|
-
else
|
73
|
-
nil
|
74
|
-
end
|
75
|
-
end
|
76
|
-
#find the image's url for a twitpic link
|
77
|
-
def self.image_url_twitpic(link_url)
|
78
|
-
image_url_redirect link_url, "http://twitpic.com/show/full/"
|
79
|
-
end
|
80
|
-
#find the image'S url for a yfrog link
|
81
|
-
def self.image_url_yfrog(link_url)
|
82
|
-
response = HttpProcessor::http_get("http://www.yfrog.com/api/oembed?url=#{link_url}")
|
83
|
-
response['url'] if response
|
84
|
-
end
|
85
|
-
#find the image's url for a img.ly link
|
86
|
-
def self.image_url_imgly(link_url)
|
87
|
-
image_url_redirect link_url, "http://img.ly/show/full/", "\r\n"
|
88
|
-
end
|
89
|
-
|
90
|
-
# extract image url from services like twitpic & img.ly that do not offer oembed interfaces
|
91
|
-
def self.image_url_redirect(link_url, service_endpoint, stop_indicator = LOCATION_STOP_INDICATOR)
|
92
|
-
link_url_redirect "#{service_endpoint}#{extract_id link_url}", stop_indicator
|
93
|
-
end
|
94
|
-
|
95
|
-
def self.link_url_redirect(short_url, stop_indicator = LOCATION_STOP_INDICATOR)
|
96
|
-
tries = 3
|
97
|
-
begin
|
98
|
-
resp = Curl::Easy.http_get(short_url) { |res| res.follow_location = true }
|
99
|
-
rescue Curl::Err::CurlError => err
|
100
|
-
log.error "Curl::Easy.http_get failed: #{err}"
|
101
|
-
tries -= 1
|
102
|
-
sleep 3
|
103
|
-
if tries > 0
|
104
|
-
retry
|
105
|
-
else
|
106
|
-
return nil
|
107
|
-
end
|
108
|
-
end
|
109
|
-
if(resp && resp.header_str.index(LOCATION_START_INDICATOR) && resp.header_str.index(stop_indicator))
|
110
|
-
start = resp.header_str.index(LOCATION_START_INDICATOR) + LOCATION_START_INDICATOR.size
|
111
|
-
stop = resp.header_str.index(stop_indicator, start)
|
112
|
-
resp.header_str[start...stop]
|
113
|
-
else
|
114
|
-
nil
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
#extract the pic id from a given <code>link</code>
|
119
|
-
def self.extract_id(link)
|
120
|
-
link.split('/').last if link.split('/')
|
121
|
-
end
|
122
|
-
end
|