strawman 0.2 → 0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +25 -8
- data/Rakefile +1 -1
- data/examples/example.rb +15 -6
- data/lib/strawman/http_request.rb +8 -2
- data/lib/strawman/proxy.rb +22 -14
- data/lib/strawman/proxy_list.rb +44 -17
- data/lib/strawman.rb +1 -0
- data/strawman.gemspec +1 -1
- data.tar.gz.sig +0 -0
- metadata +1 -1
- metadata.gz.sig +2 -1
data/README.rdoc
CHANGED
@@ -16,25 +16,42 @@ RDoc: http://rdoc.info/projects/MattColyer/strawman
|
|
16
16
|
require 'eventmachine'
|
17
17
|
require 'em-http'
|
18
18
|
require 'strawman'
|
19
|
+
require 'logger'
|
20
|
+
|
21
|
+
log = Logger.new(STDOUT)
|
22
|
+
log.level = Logger::INFO
|
19
23
|
|
20
24
|
EventMachine.run {
|
21
|
-
proxy_list = Strawman::ProxyList.new
|
22
|
-
|
25
|
+
proxy_list = Strawman::ProxyList.new("http://whatismyip.org")
|
26
|
+
proxy_list.load("proxies") if File.exist?("proxies")
|
27
|
+
sources_set = proxy_list.add_sources([Strawman::TwitterSource.new("proxy_sites")])
|
28
|
+
|
23
29
|
sources_set.callback{
|
24
|
-
|
25
|
-
http.
|
26
|
-
|
27
|
-
|
30
|
+
proxy_list.save("proxies")
|
31
|
+
request = Strawman::HttpRequest.new(proxy_list, 'http://goingtorain.com/')
|
32
|
+
request.callback {
|
33
|
+
http = request.get
|
34
|
+
http.callback {
|
35
|
+
log.info http.response_header.inspect
|
36
|
+
log.info http.response
|
37
|
+
EventMachine.stop
|
38
|
+
}
|
39
|
+
}
|
40
|
+
request.errback{
|
41
|
+
log.error "No available proxies"
|
28
42
|
}
|
29
43
|
}
|
44
|
+
|
45
|
+
sources_set.errback{
|
46
|
+
log.error "Something went wrong"
|
47
|
+
EventMachine.stop
|
48
|
+
}
|
30
49
|
}
|
31
50
|
|
32
51
|
== TODO
|
33
52
|
|
34
53
|
Patches happily accepted, please open a github ticket and attach the patch.
|
35
54
|
|
36
|
-
* Store proxies across runs, so that the list can grow larger than the status list
|
37
|
-
* Optionally, store proxy verification across runs
|
38
55
|
* Undo link encoding if Glype adds it
|
39
56
|
* Strip annoying stuff that Glype adds as a footer and header
|
40
57
|
* POST doesn't work
|
data/Rakefile
CHANGED
@@ -3,7 +3,7 @@ require 'spec/rake/spectask'
|
|
3
3
|
require 'rake'
|
4
4
|
require 'echoe'
|
5
5
|
|
6
|
-
Echoe.new('strawman', '0.
|
6
|
+
Echoe.new('strawman', '0.3') do |p|
|
7
7
|
p.description = "Allows you fetch pages using glype proxies."
|
8
8
|
p.url = "http://github.com/mattcolyer/strawman"
|
9
9
|
p.author = "Matt Colyer"
|
data/examples/example.rb
CHANGED
@@ -3,20 +3,29 @@ require 'eventmachine'
|
|
3
3
|
require 'em-http'
|
4
4
|
$LOAD_PATH << "../lib/"
|
5
5
|
require 'strawman'
|
6
|
+
require 'logger'
|
6
7
|
|
7
8
|
log = Logger.new(STDOUT)
|
8
9
|
log.level = Logger::INFO
|
9
10
|
|
10
11
|
EventMachine.run {
|
11
12
|
proxy_list = Strawman::ProxyList.new("http://whatismyip.org")
|
12
|
-
|
13
|
+
proxy_list.load("proxies") if File.exist?("proxies")
|
14
|
+
sources_set = proxy_list.add_sources([Strawman::TwitterSource.new("proxy_sites")])
|
13
15
|
|
14
16
|
sources_set.callback{
|
15
|
-
|
16
|
-
http.
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
proxy_list.save("proxies")
|
18
|
+
request = Strawman::HttpRequest.new(proxy_list, 'http://goingtorain.com/')
|
19
|
+
request.callback {
|
20
|
+
http = request.get
|
21
|
+
http.callback {
|
22
|
+
log.info http.response_header.inspect
|
23
|
+
log.info http.response
|
24
|
+
EventMachine.stop
|
25
|
+
}
|
26
|
+
}
|
27
|
+
request.errback{
|
28
|
+
log.error "No available proxies"
|
20
29
|
}
|
21
30
|
}
|
22
31
|
|
@@ -4,9 +4,15 @@ module Strawman
|
|
4
4
|
# A simple wrapper for em-http-client's HttpRequest.
|
5
5
|
#
|
6
6
|
class HttpRequest
|
7
|
+
include EventMachine::Deferrable
|
8
|
+
|
7
9
|
def initialize(proxy_list, url)
|
8
|
-
|
9
|
-
|
10
|
+
proxy_response = proxy_list.proxy
|
11
|
+
proxy_response.callback do |proxy|
|
12
|
+
@proxy = proxy
|
13
|
+
@request = Transport.new(@proxy.proxy_url(url))
|
14
|
+
succeed
|
15
|
+
end
|
10
16
|
end
|
11
17
|
|
12
18
|
#
|
data/lib/strawman/proxy.rb
CHANGED
@@ -4,6 +4,14 @@ module Strawman
|
|
4
4
|
# of proxy.
|
5
5
|
#
|
6
6
|
class Proxy
|
7
|
+
attr_reader :root_url
|
8
|
+
attr_writer :valid
|
9
|
+
|
10
|
+
def initialize(url)
|
11
|
+
@root_url = url
|
12
|
+
@valid = false
|
13
|
+
end
|
14
|
+
|
7
15
|
#
|
8
16
|
# Returns the the referer to use when making the proxied request.
|
9
17
|
#
|
@@ -15,7 +23,20 @@ module Strawman
|
|
15
23
|
# Returns the url to fetch the given url through this proxy.
|
16
24
|
#
|
17
25
|
def proxy_url(url)
|
18
|
-
URI.join @root_url, proxy_path(url)
|
26
|
+
uri = URI.join @root_url, proxy_path(url)
|
27
|
+
"#{uri.scheme}://#{uri.host}:#{uri.port}#{uri.path}?#{uri.query}"
|
28
|
+
end
|
29
|
+
|
30
|
+
#
|
31
|
+
# Used to determine whether this proxy is valid. This must be called from
|
32
|
+
# within the callback of the validate deferable.
|
33
|
+
#
|
34
|
+
def valid?
|
35
|
+
@valid
|
36
|
+
end
|
37
|
+
|
38
|
+
def ==(other)
|
39
|
+
self.class == other.class && self.root_url == other.root_url
|
19
40
|
end
|
20
41
|
|
21
42
|
protected
|
@@ -29,11 +50,6 @@ module Strawman
|
|
29
50
|
# See: http://www.glype.com/ for more details.
|
30
51
|
#
|
31
52
|
class GlypeProxy < Proxy
|
32
|
-
def initialize(url)
|
33
|
-
@root_url = url
|
34
|
-
@valid = false
|
35
|
-
end
|
36
|
-
|
37
53
|
#
|
38
54
|
# Verifies whether this proxy is currently functional. Returns a deferable.
|
39
55
|
#
|
@@ -52,14 +68,6 @@ module Strawman
|
|
52
68
|
http
|
53
69
|
end
|
54
70
|
|
55
|
-
#
|
56
|
-
# Used to determine whether this proxy is valid. This must be called from
|
57
|
-
# within the callback of the validate deferable.
|
58
|
-
#
|
59
|
-
def valid?
|
60
|
-
@valid
|
61
|
-
end
|
62
|
-
|
63
71
|
def to_s
|
64
72
|
"<GlypeProxy #{@root_url}>"
|
65
73
|
end
|
data/lib/strawman/proxy_list.rb
CHANGED
@@ -3,7 +3,7 @@ module Strawman
|
|
3
3
|
# Represents a group of proxy sources
|
4
4
|
#
|
5
5
|
class ProxyList
|
6
|
-
|
6
|
+
attr_accessor :proxies
|
7
7
|
|
8
8
|
#
|
9
9
|
# [verification_url] The url to use to verify that the proxy is valid. All
|
@@ -11,48 +11,75 @@ module Strawman
|
|
11
11
|
#
|
12
12
|
def initialize(verification_url)
|
13
13
|
@proxies = []
|
14
|
+
@dead_proxies = []
|
14
15
|
@verification_url = verification_url
|
15
16
|
end
|
16
17
|
|
17
18
|
#
|
18
19
|
# Takes a list of sources and returns a deferrable which will complete once
|
19
|
-
# all sources have been fetched
|
20
|
+
# all sources have been fetched.
|
20
21
|
#
|
21
|
-
def
|
22
|
+
def add_sources(sources)
|
22
23
|
sources_ready = EventMachine::MultiRequest.new
|
23
|
-
proxies_ready = EventMachine::MultiRequest.new
|
24
24
|
|
25
|
-
# Fetch all of the sources
|
26
25
|
sources.each do |source|
|
27
26
|
sources_ready.add(source)
|
28
27
|
end
|
29
28
|
|
30
|
-
# Verify all of the proxies
|
31
29
|
sources_ready.callback do
|
32
30
|
sources.each do |source|
|
33
31
|
source.proxies.each do |proxy|
|
34
|
-
|
32
|
+
@proxies << proxy unless @proxies.include? proxy
|
35
33
|
end
|
36
34
|
end
|
37
35
|
end
|
38
36
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
37
|
+
sources_ready
|
38
|
+
end
|
39
|
+
|
40
|
+
#
|
41
|
+
# Selects a random proxy from the list of available proxies and verifies
|
42
|
+
# it. If it isn't valid it keeps trying all available proxies before
|
43
|
+
# returning nil.
|
44
|
+
#
|
45
|
+
def proxy(deferrable=nil)
|
46
|
+
deferrable ||= EventMachine::DefaultDeferrable.new
|
47
|
+
|
48
|
+
proxy = @proxies.choice
|
49
|
+
deferrable.fail unless proxy
|
50
|
+
|
51
|
+
proxy_response = proxy.validate(@verification_url)
|
52
|
+
proxy_response.callback do
|
53
|
+
if proxy.valid?
|
54
|
+
deferrable.succeed(proxy)
|
55
|
+
else
|
56
|
+
self.proxy(deferrable)
|
45
57
|
end
|
46
58
|
end
|
47
59
|
|
48
|
-
|
60
|
+
proxy_response.errback do
|
61
|
+
@proxies.remove(proxy)
|
62
|
+
@dead_proxies.add(proxy)
|
63
|
+
end
|
64
|
+
|
65
|
+
deferrable
|
66
|
+
end
|
67
|
+
|
68
|
+
#
|
69
|
+
# Saves all proxies that were loaded into this instance, including proxies
|
70
|
+
# with errors.
|
71
|
+
#
|
72
|
+
def save(filepath)
|
73
|
+
File.open(filepath, "w") do |f|
|
74
|
+
f.write((@proxies + @dead_proxies).to_yaml)
|
75
|
+
end
|
49
76
|
end
|
50
77
|
|
51
78
|
#
|
52
|
-
#
|
79
|
+
# Loads all proxies from the given file
|
53
80
|
#
|
54
|
-
def
|
55
|
-
@proxies.
|
81
|
+
def load(filepath)
|
82
|
+
@proxies = YAML.load(File.read(filepath))
|
56
83
|
end
|
57
84
|
end
|
58
85
|
end
|
data/lib/strawman.rb
CHANGED
data/strawman.gemspec
CHANGED
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
metadata.gz.sig
CHANGED
@@ -1 +1,2 @@
|
|
1
|
-
|
1
|
+
B�¤�a���V�@� Iv�;��ZP*��TB+E�`G`���3�Ճ�P��P��6�̮���F�Kk�uKsj���u�o�I/�UZ=qHP�}��gܪ�-2Z�mHm��<\�I�/)�#ˑ�WHs���a���K�����^�Ct�VeH�`�$��V��Dn���.�)��C�-̍�j,�C��V���,��c�r&��l�)�(e�Å�?)���ុ=��m��9v~c��6
|
2
|
+
�wLt»r5C�Ii�s��BI�B@
|