strawman 0.2 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +25 -8
- data/Rakefile +1 -1
- data/examples/example.rb +15 -6
- data/lib/strawman/http_request.rb +8 -2
- data/lib/strawman/proxy.rb +22 -14
- data/lib/strawman/proxy_list.rb +44 -17
- data/lib/strawman.rb +1 -0
- data/strawman.gemspec +1 -1
- data.tar.gz.sig +0 -0
- metadata +1 -1
- metadata.gz.sig +2 -1
data/README.rdoc
CHANGED
@@ -16,25 +16,42 @@ RDoc: http://rdoc.info/projects/MattColyer/strawman
|
|
16
16
|
require 'eventmachine'
|
17
17
|
require 'em-http'
|
18
18
|
require 'strawman'
|
19
|
+
require 'logger'
|
20
|
+
|
21
|
+
log = Logger.new(STDOUT)
|
22
|
+
log.level = Logger::INFO
|
19
23
|
|
20
24
|
EventMachine.run {
|
21
|
-
proxy_list = Strawman::ProxyList.new
|
22
|
-
|
25
|
+
proxy_list = Strawman::ProxyList.new("http://whatismyip.org")
|
26
|
+
proxy_list.load("proxies") if File.exist?("proxies")
|
27
|
+
sources_set = proxy_list.add_sources([Strawman::TwitterSource.new("proxy_sites")])
|
28
|
+
|
23
29
|
sources_set.callback{
|
24
|
-
|
25
|
-
http.
|
26
|
-
|
27
|
-
|
30
|
+
proxy_list.save("proxies")
|
31
|
+
request = Strawman::HttpRequest.new(proxy_list, 'http://goingtorain.com/')
|
32
|
+
request.callback {
|
33
|
+
http = request.get
|
34
|
+
http.callback {
|
35
|
+
log.info http.response_header.inspect
|
36
|
+
log.info http.response
|
37
|
+
EventMachine.stop
|
38
|
+
}
|
39
|
+
}
|
40
|
+
request.errback{
|
41
|
+
log.error "No available proxies"
|
28
42
|
}
|
29
43
|
}
|
44
|
+
|
45
|
+
sources_set.errback{
|
46
|
+
log.error "Something went wrong"
|
47
|
+
EventMachine.stop
|
48
|
+
}
|
30
49
|
}
|
31
50
|
|
32
51
|
== TODO
|
33
52
|
|
34
53
|
Patches happily accepted, please open a github ticket and attach the patch.
|
35
54
|
|
36
|
-
* Store proxies across runs, so that the list can grow larger than the status list
|
37
|
-
* Optionally, store proxy verification across runs
|
38
55
|
* Undo link encoding if Glype adds it
|
39
56
|
* Strip annoying stuff that Glype adds as a footer and header
|
40
57
|
* POST doesn't work
|
data/Rakefile
CHANGED
@@ -3,7 +3,7 @@ require 'spec/rake/spectask'
|
|
3
3
|
require 'rake'
|
4
4
|
require 'echoe'
|
5
5
|
|
6
|
-
Echoe.new('strawman', '0.
|
6
|
+
Echoe.new('strawman', '0.3') do |p|
|
7
7
|
p.description = "Allows you fetch pages using glype proxies."
|
8
8
|
p.url = "http://github.com/mattcolyer/strawman"
|
9
9
|
p.author = "Matt Colyer"
|
data/examples/example.rb
CHANGED
@@ -3,20 +3,29 @@ require 'eventmachine'
|
|
3
3
|
require 'em-http'
|
4
4
|
$LOAD_PATH << "../lib/"
|
5
5
|
require 'strawman'
|
6
|
+
require 'logger'
|
6
7
|
|
7
8
|
log = Logger.new(STDOUT)
|
8
9
|
log.level = Logger::INFO
|
9
10
|
|
10
11
|
EventMachine.run {
|
11
12
|
proxy_list = Strawman::ProxyList.new("http://whatismyip.org")
|
12
|
-
|
13
|
+
proxy_list.load("proxies") if File.exist?("proxies")
|
14
|
+
sources_set = proxy_list.add_sources([Strawman::TwitterSource.new("proxy_sites")])
|
13
15
|
|
14
16
|
sources_set.callback{
|
15
|
-
|
16
|
-
http.
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
proxy_list.save("proxies")
|
18
|
+
request = Strawman::HttpRequest.new(proxy_list, 'http://goingtorain.com/')
|
19
|
+
request.callback {
|
20
|
+
http = request.get
|
21
|
+
http.callback {
|
22
|
+
log.info http.response_header.inspect
|
23
|
+
log.info http.response
|
24
|
+
EventMachine.stop
|
25
|
+
}
|
26
|
+
}
|
27
|
+
request.errback{
|
28
|
+
log.error "No available proxies"
|
20
29
|
}
|
21
30
|
}
|
22
31
|
|
@@ -4,9 +4,15 @@ module Strawman
|
|
4
4
|
# A simple wrapper for em-http-client's HttpRequest.
|
5
5
|
#
|
6
6
|
class HttpRequest
|
7
|
+
include EventMachine::Deferrable
|
8
|
+
|
7
9
|
def initialize(proxy_list, url)
|
8
|
-
|
9
|
-
|
10
|
+
proxy_response = proxy_list.proxy
|
11
|
+
proxy_response.callback do |proxy|
|
12
|
+
@proxy = proxy
|
13
|
+
@request = Transport.new(@proxy.proxy_url(url))
|
14
|
+
succeed
|
15
|
+
end
|
10
16
|
end
|
11
17
|
|
12
18
|
#
|
data/lib/strawman/proxy.rb
CHANGED
@@ -4,6 +4,14 @@ module Strawman
|
|
4
4
|
# of proxy.
|
5
5
|
#
|
6
6
|
class Proxy
|
7
|
+
attr_reader :root_url
|
8
|
+
attr_writer :valid
|
9
|
+
|
10
|
+
def initialize(url)
|
11
|
+
@root_url = url
|
12
|
+
@valid = false
|
13
|
+
end
|
14
|
+
|
7
15
|
#
|
8
16
|
# Returns the the referer to use when making the proxied request.
|
9
17
|
#
|
@@ -15,7 +23,20 @@ module Strawman
|
|
15
23
|
# Returns the url to fetch the given url through this proxy.
|
16
24
|
#
|
17
25
|
def proxy_url(url)
|
18
|
-
URI.join @root_url, proxy_path(url)
|
26
|
+
uri = URI.join @root_url, proxy_path(url)
|
27
|
+
"#{uri.scheme}://#{uri.host}:#{uri.port}#{uri.path}?#{uri.query}"
|
28
|
+
end
|
29
|
+
|
30
|
+
#
|
31
|
+
# Used to determine whether this proxy is valid. This must be called from
|
32
|
+
# within the callback of the validate deferable.
|
33
|
+
#
|
34
|
+
def valid?
|
35
|
+
@valid
|
36
|
+
end
|
37
|
+
|
38
|
+
def ==(other)
|
39
|
+
self.class == other.class && self.root_url == other.root_url
|
19
40
|
end
|
20
41
|
|
21
42
|
protected
|
@@ -29,11 +50,6 @@ module Strawman
|
|
29
50
|
# See: http://www.glype.com/ for more details.
|
30
51
|
#
|
31
52
|
class GlypeProxy < Proxy
|
32
|
-
def initialize(url)
|
33
|
-
@root_url = url
|
34
|
-
@valid = false
|
35
|
-
end
|
36
|
-
|
37
53
|
#
|
38
54
|
# Verifies whether this proxy is currently functional. Returns a deferable.
|
39
55
|
#
|
@@ -52,14 +68,6 @@ module Strawman
|
|
52
68
|
http
|
53
69
|
end
|
54
70
|
|
55
|
-
#
|
56
|
-
# Used to determine whether this proxy is valid. This must be called from
|
57
|
-
# within the callback of the validate deferable.
|
58
|
-
#
|
59
|
-
def valid?
|
60
|
-
@valid
|
61
|
-
end
|
62
|
-
|
63
71
|
def to_s
|
64
72
|
"<GlypeProxy #{@root_url}>"
|
65
73
|
end
|
data/lib/strawman/proxy_list.rb
CHANGED
@@ -3,7 +3,7 @@ module Strawman
|
|
3
3
|
# Represents a group of proxy sources
|
4
4
|
#
|
5
5
|
class ProxyList
|
6
|
-
|
6
|
+
attr_accessor :proxies
|
7
7
|
|
8
8
|
#
|
9
9
|
# [verification_url] The url to use to verify that the proxy is valid. All
|
@@ -11,48 +11,75 @@ module Strawman
|
|
11
11
|
#
|
12
12
|
def initialize(verification_url)
|
13
13
|
@proxies = []
|
14
|
+
@dead_proxies = []
|
14
15
|
@verification_url = verification_url
|
15
16
|
end
|
16
17
|
|
17
18
|
#
|
18
19
|
# Takes a list of sources and returns a deferrable which will complete once
|
19
|
-
# all sources have been fetched
|
20
|
+
# all sources have been fetched.
|
20
21
|
#
|
21
|
-
def
|
22
|
+
def add_sources(sources)
|
22
23
|
sources_ready = EventMachine::MultiRequest.new
|
23
|
-
proxies_ready = EventMachine::MultiRequest.new
|
24
24
|
|
25
|
-
# Fetch all of the sources
|
26
25
|
sources.each do |source|
|
27
26
|
sources_ready.add(source)
|
28
27
|
end
|
29
28
|
|
30
|
-
# Verify all of the proxies
|
31
29
|
sources_ready.callback do
|
32
30
|
sources.each do |source|
|
33
31
|
source.proxies.each do |proxy|
|
34
|
-
|
32
|
+
@proxies << proxy unless @proxies.include? proxy
|
35
33
|
end
|
36
34
|
end
|
37
35
|
end
|
38
36
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
37
|
+
sources_ready
|
38
|
+
end
|
39
|
+
|
40
|
+
#
|
41
|
+
# Selects a random proxy from the list of available proxies and verifies
|
42
|
+
# it. If it isn't valid it keeps trying all available proxies before
|
43
|
+
# returning nil.
|
44
|
+
#
|
45
|
+
def proxy(deferrable=nil)
|
46
|
+
deferrable ||= EventMachine::DefaultDeferrable.new
|
47
|
+
|
48
|
+
proxy = @proxies.choice
|
49
|
+
deferrable.fail unless proxy
|
50
|
+
|
51
|
+
proxy_response = proxy.validate(@verification_url)
|
52
|
+
proxy_response.callback do
|
53
|
+
if proxy.valid?
|
54
|
+
deferrable.succeed(proxy)
|
55
|
+
else
|
56
|
+
self.proxy(deferrable)
|
45
57
|
end
|
46
58
|
end
|
47
59
|
|
48
|
-
|
60
|
+
proxy_response.errback do
|
61
|
+
@proxies.remove(proxy)
|
62
|
+
@dead_proxies.add(proxy)
|
63
|
+
end
|
64
|
+
|
65
|
+
deferrable
|
66
|
+
end
|
67
|
+
|
68
|
+
#
|
69
|
+
# Saves all proxies that were loaded into this instance, including proxies
|
70
|
+
# with errors.
|
71
|
+
#
|
72
|
+
def save(filepath)
|
73
|
+
File.open(filepath, "w") do |f|
|
74
|
+
f.write((@proxies + @dead_proxies).to_yaml)
|
75
|
+
end
|
49
76
|
end
|
50
77
|
|
51
78
|
#
|
52
|
-
#
|
79
|
+
# Loads all proxies from the given file
|
53
80
|
#
|
54
|
-
def
|
55
|
-
@proxies.
|
81
|
+
def load(filepath)
|
82
|
+
@proxies = YAML.load(File.read(filepath))
|
56
83
|
end
|
57
84
|
end
|
58
85
|
end
|
data/lib/strawman.rb
CHANGED
data/strawman.gemspec
CHANGED
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
metadata.gz.sig
CHANGED
@@ -1 +1,2 @@
|
|
1
|
-
|
1
|
+
B�¤�a���V�@� Iv�;��ZP*��TB+E�`G`���3�Ճ�P��P��6�̮���F�Kk�uKsj���u�o�I/�UZ=qHP�}��gܪ�-2Z�mHm��<\�I�/)�#ˑ�WHs���a���K�����^�Ct�VeH�`�$��V��Dn���.�)��C�-̍�j,�C��V���,��c�r&��l�)�(e�Å�?)���ុ=��m��9v~c��6
|
2
|
+
�wLt»r5C�Ii�s��BI�B@
|