strawman 0.1 → 0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +12 -8
- data/Rakefile +8 -1
- data/examples/example.rb +12 -3
- data/lib/strawman/http_request.rb +38 -3
- data/lib/strawman/proxy.rb +42 -20
- data/lib/strawman/proxy_list.rb +24 -4
- data/lib/strawman/source.rb +54 -19
- data/strawman.gemspec +2 -2
- data.tar.gz.sig +0 -0
- metadata +2 -2
- metadata.gz.sig +0 -0
data/README.rdoc
CHANGED
@@ -3,6 +3,8 @@
|
|
3
3
|
A ruby gem which allows you to proxy EventMachine HTTP GET requests through
|
4
4
|
glype proxies on the net.
|
5
5
|
|
6
|
+
RDoc: http://rdoc.info/projects/MattColyer/strawman
|
7
|
+
|
6
8
|
== Getting started
|
7
9
|
|
8
10
|
# make sure you have gemcutter.org repos enabled
|
@@ -13,9 +15,8 @@ glype proxies on the net.
|
|
13
15
|
require 'rubygems'
|
14
16
|
require 'eventmachine'
|
15
17
|
require 'em-http'
|
16
|
-
$LOAD_PATH << "../lib/"
|
17
18
|
require 'strawman'
|
18
|
-
|
19
|
+
|
19
20
|
EventMachine.run {
|
20
21
|
proxy_list = Strawman::ProxyList.new
|
21
22
|
sources_set = proxy_list.set_sources([Strawman::TwitterSource.new("proxy_sites")])
|
@@ -32,12 +33,15 @@ glype proxies on the net.
|
|
32
33
|
|
33
34
|
Patches happily accepted, please open a github ticket and attach the patch.
|
34
35
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
36
|
+
* Store proxies across runs, so that the list can grow larger than the status list
|
37
|
+
* Optionally, store proxy verification across runs
|
38
|
+
* Undo link encoding if Glype adds it
|
39
|
+
* Strip annoying stuff that Glype adds as a footer and header
|
40
|
+
* POST doesn't work
|
41
|
+
* Cookies don't work
|
42
|
+
* SSL doesn't work
|
43
|
+
* Implement other proxy sources
|
40
44
|
|
41
45
|
== Limitations
|
42
46
|
|
43
|
-
|
47
|
+
* PUT and DELETE can't work do to the way Glype is implemented
|
data/Rakefile
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
require 'rubygems'
|
2
|
+
require 'spec/rake/spectask'
|
2
3
|
require 'rake'
|
3
4
|
require 'echoe'
|
4
5
|
|
5
|
-
Echoe.new('strawman', '0.
|
6
|
+
Echoe.new('strawman', '0.2') do |p|
|
6
7
|
p.description = "Allows you fetch pages using glype proxies."
|
7
8
|
p.url = "http://github.com/mattcolyer/strawman"
|
8
9
|
p.author = "Matt Colyer"
|
@@ -11,3 +12,9 @@ Echoe.new('strawman', '0.1') do |p|
|
|
11
12
|
p.development_dependencies = ["rspec"]
|
12
13
|
p.dependencies = ["eventmachine", "em-http-request", "json"]
|
13
14
|
end
|
15
|
+
|
16
|
+
|
17
|
+
desc "Run all examples"
|
18
|
+
Spec::Rake::SpecTask.new('tests') do |t|
|
19
|
+
t.spec_files = FileList['spec/*_spec.rb']
|
20
|
+
end
|
data/examples/example.rb
CHANGED
@@ -4,15 +4,24 @@ require 'em-http'
|
|
4
4
|
$LOAD_PATH << "../lib/"
|
5
5
|
require 'strawman'
|
6
6
|
|
7
|
+
log = Logger.new(STDOUT)
|
8
|
+
log.level = Logger::INFO
|
9
|
+
|
7
10
|
EventMachine.run {
|
8
|
-
proxy_list = Strawman::ProxyList.new
|
11
|
+
proxy_list = Strawman::ProxyList.new("http://whatismyip.org")
|
9
12
|
sources_set = proxy_list.set_sources([Strawman::TwitterSource.new("proxy_sites")])
|
13
|
+
|
10
14
|
sources_set.callback{
|
11
15
|
http = Strawman::HttpRequest.new(proxy_list, 'http://goingtorain.com/').get
|
12
16
|
http.callback {
|
13
|
-
|
14
|
-
|
17
|
+
log.info http.response_header.inspect
|
18
|
+
log.info http.response
|
15
19
|
EventMachine.stop
|
16
20
|
}
|
17
21
|
}
|
22
|
+
|
23
|
+
sources_set.errback{
|
24
|
+
log.error "Something went wrong"
|
25
|
+
EventMachine.stop
|
26
|
+
}
|
18
27
|
}
|
@@ -1,11 +1,18 @@
|
|
1
1
|
module Strawman
|
2
|
+
# = HttpRequest
|
3
|
+
#
|
4
|
+
# A simple wrapper for em-http-client's HttpRequest.
|
5
|
+
#
|
2
6
|
class HttpRequest
|
3
7
|
def initialize(proxy_list, url)
|
4
8
|
@proxy = proxy_list.proxy
|
5
|
-
|
6
|
-
@request = EventMachine::HttpRequest.new(proxied_url)
|
9
|
+
@request = Transport.new(@proxy.proxy_url(url))
|
7
10
|
end
|
8
|
-
|
11
|
+
|
12
|
+
#
|
13
|
+
# Handles get requests. Currently it accepts no arguments (ie query
|
14
|
+
# parameters, http headers etc...).
|
15
|
+
#
|
9
16
|
def get
|
10
17
|
http = @request.get :head => {"referer" => @proxy.referer}
|
11
18
|
http.callback {
|
@@ -14,5 +21,33 @@ module Strawman
|
|
14
21
|
}
|
15
22
|
http
|
16
23
|
end
|
24
|
+
|
25
|
+
#
|
26
|
+
# TODO: Implement this.
|
27
|
+
#
|
28
|
+
def post
|
29
|
+
raise NotImplementedError
|
30
|
+
end
|
31
|
+
|
32
|
+
#
|
33
|
+
# Can't and won't be implemented due to Glype not proxying these requests.
|
34
|
+
#
|
35
|
+
def put
|
36
|
+
raise NotImplementedError
|
37
|
+
end
|
38
|
+
|
39
|
+
#
|
40
|
+
# Can't and won't be implemented due to Glype not proxying these requests.
|
41
|
+
#
|
42
|
+
def delete
|
43
|
+
raise NotImplementedError
|
44
|
+
end
|
45
|
+
|
46
|
+
#
|
47
|
+
# Can't and won't be implemented due to Glype not proxying these requests.
|
48
|
+
#
|
49
|
+
def head
|
50
|
+
raise NotImplementedError
|
51
|
+
end
|
17
52
|
end
|
18
53
|
end
|
data/lib/strawman/proxy.rb
CHANGED
@@ -1,23 +1,50 @@
|
|
1
1
|
module Strawman
|
2
|
+
#
|
3
|
+
# The general proxy class, which contains functions not specific to any type
|
4
|
+
# of proxy.
|
5
|
+
#
|
2
6
|
class Proxy
|
7
|
+
#
|
8
|
+
# Returns the the referer to use when making the proxied request.
|
9
|
+
#
|
10
|
+
def referer
|
11
|
+
@root_url
|
12
|
+
end
|
13
|
+
|
14
|
+
#
|
15
|
+
# Returns the url to fetch the given url through this proxy.
|
16
|
+
#
|
17
|
+
def proxy_url(url)
|
18
|
+
URI.join @root_url, proxy_path(url)
|
19
|
+
end
|
20
|
+
|
21
|
+
protected
|
22
|
+
def proxy_path(url)
|
23
|
+
raise NotImplementedError
|
24
|
+
end
|
3
25
|
end
|
4
26
|
|
27
|
+
#
|
28
|
+
# An implementation of the Proxy class which is specific to Glype proxies.
|
29
|
+
# See: http://www.glype.com/ for more details.
|
30
|
+
#
|
5
31
|
class GlypeProxy < Proxy
|
6
32
|
def initialize(url)
|
7
33
|
@root_url = url
|
8
34
|
@valid = false
|
9
35
|
end
|
10
36
|
|
11
|
-
|
12
|
-
|
13
|
-
|
37
|
+
#
|
38
|
+
# Verifies whether this proxy is currently functional. Returns a deferable.
|
39
|
+
#
|
40
|
+
def validate(verification_url)
|
41
|
+
@valid = false
|
14
42
|
|
15
|
-
def validate
|
16
|
-
url = proxy_url("http://whatismyip.org")
|
17
43
|
# FIXME: This only validate proxies that don't require a unique session
|
18
44
|
# cookie which is retrieved by going to the root page and looking for the
|
19
45
|
# s cookie.
|
20
|
-
|
46
|
+
url = proxy_url(verification_url)
|
47
|
+
http = Transport.new(url).get :head => {'referer' => @root_url}
|
21
48
|
http.callback {
|
22
49
|
@valid = true if http.response_header.status == 200
|
23
50
|
}
|
@@ -25,27 +52,22 @@ module Strawman
|
|
25
52
|
http
|
26
53
|
end
|
27
54
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
def
|
33
|
-
@
|
55
|
+
#
|
56
|
+
# Used to determine whether this proxy is valid. This must be called from
|
57
|
+
# within the callback of the validate deferable.
|
58
|
+
#
|
59
|
+
def valid?
|
60
|
+
@valid
|
34
61
|
end
|
35
62
|
|
36
|
-
def
|
37
|
-
|
63
|
+
def to_s
|
64
|
+
"<GlypeProxy #{@root_url}>"
|
38
65
|
end
|
39
66
|
|
67
|
+
protected
|
40
68
|
def proxy_path(url)
|
41
69
|
encoded_url = CGI.escape(Base64.encode64(url[4..-1]))
|
42
70
|
"/browse.php?u=#{encoded_url}&f=norefer"
|
43
71
|
end
|
44
72
|
end
|
45
|
-
|
46
|
-
class PhpProxy < Proxy
|
47
|
-
def valid?
|
48
|
-
false
|
49
|
-
end
|
50
|
-
end
|
51
73
|
end
|
data/lib/strawman/proxy_list.rb
CHANGED
@@ -1,25 +1,42 @@
|
|
1
1
|
module Strawman
|
2
|
+
#
|
3
|
+
# Represents a group of proxy sources
|
4
|
+
#
|
2
5
|
class ProxyList
|
3
|
-
|
6
|
+
attr_reader :proxies
|
7
|
+
|
8
|
+
#
|
9
|
+
# [verification_url] The url to use to verify that the proxy is valid. All
|
10
|
+
# it needs to do is return an HTTP status of 200.
|
11
|
+
#
|
12
|
+
def initialize(verification_url)
|
4
13
|
@proxies = []
|
14
|
+
@verification_url = verification_url
|
5
15
|
end
|
6
16
|
|
17
|
+
#
|
18
|
+
# Takes a list of sources and returns a deferrable which will complete once
|
19
|
+
# all sources have been fetched and all proxies have been verified.
|
20
|
+
#
|
7
21
|
def set_sources(sources)
|
8
22
|
sources_ready = EventMachine::MultiRequest.new
|
23
|
+
proxies_ready = EventMachine::MultiRequest.new
|
9
24
|
|
25
|
+
# Fetch all of the sources
|
10
26
|
sources.each do |source|
|
11
27
|
sources_ready.add(source)
|
12
28
|
end
|
13
29
|
|
14
|
-
|
30
|
+
# Verify all of the proxies
|
15
31
|
sources_ready.callback do
|
16
32
|
sources.each do |source|
|
17
33
|
source.proxies.each do |proxy|
|
18
|
-
proxies_ready.add(proxy.validate)
|
34
|
+
proxies_ready.add(proxy.validate(@verification_url))
|
19
35
|
end
|
20
36
|
end
|
21
37
|
end
|
22
38
|
|
39
|
+
# Include proxies that are verified
|
23
40
|
proxies_ready.callback do
|
24
41
|
sources.each do |source|
|
25
42
|
source.proxies.each do |proxy|
|
@@ -31,8 +48,11 @@ module Strawman
|
|
31
48
|
proxies_ready
|
32
49
|
end
|
33
50
|
|
51
|
+
#
|
52
|
+
# Selects a random proxy from the list of available proxies
|
53
|
+
#
|
34
54
|
def proxy
|
35
|
-
@proxies.choice
|
55
|
+
@proxies.choice
|
36
56
|
end
|
37
57
|
end
|
38
58
|
end
|
data/lib/strawman/source.rb
CHANGED
@@ -2,35 +2,68 @@ module Strawman
|
|
2
2
|
class Source
|
3
3
|
end
|
4
4
|
|
5
|
+
#
|
6
|
+
# A source that parses a twitter feed for urls which points to proxies, like
|
7
|
+
# http://twitter.com/proxy_lists. The class is deferable itself and fires
|
8
|
+
# its callback once the feed has been fetched and parsed.
|
9
|
+
#
|
10
|
+
# By default it caches the feed to disk (cache/twitter-username.json), and
|
11
|
+
# fetches a new copy after that file is an hour old. To disable this, simply
|
12
|
+
# pass in false to the constructor.
|
13
|
+
#
|
5
14
|
class TwitterSource < Source
|
6
15
|
include EventMachine::Deferrable
|
7
16
|
ONE_HOUR = 60*60
|
8
17
|
attr_reader :proxies
|
9
18
|
|
10
|
-
|
19
|
+
#
|
20
|
+
# [twitter_username] Just the twitter user's username. Not the full url.
|
21
|
+
#
|
22
|
+
# [cache] Whether to enable or disable caching. Defaults to enabling
|
23
|
+
# caching.
|
24
|
+
#
|
25
|
+
def initialize(twitter_username, cache=true)
|
11
26
|
@id = twitter_username
|
27
|
+
@cache = cache
|
28
|
+
|
29
|
+
if cache
|
30
|
+
fetched = update_cache
|
31
|
+
else
|
32
|
+
fetched = fetch
|
33
|
+
end
|
12
34
|
|
13
|
-
fetched = update_cache
|
14
35
|
fetched.callback do
|
15
|
-
@
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
36
|
+
if @cache
|
37
|
+
data = read_cache
|
38
|
+
else
|
39
|
+
data = fetched.response
|
40
|
+
end
|
41
|
+
|
42
|
+
@proxies = parse(data)
|
43
|
+
|
44
|
+
succeed
|
24
45
|
end
|
25
46
|
end
|
26
47
|
|
27
48
|
private
|
49
|
+
def parse(data)
|
50
|
+
JSON.parse(data).map do |status|
|
51
|
+
match = /.*(http:\/\/.*)/.match(status["text"])
|
52
|
+
|
53
|
+
if match
|
54
|
+
GlypeProxy.new(match[1])
|
55
|
+
else
|
56
|
+
nil
|
57
|
+
end
|
58
|
+
end.compact
|
59
|
+
end
|
60
|
+
|
28
61
|
def cache_dir
|
29
62
|
"cache"
|
30
63
|
end
|
31
64
|
|
32
65
|
def cache_file_path
|
33
|
-
File.join cache_dir, "
|
66
|
+
File.join cache_dir, "twitter-#{@id}.json"
|
34
67
|
end
|
35
68
|
|
36
69
|
def cache_file_url
|
@@ -55,14 +88,16 @@ module Strawman
|
|
55
88
|
fetch
|
56
89
|
end
|
57
90
|
end
|
58
|
-
|
91
|
+
|
59
92
|
def fetch
|
60
|
-
http =
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
93
|
+
http = Transport.new(cache_file_url).get
|
94
|
+
|
95
|
+
if @cache
|
96
|
+
http.callback do
|
97
|
+
FileUtils.mkdir(cache_dir) unless File.exist? cache_dir
|
98
|
+
open(cache_file_path, "w") do |f|
|
99
|
+
f.write(http.response)
|
100
|
+
end
|
66
101
|
end
|
67
102
|
end
|
68
103
|
|
data/strawman.gemspec
CHANGED
@@ -2,12 +2,12 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{strawman}
|
5
|
-
s.version = "0.
|
5
|
+
s.version = "0.2"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Matt Colyer"]
|
9
9
|
s.cert_chain = ["/home/mcolyer/.ssh/gem-public_cert.pem"]
|
10
|
-
s.date = %q{2010-02-
|
10
|
+
s.date = %q{2010-02-19}
|
11
11
|
s.description = %q{Allows you fetch pages using glype proxies.}
|
12
12
|
s.email = %q{matt @nospam@ colyer.name}
|
13
13
|
s.extra_rdoc_files = ["README.rdoc", "lib/strawman.rb", "lib/strawman/http_request.rb", "lib/strawman/proxy.rb", "lib/strawman/proxy_list.rb", "lib/strawman/source.rb"]
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: strawman
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: "0.
|
4
|
+
version: "0.2"
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Colyer
|
@@ -30,7 +30,7 @@ cert_chain:
|
|
30
30
|
anU=
|
31
31
|
-----END CERTIFICATE-----
|
32
32
|
|
33
|
-
date: 2010-02-
|
33
|
+
date: 2010-02-19 00:00:00 -08:00
|
34
34
|
default_executable:
|
35
35
|
dependencies:
|
36
36
|
- !ruby/object:Gem::Dependency
|
metadata.gz.sig
CHANGED
Binary file
|