strawman 0.1 → 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +12 -8
- data/Rakefile +8 -1
- data/examples/example.rb +12 -3
- data/lib/strawman/http_request.rb +38 -3
- data/lib/strawman/proxy.rb +42 -20
- data/lib/strawman/proxy_list.rb +24 -4
- data/lib/strawman/source.rb +54 -19
- data/strawman.gemspec +2 -2
- data.tar.gz.sig +0 -0
- metadata +2 -2
- metadata.gz.sig +0 -0
data/README.rdoc
CHANGED
@@ -3,6 +3,8 @@
|
|
3
3
|
A ruby gem which allows you to proxy EventMachine HTTP GET requests through
|
4
4
|
glype proxies on the net.
|
5
5
|
|
6
|
+
RDoc: http://rdoc.info/projects/MattColyer/strawman
|
7
|
+
|
6
8
|
== Getting started
|
7
9
|
|
8
10
|
# make sure you have gemcutter.org repos enabled
|
@@ -13,9 +15,8 @@ glype proxies on the net.
|
|
13
15
|
require 'rubygems'
|
14
16
|
require 'eventmachine'
|
15
17
|
require 'em-http'
|
16
|
-
$LOAD_PATH << "../lib/"
|
17
18
|
require 'strawman'
|
18
|
-
|
19
|
+
|
19
20
|
EventMachine.run {
|
20
21
|
proxy_list = Strawman::ProxyList.new
|
21
22
|
sources_set = proxy_list.set_sources([Strawman::TwitterSource.new("proxy_sites")])
|
@@ -32,12 +33,15 @@ glype proxies on the net.
|
|
32
33
|
|
33
34
|
Patches happily accepted, please open a github ticket and attach the patch.
|
34
35
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
36
|
+
* Store proxies across runs, so that the list can grow larger than the status list
|
37
|
+
* Optionally, store proxy verification across runs
|
38
|
+
* Undo link encoding if Glype adds it
|
39
|
+
* Strip annoying stuff that Glype adds as a footer and header
|
40
|
+
* POST doesn't work
|
41
|
+
* Cookies don't work
|
42
|
+
* SSL doesn't work
|
43
|
+
* Implement other proxy sources
|
40
44
|
|
41
45
|
== Limitations
|
42
46
|
|
43
|
-
|
47
|
+
* PUT and DELETE can't work do to the way Glype is implemented
|
data/Rakefile
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
require 'rubygems'
|
2
|
+
require 'spec/rake/spectask'
|
2
3
|
require 'rake'
|
3
4
|
require 'echoe'
|
4
5
|
|
5
|
-
Echoe.new('strawman', '0.
|
6
|
+
Echoe.new('strawman', '0.2') do |p|
|
6
7
|
p.description = "Allows you fetch pages using glype proxies."
|
7
8
|
p.url = "http://github.com/mattcolyer/strawman"
|
8
9
|
p.author = "Matt Colyer"
|
@@ -11,3 +12,9 @@ Echoe.new('strawman', '0.1') do |p|
|
|
11
12
|
p.development_dependencies = ["rspec"]
|
12
13
|
p.dependencies = ["eventmachine", "em-http-request", "json"]
|
13
14
|
end
|
15
|
+
|
16
|
+
|
17
|
+
desc "Run all examples"
|
18
|
+
Spec::Rake::SpecTask.new('tests') do |t|
|
19
|
+
t.spec_files = FileList['spec/*_spec.rb']
|
20
|
+
end
|
data/examples/example.rb
CHANGED
@@ -4,15 +4,24 @@ require 'em-http'
|
|
4
4
|
$LOAD_PATH << "../lib/"
|
5
5
|
require 'strawman'
|
6
6
|
|
7
|
+
log = Logger.new(STDOUT)
|
8
|
+
log.level = Logger::INFO
|
9
|
+
|
7
10
|
EventMachine.run {
|
8
|
-
proxy_list = Strawman::ProxyList.new
|
11
|
+
proxy_list = Strawman::ProxyList.new("http://whatismyip.org")
|
9
12
|
sources_set = proxy_list.set_sources([Strawman::TwitterSource.new("proxy_sites")])
|
13
|
+
|
10
14
|
sources_set.callback{
|
11
15
|
http = Strawman::HttpRequest.new(proxy_list, 'http://goingtorain.com/').get
|
12
16
|
http.callback {
|
13
|
-
|
14
|
-
|
17
|
+
log.info http.response_header.inspect
|
18
|
+
log.info http.response
|
15
19
|
EventMachine.stop
|
16
20
|
}
|
17
21
|
}
|
22
|
+
|
23
|
+
sources_set.errback{
|
24
|
+
log.error "Something went wrong"
|
25
|
+
EventMachine.stop
|
26
|
+
}
|
18
27
|
}
|
@@ -1,11 +1,18 @@
|
|
1
1
|
module Strawman
|
2
|
+
# = HttpRequest
|
3
|
+
#
|
4
|
+
# A simple wrapper for em-http-client's HttpRequest.
|
5
|
+
#
|
2
6
|
class HttpRequest
|
3
7
|
def initialize(proxy_list, url)
|
4
8
|
@proxy = proxy_list.proxy
|
5
|
-
|
6
|
-
@request = EventMachine::HttpRequest.new(proxied_url)
|
9
|
+
@request = Transport.new(@proxy.proxy_url(url))
|
7
10
|
end
|
8
|
-
|
11
|
+
|
12
|
+
#
|
13
|
+
# Handles get requests. Currently it accepts no arguments (ie query
|
14
|
+
# parameters, http headers etc...).
|
15
|
+
#
|
9
16
|
def get
|
10
17
|
http = @request.get :head => {"referer" => @proxy.referer}
|
11
18
|
http.callback {
|
@@ -14,5 +21,33 @@ module Strawman
|
|
14
21
|
}
|
15
22
|
http
|
16
23
|
end
|
24
|
+
|
25
|
+
#
|
26
|
+
# TODO: Implement this.
|
27
|
+
#
|
28
|
+
def post
|
29
|
+
raise NotImplementedError
|
30
|
+
end
|
31
|
+
|
32
|
+
#
|
33
|
+
# Can't and won't be implemented due to Glype not proxying these requests.
|
34
|
+
#
|
35
|
+
def put
|
36
|
+
raise NotImplementedError
|
37
|
+
end
|
38
|
+
|
39
|
+
#
|
40
|
+
# Can't and won't be implemented due to Glype not proxying these requests.
|
41
|
+
#
|
42
|
+
def delete
|
43
|
+
raise NotImplementedError
|
44
|
+
end
|
45
|
+
|
46
|
+
#
|
47
|
+
# Can't and won't be implemented due to Glype not proxying these requests.
|
48
|
+
#
|
49
|
+
def head
|
50
|
+
raise NotImplementedError
|
51
|
+
end
|
17
52
|
end
|
18
53
|
end
|
data/lib/strawman/proxy.rb
CHANGED
@@ -1,23 +1,50 @@
|
|
1
1
|
module Strawman
|
2
|
+
#
|
3
|
+
# The general proxy class, which contains functions not specific to any type
|
4
|
+
# of proxy.
|
5
|
+
#
|
2
6
|
class Proxy
|
7
|
+
#
|
8
|
+
# Returns the the referer to use when making the proxied request.
|
9
|
+
#
|
10
|
+
def referer
|
11
|
+
@root_url
|
12
|
+
end
|
13
|
+
|
14
|
+
#
|
15
|
+
# Returns the url to fetch the given url through this proxy.
|
16
|
+
#
|
17
|
+
def proxy_url(url)
|
18
|
+
URI.join @root_url, proxy_path(url)
|
19
|
+
end
|
20
|
+
|
21
|
+
protected
|
22
|
+
def proxy_path(url)
|
23
|
+
raise NotImplementedError
|
24
|
+
end
|
3
25
|
end
|
4
26
|
|
27
|
+
#
|
28
|
+
# An implementation of the Proxy class which is specific to Glype proxies.
|
29
|
+
# See: http://www.glype.com/ for more details.
|
30
|
+
#
|
5
31
|
class GlypeProxy < Proxy
|
6
32
|
def initialize(url)
|
7
33
|
@root_url = url
|
8
34
|
@valid = false
|
9
35
|
end
|
10
36
|
|
11
|
-
|
12
|
-
|
13
|
-
|
37
|
+
#
|
38
|
+
# Verifies whether this proxy is currently functional. Returns a deferable.
|
39
|
+
#
|
40
|
+
def validate(verification_url)
|
41
|
+
@valid = false
|
14
42
|
|
15
|
-
def validate
|
16
|
-
url = proxy_url("http://whatismyip.org")
|
17
43
|
# FIXME: This only validate proxies that don't require a unique session
|
18
44
|
# cookie which is retrieved by going to the root page and looking for the
|
19
45
|
# s cookie.
|
20
|
-
|
46
|
+
url = proxy_url(verification_url)
|
47
|
+
http = Transport.new(url).get :head => {'referer' => @root_url}
|
21
48
|
http.callback {
|
22
49
|
@valid = true if http.response_header.status == 200
|
23
50
|
}
|
@@ -25,27 +52,22 @@ module Strawman
|
|
25
52
|
http
|
26
53
|
end
|
27
54
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
def
|
33
|
-
@
|
55
|
+
#
|
56
|
+
# Used to determine whether this proxy is valid. This must be called from
|
57
|
+
# within the callback of the validate deferable.
|
58
|
+
#
|
59
|
+
def valid?
|
60
|
+
@valid
|
34
61
|
end
|
35
62
|
|
36
|
-
def
|
37
|
-
|
63
|
+
def to_s
|
64
|
+
"<GlypeProxy #{@root_url}>"
|
38
65
|
end
|
39
66
|
|
67
|
+
protected
|
40
68
|
def proxy_path(url)
|
41
69
|
encoded_url = CGI.escape(Base64.encode64(url[4..-1]))
|
42
70
|
"/browse.php?u=#{encoded_url}&f=norefer"
|
43
71
|
end
|
44
72
|
end
|
45
|
-
|
46
|
-
class PhpProxy < Proxy
|
47
|
-
def valid?
|
48
|
-
false
|
49
|
-
end
|
50
|
-
end
|
51
73
|
end
|
data/lib/strawman/proxy_list.rb
CHANGED
@@ -1,25 +1,42 @@
|
|
1
1
|
module Strawman
|
2
|
+
#
|
3
|
+
# Represents a group of proxy sources
|
4
|
+
#
|
2
5
|
class ProxyList
|
3
|
-
|
6
|
+
attr_reader :proxies
|
7
|
+
|
8
|
+
#
|
9
|
+
# [verification_url] The url to use to verify that the proxy is valid. All
|
10
|
+
# it needs to do is return an HTTP status of 200.
|
11
|
+
#
|
12
|
+
def initialize(verification_url)
|
4
13
|
@proxies = []
|
14
|
+
@verification_url = verification_url
|
5
15
|
end
|
6
16
|
|
17
|
+
#
|
18
|
+
# Takes a list of sources and returns a deferrable which will complete once
|
19
|
+
# all sources have been fetched and all proxies have been verified.
|
20
|
+
#
|
7
21
|
def set_sources(sources)
|
8
22
|
sources_ready = EventMachine::MultiRequest.new
|
23
|
+
proxies_ready = EventMachine::MultiRequest.new
|
9
24
|
|
25
|
+
# Fetch all of the sources
|
10
26
|
sources.each do |source|
|
11
27
|
sources_ready.add(source)
|
12
28
|
end
|
13
29
|
|
14
|
-
|
30
|
+
# Verify all of the proxies
|
15
31
|
sources_ready.callback do
|
16
32
|
sources.each do |source|
|
17
33
|
source.proxies.each do |proxy|
|
18
|
-
proxies_ready.add(proxy.validate)
|
34
|
+
proxies_ready.add(proxy.validate(@verification_url))
|
19
35
|
end
|
20
36
|
end
|
21
37
|
end
|
22
38
|
|
39
|
+
# Include proxies that are verified
|
23
40
|
proxies_ready.callback do
|
24
41
|
sources.each do |source|
|
25
42
|
source.proxies.each do |proxy|
|
@@ -31,8 +48,11 @@ module Strawman
|
|
31
48
|
proxies_ready
|
32
49
|
end
|
33
50
|
|
51
|
+
#
|
52
|
+
# Selects a random proxy from the list of available proxies
|
53
|
+
#
|
34
54
|
def proxy
|
35
|
-
@proxies.choice
|
55
|
+
@proxies.choice
|
36
56
|
end
|
37
57
|
end
|
38
58
|
end
|
data/lib/strawman/source.rb
CHANGED
@@ -2,35 +2,68 @@ module Strawman
|
|
2
2
|
class Source
|
3
3
|
end
|
4
4
|
|
5
|
+
#
|
6
|
+
# A source that parses a twitter feed for urls which points to proxies, like
|
7
|
+
# http://twitter.com/proxy_lists. The class is deferable itself and fires
|
8
|
+
# its callback once the feed has been fetched and parsed.
|
9
|
+
#
|
10
|
+
# By default it caches the feed to disk (cache/twitter-username.json), and
|
11
|
+
# fetches a new copy after that file is an hour old. To disable this, simply
|
12
|
+
# pass in false to the constructor.
|
13
|
+
#
|
5
14
|
class TwitterSource < Source
|
6
15
|
include EventMachine::Deferrable
|
7
16
|
ONE_HOUR = 60*60
|
8
17
|
attr_reader :proxies
|
9
18
|
|
10
|
-
|
19
|
+
#
|
20
|
+
# [twitter_username] Just the twitter user's username. Not the full url.
|
21
|
+
#
|
22
|
+
# [cache] Whether to enable or disable caching. Defaults to enabling
|
23
|
+
# caching.
|
24
|
+
#
|
25
|
+
def initialize(twitter_username, cache=true)
|
11
26
|
@id = twitter_username
|
27
|
+
@cache = cache
|
28
|
+
|
29
|
+
if cache
|
30
|
+
fetched = update_cache
|
31
|
+
else
|
32
|
+
fetched = fetch
|
33
|
+
end
|
12
34
|
|
13
|
-
fetched = update_cache
|
14
35
|
fetched.callback do
|
15
|
-
@
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
36
|
+
if @cache
|
37
|
+
data = read_cache
|
38
|
+
else
|
39
|
+
data = fetched.response
|
40
|
+
end
|
41
|
+
|
42
|
+
@proxies = parse(data)
|
43
|
+
|
44
|
+
succeed
|
24
45
|
end
|
25
46
|
end
|
26
47
|
|
27
48
|
private
|
49
|
+
def parse(data)
|
50
|
+
JSON.parse(data).map do |status|
|
51
|
+
match = /.*(http:\/\/.*)/.match(status["text"])
|
52
|
+
|
53
|
+
if match
|
54
|
+
GlypeProxy.new(match[1])
|
55
|
+
else
|
56
|
+
nil
|
57
|
+
end
|
58
|
+
end.compact
|
59
|
+
end
|
60
|
+
|
28
61
|
def cache_dir
|
29
62
|
"cache"
|
30
63
|
end
|
31
64
|
|
32
65
|
def cache_file_path
|
33
|
-
File.join cache_dir, "
|
66
|
+
File.join cache_dir, "twitter-#{@id}.json"
|
34
67
|
end
|
35
68
|
|
36
69
|
def cache_file_url
|
@@ -55,14 +88,16 @@ module Strawman
|
|
55
88
|
fetch
|
56
89
|
end
|
57
90
|
end
|
58
|
-
|
91
|
+
|
59
92
|
def fetch
|
60
|
-
http =
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
93
|
+
http = Transport.new(cache_file_url).get
|
94
|
+
|
95
|
+
if @cache
|
96
|
+
http.callback do
|
97
|
+
FileUtils.mkdir(cache_dir) unless File.exist? cache_dir
|
98
|
+
open(cache_file_path, "w") do |f|
|
99
|
+
f.write(http.response)
|
100
|
+
end
|
66
101
|
end
|
67
102
|
end
|
68
103
|
|
data/strawman.gemspec
CHANGED
@@ -2,12 +2,12 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{strawman}
|
5
|
-
s.version = "0.
|
5
|
+
s.version = "0.2"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Matt Colyer"]
|
9
9
|
s.cert_chain = ["/home/mcolyer/.ssh/gem-public_cert.pem"]
|
10
|
-
s.date = %q{2010-02-
|
10
|
+
s.date = %q{2010-02-19}
|
11
11
|
s.description = %q{Allows you fetch pages using glype proxies.}
|
12
12
|
s.email = %q{matt @nospam@ colyer.name}
|
13
13
|
s.extra_rdoc_files = ["README.rdoc", "lib/strawman.rb", "lib/strawman/http_request.rb", "lib/strawman/proxy.rb", "lib/strawman/proxy_list.rb", "lib/strawman/source.rb"]
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: strawman
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: "0.
|
4
|
+
version: "0.2"
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Colyer
|
@@ -30,7 +30,7 @@ cert_chain:
|
|
30
30
|
anU=
|
31
31
|
-----END CERTIFICATE-----
|
32
32
|
|
33
|
-
date: 2010-02-
|
33
|
+
date: 2010-02-19 00:00:00 -08:00
|
34
34
|
default_executable:
|
35
35
|
dependencies:
|
36
36
|
- !ruby/object:Gem::Dependency
|
metadata.gz.sig
CHANGED
Binary file
|