strawman 0.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -3,6 +3,8 @@
3
3
  A ruby gem which allows you to proxy EventMachine HTTP GET requests through
4
4
  glype proxies on the net.
5
5
 
6
+ RDoc: http://rdoc.info/projects/MattColyer/strawman
7
+
6
8
  == Getting started
7
9
 
8
10
  # make sure you have gemcutter.org repos enabled
@@ -13,9 +15,8 @@ glype proxies on the net.
13
15
  require 'rubygems'
14
16
  require 'eventmachine'
15
17
  require 'em-http'
16
- $LOAD_PATH << "../lib/"
17
18
  require 'strawman'
18
-
19
+
19
20
  EventMachine.run {
20
21
  proxy_list = Strawman::ProxyList.new
21
22
  sources_set = proxy_list.set_sources([Strawman::TwitterSource.new("proxy_sites")])
@@ -32,12 +33,15 @@ glype proxies on the net.
32
33
 
33
34
  Patches happily accepted, please open a github ticket and attach the patch.
34
35
 
35
- * specs need to be implemented
36
- * POST doesn't work
37
- * Cookies don't work
38
- * SSL doesn't work
39
- * Implement other sources
36
+ * Store proxies across runs, so that the list can grow larger than the status list
37
+ * Optionally, store proxy verification across runs
38
+ * Undo link encoding if Glype adds it
39
+ * Strip annoying stuff that Glype adds as a footer and header
40
+ * POST doesn't work
41
+ * Cookies don't work
42
+ * SSL doesn't work
43
+ * Implement other proxy sources
40
44
 
41
45
  == Limitations
42
46
 
43
- * PUT and DELETE can't work do to the way Glype is implemented
47
+ * PUT and DELETE can't work do to the way Glype is implemented
data/Rakefile CHANGED
@@ -1,8 +1,9 @@
1
1
  require 'rubygems'
2
+ require 'spec/rake/spectask'
2
3
  require 'rake'
3
4
  require 'echoe'
4
5
 
5
- Echoe.new('strawman', '0.1') do |p|
6
+ Echoe.new('strawman', '0.2') do |p|
6
7
  p.description = "Allows you fetch pages using glype proxies."
7
8
  p.url = "http://github.com/mattcolyer/strawman"
8
9
  p.author = "Matt Colyer"
@@ -11,3 +12,9 @@ Echoe.new('strawman', '0.1') do |p|
11
12
  p.development_dependencies = ["rspec"]
12
13
  p.dependencies = ["eventmachine", "em-http-request", "json"]
13
14
  end
15
+
16
+
17
+ desc "Run all examples"
18
+ Spec::Rake::SpecTask.new('tests') do |t|
19
+ t.spec_files = FileList['spec/*_spec.rb']
20
+ end
data/examples/example.rb CHANGED
@@ -4,15 +4,24 @@ require 'em-http'
4
4
  $LOAD_PATH << "../lib/"
5
5
  require 'strawman'
6
6
 
7
+ log = Logger.new(STDOUT)
8
+ log.level = Logger::INFO
9
+
7
10
  EventMachine.run {
8
- proxy_list = Strawman::ProxyList.new
11
+ proxy_list = Strawman::ProxyList.new("http://whatismyip.org")
9
12
  sources_set = proxy_list.set_sources([Strawman::TwitterSource.new("proxy_sites")])
13
+
10
14
  sources_set.callback{
11
15
  http = Strawman::HttpRequest.new(proxy_list, 'http://goingtorain.com/').get
12
16
  http.callback {
13
- p http.response_headers.inspect
14
- p http.response
17
+ log.info http.response_header.inspect
18
+ log.info http.response
15
19
  EventMachine.stop
16
20
  }
17
21
  }
22
+
23
+ sources_set.errback{
24
+ log.error "Something went wrong"
25
+ EventMachine.stop
26
+ }
18
27
  }
@@ -1,11 +1,18 @@
1
1
  module Strawman
2
+ # = HttpRequest
3
+ #
4
+ # A simple wrapper for em-http-client's HttpRequest.
5
+ #
2
6
  class HttpRequest
3
7
  def initialize(proxy_list, url)
4
8
  @proxy = proxy_list.proxy
5
- proxied_url = @proxy.proxy_url(url)
6
- @request = EventMachine::HttpRequest.new(proxied_url)
9
+ @request = Transport.new(@proxy.proxy_url(url))
7
10
  end
8
-
11
+
12
+ #
13
+ # Handles get requests. Currently it accepts no arguments (ie query
14
+ # parameters, http headers etc...).
15
+ #
9
16
  def get
10
17
  http = @request.get :head => {"referer" => @proxy.referer}
11
18
  http.callback {
@@ -14,5 +21,33 @@ module Strawman
14
21
  }
15
22
  http
16
23
  end
24
+
25
+ #
26
+ # TODO: Implement this.
27
+ #
28
+ def post
29
+ raise NotImplementedError
30
+ end
31
+
32
+ #
33
+ # Can't and won't be implemented due to Glype not proxying these requests.
34
+ #
35
+ def put
36
+ raise NotImplementedError
37
+ end
38
+
39
+ #
40
+ # Can't and won't be implemented due to Glype not proxying these requests.
41
+ #
42
+ def delete
43
+ raise NotImplementedError
44
+ end
45
+
46
+ #
47
+ # Can't and won't be implemented due to Glype not proxying these requests.
48
+ #
49
+ def head
50
+ raise NotImplementedError
51
+ end
17
52
  end
18
53
  end
@@ -1,23 +1,50 @@
1
1
  module Strawman
2
+ #
3
+ # The general proxy class, which contains functions not specific to any type
4
+ # of proxy.
5
+ #
2
6
  class Proxy
7
+ #
8
+ # Returns the the referer to use when making the proxied request.
9
+ #
10
+ def referer
11
+ @root_url
12
+ end
13
+
14
+ #
15
+ # Returns the url to fetch the given url through this proxy.
16
+ #
17
+ def proxy_url(url)
18
+ URI.join @root_url, proxy_path(url)
19
+ end
20
+
21
+ protected
22
+ def proxy_path(url)
23
+ raise NotImplementedError
24
+ end
3
25
  end
4
26
 
27
+ #
28
+ # An implementation of the Proxy class which is specific to Glype proxies.
29
+ # See: http://www.glype.com/ for more details.
30
+ #
5
31
  class GlypeProxy < Proxy
6
32
  def initialize(url)
7
33
  @root_url = url
8
34
  @valid = false
9
35
  end
10
36
 
11
- def valid?
12
- @valid
13
- end
37
+ #
38
+ # Verifies whether this proxy is currently functional. Returns a deferable.
39
+ #
40
+ def validate(verification_url)
41
+ @valid = false
14
42
 
15
- def validate
16
- url = proxy_url("http://whatismyip.org")
17
43
  # FIXME: This only validate proxies that don't require a unique session
18
44
  # cookie which is retrieved by going to the root page and looking for the
19
45
  # s cookie.
20
- http = EventMachine::HttpRequest.new(url).get :head => {'referer' => @root_url}
46
+ url = proxy_url(verification_url)
47
+ http = Transport.new(url).get :head => {'referer' => @root_url}
21
48
  http.callback {
22
49
  @valid = true if http.response_header.status == 200
23
50
  }
@@ -25,27 +52,22 @@ module Strawman
25
52
  http
26
53
  end
27
54
 
28
- def to_s
29
- "<GlypeProxy #{@root_url}>"
30
- end
31
-
32
- def referer
33
- @root_url
55
+ #
56
+ # Used to determine whether this proxy is valid. This must be called from
57
+ # within the callback of the validate deferable.
58
+ #
59
+ def valid?
60
+ @valid
34
61
  end
35
62
 
36
- def proxy_url(url)
37
- URI.join @root_url, proxy_path(url)
63
+ def to_s
64
+ "<GlypeProxy #{@root_url}>"
38
65
  end
39
66
 
67
+ protected
40
68
  def proxy_path(url)
41
69
  encoded_url = CGI.escape(Base64.encode64(url[4..-1]))
42
70
  "/browse.php?u=#{encoded_url}&f=norefer"
43
71
  end
44
72
  end
45
-
46
- class PhpProxy < Proxy
47
- def valid?
48
- false
49
- end
50
- end
51
73
  end
@@ -1,25 +1,42 @@
1
1
  module Strawman
2
+ #
3
+ # Represents a group of proxy sources
4
+ #
2
5
  class ProxyList
3
- def initialize
6
+ attr_reader :proxies
7
+
8
+ #
9
+ # [verification_url] The url to use to verify that the proxy is valid. All
10
+ # it needs to do is return an HTTP status of 200.
11
+ #
12
+ def initialize(verification_url)
4
13
  @proxies = []
14
+ @verification_url = verification_url
5
15
  end
6
16
 
17
+ #
18
+ # Takes a list of sources and returns a deferrable which will complete once
19
+ # all sources have been fetched and all proxies have been verified.
20
+ #
7
21
  def set_sources(sources)
8
22
  sources_ready = EventMachine::MultiRequest.new
23
+ proxies_ready = EventMachine::MultiRequest.new
9
24
 
25
+ # Fetch all of the sources
10
26
  sources.each do |source|
11
27
  sources_ready.add(source)
12
28
  end
13
29
 
14
- proxies_ready = EventMachine::MultiRequest.new
30
+ # Verify all of the proxies
15
31
  sources_ready.callback do
16
32
  sources.each do |source|
17
33
  source.proxies.each do |proxy|
18
- proxies_ready.add(proxy.validate)
34
+ proxies_ready.add(proxy.validate(@verification_url))
19
35
  end
20
36
  end
21
37
  end
22
38
 
39
+ # Include proxies that are verified
23
40
  proxies_ready.callback do
24
41
  sources.each do |source|
25
42
  source.proxies.each do |proxy|
@@ -31,8 +48,11 @@ module Strawman
31
48
  proxies_ready
32
49
  end
33
50
 
51
+ #
52
+ # Selects a random proxy from the list of available proxies
53
+ #
34
54
  def proxy
35
- @proxies.choice
55
+ @proxies.choice
36
56
  end
37
57
  end
38
58
  end
@@ -2,35 +2,68 @@ module Strawman
2
2
  class Source
3
3
  end
4
4
 
5
+ #
6
+ # A source that parses a twitter feed for urls which points to proxies, like
7
+ # http://twitter.com/proxy_lists. The class is deferable itself and fires
8
+ # its callback once the feed has been fetched and parsed.
9
+ #
10
+ # By default it caches the feed to disk (cache/twitter-username.json), and
11
+ # fetches a new copy after that file is an hour old. To disable this, simply
12
+ # pass in false to the constructor.
13
+ #
5
14
  class TwitterSource < Source
6
15
  include EventMachine::Deferrable
7
16
  ONE_HOUR = 60*60
8
17
  attr_reader :proxies
9
18
 
10
- def initialize(twitter_username)
19
+ #
20
+ # [twitter_username] Just the twitter user's username. Not the full url.
21
+ #
22
+ # [cache] Whether to enable or disable caching. Defaults to enabling
23
+ # caching.
24
+ #
25
+ def initialize(twitter_username, cache=true)
11
26
  @id = twitter_username
27
+ @cache = cache
28
+
29
+ if cache
30
+ fetched = update_cache
31
+ else
32
+ fetched = fetch
33
+ end
12
34
 
13
- fetched = update_cache
14
35
  fetched.callback do
15
- @proxies = JSON.parse(read_cache).map do |status|
16
- match = /.*(http:\/\/.*)/.match(status["text"])
17
- if match
18
- GlypeProxy.new(match[1])
19
- else
20
- nil
21
- end
22
- end.compact
23
- set_deferred_status :succeeded
36
+ if @cache
37
+ data = read_cache
38
+ else
39
+ data = fetched.response
40
+ end
41
+
42
+ @proxies = parse(data)
43
+
44
+ succeed
24
45
  end
25
46
  end
26
47
 
27
48
  private
49
+ def parse(data)
50
+ JSON.parse(data).map do |status|
51
+ match = /.*(http:\/\/.*)/.match(status["text"])
52
+
53
+ if match
54
+ GlypeProxy.new(match[1])
55
+ else
56
+ nil
57
+ end
58
+ end.compact
59
+ end
60
+
28
61
  def cache_dir
29
62
  "cache"
30
63
  end
31
64
 
32
65
  def cache_file_path
33
- File.join cache_dir, "#{@id}.json"
66
+ File.join cache_dir, "twitter-#{@id}.json"
34
67
  end
35
68
 
36
69
  def cache_file_url
@@ -55,14 +88,16 @@ module Strawman
55
88
  fetch
56
89
  end
57
90
  end
58
-
91
+
59
92
  def fetch
60
- http = EventMachine::HttpRequest.new(cache_file_url).get
61
-
62
- http.callback do
63
- FileUtils.mkdir(cache_dir) unless File.exist? cache_dir
64
- open(cache_file_path, "w") do |f|
65
- f.write(http.response)
93
+ http = Transport.new(cache_file_url).get
94
+
95
+ if @cache
96
+ http.callback do
97
+ FileUtils.mkdir(cache_dir) unless File.exist? cache_dir
98
+ open(cache_file_path, "w") do |f|
99
+ f.write(http.response)
100
+ end
66
101
  end
67
102
  end
68
103
 
data/strawman.gemspec CHANGED
@@ -2,12 +2,12 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{strawman}
5
- s.version = "0.1"
5
+ s.version = "0.2"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Matt Colyer"]
9
9
  s.cert_chain = ["/home/mcolyer/.ssh/gem-public_cert.pem"]
10
- s.date = %q{2010-02-13}
10
+ s.date = %q{2010-02-19}
11
11
  s.description = %q{Allows you fetch pages using glype proxies.}
12
12
  s.email = %q{matt @nospam@ colyer.name}
13
13
  s.extra_rdoc_files = ["README.rdoc", "lib/strawman.rb", "lib/strawman/http_request.rb", "lib/strawman/proxy.rb", "lib/strawman/proxy_list.rb", "lib/strawman/source.rb"]
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: strawman
3
3
  version: !ruby/object:Gem::Version
4
- version: "0.1"
4
+ version: "0.2"
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Colyer
@@ -30,7 +30,7 @@ cert_chain:
30
30
  anU=
31
31
  -----END CERTIFICATE-----
32
32
 
33
- date: 2010-02-13 00:00:00 -08:00
33
+ date: 2010-02-19 00:00:00 -08:00
34
34
  default_executable:
35
35
  dependencies:
36
36
  - !ruby/object:Gem::Dependency
metadata.gz.sig CHANGED
Binary file