strawman 0.1 → 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -3,6 +3,8 @@
3
3
  A ruby gem which allows you to proxy EventMachine HTTP GET requests through
4
4
  glype proxies on the net.
5
5
 
6
+ RDoc: http://rdoc.info/projects/MattColyer/strawman
7
+
6
8
  == Getting started
7
9
 
8
10
  # make sure you have gemcutter.org repos enabled
@@ -13,9 +15,8 @@ glype proxies on the net.
13
15
  require 'rubygems'
14
16
  require 'eventmachine'
15
17
  require 'em-http'
16
- $LOAD_PATH << "../lib/"
17
18
  require 'strawman'
18
-
19
+
19
20
  EventMachine.run {
20
21
  proxy_list = Strawman::ProxyList.new
21
22
  sources_set = proxy_list.set_sources([Strawman::TwitterSource.new("proxy_sites")])
@@ -32,12 +33,15 @@ glype proxies on the net.
32
33
 
33
34
  Patches happily accepted, please open a github ticket and attach the patch.
34
35
 
35
- * specs need to be implemented
36
- * POST doesn't work
37
- * Cookies don't work
38
- * SSL doesn't work
39
- * Implement other sources
36
+ * Store proxies across runs, so that the list can grow larger than the status list
37
+ * Optionally, store proxy verification across runs
38
+ * Undo link encoding if Glype adds it
39
+ * Strip annoying stuff that Glype adds as a footer and header
40
+ * POST doesn't work
41
+ * Cookies don't work
42
+ * SSL doesn't work
43
+ * Implement other proxy sources
40
44
 
41
45
  == Limitations
42
46
 
43
- * PUT and DELETE can't work do to the way Glype is implemented
47
+ * PUT and DELETE can't work do to the way Glype is implemented
data/Rakefile CHANGED
@@ -1,8 +1,9 @@
1
1
  require 'rubygems'
2
+ require 'spec/rake/spectask'
2
3
  require 'rake'
3
4
  require 'echoe'
4
5
 
5
- Echoe.new('strawman', '0.1') do |p|
6
+ Echoe.new('strawman', '0.2') do |p|
6
7
  p.description = "Allows you fetch pages using glype proxies."
7
8
  p.url = "http://github.com/mattcolyer/strawman"
8
9
  p.author = "Matt Colyer"
@@ -11,3 +12,9 @@ Echoe.new('strawman', '0.1') do |p|
11
12
  p.development_dependencies = ["rspec"]
12
13
  p.dependencies = ["eventmachine", "em-http-request", "json"]
13
14
  end
15
+
16
+
17
+ desc "Run all examples"
18
+ Spec::Rake::SpecTask.new('tests') do |t|
19
+ t.spec_files = FileList['spec/*_spec.rb']
20
+ end
data/examples/example.rb CHANGED
@@ -4,15 +4,24 @@ require 'em-http'
4
4
  $LOAD_PATH << "../lib/"
5
5
  require 'strawman'
6
6
 
7
+ log = Logger.new(STDOUT)
8
+ log.level = Logger::INFO
9
+
7
10
  EventMachine.run {
8
- proxy_list = Strawman::ProxyList.new
11
+ proxy_list = Strawman::ProxyList.new("http://whatismyip.org")
9
12
  sources_set = proxy_list.set_sources([Strawman::TwitterSource.new("proxy_sites")])
13
+
10
14
  sources_set.callback{
11
15
  http = Strawman::HttpRequest.new(proxy_list, 'http://goingtorain.com/').get
12
16
  http.callback {
13
- p http.response_headers.inspect
14
- p http.response
17
+ log.info http.response_header.inspect
18
+ log.info http.response
15
19
  EventMachine.stop
16
20
  }
17
21
  }
22
+
23
+ sources_set.errback{
24
+ log.error "Something went wrong"
25
+ EventMachine.stop
26
+ }
18
27
  }
@@ -1,11 +1,18 @@
1
1
  module Strawman
2
+ # = HttpRequest
3
+ #
4
+ # A simple wrapper for em-http-client's HttpRequest.
5
+ #
2
6
  class HttpRequest
3
7
  def initialize(proxy_list, url)
4
8
  @proxy = proxy_list.proxy
5
- proxied_url = @proxy.proxy_url(url)
6
- @request = EventMachine::HttpRequest.new(proxied_url)
9
+ @request = Transport.new(@proxy.proxy_url(url))
7
10
  end
8
-
11
+
12
+ #
13
+ # Handles get requests. Currently it accepts no arguments (ie query
14
+ # parameters, http headers etc...).
15
+ #
9
16
  def get
10
17
  http = @request.get :head => {"referer" => @proxy.referer}
11
18
  http.callback {
@@ -14,5 +21,33 @@ module Strawman
14
21
  }
15
22
  http
16
23
  end
24
+
25
+ #
26
+ # TODO: Implement this.
27
+ #
28
+ def post
29
+ raise NotImplementedError
30
+ end
31
+
32
+ #
33
+ # Can't and won't be implemented due to Glype not proxying these requests.
34
+ #
35
+ def put
36
+ raise NotImplementedError
37
+ end
38
+
39
+ #
40
+ # Can't and won't be implemented due to Glype not proxying these requests.
41
+ #
42
+ def delete
43
+ raise NotImplementedError
44
+ end
45
+
46
+ #
47
+ # Can't and won't be implemented due to Glype not proxying these requests.
48
+ #
49
+ def head
50
+ raise NotImplementedError
51
+ end
17
52
  end
18
53
  end
@@ -1,23 +1,50 @@
1
1
  module Strawman
2
+ #
3
+ # The general proxy class, which contains functions not specific to any type
4
+ # of proxy.
5
+ #
2
6
  class Proxy
7
+ #
8
+ # Returns the the referer to use when making the proxied request.
9
+ #
10
+ def referer
11
+ @root_url
12
+ end
13
+
14
+ #
15
+ # Returns the url to fetch the given url through this proxy.
16
+ #
17
+ def proxy_url(url)
18
+ URI.join @root_url, proxy_path(url)
19
+ end
20
+
21
+ protected
22
+ def proxy_path(url)
23
+ raise NotImplementedError
24
+ end
3
25
  end
4
26
 
27
+ #
28
+ # An implementation of the Proxy class which is specific to Glype proxies.
29
+ # See: http://www.glype.com/ for more details.
30
+ #
5
31
  class GlypeProxy < Proxy
6
32
  def initialize(url)
7
33
  @root_url = url
8
34
  @valid = false
9
35
  end
10
36
 
11
- def valid?
12
- @valid
13
- end
37
+ #
38
+ # Verifies whether this proxy is currently functional. Returns a deferable.
39
+ #
40
+ def validate(verification_url)
41
+ @valid = false
14
42
 
15
- def validate
16
- url = proxy_url("http://whatismyip.org")
17
43
  # FIXME: This only validate proxies that don't require a unique session
18
44
  # cookie which is retrieved by going to the root page and looking for the
19
45
  # s cookie.
20
- http = EventMachine::HttpRequest.new(url).get :head => {'referer' => @root_url}
46
+ url = proxy_url(verification_url)
47
+ http = Transport.new(url).get :head => {'referer' => @root_url}
21
48
  http.callback {
22
49
  @valid = true if http.response_header.status == 200
23
50
  }
@@ -25,27 +52,22 @@ module Strawman
25
52
  http
26
53
  end
27
54
 
28
- def to_s
29
- "<GlypeProxy #{@root_url}>"
30
- end
31
-
32
- def referer
33
- @root_url
55
+ #
56
+ # Used to determine whether this proxy is valid. This must be called from
57
+ # within the callback of the validate deferable.
58
+ #
59
+ def valid?
60
+ @valid
34
61
  end
35
62
 
36
- def proxy_url(url)
37
- URI.join @root_url, proxy_path(url)
63
+ def to_s
64
+ "<GlypeProxy #{@root_url}>"
38
65
  end
39
66
 
67
+ protected
40
68
  def proxy_path(url)
41
69
  encoded_url = CGI.escape(Base64.encode64(url[4..-1]))
42
70
  "/browse.php?u=#{encoded_url}&f=norefer"
43
71
  end
44
72
  end
45
-
46
- class PhpProxy < Proxy
47
- def valid?
48
- false
49
- end
50
- end
51
73
  end
@@ -1,25 +1,42 @@
1
1
  module Strawman
2
+ #
3
+ # Represents a group of proxy sources
4
+ #
2
5
  class ProxyList
3
- def initialize
6
+ attr_reader :proxies
7
+
8
+ #
9
+ # [verification_url] The url to use to verify that the proxy is valid. All
10
+ # it needs to do is return an HTTP status of 200.
11
+ #
12
+ def initialize(verification_url)
4
13
  @proxies = []
14
+ @verification_url = verification_url
5
15
  end
6
16
 
17
+ #
18
+ # Takes a list of sources and returns a deferrable which will complete once
19
+ # all sources have been fetched and all proxies have been verified.
20
+ #
7
21
  def set_sources(sources)
8
22
  sources_ready = EventMachine::MultiRequest.new
23
+ proxies_ready = EventMachine::MultiRequest.new
9
24
 
25
+ # Fetch all of the sources
10
26
  sources.each do |source|
11
27
  sources_ready.add(source)
12
28
  end
13
29
 
14
- proxies_ready = EventMachine::MultiRequest.new
30
+ # Verify all of the proxies
15
31
  sources_ready.callback do
16
32
  sources.each do |source|
17
33
  source.proxies.each do |proxy|
18
- proxies_ready.add(proxy.validate)
34
+ proxies_ready.add(proxy.validate(@verification_url))
19
35
  end
20
36
  end
21
37
  end
22
38
 
39
+ # Include proxies that are verified
23
40
  proxies_ready.callback do
24
41
  sources.each do |source|
25
42
  source.proxies.each do |proxy|
@@ -31,8 +48,11 @@ module Strawman
31
48
  proxies_ready
32
49
  end
33
50
 
51
+ #
52
+ # Selects a random proxy from the list of available proxies
53
+ #
34
54
  def proxy
35
- @proxies.choice
55
+ @proxies.choice
36
56
  end
37
57
  end
38
58
  end
@@ -2,35 +2,68 @@ module Strawman
2
2
  class Source
3
3
  end
4
4
 
5
+ #
6
+ # A source that parses a twitter feed for urls which points to proxies, like
7
+ # http://twitter.com/proxy_lists. The class is deferable itself and fires
8
+ # its callback once the feed has been fetched and parsed.
9
+ #
10
+ # By default it caches the feed to disk (cache/twitter-username.json), and
11
+ # fetches a new copy after that file is an hour old. To disable this, simply
12
+ # pass in false to the constructor.
13
+ #
5
14
  class TwitterSource < Source
6
15
  include EventMachine::Deferrable
7
16
  ONE_HOUR = 60*60
8
17
  attr_reader :proxies
9
18
 
10
- def initialize(twitter_username)
19
+ #
20
+ # [twitter_username] Just the twitter user's username. Not the full url.
21
+ #
22
+ # [cache] Whether to enable or disable caching. Defaults to enabling
23
+ # caching.
24
+ #
25
+ def initialize(twitter_username, cache=true)
11
26
  @id = twitter_username
27
+ @cache = cache
28
+
29
+ if cache
30
+ fetched = update_cache
31
+ else
32
+ fetched = fetch
33
+ end
12
34
 
13
- fetched = update_cache
14
35
  fetched.callback do
15
- @proxies = JSON.parse(read_cache).map do |status|
16
- match = /.*(http:\/\/.*)/.match(status["text"])
17
- if match
18
- GlypeProxy.new(match[1])
19
- else
20
- nil
21
- end
22
- end.compact
23
- set_deferred_status :succeeded
36
+ if @cache
37
+ data = read_cache
38
+ else
39
+ data = fetched.response
40
+ end
41
+
42
+ @proxies = parse(data)
43
+
44
+ succeed
24
45
  end
25
46
  end
26
47
 
27
48
  private
49
+ def parse(data)
50
+ JSON.parse(data).map do |status|
51
+ match = /.*(http:\/\/.*)/.match(status["text"])
52
+
53
+ if match
54
+ GlypeProxy.new(match[1])
55
+ else
56
+ nil
57
+ end
58
+ end.compact
59
+ end
60
+
28
61
  def cache_dir
29
62
  "cache"
30
63
  end
31
64
 
32
65
  def cache_file_path
33
- File.join cache_dir, "#{@id}.json"
66
+ File.join cache_dir, "twitter-#{@id}.json"
34
67
  end
35
68
 
36
69
  def cache_file_url
@@ -55,14 +88,16 @@ module Strawman
55
88
  fetch
56
89
  end
57
90
  end
58
-
91
+
59
92
  def fetch
60
- http = EventMachine::HttpRequest.new(cache_file_url).get
61
-
62
- http.callback do
63
- FileUtils.mkdir(cache_dir) unless File.exist? cache_dir
64
- open(cache_file_path, "w") do |f|
65
- f.write(http.response)
93
+ http = Transport.new(cache_file_url).get
94
+
95
+ if @cache
96
+ http.callback do
97
+ FileUtils.mkdir(cache_dir) unless File.exist? cache_dir
98
+ open(cache_file_path, "w") do |f|
99
+ f.write(http.response)
100
+ end
66
101
  end
67
102
  end
68
103
 
data/strawman.gemspec CHANGED
@@ -2,12 +2,12 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{strawman}
5
- s.version = "0.1"
5
+ s.version = "0.2"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Matt Colyer"]
9
9
  s.cert_chain = ["/home/mcolyer/.ssh/gem-public_cert.pem"]
10
- s.date = %q{2010-02-13}
10
+ s.date = %q{2010-02-19}
11
11
  s.description = %q{Allows you fetch pages using glype proxies.}
12
12
  s.email = %q{matt @nospam@ colyer.name}
13
13
  s.extra_rdoc_files = ["README.rdoc", "lib/strawman.rb", "lib/strawman/http_request.rb", "lib/strawman/proxy.rb", "lib/strawman/proxy_list.rb", "lib/strawman/source.rb"]
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: strawman
3
3
  version: !ruby/object:Gem::Version
4
- version: "0.1"
4
+ version: "0.2"
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Colyer
@@ -30,7 +30,7 @@ cert_chain:
30
30
  anU=
31
31
  -----END CERTIFICATE-----
32
32
 
33
- date: 2010-02-13 00:00:00 -08:00
33
+ date: 2010-02-19 00:00:00 -08:00
34
34
  default_executable:
35
35
  dependencies:
36
36
  - !ruby/object:Gem::Dependency
metadata.gz.sig CHANGED
Binary file