RubyGems - pretty_proxy - Versions diffs - 0.1.0 - Mend

pretty_proxy 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 55e1a99924c4f5f41b6e78e0c26d8be032b59076
+  data.tar.gz: 2917669d77edead2513f8abfabbe232bd258a8e5
+SHA512:
+  metadata.gz: 6c47f72b5f7542ac0d2f6b7e0753721960e7552d7a6ffc518be2c48c3779e05f351e2d3c60f8faa1e0fbd2c185e4ee27fc18e66077ad5482fe0ae004edd6e793
+  data.tar.gz: 25c966ed96630729a914288da0df7db998130ba1485504607179a09643e9c164b342e3ceb77eb6bc1bcf685dd7e8efb902106d91766f2b3c732b1fe2dd7a99a1

data/Rakefile ADDED Viewed

@@ -0,0 +1,21 @@
+require 'rspec/core/rake_task'
+RSpec::Core::RakeTask.new :spec
+task :default => [:spec]
+desc 'run a sample of the horrors this class is capable of (in localhost:9292/proxy)'
+task :heresy_example do
+  sh 'rackup ./example/heresy.ru'
+end
+desc 'run a multithread example in http://localhost:9292/{p1,proxy/p1} with thin'
+task :run_example do
+  sh 'thin start --threaded -p 9292 --rackup ./example/example.ru'
+end
+desc "run the specs of the multithread example, run 'rake :run_example' before"
+task :test_example do
+  sh 'rspec ./example/example_spec.rb'
+end

data/example/example.ru ADDED Viewed

@@ -0,0 +1,35 @@
+require 'rack'
+require 'json'
+require 'open-uri'
+require 'pretty_proxy'
+# the json path below is relative to the Rakefile, call rake or change it
+config = JSON.parse(open('example/example_conf.json').read)
+pretty_proxy_new_args = config['pretty_proxy_new_args']
+proxy_path = pretty_proxy_new_args['proxy_path']
+original_domain = pretty_proxy_new_args['original_domain']
+original_paths = pretty_proxy_new_args['original_paths']
+original_html = config['xhtml_template'].join("\n")
+                  .gsub('PROXY_PATH', proxy_path)
+                  .gsub('ORIGINAL_DOMAIN', original_domain)
+pp = PrettyProxy.new(proxy_path, original_domain, original_paths)
+headers = { 'content-type' => 'application/xhtml+xml',
+            'content-encoding' => 'identity',
+            'content-length' => original_html.bytesize.to_s }
+app = Rack::Builder.new do
+  map config['content_path'] do
+    run (->(env) { [200, headers, [original_html]] })
+  end
+  map Pathname.new(proxy_path).join('.' + config['content_path']).to_s do
+    run pp
+  end
+end.to_app
+run app

data/example/example_conf.json ADDED Viewed

@@ -0,0 +1,32 @@
+{
+  "pretty_proxy_new_args": {
+    "proxy_path": "/proxy/",
+    "original_domain": "http://localhost:9292",
+    "__comment": "if you change the 'Original paths' field you have to edit the 'Content path' and the 'XHTML Template' fields by hand",
+    "original_paths": ["/p1", "/p2/p2_2"]
+  },
+  "content_path": "/p1",
+  "xhtml_template": [
+    "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"",
+    "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">",
+    "<html xmlns=\"http://www.w3.org/1999/xhtml\">",
+    "<head>",
+    "  <title>A title</title>",
+    "  <meta http-equiv=\"content-type\" content=\"application/xhtml+xml; charset=UTF-8\" />",
+    "</head>",
+    "<body>",
+    "  <a href=\"ORIGINAL_DOMAIN/p2/p2_2/\" >a link </a>",
+    "  <p><a href=\"http://othersite.net\" >other link</a></p>",
+    "  <div>",
+    "    <a href=\"../p3\" >another link</a>",
+    "    <p><a href=\"../p2/p2_2/\" >yet another link</a></p>",
+    "  </div>",
+    "  <div>",
+    "    <a href=\"ORIGINAL_DOMAIN/PROXY_PATH/p1\" >and yet another link</a>",
+    "    <p><a href=\"../PROXY_PATH/p1\" >the last link</a></p>",
+    "  </div>",
+    "</body>",
+    "</html>"
+  ]
+}

data/example/example_spec.rb ADDED Viewed

@@ -0,0 +1,22 @@
+require 'open-uri'
+require 'equivalent-xml'
+require 'json'
+require 'pretty_proxy'
+# the json path below is relative to the Rakefile, call rake or change it
+config = JSON.parse(open('example/example_conf.json').read)
+# this is ugly, but simple and clear, and this is a example
+pretty_proxy_new_args = config['pretty_proxy_new_args']
+proxy_path = pretty_proxy_new_args['proxy_path']
+original_domain = pretty_proxy_new_args['original_domain']
+original_paths = pretty_proxy_new_args['original_paths']
+original_url = original_domain + config['content_path']
+proxy_url = original_domain + Pathname.new(proxy_path).join('.' + config['content_path']).to_s
+describe 'PrettyProxy example' do
+  let (:pp) { PrettyProxy.new(proxy_path, original_domain, original_paths) }
+  it { expect(open(proxy_url)).to be_equivalent_to(pp.proxify_html(open(original_url), proxy_url)) }
+end

data/example/heresy.ru ADDED Viewed

@@ -0,0 +1,12 @@
+require 'pretty_proxy'
+class Heresy < PrettyProxy
+  def sugared_rewrite_response(triplet, requested_to_proxy_env, rewritten_env)
+    status, headers, page = triplet
+    page = page.gsub(/(MTG )?Magic(: The Gathering)?/, 'Yu-Gi-Oh')
+    [status, headers, page]
+  end
+end
+run Heresy.new('/proxy/', 'http://magiccards.info', '/')

data/lib/pretty_proxy.rb ADDED Viewed

@@ -0,0 +1,452 @@
+require 'pathname'
+require 'uri'
+require 'nokogiri'
+require 'rack'
+require 'rack-proxy'
+# The PrettyProxy class aggregate and validate the configuration of a
+# proxy based in simple pretty url oriented rewriting rules. It's too
+# a rack app, and offers a abstract method for rewrite the responses
+# returned by the proxy. The (X)HTML responses are rewritten to make
+# the hyperlinks point to the proxy version of the page if it exist.
+#
+# @example A terrible example
+# require 'pretty_proxy'
+#
+# class Heresy < PrettyProxy
+#   def sugared_rewrite_response(triplet, requested_to_proxy_env, rewritten_env)
+#     status, headers, page = triplet
+#     page = page.gsub(/(MTG )?Magic(: The Gathering)?/, 'Yu-Gi-Oh')
+#     [status, headers, page]
+#   end
+# end
+#
+# run Heresy.new('/proxy/', 'http://magiccards.info', '/')
+#
+# You can see the result in http://localhost:9292/proxy/ (if you use the
+# command 'rake heresy_example' in the gem folder).
+#
+# @note: If you want to make a Rack app who use the proxy to point to
+#   another path of the same app you have to use a server in multithread
+#   mode, otherwise requests to the proxy will end in a deadlock.
+# The proxy request the original page but the server don't respond because
+# is waiting the proxy request to be resolved. The proxy request don't end
+# because need the original page. A timeout error occur.
+#
+# What this class can't do but maybe will do in the future: smart
+# handling of 3xx status response and chunked encoding (the chunks are
+# concatened in the proxy and the transfer-encoding header removed);
+# support more than deflate and gzip; exception classes with more
+# than a message;
+#
+# Glossary:
+#   'a valid proxy url/path': The path (or the path of the url) start with
+#     the proxy_path and is followed by a original_path.
+#   'in(side)/out(side) the proxy control': The url have (or not) the path
+#     starting with a original_path and the scheme, port and host are the
+#     same of the original_domain.
+#
+# The exception classes (except Error) inherit Error, and Error inherit
+# ArgumentError. They are empty yet, only have a message.
+#
+#   @see PrettyProxy::Error
+#   @see PrettyProxy::ConfigError
+#   @see PrettyProxy::ProxyError
+#
+# @author: Henrique Becker
+class PrettyProxy < Rack::Proxy
+  # The supertype of any exceptions explicitly raised by the methods
+  class Error < ArgumentError; end
+  # Class of exceptions thrown when trying to set the internal state
+  # of the class to a invalid value
+  class ConfigError < Error; end
+  # Class of exceptions thrown when the arguments of the method
+  # are invalid for the proxy configuration
+  class ProxyError < Error; end
+  @proxy_path = nil
+  @original_domain = nil
+  @original_paths = nil
+  # Create a new PrettyProxy instance or raise a ConfigError. Clone the arguments.
+  # @param proxy_path [String] Start and end with slashes, represent the
+  #   path in the proxy site who map to the proxy app (and, in consequence,
+  #   to another path in the same or another site).
+  # @param original_domain [String, URI] A URL without path (no trailing slash),
+  #   query or fragment (can have scheme (http[s]), domain and port), the site
+  #   to where the proxy map.
+  # @param original_paths [String, #each] The path (or the paths) to be mapped
+  #   right inside the proxy_path (has to begin with slash).
+  # @note See the specs {file:../spec/pretty_proxy_spec.rb} for examples and
+  #   complete definition of invalid args.
+  # @return [PrettyProxy] a new instance
+  # @raise PrettyProxy::ConfigError
+  def initialize(proxy_path, original_domain, original_paths)
+    Utils.validate_proxy_path(proxy_path)
+    Utils.validate_original_domain_and_paths(original_domain, original_paths)
+    @proxy_path = proxy_path.clone
+    @original_domain = URI(original_domain.clone)
+    if original_paths.respond_to? :each
+      @original_paths = original_paths.clone
+    else
+      @original_paths = [original_paths.clone]
+    end
+  end
+  # !@attribute proxy_path
+  #   @param a input who will be validated as in the initialize
+  #   @return the clone of the internal value
+  # !@attribute original_domain
+  #   @param a input who will be validated as in the initialize
+  #   @return the clone of the internal value
+  # !@attribute original_paths
+  #   @param a input who will be validated as in the initialize
+  #   @return the clone of the internal value
+  [:proxy_path, :original_domain, :original_paths].each do | reader |
+    define_method(reader) { instance_variable_get("@#{reader.to_s}").clone }
+  end
+  def proxy_path=(proxy_path)
+    Utils.validate_proxy_path(proxy_path)
+    @proxy_path = proxy_path
+  end
+  def original_domain=(original_domain)
+    Utils.validate_original_domain_and_paths(original_domain, @original_paths)
+    @original_domain = original_domain
+  end
+  def original_paths=(original_paths)
+    Utils.validate_original_domain_and_paths(@original_domain, original_paths)
+    @original_paths = original_paths
+  end
+  # Take a proxy url and return the original URL behind the proxy. Preserve the
+  #   query and fragment, if any. For the rewrite of a request @see rewrite_env.
+  # @param [String, URI::HTTP, URI::HTTPS] A URL.
+  # @return [URI::HTTP, URI::HTTPS] A URI object.
+  # @raise PrettyProxy::ProxyError
+  def unproxify_url(url)
+    url = URI(url.clone)
+    unless url.path.start_with?(@proxy_path)
+      fail ProxyError, "url path has to be prefixed by proxy_path (#{@proxy_path})"
+    end
+    url.path = url.path.slice((proxy_path.size-1)..-1)
+    unless original_paths.any? { | path | url.path.start_with? path }
+      fail ProxyError, "the proxy only responds to paths in the original_paths (#{@original_paths})"
+    end
+    if url.host == original_domain.host && url.path.start_with?(@proxy_path)
+      fail ProxyError, 'this is a request for the proxy for a proxy page (recursive request)'
+    end
+    url.host = original_domain.host
+    url.scheme = original_domain.scheme
+    url.port = original_domain.port
+    url
+  rescue URI::InvalidURIError
+    raise ArgumentError, "the url argument isn't a valid uri"
+  rescue URI::Error => e
+    raise ProxyError, "an unexpected URI exception has been thrown, the message is '#{e.message}'"
+  end
+  # Take a hyperlink and the url of the proxy page (not the original page)
+  #   where it come from and return the rewritten hyperlink. If the page
+  #   pointed vy the hyperlink is in the proxy control the rewritten hyperlink
+  #   gonna point to the proxyfied version, otherwise gonna point to the original
+  #   version.
+  # @param hyperlink [String, URI::HTTP, URI::HTTPS] A string with a relative
+  #   path or an url (string or URI).
+  # @param proxy_page_url [String, URI::HTTP, URI::HTTPS] The url from the
+  #   proxy page where the hyperlink come from.
+  # @return [String] A relative path or an url.
+  # @raise PrettyProxy::ProxyError
+  def proxify_hyperlink(hyperlink, proxy_page_url)
+    hyperlink = URI(hyperlink.clone)
+    proxy_page_url = URI(proxy_page_url)
+    if Utils.relative_path? hyperlink
+      # recreate the original site url from the relative path
+      absolute_link = unproxify_url proxy_page_url
+      absolute_link.path = Pathname.new(absolute_link.path).join(hyperlink.path).to_s
+      if inside_proxy_control? absolute_link
+        if same_domain_as_original?(proxy_page_url) &&
+             valid_path_for_proxy?(absolute_link.path)
+          # in the case of a relative path in the original page who points
+          # to a proxy page, and the proxy page is inside the proxy control
+          # we have to use the absolute_link or the page will be double proxified
+          # example: ../proxy/content in http://example.com/proxy/content, with
+          # original_path as '/' is http://example.com/proxy/proxy/content
+          hyperlink = absolute_link
+        end
+      else
+        hyperlink = absolute_link
+      end
+    else
+      if inside_proxy_control? hyperlink
+        unless point_to_a_proxy_page?(hyperlink, proxy_page_url)
+          hyperlink.scheme = proxy_page_url.scheme
+          hyperlink.host = proxy_page_url.host
+          hyperlink.port = proxy_page_url.port
+          hyperlink.path = @proxy_path + hyperlink.path[1..-1]
+        end
+      end
+    end
+    hyperlink.to_s
+  end
+  # Take a (X)HTML Document and apply proxify_hyperlink to the 'href'
+  #   attribute of each 'a' element.
+  # @param html [String] A (X)HTML document.
+  # @param proxy_url [String, URI::HTTP, URI::HTTPS] The url where the
+  #   the proxified version of the page will be displayed.
+  # @return [String] A copy of the document with the changes applied.
+  # @raise PrettyProxy::ProxyError
+  def proxify_html(html, proxy_url)
+    parsed_html = nil
+    # If you parse XHTML as HTML with Nokogiri and use to_s after the markup can be messed up
+		#
+    # Example:     <meta name="description" content="not important" />
+    #   becomes    <meta name="description" content="not important" >
+    # To avoid this we parse a document who is XML valid as XML, and, otherwise as HTML
+    begin
+      # this also isn't a great way to do this
+      # the Nokogiri don't have exception classes, this way any StandardError will be silenced
+      options = Nokogiri::XML::ParseOptions::DEFAULT_XML &
+                  Nokogiri::XML::ParseOptions::STRICT &
+                  Nokogiri::XML::ParseOptions::DTDVALID
+      parsed_html = Nokogiri::XML::Document.parse(html, nil, nil, options)
+    rescue
+      parsed_html = Nokogiri::HTML(html)
+    end
+    parsed_html.css('a').each do | hyperlink |
+      hyperlink['href'] = proxify_hyperlink(hyperlink['href'], proxy_url)
+    end
+    parsed_html.to_s
+  end
+  # Modify a Rack environment hash of a request to the proxy version of
+  #   a page to a request to the original page. As in Rack::proxy is used
+  #   by #call for require the original page before call rewrite_response in
+  #   the response. If you want to use your own rewrite rules maybe is more
+  #   wise to subclass Rack::Proxy instead subclass this class. The purpose
+  #   of this class is mainly implement and enforce these rules for you.
+  # @param html [Hash{String => String}] A Rack environment hash.
+  #   (see: {http://rack.rubyforge.org/doc/SPEC.html})
+  # @return [Hash{String => String}] A unproxified copy of the argument.
+  # @raise PrettyProxy::ProxyError
+  def rewrite_env(env)
+    env = env.clone
+    url_requested_to_proxy = Rack::Request.new(env).url
+    unproxified_url = unproxify_url(url_requested_to_proxy)
+    if env['HTTP_HOST']
+      env['HTTP_HOST'] = unproxified_url.host
+    end
+    env['SERVER_NAME'] = unproxified_url.host
+    env['SERVER_PORT'] = unproxified_url.port.to_s
+    if env['SCRIPT_NAME'].empty? && !env['PATH_INFO'].empty?
+      env['PATH_INFO'] = unproxified_url.path
+    end
+    if !env['SCRIPT_NAME'].empty? && env['PATH_INFO'].empty?
+      env['SCRIPT_NAME'] = unproxified_url.path
+    end
+    # Seriously, i don't know how to split again the unproxified url, so PATH_INFO gonna have the full path
+    if (!env['SCRIPT_NAME'].empty? && !env['PATH_INFO'].empty?) ||
+        (env['SCRIPT_NAME'].empty? && env['PATH_INFO'].empty?)
+      env['PATH_INFO'] = unproxified_url.path
+      env['SCRIPT_NAME'] = ''
+    end
+    env['REQUEST_PATH'] = unproxified_url.path
+    env['REQUEST_URI'] = unproxified_url.path
+    env
+  end
+  # Mainly apply the proxify_html to the body of the response if it is a html.
+  #   Raise an error if the 'content-encoding' is other than deflate, gzip or
+  #   identity. Change the 'content-length' header for the new body bytesize.
+  #   Remove the 'transfer-encoding' if it is chunked, and act as not chunked.
+  #   This method is inherited of Rack::Proxy, but in the original it have only
+  #   the first parameter (the triplet). This version have the request Rack env
+  #   to the proxy and the rewritten Rack env as second and third parameters,
+  #   respectively.
+  # @param triplet [Array<(Integer, Hash{String => String}, #each)>] A Rack
+  #   response (see {http://rack.rubyforge.org/doc/SPEC.html}) for the request
+  #   to the original site.
+  # @param [Hash{String => String}] A Rack environment hash. The requested to
+  #   the proxy version.
+  # @param [Hash{String => String}] A Rack environment hash. The rewritten by
+  #   the proxy to point to the original version.
+  # @return [Array<(Integer, Hash{String => String}, #each)>] A unproxified
+  #   copy of the first argument.
+  # @raise PrettyProxy::ProxyError
+  def rewrite_response(triplet, requested_to_proxy_env, rewritten_env)
+    status, headers, body = triplet
+    content_type = headers['content-type']
+    return triplet unless %r{text/html} =~ content_type ||
+                          %r{application/xhtml\+xml} =~ content_type
+    # the #each method of body can't be called twice, but we need to call it here and it is called
+    # after this method return, so we fake the body with a array of one string
+    # we can't return a string (even it responds to #each) see: http://rack.rubyforge.org/doc/SPEC.html (section 'The Body')
+    page = ''
+    body.each do | chunk |
+      page << chunk
+    end
+    case headers['content-encoding']
+    when 'gzip' then page = Zlib::GzipReader.new(StringIO.new(page)).read
+    when 'deflate' then page = Zlib::Inflate.inflate(page)
+    when 'identity' then page = page
+    else
+      fail ProxyError, 'unknown content-encoding, only encodings known are gzip, deflate and identity'
+    end
+    page = proxify_html(page, Rack::Request.new(requested_to_proxy_env).url)
+    status, headers, page = sugared_rewrite_response([status, headers, page],
+                                                      requested_to_proxy_env,
+                                                      rewritten_env)
+    case headers['content-encoding']
+    when 'gzip'
+      page_ = page.clone
+      gzip_stream = Zlib::GzipWriter.new(StringIO.new(page_))
+      gzip_stream.write page
+      gzip_stream.close
+      page = page_
+    when 'deflate' then page = Zlib::Deflate.deflate(page)
+    end
+    headers['content-length'] = page.bytesize.to_s if headers['content-length']
+    # TODO: find a way to make the code work with chunked encoding
+    if 'chunked' == headers['transfer-encoding']
+      headers.delete('transfer-encoding')
+      headers['content-length'] = page.bytesize.to_s
+    end
+    [status, headers, [page]]
+  end
+  # @abstract This method is called only over (X)HTML responses, after they are
+  #   decompressed and the hyperlinks proxified, before they are compressed
+  #   again and the new content-length calculated. The body of the triplet is
+  #   a String and not a object who respond to #each, the same has to be true
+  #   in the return. Return a modified clone of the response, don't change
+  #   the argument.
+  # @param triplet [Array<(Integer, Hash{String => String}, String)>] Not a
+  #   valid Rack response, the third element is a string with the response body.
+  # @param [Hash{String => String}] A Rack environment hash. The requested to
+  #   the proxy version.
+  # @param [Hash{String => String}] A Rack environment hash. The rewritten by
+  #   the proxy to point to the original version.
+  # @return [Array<(Integer, Hash{String => String}, String)>] A unproxified
+  #   copy of the first argument.
+  def sugared_rewrite_response(triplet, requested_to_proxy_env, rewritten_env)
+    triplet
+  end
+  # Make this class a Rack app. Is overriden to repass to the rewrite_response
+  #   the original Rack environment (request to the proxy) and the rewritten env
+  #   (modified to point the original page request).
+  #   If you don't know the parameters and return of this method, please read
+  #   {http://rack.rubyforge.org/doc/SPEC.html}.
+  def call(env)
+    # in theory we only need to repass the rewritten_env, any original env info
+    #  needed can be passed as a environment application variable
+    #  example: (env['app_name.original_path'] = env['PATH_INFO'])
+    #  but to avoid this to be a common idiom we repass the original env too
+    rewritten_env = rewrite_env(env)
+    rewrite_response(perform_request(rewritten_env), env, rewritten_env)
+  end
+  # Check if the #scheme, #host, and #port of the argument are equal to the
+  # original_domain ones.
+  def same_domain_as_original?(uri)
+    Utils.same_domain?(@original_domain, uri)
+  end
+  # Check if the URI::HTTP(S) is a page who can be accessed through the proxy
+  def inside_proxy_control?(uri)
+    same_domain_as_original?(uri) &&
+      valid_path_for_proxy?(@proxy_path + uri.path[1..-1])
+  end
+  # Check if the absolute path begin with a proxy_path and is followed by a
+  # original_paths element.
+  def valid_path_for_proxy?(absolute_path)
+    path_without_proxy_prefix = absolute_path[(@proxy_path.size-1)..-1]
+    # if we don't add the trailing slash '/about' and '/about_us' match
+    original_paths_with_trailing_slash = []
+    @original_paths.each do | path |
+      original_paths_with_trailing_slash << (path.end_with?('/') ? path : "#{path}/")
+    end
+    absolute_path.start_with?(@proxy_path) &&
+      original_paths_with_trailing_slash.any? do | original_path |
+        path_without_proxy_prefix.start_with? original_path
+      end
+  end
+  # Take a url and the proxy domain (scheme, host and port) and return if
+  # the url point to a valid proxy page.
+  def point_to_a_proxy_page?(hyperlink, proxy_domain)
+    Utils.same_domain?(hyperlink, proxy_domain) &&
+      valid_path_for_proxy?(hyperlink.path)
+  end
+  # api private Don't use the methods of this class. They are for internal use only.
+  class Utils
+    def self.relative_path?(hyperlink)
+      ! hyperlink.scheme
+    end
+    def self.same_domain?(u1, u2)
+      u1.scheme == u2.scheme &&
+        u1.host == u2.host &&
+        u1.port == u2.port
+    end
+    def self.validate_proxy_path(proxy_path)
+      fail ConfigError, "proxy_path argument don't end with a '/'" unless proxy_path.end_with? '/'
+      # NOTE: if the user want to proxify 'www.site.net', and not 'www.site.net/'?
+      # Well, majority of the internet answers for this are 'the right way is to use the trailing slash'
+      # See:  http://tim-stanley.com/post/pretty-good-urls/
+      #       http://www.w3.org/Provider/Style/URI.html
+      #       http://stackoverflow.com/questions/7355305/preventing-trailing-slash-on-domain-name
+      #       http://alistapart.com/article/slashforward
+      #       http://www.searchenginejournal.com/linking-issues-why-a-trailing-slash-in-the-url-does-matter/13021/?ModPagespeed=noscript
+    end
+    def self.validate_original_domain_and_paths(original_domain, original_paths)
+      fail ConfigError, 'original_paths is empty' if original_paths.empty?
+      original_domain = URI(original_domain) # can raise URI:Error's
+      fail ConfigError, 'the original_domain has to have no query or fragment' if original_domain.query || original_domain.fragment
+      # can raise URI:Error's
+      test_uri = original_domain.clone
+      if original_paths.respond_to?(:each)
+        original_paths.each { | path | test_uri.path = path }
+      else
+        test_uri.path = original_paths
+      end
+    rescue URI::InvalidComponentError => e
+      raise ConfigError, "the original_paths contain a invalid path, message of the URI exception: '#{e.message}'"
+    rescue URI::InvalidURIError => e
+      raise ConfigError, "the original_domain isn't a valid URI, message of the URI exception: '#{e.message}'"
+    rescue URI::Error => e
+      raise ConfigError, "a unexpected URI::Error exception was raised, message of the exception: '#{e.message}'"
+    end
+  end
+  private_constant :Utils
+end

data/spec/pretty_proxy_spec.rb ADDED Viewed

@@ -0,0 +1,357 @@
+require 'pretty_proxy'
+require 'equivalent-xml' # needed for be_equivalent_to xml rspec matcher
+require 'zlib'
+shared_examples 'an reader method who encapsulate a mutable variable' do
+  context 'when the return is changed' do
+    it 'does not change the next return value' do
+      instance = described_class.new(*new_args)
+      first_return = instance.send reader_method_name
+      if change_return.respond_to? :call
+        change_return.call first_return
+      else
+        first_return.send change_return
+      end
+      second_return = instance.send reader_method_name
+      expect(second_return).to_not eq first_return
+    end
+  end
+end
+describe PrettyProxy do
+  def generate_html_for_test(hyperlinks)
+    doc = <<-END
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+	<head>
+		<title>A title</title>
+		<meta http-equiv="content-type" content="application/xhtml+xml; charset=UTF-8" />
+	</head>
+	<body>
+    <a href="ARG_0" >a link </a>
+		<p><a href="ARG_1" >other link</a></p>
+		<div>
+      <a href="ARG_2" >another link</a>
+      <p><a href="ARG_3" >yet another link</a></p>
+    </div>
+		<div>
+      <a href="ARG_4" >and yet another link</a>
+      <p><a href="ARG_5" >the last link</a></p>
+    </div>
+	</body>
+</html>
+    END
+    doc.gsub!(/ARG_\d+/) { | match | hyperlinks[match[4..-1].to_i] }
+    doc
+  end
+  let(:original_html) { generate_html_for_test(['http://site.net/p2/p2_2/',
+                                                'http://othersite.net',
+                                                '../p3', '../p2/p2_2/',
+                                                'http://site.net/proxy/p1',
+                                                '../proxy/p1']) }
+  let(:proxified_html) { generate_html_for_test(['http://site.net/proxy/p2/p2_2/',
+                                                  'http://othersite.net',
+                                                  'http://site.net/p3', '../p2/p2_2/',
+                                                  'http://site.net/proxy/p1',
+                                                  'http://site.net/proxy/p1']) }
+  let (:correct_new_args_example) { ['/proxy/', 'http://myoriginalsite.com', '/content'] }
+  let (:pp) { described_class.new(*correct_new_args_example) }
+  describe '.new' do
+    subject (:new) { described_class.method :new }
+    [ {desc: 'accept original_paths as a String',
+        args: ['/proxy/', 'http://myoriginalsite.com', '/content']},
+      {desc: 'accept original_paths as an object who yelds strings with #each',
+        args: ['/proxy/', 'http://myoriginalsite.com', ['/content', '/other_content']]},
+      {desc: 'accept https in the original_domain',
+        args: ['/proxy/', 'https://myoriginalsite.com', ['/content']]},
+      {desc: 'accept port in the original_domain',
+        args: ['/proxy/', 'https://myoriginalsite.com:8080', ['/content']]}
+    ].each do | happy_case |
+      it happy_case[:desc] do
+        expect(new.call(*happy_case[:args])).to be_a_instance_of described_class
+      end
+    end
+    # TODO: Add specs for '/' in the start of the proxy_path
+    let (:right_args) { correct_new_args_example }
+    context "when proxy_path doesn't end with a '/'" do
+      it { expect {new.call('/proxy', right_args[1], right_args[2])}.to raise_error(PrettyProxy::ConfigError) }
+    end
+    context 'when the original_domain is invalid' do
+      it { expect {new.call(right_args[0], 'http://myoriginalsite.com/%%%/', right_args[2])}.to raise_error(PrettyProxy::ConfigError)}
+    end
+    context 'when the original_domain has a query' do
+      it { expect {new.call(right_args[0], 'http://myoriginalsite.com/?q=error', right_args[2])}.to raise_error(PrettyProxy::ConfigError)}
+    end
+    context 'when the original_domain has a fragment' do
+      it { expect {new.call(right_args[0], 'http://myoriginalsite.com/#id', right_args[2])}.to raise_error(PrettyProxy::ConfigError)}
+    end
+    context "when the original_paths don't begin with a '/'" do
+      it { expect {new.call(right_args[0], right_args[1], ['content'])}.to raise_error(PrettyProxy::ConfigError) }
+    end
+  end
+  #NOTE: save ten lines of the not metaprogrammed way
+  [:proxy_path, :original_domain, :original_paths].each do | reader_method |
+    describe "##{reader_method.to_s}" do
+      return_changers = { proxy_path: :chop!,
+                          original_domain: ->(uri){ uri.host = 'otherdomain.com'},
+                          original_paths: :shift }
+      it_behaves_like 'an reader method who encapsulate a mutable variable' do
+        let(:reader_method_name) { reader_method }
+        let(:new_args) { ['/proxy/', 'http://myoriginalsite.com', '/content'] }
+        let(:change_return) { return_changers[reader_method] }
+      end
+    end
+  end
+  # NOTE: excessive metaprogramming? only save 3~6 lines
+  [ [ :proxy_path=, "when proxy_path doesn't end with a '/'", '/proxy'],
+    [ :original_domain=, 'when the original_domain is invalid', 'http://myoriginalsite.com/%%%/'],
+    [ :original_paths=, "when the original_paths don't begin with a '/'", 'content']
+  ].each do | error_case |
+    writter, context_desc, invalid_input = *error_case
+    describe "##{writter.to_s}" do
+      context context_desc do
+        it { expect {pp.send(writter, invalid_input)}.to raise_error(PrettyProxy::ConfigError) }
+      end
+    end
+  end
+  describe '#unproxify_url' do
+    new_args = ['/proxys/sitez/', 'http://site.net', ['/p1', '/p2/p2_2/']]
+    let (:pp) { described_class.new(*new_args) }
+    context 'when the original_path has no trailing slash' do
+      it 'allow no trailing slash in the url' do
+        expect(pp.unproxify_url('http://myproxy.net/proxys/sitez/p1')).to eq URI('http://site.net/p1')
+      end
+      it 'allow trailing slash in the url' do
+        expect(pp.unproxify_url('http://myproxy.net/proxys/sitez/p1/')).to eq URI('http://site.net/p1/')
+      end
+    end
+    context 'when the original_path has a trailing slash' do
+      it 'allow trailing slash in the url' do
+        expect(pp.unproxify_url('http://myproxy.net/proxys/sitez/p2/p2_2/')).to eq URI('http://site.net/p2/p2_2/')
+      end
+      it "don't allow no trailing slash" do
+        expect { pp.unproxify_url('http://myproxy.net/proxys/sitez/p2/p2_2') }.to raise_error(PrettyProxy::ProxyError)
+      end
+    end
+    it 'allow subdirectories inside that path' do
+      expect(pp.unproxify_url('http://myproxy.net/proxys/sitez/p1/a/b/c/')).to eq URI('http://site.net/p1/a/b/c/')
+    end
+    it 'preserve querys in the url' do
+      expect(pp.unproxify_url('http://myproxy.net/proxys/sitez/p1/?q=error&l=pt')).to eq URI('http://site.net/p1/?q=error&l=pt')
+    end
+    it 'preserve fragments in the url' do
+      expect(pp.unproxify_url('http://myproxy.net/proxys/sitez/p1/#id')).to eq URI('http://site.net/p1/#id')
+    end
+    it 'change the port to the original' do
+      expect(pp.unproxify_url('http://myproxy.net:9292/proxys/sitez/p1/#id').port).to eq 80
+    end
+    context 'when the url redirect to the own proxy' do
+      let (:pp) { described_class.new('/', 'http://myoriginalsite.com/', '/content') }
+      it { expect {pp.unproxify_url('http://myproxysite.com/proxy/proxy/')}.to raise_error(PrettyProxy::ProxyError) }
+    end
+    context "when the url don't begin with the proxy_path" do
+      it { expect {pp.unproxify_url('http://myproxysite.com/no_proxy/content')}.to raise_error(PrettyProxy::ProxyError) }
+    end
+    context "when the proxy_path in the url isn't followed by a original_paths" do
+      it { expect {pp.unproxify_url('http://myproxysite.com/proxy/other_content')}.to raise_error(PrettyProxy::ProxyError) }
+    end
+  end
+  describe '#proxify_hyperlink' do
+    let (:pp) { described_class.new('/proxy/', 'http://site.net', ['/p1', '/p2/p2_2/']) }
+    it "proxify absolute hyperlinks to inside the proxy control" do
+      expect(pp.proxify_hyperlink('http://site.net/p2/p2_2/', 'http://theproxy.net/proxy/p1')).to eq 'http://theproxy.net/proxy/p2/p2_2/'
+    end
+    it "don't change absolute hyperlinks to ouside the proxy control" do
+      expect(pp.proxify_hyperlink('http://othersite.net', 'http://theproxy.net/proxy/p1')).to eq 'http://othersite.net'
+    end
+    it 'change to absolute hyperlinks the relative paths to outside the proxy control' do
+      expect(pp.proxify_hyperlink('../p3', 'http://theproxy.net/proxy/p1')).to eq 'http://site.net/p3'
+      expect(pp.proxify_hyperlink('../p2/p2_2', 'http://theproxy.net/proxy/p1')).to eq 'http://site.net/p2/p2_2' # without the trailing '/'
+    end
+    it "don't change relative paths to inside the proxy control" do
+      expect(pp.proxify_hyperlink('../p2/p2_2/', 'http://theproxy.net/proxy/p1')).to eq '../p2/p2_2/'
+    end
+    context 'when the proxy itself is inside the proxy control' do
+      let (:pp) { described_class.new('/proxy/', 'http://site.net', '/') }
+      it "dont't change absolute hyperlinks to the proxy itself" do
+        expect(pp.proxify_hyperlink('http://site.net/proxy/p1', 'http://site.net/proxy/p1')).to eq 'http://site.net/proxy/p1'
+        expect(pp.proxify_hyperlink('http://site.net/proxy/p1', 'http://site.net/proxy/p2/p2_2/')).to eq 'http://site.net/proxy/p1'
+      end
+      it 'change to absolute hyperlinks the relative paths to the proxy itself' do
+        expect(pp.proxify_hyperlink('../proxy/p1', 'http://site.net/proxy/p1')).to eq 'http://site.net/proxy/p1'
+        expect(pp.proxify_hyperlink('../../proxy/p1', 'http://site.net/proxy/p2/p2_2/')).to eq 'http://site.net/proxy/p1'
+      end
+    end
+  end
+  describe '#proxify_html' do
+    let (:pp) { described_class.new('/proxy/', 'http://site.net', ['/p1', '/p2/p2_2/']) }
+    it 'apply #proxify_hyperlink in all hyperlinks in the page' do
+      # We aren't really testing with HTML, but with XHTML, what is a XML
+      # This is because we dont have a matcher to test HTML equivalence, only XML equivalence
+      # This test is not guaranteed to pass if the input is a HTML non-XHTML
+      # The parse and unparse of the HTML can output a value who is not XML equivalent to the input
+      # Maybe the way is use regex instead of Nokogiri to this work
+      expect(pp.proxify_html(original_html, 'http://site.net/proxy/p1')).to be_equivalent_to(proxified_html)
+    end
+  end
+  describe '#rewrite_env' do
+    # See http://rack.rubyforge.org/doc/SPEC.html for the rack env hash fields spec
+    example_request = {'HTTP_HOST' => 'myproxysite.com',
+                      'SCRIPT_NAME' => '',
+                      'PATH_INFO' => '/proxy/content',
+                      'QUERY_STRING' => '',
+                      'SERVER_NAME' => 'myproxysite.com',
+                      'SERVER_PORT' => '9292',
+                      'rack.url_scheme' => 'http'}
+    context "when the request is not prefixed by proxy_path" do
+      let (:request_to_outside_content) { example_request.clone.update({'PATH_INFO' => '/no_proxy/content'}) }
+      it { expect {pp.rewrite_env(request_to_outside_content)}.to raise_error(PrettyProxy::ProxyError) }
+    end
+    context "when the request don't point to a original_path" do
+      let (:request_to_not_a_proxy) { example_request.clone.update({'PATH_INFO' => '/no_proxy/content'}) }
+      it { expect {pp.rewrite_env(request_to_not_a_proxy)}.to raise_error(PrettyProxy::ProxyError) }
+    end
+    let (:by_proxy_request) { example_request.clone }
+    let (:rewritten_env) { pp.rewrite_env by_proxy_request }
+    context 'when the HTTP_HOST is not empty' do
+      it 'change the HTTP_HOST and SERVER_NAME to the unproxyfied version' do
+        expect(rewritten_env['HTTP_HOST']).to eq 'myoriginalsite.com'
+        expect(rewritten_env['SERVER_NAME']).to eq 'myoriginalsite.com'
+      end
+    end
+    context 'when the HTTP_HOST is empty' do
+      let (:by_proxy_request) { t = example_request.clone; t.delete('HTTP_HOST'); t }
+      it 'change the SERVER_NAME to the unproxyfied version' do
+        expect(rewritten_env.has_key? 'HTTP_HOST').to be_false
+        expect(rewritten_env['SERVER_NAME']).to eq 'myoriginalsite.com'
+      end
+    end
+    context 'when the SCRIPT_NAME is not empty and the PATH_INFO is empty' do
+      let (:by_proxy_request) { example_request.clone.update({'SCRIPT_NAME' => '/proxy/content',
+                                                              'PATH_INFO' => ''}) }
+      it 'changes only the SCRIPT_NAME' do
+        expect(rewritten_env['SCRIPT_NAME']).to eq '/content'
+        expect(rewritten_env['PATH_INFO']).to eq ''
+      end
+    end
+    context 'when the PATH_INFO is not empty and the SCRIPT_NAME is empty' do
+      it 'changes only the PATH_INFO' do
+        expect(rewritten_env['PATH_INFO']).to eq '/content'
+        expect(rewritten_env['SCRIPT_NAME']).to eq ''
+      end
+    end
+    context 'when the SCRIPT_NAME and the PATH_INFO are not empty' do
+      # NOTE: in a real request the SCRIPT_NAME have a trailing slash?
+      #       even if the PATH_INFO start with a slash?
+      let (:by_proxy_request) { example_request.update({'SCRIPT_NAME' => '/proxy',
+                                                        'PATH_INFO' => '/content'}) }
+      it 'change the SCRIPT_NAME to empty and the PATH_INFO has the full path' do
+        expect(rewritten_env['PATH_INFO']).to eq '/content'
+        expect(rewritten_env['SCRIPT_NAME']).to eq ''
+      end
+    end
+  end
+  describe '#rewrite_response' do
+    let (:pp) { described_class.new('/proxy/', 'http://site.net', ['/p1', '/p2/p2_2/']) }
+    # See http://rack.rubyforge.org/doc/SPEC.html for the rack env hash fields spec
+    let (:original_env) {{'HTTP_HOST' => 'site.net',
+                          'SCRIPT_NAME' => '',
+                          'PATH_INFO' => '/proxy/p1',
+                          'QUERY_STRING' => '',
+                          'SERVER_NAME' => 'site.net',
+                          'SERVER_PORT' => '80',
+                          'rack.url_scheme' => 'http'}}
+    let (:rewritten_env) { pp.rewrite_env(original_env) }
+    let (:response_example) { original_content =  [200,
+                                                  {'content-type' => 'application/xhtml+xml',
+                                                   'content-encoding' => 'identity',
+                                                   'content-length' => original_html.bytesize.to_s },
+                                                  [original_html]] }
+    context 'when the content-type is html or xhtml' do
+      let (:original_response) { response_example }
+      subject { pp.rewrite_response(original_response, original_env, rewritten_env) }
+      let (:rewritten_headers) { subject[1] }
+      let (:rewritten_body) { subject[2].join }
+      let (:original_url) { Rack::Request.new(original_env).url }
+      # NOTE: TESTING ONLY WITH XHTML, BY THE SAME MOTIVE EXPLAINED IN THE #proxify_html SPEC
+      it 'apply #proxify_html to the body' do
+        expect(rewritten_body).to be_equivalent_to pp.proxify_html(original_html, original_url)
+      end
+      it 'change the content-length header to the new size of the body' do
+        expect(rewritten_headers['content-length']).to eq rewritten_body.bytesize.to_s
+      end
+      context 'compressed with deflate' do
+        it 'decompress, make the changes, and return it compressed again' do
+          original_response[1].update({'content-encoding' => 'deflate'})
+          deflate = Zlib::Deflate.method :deflate
+          original_response[2] = [deflate.call(original_html)]
+          inflate = Zlib::Inflate.method :inflate
+          expect(inflate.call(rewritten_body)).to be_equivalent_to(proxified_html)
+        end
+      end
+      context 'compressed with gzip' do
+        it 'decompress, make the changes, and return it compressed again' do
+          original_response[1].update({'content-encoding' => 'gzip'})
+          gzip = ->(str) do
+            return_str = ''
+            gzip_stream = Zlib::GzipWriter.new(StringIO.new(return_str))
+            gzip_stream.write str
+            gzip_stream.close
+            return_str
+          end
+          ungzip = ->(str) do
+            Zlib::GzipReader.new(StringIO.new(str)).read
+          end
+          original_response[2] = [gzip.call(original_html)]
+          expect(ungzip.call(rewritten_body)).to be_equivalent_to proxified_html
+        end
+      end
+      context 'compressed with another method' do
+        let (:original_response) { response_example[1].update({'content-encoding' => 'unknown-encoding'}); response_example }
+        it { expect {subject}.to raise_error(PrettyProxy::ProxyError) }
+      end
+    end
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,179 @@
+--- !ruby/object:Gem::Specification
+name: pretty_proxy
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- Henrique Becker
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2013-05-13 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '1.5'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '1.5'
+- !ruby/object:Gem::Dependency
+  name: rack
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '1.5'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '1.5'
+- !ruby/object:Gem::Dependency
+  name: rack-proxy
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '0.3'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '0.3'
+- !ruby/object:Gem::Dependency
+  name: equivalent-xml
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '0.3'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '0.3'
+- !ruby/object:Gem::Dependency
+  name: thin
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '1.5'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '1.5'
+- !ruby/object:Gem::Dependency
+  name: json
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '1.7'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '1.7'
+- !ruby/object:Gem::Dependency
+  name: rspec-core
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '2.13'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '2.13'
+- !ruby/object:Gem::Dependency
+  name: rspec-expectations
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '2.13'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '2.13'
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '10.0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '10.0'
+description: If you want to replicate a site section with some change (like translation)
+  and mantain the url pretty maybe this is the right library.
+email: henriquebecker91@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/pretty_proxy.rb
+- example/example_spec.rb
+- example/example_conf.json
+- example/example.ru
+- example/heresy.ru
+- spec/pretty_proxy_spec.rb
+- Rakefile
+homepage: http://rubygems.org/gems/pretty_proxy
+licenses:
+- Public domain
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.0.0
+signing_key:
+specification_version: 4
+summary: A Rack::Proxy child pretty url oriented
+test_files:
+- spec/pretty_proxy_spec.rb
+has_rdoc: