pretty_proxy 3.0.1 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/example/example.ru +3 -2
- data/example/example_spec.rb +3 -1
- data/lib/pretty_proxy.rb +120 -71
- data/spec/pretty_proxy_spec.rb +108 -55
- metadata +17 -3
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 3d98ec7a74142c72c40fbc20de2aa4553829196e
         | 
| 4 | 
            +
              data.tar.gz: 671e81774cfc968511a1b9350538411cf935b50e
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 38e3c0e99b16af783e0f34c488d94a161cb8a034eb9a5aef609c3a8117c0c8a6e051f0d8fb66cb4dc040cebc980da0a00edfeb718b186a34f1fc5a0b367ca843
         | 
| 7 | 
            +
              data.tar.gz: b204811390ada1dadf6b127d377dea989f9c475a577bc99ff9050ca0d79b074aab10afdb7e75c0e0dd46c759c813c9c7e471d2430477a3656af7b2702af8c760
         | 
    
        data/example/example.ru
    CHANGED
    
    | @@ -11,9 +11,10 @@ proxy_path = pretty_proxy_new_args['proxy_path'] | |
| 11 11 | 
             
            original_domain = pretty_proxy_new_args['original_domain']
         | 
| 12 12 | 
             
            original_paths = pretty_proxy_new_args['original_paths']
         | 
| 13 13 |  | 
| 14 | 
            +
            # replace the constants with the paths without trailing slashs
         | 
| 14 15 | 
             
            original_html = config['xhtml_template'].join("\n")
         | 
| 15 | 
            -
                              .gsub('PROXY_PATH', proxy_path)
         | 
| 16 | 
            -
                              .gsub('ORIGINAL_DOMAIN', original_domain)
         | 
| 16 | 
            +
                              .gsub('PROXY_PATH', proxy_path[1..-2])
         | 
| 17 | 
            +
                              .gsub('ORIGINAL_DOMAIN', original_domain[0..-2])
         | 
| 17 18 |  | 
| 18 19 | 
             
            pp = PrettyProxy.new(proxy_path, original_domain, original_paths)
         | 
| 19 20 |  | 
    
        data/example/example_spec.rb
    CHANGED
    
    | @@ -17,6 +17,8 @@ proxy_url = original_domain + Pathname.new(proxy_path).join('.' + config['conten | |
| 17 17 |  | 
| 18 18 | 
             
            describe 'PrettyProxy example' do
         | 
| 19 19 | 
             
              let (:pp) { PrettyProxy.new(proxy_path, original_domain, original_paths) }
         | 
| 20 | 
            -
               | 
| 20 | 
            +
              let (:original_page) { open(original_url) }
         | 
| 21 | 
            +
              let (:proxy_page) { open(proxy_url) }
         | 
| 22 | 
            +
              it { expect(proxy_page.read).to be_equivalent_to(pp.proxify_html(original_page.read, proxy_url, original_page.content_type)) }
         | 
| 21 23 | 
             
            end
         | 
| 22 24 |  | 
    
        data/lib/pretty_proxy.rb
    CHANGED
    
    | @@ -51,6 +51,19 @@ require 'addressable/uri' | |
| 51 51 | 
             
            # same of the original_domain.
         | 
| 52 52 | 
             
            #
         | 
| 53 53 | 
             
            # CHANGELOG:
         | 
| 54 | 
            +
            #   4.0.0
         | 
| 55 | 
            +
            #     * proxify_hyperlink don't take relative paths or urls anymore, only
         | 
| 56 | 
            +
            #       absolute urls. This is because the proxy url was used for a double
         | 
| 57 | 
            +
            #       purpose (know the proxy scheme+host+port and resolve relative
         | 
| 58 | 
            +
            #       hyperlinks). This can lead to the mistake of believing that the
         | 
| 59 | 
            +
            #       base url to resolve relative links in the page is the page url
         | 
| 60 | 
            +
            #       (what's false if the page has a base tag). See more in:
         | 
| 61 | 
            +
            #       http://www.w3.org/TR/html5/infrastructure.html#base-urls
         | 
| 62 | 
            +
            #     * proxify_html (and other methods who use it, as #call) use the base
         | 
| 63 | 
            +
            #       tag from the page to determine the base url, and add the the base
         | 
| 64 | 
            +
            #       tag (if the page don't have one) to simplify the assets proxification.
         | 
| 65 | 
            +
            #       All a[href] are changed to absolute urls.
         | 
| 66 | 
            +
            #     * rspec-html-matchers added as development dependency
         | 
| 54 67 | 
             
            #   3.0.0
         | 
| 55 68 | 
             
            #     * return a String for unproxify_url (and not more a URI)
         | 
| 56 69 | 
             
            #        because this is a change in the API (and can break code) the major
         | 
| @@ -147,84 +160,81 @@ class PrettyProxy < Rack::Proxy | |
| 147 160 | 
             
                raise ArgumentError, "the url argument isn't a valid uri"
         | 
| 148 161 | 
             
              end
         | 
| 149 162 |  | 
| 150 | 
            -
              # Take a  | 
| 151 | 
            -
              #  | 
| 152 | 
            -
              #  | 
| 153 | 
            -
              #  | 
| 154 | 
            -
              #  | 
| 155 | 
            -
              #  | 
| 156 | 
            -
              # | 
| 157 | 
            -
              # @param  | 
| 158 | 
            -
              #    | 
| 159 | 
            -
              # @ | 
| 163 | 
            +
              # Take a absolute URL and the scheme://host[:port] of the proxy page
         | 
| 164 | 
            +
              # (can have path/?query#fragment, but they are ignored) and return the
         | 
| 165 | 
            +
              # rewritten hyperlink.
         | 
| 166 | 
            +
              # The url only is rewritten to point the proxyfied version if it's under
         | 
| 167 | 
            +
              # proxy control.
         | 
| 168 | 
            +
              # If the url is under proxy control, but it's also a url to the proxy, the
         | 
| 169 | 
            +
              # url isn't changed (to not double proxyfy, /proxy/ ~> /proxy/proxy/).
         | 
| 170 | 
            +
              # @param hyperlink [String, URI::HTTP, URI::HTTPS] A string or URI object
         | 
| 171 | 
            +
              #   with a absolute url.
         | 
| 172 | 
            +
              # @param proxy_site [String, URI::HTTP, URI::HTTPS] A URL with
         | 
| 173 | 
            +
              #   scheme://host[:port] to use in the hyperlink proxification.
         | 
| 174 | 
            +
              # @return [String] A absolute URL.
         | 
| 160 175 | 
             
              # @raise PrettyProxy::ProxyError
         | 
| 161 | 
            -
              def proxify_hyperlink(hyperlink,  | 
| 162 | 
            -
                hyperlink = Addressable::URI.parse(hyperlink | 
| 163 | 
            -
                 | 
| 164 | 
            -
             | 
| 165 | 
            -
                # this is URI relative ('//duckduckgo.com', '/path', '../path')
         | 
| 166 | 
            -
                if hyperlink.relative?
         | 
| 167 | 
            -
                  absolute_hyperlink = Addressable::URI.parse(unproxify_url(proxy_page_url))
         | 
| 168 | 
            -
                                                       .join(hyperlink)
         | 
| 169 | 
            -
                  if inside_proxy_control? absolute_hyperlink
         | 
| 170 | 
            -
                    # this is path relative ('../path', 'path', but not '//duckduckgo.com' or '/path')
         | 
| 171 | 
            -
                    if Pathname.new(hyperlink.path).relative?
         | 
| 172 | 
            -
                      if point_to_a_proxy_page?(absolute_hyperlink, proxy_page_url)
         | 
| 173 | 
            -
                        # in the case of a relative path in the original page who points
         | 
| 174 | 
            -
                        # to a proxy page, and the proxy page is inside the proxy control
         | 
| 175 | 
            -
                        # we have to use the absolute_hyperlink or the page will be double
         | 
| 176 | 
            -
                        # proxified. Example: ../proxy/content in http://example.com/proxy/content,
         | 
| 177 | 
            -
                        # with original_path as '/' is http://example.com/proxy/proxy/content
         | 
| 178 | 
            -
                        hyperlink = absolute_hyperlink
         | 
| 179 | 
            -
                      end
         | 
| 180 | 
            -
                    else
         | 
| 181 | 
            -
                      hyperlink.path = @proxy_path[0..-2] + absolute_hyperlink.path
         | 
| 182 | 
            -
                      hyperlink.host = proxy_page_url.host if hyperlink.host
         | 
| 183 | 
            -
                      hyperlink.port = proxy_page_url.port if hyperlink.port
         | 
| 184 | 
            -
                    end
         | 
| 185 | 
            -
                  else
         | 
| 186 | 
            -
                    hyperlink = absolute_hyperlink
         | 
| 187 | 
            -
                  end
         | 
| 188 | 
            -
                else # the hyperlink is absolute
         | 
| 189 | 
            -
                  if inside_proxy_control? hyperlink
         | 
| 190 | 
            -
                    # if points to the proxy itself we don't double-proxify
         | 
| 191 | 
            -
                    unless point_to_a_proxy_page?(hyperlink, proxy_page_url)
         | 
| 192 | 
            -
                      hyperlink = proxify_uri(hyperlink, proxy_page_url)
         | 
| 193 | 
            -
                    end
         | 
| 194 | 
            -
                  end
         | 
| 195 | 
            -
                end
         | 
| 176 | 
            +
              def proxify_hyperlink(hyperlink, proxy_site)
         | 
| 177 | 
            +
                hyperlink = Addressable::URI.parse(hyperlink)
         | 
| 178 | 
            +
                proxy_site = Addressable::URI.parse(proxy_site)
         | 
| 196 179 |  | 
| 197 | 
            -
                hyperlink | 
| 180 | 
            +
                if inside_proxy_control?(hyperlink) &&
         | 
| 181 | 
            +
                      ! point_to_a_proxy_page?(hyperlink, proxy_site)
         | 
| 182 | 
            +
                  proxify_uri(hyperlink, proxy_site).to_s
         | 
| 183 | 
            +
                else
         | 
| 184 | 
            +
                  hyperlink.to_s
         | 
| 185 | 
            +
                end
         | 
| 198 186 | 
             
              end
         | 
| 199 187 |  | 
| 200 | 
            -
              # Take a (X)HTML Document  | 
| 201 | 
            -
              # attribute of each 'a' element.
         | 
| 188 | 
            +
              # Take a (X)HTML Document add a base tag (if none) and apply
         | 
| 189 | 
            +
              # proxify_hyperlink to the 'href' attribute of each 'a' element.
         | 
| 190 | 
            +
              # If the page has a base tag leave it unchanged.
         | 
| 191 | 
            +
              # If a valid mime_type is passed as argument, but the html argument
         | 
| 192 | 
            +
              # can't be parsed by this mime-type it simple returns the first argument
         | 
| 193 | 
            +
              # unchanged.
         | 
| 202 194 | 
             
              # @param html [String] A (X)HTML document.
         | 
| 203 195 | 
             
              # @param proxy_url [String, URI::HTTP, URI::HTTPS] The url where the
         | 
| 204 196 | 
             
              #   the proxified version of the page will be displayed.
         | 
| 205 | 
            -
              # @ | 
| 197 | 
            +
              # @param mime_type [String] A string containing 'text/html' or
         | 
| 198 | 
            +
              #   'application/xhtml+xml' (insensitive to case and any characters
         | 
| 199 | 
            +
              #   before or after the type). Define if the content will be parsed as xml or
         | 
| 200 | 
            +
              #   html. See this link for more info: http://www.w3.org/TR/xhtml-media-types/.
         | 
| 201 | 
            +
              #   Raise an exception if an invalid value is provided.
         | 
| 202 | 
            +
              # @return [String] A copy of the document with the changes applied,
         | 
| 203 | 
            +
              #   or the original string, if the document can't be parsed.
         | 
| 206 204 | 
             
              # @raise PrettyProxy::ProxyError
         | 
| 207 | 
            -
              def proxify_html(html, proxy_url)
         | 
| 208 | 
            -
                parsed_html =  | 
| 209 | 
            -
             | 
| 210 | 
            -
                 | 
| 211 | 
            -
             | 
| 212 | 
            -
                # Example:     <meta name="description" content="not important" />
         | 
| 213 | 
            -
                #   becomes    <meta name="description" content="not important" >
         | 
| 214 | 
            -
                # To avoid this we parse a document who is XML valid as XML, and, otherwise as HTML
         | 
| 215 | 
            -
                begin
         | 
| 216 | 
            -
                  # this also isn't a great way to do this
         | 
| 217 | 
            -
                  # the Nokogiri don't have exception classes, this way any StandardError will be silenced
         | 
| 218 | 
            -
                  options = Nokogiri::XML::ParseOptions::DEFAULT_XML &
         | 
| 219 | 
            -
                              Nokogiri::XML::ParseOptions::STRICT &
         | 
| 220 | 
            -
                              Nokogiri::XML::ParseOptions::DTDVALID
         | 
| 221 | 
            -
                  parsed_html = Nokogiri::XML::Document.parse(html, nil, nil, options)
         | 
| 222 | 
            -
                rescue
         | 
| 223 | 
            -
                  parsed_html = Nokogiri::HTML(html)
         | 
| 205 | 
            +
              def proxify_html(html, proxy_url, mime_type)
         | 
| 206 | 
            +
                parsed_html = Utils.parse_html_or_xhtml(html, mime_type)
         | 
| 207 | 
            +
             | 
| 208 | 
            +
                if parsed_html.nil?
         | 
| 209 | 
            +
                  return html
         | 
| 224 210 | 
             
                end
         | 
| 225 211 |  | 
| 226 | 
            -
                 | 
| 227 | 
            -
             | 
| 212 | 
            +
                # This isn't in conformance with the following document
         | 
| 213 | 
            +
                # http://www.w3.org/TR/html5/infrastructure.html#base-urls
         | 
| 214 | 
            +
                # but support to frames is not a priority
         | 
| 215 | 
            +
                document_original_url = unproxify_url(proxy_url)
         | 
| 216 | 
            +
                # in theory base must have a href... but to avoid an exception by bad html
         | 
| 217 | 
            +
                base_tag = parsed_html.at_css('base[href]')
         | 
| 218 | 
            +
                base_url = nil
         | 
| 219 | 
            +
                if base_tag
         | 
| 220 | 
            +
                  base_url = Addressable::URI.parse(document_original_url)
         | 
| 221 | 
            +
                                             .join(base_tag['href']).to_s
         | 
| 222 | 
            +
                else
         | 
| 223 | 
            +
                  base_url = document_original_url
         | 
| 224 | 
            +
                end
         | 
| 225 | 
            +
             | 
| 226 | 
            +
                # the href isn't a obrigatory attribute of an anchor element
         | 
| 227 | 
            +
                parsed_html.css('a[href]').each do | hyperlink |
         | 
| 228 | 
            +
                  absolute_hyperlink = Addressable::URI.parse(base_url)
         | 
| 229 | 
            +
                                                       .join(hyperlink['href']).to_s
         | 
| 230 | 
            +
                  hyperlink['href'] = proxify_hyperlink(absolute_hyperlink, proxy_url)
         | 
| 231 | 
            +
                end
         | 
| 232 | 
            +
             | 
| 233 | 
            +
                unless base_tag
         | 
| 234 | 
            +
                  is_XML = %r{application/xhtml\+xml}.match(mime_type)
         | 
| 235 | 
            +
                  base_tag = "<base href='#{document_original_url}' #{is_XML ? '/' : ''}>"
         | 
| 236 | 
            +
                  parsed_html.at_css('head').first_element_child
         | 
| 237 | 
            +
                             .add_previous_sibling(base_tag)
         | 
| 228 238 | 
             
                end
         | 
| 229 239 |  | 
| 230 240 | 
             
                parsed_html.to_s
         | 
| @@ -312,7 +322,8 @@ class PrettyProxy < Rack::Proxy | |
| 312 322 | 
             
                  fail ProxyError, 'unknown content-encoding, only encodings known are gzip, deflate and identity'
         | 
| 313 323 | 
             
                end
         | 
| 314 324 |  | 
| 315 | 
            -
                 | 
| 325 | 
            +
                request_to_proxy = Rack::Request.new(requested_to_proxy_env)
         | 
| 326 | 
            +
                page = proxify_html(page, request_to_proxy.url, content_type)
         | 
| 316 327 | 
             
                status, headers, page = sugared_rewrite_response([status, headers, page],
         | 
| 317 328 | 
             
                                                                  requested_to_proxy_env,
         | 
| 318 329 | 
             
                                                                  rewritten_env)
         | 
| @@ -338,6 +349,8 @@ class PrettyProxy < Rack::Proxy | |
| 338 349 | 
             
                [status, headers, [page]]
         | 
| 339 350 | 
             
              end
         | 
| 340 351 |  | 
| 352 | 
            +
              # The simplest way to make use of this class is subclass this class and
         | 
| 353 | 
            +
              # redefine this method.
         | 
| 341 354 | 
             
              # @abstract This method is called only over (X)HTML responses, after they are
         | 
| 342 355 | 
             
              #   decompressed and the hyperlinks proxified, before they are compressed
         | 
| 343 356 | 
             
              #   again and the new content-length calculated.
         | 
| @@ -412,7 +425,7 @@ class PrettyProxy < Rack::Proxy | |
| 412 425 | 
             
                uri = absolute_uri.clone
         | 
| 413 426 |  | 
| 414 427 | 
             
                uri.site = proxy_site.site
         | 
| 415 | 
            -
                uri.path = @proxy_path + uri.path | 
| 428 | 
            +
                uri.path = @proxy_path[0..-2] + uri.path
         | 
| 416 429 |  | 
| 417 430 | 
             
                uri
         | 
| 418 431 | 
             
              end
         | 
| @@ -424,9 +437,45 @@ class PrettyProxy < Rack::Proxy | |
| 424 437 | 
             
                def self.same_domain?(u1, u2)
         | 
| 425 438 | 
             
                  u1.normalized_scheme == u2.normalized_scheme &&
         | 
| 426 439 | 
             
                    u1.normalized_host == u2.normalized_host &&
         | 
| 427 | 
            -
                    u1. | 
| 440 | 
            +
                    u1.inferred_port == u2.inferred_port
         | 
| 428 441 | 
             
                end
         | 
| 429 442 |  | 
| 443 | 
            +
                def self.parse_html_or_xhtml(doc, mime_type)
         | 
| 444 | 
            +
                  # If you parse XHTML as HTML with Nokogiri, and use to_s after, the markup
         | 
| 445 | 
            +
                  # can be messed up, breaking the structural integrity of the xml
         | 
| 446 | 
            +
                  # 
         | 
| 447 | 
            +
                  # Example:     <meta name="description" content="not important" />
         | 
| 448 | 
            +
                  #   becomes    <meta name="description" content="not important" >
         | 
| 449 | 
            +
                  #
         | 
| 450 | 
            +
                  # In the other side if you parse HTML as a XML, and use to_s after, the
         | 
| 451 | 
            +
                  # Nokogiri make empty content tags self-close
         | 
| 452 | 
            +
                  #
         | 
| 453 | 
            +
                  # Example:    <script type="text/javascript" src="/ballonizer.js"></script>
         | 
| 454 | 
            +
                  #   becomes:  <script type="text/javascript" src="/ballonizer.js" />
         | 
| 455 | 
            +
                  #
         | 
| 456 | 
            +
                  # What's even worse than the contrary (xml as html)
         | 
| 457 | 
            +
                  parsed_doc = nil
         | 
| 458 | 
            +
             | 
| 459 | 
            +
                  case mime_type
         | 
| 460 | 
            +
                  when /text\/html/i
         | 
| 461 | 
            +
                    parsed_doc = Nokogiri::HTML(doc)
         | 
| 462 | 
            +
                  when /application\/xhtml\+xml/i
         | 
| 463 | 
            +
                    options = Nokogiri::XML::ParseOptions::DEFAULT_XML &
         | 
| 464 | 
            +
                                Nokogiri::XML::ParseOptions::STRICT &
         | 
| 465 | 
            +
                                Nokogiri::XML::ParseOptions::NONET
         | 
| 466 | 
            +
                    begin
         | 
| 467 | 
            +
                      parsed_doc = Nokogiri::XML::Document.parse(doc, nil, nil, options)
         | 
| 468 | 
            +
                    rescue
         | 
| 469 | 
            +
                      return nil
         | 
| 470 | 
            +
                    end
         | 
| 471 | 
            +
                  else
         | 
| 472 | 
            +
                    fail ProxyError, "the only mime-types accepted are text/html and" +
         | 
| 473 | 
            +
                                     " application/xhtml+xml, the passed argument was " +
         | 
| 474 | 
            +
                                     "'#{mime_type}'"
         | 
| 475 | 
            +
                  end
         | 
| 476 | 
            +
             | 
| 477 | 
            +
                  parsed_doc
         | 
| 478 | 
            +
                end
         | 
| 430 479 | 
             
                def self.validate_proxy_path(proxy_path)
         | 
| 431 480 | 
             
                  fail ConfigError, "proxy_path argument don't start with a '/'" unless proxy_path.start_with? '/'
         | 
| 432 481 | 
             
                  fail ConfigError, "proxy_path argument don't end with a '/'" unless proxy_path.end_with? '/'
         | 
    
        data/spec/pretty_proxy_spec.rb
    CHANGED
    
    | @@ -1,5 +1,6 @@ | |
| 1 1 | 
             
            require 'pretty_proxy'
         | 
| 2 | 
            -
            require 'equivalent-xml' | 
| 2 | 
            +
            require 'equivalent-xml'
         | 
| 3 | 
            +
            require 'rspec-html-matchers'
         | 
| 3 4 | 
             
            require 'zlib'
         | 
| 4 5 | 
             
            require 'uri'
         | 
| 5 6 |  | 
| @@ -22,12 +23,13 @@ end | |
| 22 23 |  | 
| 23 24 | 
             
            describe PrettyProxy do
         | 
| 24 25 |  | 
| 25 | 
            -
              def generate_html_for_test(hyperlinks)
         | 
| 26 | 
            +
              def generate_html_for_test(base_url, hyperlinks)
         | 
| 26 27 | 
             
                doc = <<-END
         | 
| 27 28 | 
             
            <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
         | 
| 28 29 | 
             
            "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
         | 
| 29 30 | 
             
            <html xmlns="http://www.w3.org/1999/xhtml">
         | 
| 30 31 | 
             
            	<head>
         | 
| 32 | 
            +
                #{base_url ? "<base href='#{base_url}' />" : ''}
         | 
| 31 33 | 
             
            		<title>A title</title>
         | 
| 32 34 | 
             
            		<meta http-equiv="content-type" content="application/xhtml+xml; charset=UTF-8" />
         | 
| 33 35 | 
             
            	</head>
         | 
| @@ -51,18 +53,43 @@ describe PrettyProxy do | |
| 51 53 | 
             
                doc
         | 
| 52 54 | 
             
              end
         | 
| 53 55 |  | 
| 54 | 
            -
              let(: | 
| 55 | 
            -
             | 
| 56 | 
            -
             | 
| 57 | 
            -
             | 
| 58 | 
            -
                                                            '../proxy/p1', '/p1']) }
         | 
| 56 | 
            +
              let(:original_html_url) { 'http://site.net/p1' }
         | 
| 57 | 
            +
              let(:requested_to_proxy_url) do
         | 
| 58 | 
            +
                pp.proxify_hyperlink(original_html_url, 'http://site.net')
         | 
| 59 | 
            +
              end
         | 
| 59 60 |  | 
| 60 | 
            -
              let(: | 
| 61 | 
            -
             | 
| 62 | 
            -
             | 
| 63 | 
            -
             | 
| 64 | 
            -
             | 
| 65 | 
            -
             | 
| 61 | 
            +
              let (:link_examples) do
         | 
| 62 | 
            +
                [ 'http://site.net/p2/p2_2/', 'http://othersite.net', '../p3',
         | 
| 63 | 
            +
                  '../p2/p2_2/', 'http://site.net/proxy/p1', '../proxy/p1', '/p1'
         | 
| 64 | 
            +
                ]
         | 
| 65 | 
            +
              end
         | 
| 66 | 
            +
             | 
| 67 | 
            +
              let(:html_base_href) { 'http://othersite.net/p1' }
         | 
| 68 | 
            +
              let(:original_html_with_base) do
         | 
| 69 | 
            +
                generate_html_for_test(html_base_href, link_examples)
         | 
| 70 | 
            +
              end
         | 
| 71 | 
            +
              let(:proxified_html_with_base) do
         | 
| 72 | 
            +
                # in this case only the first link is proxified because all others are
         | 
| 73 | 
            +
                # pointing for a site outside proxy control (or for the proxy itself)
         | 
| 74 | 
            +
                generate_html_for_test(html_base_href, [
         | 
| 75 | 
            +
                  'http://site.net/proxy/p2/p2_2/', 'http://othersite.net',
         | 
| 76 | 
            +
                  'http://othersite.net/p3', 'http://othersite.net/p2/p2_2/',
         | 
| 77 | 
            +
                  'http://site.net/proxy/p1', 'http://othersite.net/proxy/p1',
         | 
| 78 | 
            +
                  'http://othersite.net/p1'
         | 
| 79 | 
            +
                ])
         | 
| 80 | 
            +
              end
         | 
| 81 | 
            +
             | 
| 82 | 
            +
              let(:original_html) do
         | 
| 83 | 
            +
                generate_html_for_test(nil, link_examples)
         | 
| 84 | 
            +
              end
         | 
| 85 | 
            +
              let(:proxified_html) do
         | 
| 86 | 
            +
                generate_html_for_test(original_html_url, [
         | 
| 87 | 
            +
                  'http://site.net/proxy/p2/p2_2/', 'http://othersite.net',
         | 
| 88 | 
            +
                  'http://site.net/p3', 'http://site.net/proxy/p2/p2_2/',
         | 
| 89 | 
            +
                  'http://site.net/proxy/p1', 'http://site.net/proxy/p1',
         | 
| 90 | 
            +
                  'http://site.net/proxy/p1'
         | 
| 91 | 
            +
                ])
         | 
| 92 | 
            +
              end
         | 
| 66 93 |  | 
| 67 94 | 
             
              let (:correct_new_args_example) { ['/proxy/', 'http://myoriginalsite.com', '/content'] }
         | 
| 68 95 | 
             
              let (:pp) { described_class.new(*correct_new_args_example) }
         | 
| @@ -84,7 +111,6 @@ describe PrettyProxy do | |
| 84 111 | 
             
                  end
         | 
| 85 112 | 
             
                end
         | 
| 86 113 |  | 
| 87 | 
            -
                # TODO: Add specs for '/' in the start of the proxy_path
         | 
| 88 114 | 
             
                let (:right_args) { correct_new_args_example }
         | 
| 89 115 | 
             
                context "when proxy_path doesn't start with a '/'" do
         | 
| 90 116 | 
             
                  it { expect {new.call('proxy/', right_args[1], right_args[2])}.to raise_error(PrettyProxy::ConfigError) }
         | 
| @@ -169,7 +195,7 @@ describe PrettyProxy do | |
| 169 195 | 
             
                  expect(pp.unproxify_url('http://myproxy.net/proxys/sitez/p1/#id')).to eq 'http://site.net/p1/#id'
         | 
| 170 196 | 
             
                end
         | 
| 171 197 | 
             
                it 'change the port to the original' do
         | 
| 172 | 
            -
                  expect(URI(pp.unproxify_url('http://myproxy.net:9292/proxys/sitez/p1/#id')).port).to eq 80
         | 
| 198 | 
            +
                  expect(URI.parse(pp.unproxify_url('http://myproxy.net:9292/proxys/sitez/p1/#id')).port).to eq 80
         | 
| 173 199 | 
             
                end
         | 
| 174 200 |  | 
| 175 201 | 
             
                context 'when the url redirect to the own proxy' do
         | 
| @@ -188,36 +214,26 @@ describe PrettyProxy do | |
| 188 214 | 
             
              describe '#proxify_hyperlink' do
         | 
| 189 215 | 
             
                let (:pp) { described_class.new('/proxy/', 'http://site.net', ['/p1', '/p2/p2_2/']) }
         | 
| 190 216 |  | 
| 191 | 
            -
                it  | 
| 192 | 
            -
                   | 
| 193 | 
            -
             | 
| 194 | 
            -
             | 
| 195 | 
            -
                  expect(pp.proxify_hyperlink( | 
| 196 | 
            -
                end
         | 
| 197 | 
            -
                it "proxify hyperlinks without scheme to inside the proxy control" do
         | 
| 198 | 
            -
                  expect(pp.proxify_hyperlink('//site.net/p2/p2_2/', 'http://theproxy.net/proxy/p1')).to eq '//theproxy.net/proxy/p2/p2_2/'
         | 
| 217 | 
            +
                it 'proxify urls that are inside the proxy control' do
         | 
| 218 | 
            +
                  original_link = 'http://site.net/p2/p2_2/'
         | 
| 219 | 
            +
                  url_proxy_page_with_the_link = 'http://theproxy.net/proxy/p1'
         | 
| 220 | 
            +
                  proxified_link = 'http://theproxy.net/proxy/p2/p2_2/'
         | 
| 221 | 
            +
                  expect(pp.proxify_hyperlink(original_link, url_proxy_page_with_the_link)).to eq proxified_link
         | 
| 199 222 | 
             
                end
         | 
| 200 | 
            -
                it "don't  | 
| 201 | 
            -
                   | 
| 202 | 
            -
             | 
| 203 | 
            -
             | 
| 204 | 
            -
                  expect(pp.proxify_hyperlink('../p3', 'http://theproxy.net/proxy/p1')).to eq 'http://site.net/p3'
         | 
| 205 | 
            -
                  expect(pp.proxify_hyperlink('../p2/p2_2', 'http://theproxy.net/proxy/p1')).to eq 'http://site.net/p2/p2_2' # without the trailing '/'
         | 
| 206 | 
            -
                end
         | 
| 207 | 
            -
                it "don't change relative paths to inside the proxy control" do
         | 
| 208 | 
            -
                  expect(pp.proxify_hyperlink('../p2/p2_2/', 'http://theproxy.net/proxy/p1')).to eq '../p2/p2_2/'
         | 
| 223 | 
            +
                it "don't proxify urls that are outside proxy control" do
         | 
| 224 | 
            +
                  outside_site = 'http://othersite.net'
         | 
| 225 | 
            +
                  proxy_url = 'http://theproxy.net/proxy/p1'
         | 
| 226 | 
            +
                  expect(pp.proxify_hyperlink(outside_site, proxy_url)).to eq outside_site
         | 
| 209 227 | 
             
                end
         | 
| 210 228 |  | 
| 211 229 | 
             
                context 'when the proxy itself is inside the proxy control' do
         | 
| 212 230 | 
             
                  let (:pp) { described_class.new('/proxy/', 'http://site.net', '/') }
         | 
| 213 231 |  | 
| 214 | 
            -
                  it " | 
| 215 | 
            -
                     | 
| 216 | 
            -
                     | 
| 217 | 
            -
             | 
| 218 | 
            -
             | 
| 219 | 
            -
                    expect(pp.proxify_hyperlink('../proxy/p1', 'http://site.net/proxy/p1')).to eq 'http://site.net/proxy/p1'
         | 
| 220 | 
            -
                    expect(pp.proxify_hyperlink('../../proxy/p1', 'http://site.net/proxy/p2/p2_2/')).to eq 'http://site.net/proxy/p1'
         | 
| 232 | 
            +
                  it "preserve urls to the proxy itself (don't double proxify)" do
         | 
| 233 | 
            +
                    proxy_url = 'http://site.net/proxy/p1'
         | 
| 234 | 
            +
                    another_proxy_url = 'http://site.net/proxy/p2/p2_2/'
         | 
| 235 | 
            +
                    expect(pp.proxify_hyperlink(proxy_url, proxy_url)).to eq proxy_url
         | 
| 236 | 
            +
                    expect(pp.proxify_hyperlink(proxy_url, another_proxy_url)).to eq proxy_url
         | 
| 221 237 | 
             
                  end
         | 
| 222 238 | 
             
                end
         | 
| 223 239 | 
             
              end
         | 
| @@ -225,13 +241,44 @@ describe PrettyProxy do | |
| 225 241 | 
             
              describe '#proxify_html' do
         | 
| 226 242 | 
             
                let (:pp) { described_class.new('/proxy/', 'http://site.net', ['/p1', '/p2/p2_2/']) }
         | 
| 227 243 |  | 
| 228 | 
            -
                 | 
| 244 | 
            +
                # valid mime_types are 'text/html' and 'application/xhtml+xml' (with any
         | 
| 245 | 
            +
                # others characters before or after)
         | 
| 246 | 
            +
                context 'when the mime_type is invalid' do
         | 
| 247 | 
            +
                  it { expect {pp.proxify_html(original_html, 'http://site.net/proxy/p1', 'not a valid mime-type')}.to raise_error(described_class::ProxyError) }
         | 
| 248 | 
            +
                end
         | 
| 249 | 
            +
             | 
| 250 | 
            +
                context "when the content can't be parsed" do
         | 
| 251 | 
            +
                  it 'return the original string' do
         | 
| 252 | 
            +
                    # without the </b> to make this xml invalid the test fail
         | 
| 253 | 
            +
                    # (the href is changed)
         | 
| 254 | 
            +
                    page = '<a href="http://site.net/p2/p2_2/">test</a></b>'
         | 
| 255 | 
            +
                    expect(pp.proxify_html(page, 'http://proxy.net/proxy/p1', 'application/xhtml+xml')).to equal(page)
         | 
| 256 | 
            +
                  end
         | 
| 257 | 
            +
                end
         | 
| 258 | 
            +
             | 
| 259 | 
            +
                it 'apply #proxify_hyperlink in all anchors in the page' do
         | 
| 229 260 | 
             
                  # We aren't really testing with HTML, but with XHTML, what is a XML
         | 
| 230 261 | 
             
                  # This is because we dont have a matcher to test HTML equivalence, only XML equivalence
         | 
| 231 262 | 
             
                  # This test is not guaranteed to pass if the input is a HTML non-XHTML
         | 
| 232 263 | 
             
                  # The parse and unparse of the HTML can output a value who is not XML equivalent to the input
         | 
| 233 264 | 
             
                  # Maybe the way is use regex instead of Nokogiri to this work
         | 
| 234 | 
            -
                  expect(pp.proxify_html(original_html, ' | 
| 265 | 
            +
                  expect(pp.proxify_html(original_html, requested_to_proxy_url, 'application/xhtml+xml')).to be_equivalent_to(proxified_html)
         | 
| 266 | 
            +
                end
         | 
| 267 | 
            +
             | 
| 268 | 
            +
                context 'when the page has a base tag' do
         | 
| 269 | 
            +
                  subject do
         | 
| 270 | 
            +
                    pp.proxify_html(
         | 
| 271 | 
            +
                      original_html_with_base,
         | 
| 272 | 
            +
                      requested_to_proxy_url,
         | 
| 273 | 
            +
                      'application/xhtml+xml'
         | 
| 274 | 
            +
                    )
         | 
| 275 | 
            +
                  end
         | 
| 276 | 
            +
                  it 'do not alter the base tag' do
         | 
| 277 | 
            +
                    should have_tag("base[href='#{html_base_href}']")
         | 
| 278 | 
            +
                  end
         | 
| 279 | 
            +
                  it 'use the base tag href as base url for relative links' do
         | 
| 280 | 
            +
                    should be_equivalent_to(proxified_html_with_base)
         | 
| 281 | 
            +
                  end
         | 
| 235 282 | 
             
                end
         | 
| 236 283 | 
             
              end
         | 
| 237 284 |  | 
| @@ -299,19 +346,25 @@ describe PrettyProxy do | |
| 299 346 | 
             
              describe '#rewrite_response' do
         | 
| 300 347 | 
             
                let (:pp) { described_class.new('/proxy/', 'http://site.net', ['/p1', '/p2/p2_2/']) }
         | 
| 301 348 | 
             
                # See http://rack.rubyforge.org/doc/SPEC.html for the rack env hash fields spec
         | 
| 302 | 
            -
                let (:original_env)  | 
| 303 | 
            -
             | 
| 304 | 
            -
             | 
| 305 | 
            -
             | 
| 306 | 
            -
             | 
| 307 | 
            -
             | 
| 308 | 
            -
             | 
| 349 | 
            +
                let (:original_env) do
         | 
| 350 | 
            +
                  url = URI.parse(original_html_url)
         | 
| 351 | 
            +
                  { 'HTTP_HOST' => url.host,
         | 
| 352 | 
            +
                    'SCRIPT_NAME' => '',
         | 
| 353 | 
            +
                    'PATH_INFO' => pp.proxy_path[0..-2] + url.path,
         | 
| 354 | 
            +
                    'QUERY_STRING' => '',
         | 
| 355 | 
            +
                    'SERVER_NAME' => url.host,
         | 
| 356 | 
            +
                    'SERVER_PORT' => url.port,
         | 
| 357 | 
            +
                    'rack.url_scheme' => url.scheme
         | 
| 358 | 
            +
                  }
         | 
| 359 | 
            +
                end
         | 
| 309 360 | 
             
                let (:rewritten_env) { pp.rewrite_env(original_env) }
         | 
| 310 | 
            -
                let (:response_example)  | 
| 311 | 
            -
             | 
| 312 | 
            -
             | 
| 313 | 
            -
             | 
| 314 | 
            -
             | 
| 361 | 
            +
                let (:response_example) do
         | 
| 362 | 
            +
                  original_content = [200, {
         | 
| 363 | 
            +
                    'content-type' => 'application/xhtml+xml',
         | 
| 364 | 
            +
                    'content-encoding' => 'identity',
         | 
| 365 | 
            +
                    'content-length' => original_html.bytesize.to_s
         | 
| 366 | 
            +
                  }, [original_html]]
         | 
| 367 | 
            +
                end
         | 
| 315 368 |  | 
| 316 369 | 
             
                context 'when the content-type is html or xhtml' do
         | 
| 317 370 | 
             
                  let (:original_response) { response_example }
         | 
| @@ -319,11 +372,11 @@ describe PrettyProxy do | |
| 319 372 |  | 
| 320 373 | 
             
                  let (:rewritten_headers) { subject[1] }
         | 
| 321 374 | 
             
                  let (:rewritten_body) { subject[2].join }
         | 
| 322 | 
            -
                  let (: | 
| 375 | 
            +
                  let (:requested_to_proxy_url) { Rack::Request.new(original_env).url }
         | 
| 323 376 |  | 
| 324 377 | 
             
                  # NOTE: TESTING ONLY WITH XHTML, BY THE SAME MOTIVE EXPLAINED IN THE #proxify_html SPEC
         | 
| 325 378 | 
             
                  it 'apply #proxify_html to the body' do
         | 
| 326 | 
            -
                    expect(rewritten_body).to be_equivalent_to pp.proxify_html(original_html,  | 
| 379 | 
            +
                    expect(rewritten_body).to be_equivalent_to pp.proxify_html(original_html, requested_to_proxy_url, 'application/xhtml+xml')
         | 
| 327 380 | 
             
                  end
         | 
| 328 381 |  | 
| 329 382 | 
             
                  it 'change the content-length header to the new size of the body' do
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: pretty_proxy
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version:  | 
| 4 | 
            +
              version: 4.0.1
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Henrique Becker
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2013- | 
| 11 | 
            +
            date: 2013-07-17 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: nokogiri
         | 
| @@ -150,6 +150,20 @@ dependencies: | |
| 150 150 | 
             
                - - ~>
         | 
| 151 151 | 
             
                  - !ruby/object:Gem::Version
         | 
| 152 152 | 
             
                    version: '10.0'
         | 
| 153 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 154 | 
            +
              name: rspec-html-matchers
         | 
| 155 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 156 | 
            +
                requirements:
         | 
| 157 | 
            +
                - - ~>
         | 
| 158 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 159 | 
            +
                    version: 0.4.1
         | 
| 160 | 
            +
              type: :development
         | 
| 161 | 
            +
              prerelease: false
         | 
| 162 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 163 | 
            +
                requirements:
         | 
| 164 | 
            +
                - - ~>
         | 
| 165 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 166 | 
            +
                    version: 0.4.1
         | 
| 153 167 | 
             
            description: If you want to replicate a site section with some change (like translation)
         | 
| 154 168 | 
             
              and mantain the url pretty maybe this is the right library.
         | 
| 155 169 | 
             
            email: henriquebecker91@gmail.com
         | 
| @@ -184,7 +198,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 184 198 | 
             
                  version: '0'
         | 
| 185 199 | 
             
            requirements: []
         | 
| 186 200 | 
             
            rubyforge_project: 
         | 
| 187 | 
            -
            rubygems_version: 2.0. | 
| 201 | 
            +
            rubygems_version: 2.0.3
         | 
| 188 202 | 
             
            signing_key: 
         | 
| 189 203 | 
             
            specification_version: 4
         | 
| 190 204 | 
             
            summary: A Rack::Proxy child pretty url oriented
         |