pretty_proxy 3.0.1 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4f0b7cd40975e96c78486559c56a14332c80c480
4
- data.tar.gz: 36e64c2a281cf96d487b4258a096204f4f1bc9da
3
+ metadata.gz: 3d98ec7a74142c72c40fbc20de2aa4553829196e
4
+ data.tar.gz: 671e81774cfc968511a1b9350538411cf935b50e
5
5
  SHA512:
6
- metadata.gz: dd0cd6022492f5b31122b58b52f8f907674b8c55c37f5d38dbc510d19f59904bdcab62714b35884670a0047f54169bce43c4c24ab7fc31646fd434a29079c650
7
- data.tar.gz: 164ea3a9431730971635076a2173799fa87e555f35f0f5fc2aeba73220e903154c4ac122da8b670c8cafdc7f7d7449938afdeb0300e8684211e13f780d46ab30
6
+ metadata.gz: 38e3c0e99b16af783e0f34c488d94a161cb8a034eb9a5aef609c3a8117c0c8a6e051f0d8fb66cb4dc040cebc980da0a00edfeb718b186a34f1fc5a0b367ca843
7
+ data.tar.gz: b204811390ada1dadf6b127d377dea989f9c475a577bc99ff9050ca0d79b074aab10afdb7e75c0e0dd46c759c813c9c7e471d2430477a3656af7b2702af8c760
data/example/example.ru CHANGED
@@ -11,9 +11,10 @@ proxy_path = pretty_proxy_new_args['proxy_path']
11
11
  original_domain = pretty_proxy_new_args['original_domain']
12
12
  original_paths = pretty_proxy_new_args['original_paths']
13
13
 
14
+ # replace the constants with the paths without trailing slashs
14
15
  original_html = config['xhtml_template'].join("\n")
15
- .gsub('PROXY_PATH', proxy_path)
16
- .gsub('ORIGINAL_DOMAIN', original_domain)
16
+ .gsub('PROXY_PATH', proxy_path[1..-2])
17
+ .gsub('ORIGINAL_DOMAIN', original_domain[0..-2])
17
18
 
18
19
  pp = PrettyProxy.new(proxy_path, original_domain, original_paths)
19
20
 
@@ -17,6 +17,8 @@ proxy_url = original_domain + Pathname.new(proxy_path).join('.' + config['conten
17
17
 
18
18
  describe 'PrettyProxy example' do
19
19
  let (:pp) { PrettyProxy.new(proxy_path, original_domain, original_paths) }
20
- it { expect(open(proxy_url)).to be_equivalent_to(pp.proxify_html(open(original_url), proxy_url)) }
20
+ let (:original_page) { open(original_url) }
21
+ let (:proxy_page) { open(proxy_url) }
22
+ it { expect(proxy_page.read).to be_equivalent_to(pp.proxify_html(original_page.read, proxy_url, original_page.content_type)) }
21
23
  end
22
24
 
data/lib/pretty_proxy.rb CHANGED
@@ -51,6 +51,19 @@ require 'addressable/uri'
51
51
  # same of the original_domain.
52
52
  #
53
53
  # CHANGELOG:
54
+ # 4.0.0
55
+ # * proxify_hyperlink don't take relative paths or urls anymore, only
56
+ # absolute urls. This is because the proxy url was used for a double
57
+ # purpose (know the proxy scheme+host+port and resolve relative
58
+ # hyperlinks). This can lead to the mistake of believing that the
59
+ # base url to resolve relative links in the page is the page url
60
+ # (what's false if the page has a base tag). See more in:
61
+ # http://www.w3.org/TR/html5/infrastructure.html#base-urls
62
+ # * proxify_html (and other methods who use it, as #call) use the base
63
+ # tag from the page to determine the base url, and add the the base
64
+ # tag (if the page don't have one) to simplify the assets proxification.
65
+ # All a[href] are changed to absolute urls.
66
+ # * rspec-html-matchers added as development dependency
54
67
  # 3.0.0
55
68
  # * return a String for unproxify_url (and not more a URI)
56
69
  # because this is a change in the API (and can break code) the major
@@ -147,84 +160,81 @@ class PrettyProxy < Rack::Proxy
147
160
  raise ArgumentError, "the url argument isn't a valid uri"
148
161
  end
149
162
 
150
- # Take a hyperlink and the url of the proxy page (not the original page)
151
- # where it come from and return the rewritten hyperlink. If the page
152
- # pointed vy the hyperlink is in the proxy control the rewritten hyperlink
153
- # gonna point to the proxyfied version, otherwise gonna point to the original
154
- # version.
155
- # @param hyperlink [String, URI::HTTP, URI::HTTPS] A string with a relative
156
- # path or an url (string or URI).
157
- # @param proxy_page_url [String, URI::HTTP, URI::HTTPS] The url from the
158
- # proxy page where the hyperlink come from.
159
- # @return [String] A relative path or an url.
163
+ # Take a absolute URL and the scheme://host[:port] of the proxy page
164
+ # (can have path/?query#fragment, but they are ignored) and return the
165
+ # rewritten hyperlink.
166
+ # The url only is rewritten to point the proxyfied version if it's under
167
+ # proxy control.
168
+ # If the url is under proxy control, but it's also a url to the proxy, the
169
+ # url isn't changed (to not double proxyfy, /proxy/ ~> /proxy/proxy/).
170
+ # @param hyperlink [String, URI::HTTP, URI::HTTPS] A string or URI object
171
+ # with a absolute url.
172
+ # @param proxy_site [String, URI::HTTP, URI::HTTPS] A URL with
173
+ # scheme://host[:port] to use in the hyperlink proxification.
174
+ # @return [String] A absolute URL.
160
175
  # @raise PrettyProxy::ProxyError
161
- def proxify_hyperlink(hyperlink, proxy_page_url)
162
- hyperlink = Addressable::URI.parse(hyperlink.clone)
163
- proxy_page_url = Addressable::URI.parse(proxy_page_url)
164
-
165
- # this is URI relative ('//duckduckgo.com', '/path', '../path')
166
- if hyperlink.relative?
167
- absolute_hyperlink = Addressable::URI.parse(unproxify_url(proxy_page_url))
168
- .join(hyperlink)
169
- if inside_proxy_control? absolute_hyperlink
170
- # this is path relative ('../path', 'path', but not '//duckduckgo.com' or '/path')
171
- if Pathname.new(hyperlink.path).relative?
172
- if point_to_a_proxy_page?(absolute_hyperlink, proxy_page_url)
173
- # in the case of a relative path in the original page who points
174
- # to a proxy page, and the proxy page is inside the proxy control
175
- # we have to use the absolute_hyperlink or the page will be double
176
- # proxified. Example: ../proxy/content in http://example.com/proxy/content,
177
- # with original_path as '/' is http://example.com/proxy/proxy/content
178
- hyperlink = absolute_hyperlink
179
- end
180
- else
181
- hyperlink.path = @proxy_path[0..-2] + absolute_hyperlink.path
182
- hyperlink.host = proxy_page_url.host if hyperlink.host
183
- hyperlink.port = proxy_page_url.port if hyperlink.port
184
- end
185
- else
186
- hyperlink = absolute_hyperlink
187
- end
188
- else # the hyperlink is absolute
189
- if inside_proxy_control? hyperlink
190
- # if points to the proxy itself we don't double-proxify
191
- unless point_to_a_proxy_page?(hyperlink, proxy_page_url)
192
- hyperlink = proxify_uri(hyperlink, proxy_page_url)
193
- end
194
- end
195
- end
176
+ def proxify_hyperlink(hyperlink, proxy_site)
177
+ hyperlink = Addressable::URI.parse(hyperlink)
178
+ proxy_site = Addressable::URI.parse(proxy_site)
196
179
 
197
- hyperlink.to_s
180
+ if inside_proxy_control?(hyperlink) &&
181
+ ! point_to_a_proxy_page?(hyperlink, proxy_site)
182
+ proxify_uri(hyperlink, proxy_site).to_s
183
+ else
184
+ hyperlink.to_s
185
+ end
198
186
  end
199
187
 
200
- # Take a (X)HTML Document and apply proxify_hyperlink to the 'href'
201
- # attribute of each 'a' element.
188
+ # Take a (X)HTML Document add a base tag (if none) and apply
189
+ # proxify_hyperlink to the 'href' attribute of each 'a' element.
190
+ # If the page has a base tag leave it unchanged.
191
+ # If a valid mime_type is passed as argument, but the html argument
192
+ # can't be parsed by this mime-type it simple returns the first argument
193
+ # unchanged.
202
194
  # @param html [String] A (X)HTML document.
203
195
  # @param proxy_url [String, URI::HTTP, URI::HTTPS] The url where the
204
196
  # the proxified version of the page will be displayed.
205
- # @return [String] A copy of the document with the changes applied.
197
+ # @param mime_type [String] A string containing 'text/html' or
198
+ # 'application/xhtml+xml' (insensitive to case and any characters
199
+ # before or after the type). Define if the content will be parsed as xml or
200
+ # html. See this link for more info: http://www.w3.org/TR/xhtml-media-types/.
201
+ # Raise an exception if an invalid value is provided.
202
+ # @return [String] A copy of the document with the changes applied,
203
+ # or the original string, if the document can't be parsed.
206
204
  # @raise PrettyProxy::ProxyError
207
- def proxify_html(html, proxy_url)
208
- parsed_html = nil
209
-
210
- # If you parse XHTML as HTML with Nokogiri and use to_s after the markup can be messed up
211
- #
212
- # Example: <meta name="description" content="not important" />
213
- # becomes <meta name="description" content="not important" >
214
- # To avoid this we parse a document who is XML valid as XML, and, otherwise as HTML
215
- begin
216
- # this also isn't a great way to do this
217
- # the Nokogiri don't have exception classes, this way any StandardError will be silenced
218
- options = Nokogiri::XML::ParseOptions::DEFAULT_XML &
219
- Nokogiri::XML::ParseOptions::STRICT &
220
- Nokogiri::XML::ParseOptions::DTDVALID
221
- parsed_html = Nokogiri::XML::Document.parse(html, nil, nil, options)
222
- rescue
223
- parsed_html = Nokogiri::HTML(html)
205
+ def proxify_html(html, proxy_url, mime_type)
206
+ parsed_html = Utils.parse_html_or_xhtml(html, mime_type)
207
+
208
+ if parsed_html.nil?
209
+ return html
224
210
  end
225
211
 
226
- parsed_html.css('a').each do | hyperlink |
227
- hyperlink['href'] = proxify_hyperlink(hyperlink['href'], proxy_url)
212
+ # This isn't in conformance with the following document
213
+ # http://www.w3.org/TR/html5/infrastructure.html#base-urls
214
+ # but support to frames is not a priority
215
+ document_original_url = unproxify_url(proxy_url)
216
+ # in theory base must have a href... but to avoid an exception by bad html
217
+ base_tag = parsed_html.at_css('base[href]')
218
+ base_url = nil
219
+ if base_tag
220
+ base_url = Addressable::URI.parse(document_original_url)
221
+ .join(base_tag['href']).to_s
222
+ else
223
+ base_url = document_original_url
224
+ end
225
+
226
+ # the href isn't a obrigatory attribute of an anchor element
227
+ parsed_html.css('a[href]').each do | hyperlink |
228
+ absolute_hyperlink = Addressable::URI.parse(base_url)
229
+ .join(hyperlink['href']).to_s
230
+ hyperlink['href'] = proxify_hyperlink(absolute_hyperlink, proxy_url)
231
+ end
232
+
233
+ unless base_tag
234
+ is_XML = %r{application/xhtml\+xml}.match(mime_type)
235
+ base_tag = "<base href='#{document_original_url}' #{is_XML ? '/' : ''}>"
236
+ parsed_html.at_css('head').first_element_child
237
+ .add_previous_sibling(base_tag)
228
238
  end
229
239
 
230
240
  parsed_html.to_s
@@ -312,7 +322,8 @@ class PrettyProxy < Rack::Proxy
312
322
  fail ProxyError, 'unknown content-encoding, only encodings known are gzip, deflate and identity'
313
323
  end
314
324
 
315
- page = proxify_html(page, Rack::Request.new(requested_to_proxy_env).url)
325
+ request_to_proxy = Rack::Request.new(requested_to_proxy_env)
326
+ page = proxify_html(page, request_to_proxy.url, content_type)
316
327
  status, headers, page = sugared_rewrite_response([status, headers, page],
317
328
  requested_to_proxy_env,
318
329
  rewritten_env)
@@ -338,6 +349,8 @@ class PrettyProxy < Rack::Proxy
338
349
  [status, headers, [page]]
339
350
  end
340
351
 
352
+ # The simplest way to make use of this class is subclass this class and
353
+ # redefine this method.
341
354
  # @abstract This method is called only over (X)HTML responses, after they are
342
355
  # decompressed and the hyperlinks proxified, before they are compressed
343
356
  # again and the new content-length calculated.
@@ -412,7 +425,7 @@ class PrettyProxy < Rack::Proxy
412
425
  uri = absolute_uri.clone
413
426
 
414
427
  uri.site = proxy_site.site
415
- uri.path = @proxy_path + uri.path[1..-1]
428
+ uri.path = @proxy_path[0..-2] + uri.path
416
429
 
417
430
  uri
418
431
  end
@@ -424,9 +437,45 @@ class PrettyProxy < Rack::Proxy
424
437
  def self.same_domain?(u1, u2)
425
438
  u1.normalized_scheme == u2.normalized_scheme &&
426
439
  u1.normalized_host == u2.normalized_host &&
427
- u1.normalized_port == u2.normalized_port
440
+ u1.inferred_port == u2.inferred_port
428
441
  end
429
442
 
443
+ def self.parse_html_or_xhtml(doc, mime_type)
444
+ # If you parse XHTML as HTML with Nokogiri, and use to_s after, the markup
445
+ # can be messed up, breaking the structural integrity of the xml
446
+ #
447
+ # Example: <meta name="description" content="not important" />
448
+ # becomes <meta name="description" content="not important" >
449
+ #
450
+ # In the other side if you parse HTML as a XML, and use to_s after, the
451
+ # Nokogiri make empty content tags self-close
452
+ #
453
+ # Example: <script type="text/javascript" src="/ballonizer.js"></script>
454
+ # becomes: <script type="text/javascript" src="/ballonizer.js" />
455
+ #
456
+ # What's even worse than the contrary (xml as html)
457
+ parsed_doc = nil
458
+
459
+ case mime_type
460
+ when /text\/html/i
461
+ parsed_doc = Nokogiri::HTML(doc)
462
+ when /application\/xhtml\+xml/i
463
+ options = Nokogiri::XML::ParseOptions::DEFAULT_XML &
464
+ Nokogiri::XML::ParseOptions::STRICT &
465
+ Nokogiri::XML::ParseOptions::NONET
466
+ begin
467
+ parsed_doc = Nokogiri::XML::Document.parse(doc, nil, nil, options)
468
+ rescue
469
+ return nil
470
+ end
471
+ else
472
+ fail ProxyError, "the only mime-types accepted are text/html and" +
473
+ " application/xhtml+xml, the passed argument was " +
474
+ "'#{mime_type}'"
475
+ end
476
+
477
+ parsed_doc
478
+ end
430
479
  def self.validate_proxy_path(proxy_path)
431
480
  fail ConfigError, "proxy_path argument don't start with a '/'" unless proxy_path.start_with? '/'
432
481
  fail ConfigError, "proxy_path argument don't end with a '/'" unless proxy_path.end_with? '/'
@@ -1,5 +1,6 @@
1
1
  require 'pretty_proxy'
2
- require 'equivalent-xml' # needed for be_equivalent_to xml rspec matcher
2
+ require 'equivalent-xml'
3
+ require 'rspec-html-matchers'
3
4
  require 'zlib'
4
5
  require 'uri'
5
6
 
@@ -22,12 +23,13 @@ end
22
23
 
23
24
  describe PrettyProxy do
24
25
 
25
- def generate_html_for_test(hyperlinks)
26
+ def generate_html_for_test(base_url, hyperlinks)
26
27
  doc = <<-END
27
28
  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
28
29
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
29
30
  <html xmlns="http://www.w3.org/1999/xhtml">
30
31
  <head>
32
+ #{base_url ? "<base href='#{base_url}' />" : ''}
31
33
  <title>A title</title>
32
34
  <meta http-equiv="content-type" content="application/xhtml+xml; charset=UTF-8" />
33
35
  </head>
@@ -51,18 +53,43 @@ describe PrettyProxy do
51
53
  doc
52
54
  end
53
55
 
54
- let(:original_html) { generate_html_for_test(['http://site.net/p2/p2_2/',
55
- 'http://othersite.net',
56
- '../p3', '../p2/p2_2/',
57
- 'http://site.net/proxy/p1',
58
- '../proxy/p1', '/p1']) }
56
+ let(:original_html_url) { 'http://site.net/p1' }
57
+ let(:requested_to_proxy_url) do
58
+ pp.proxify_hyperlink(original_html_url, 'http://site.net')
59
+ end
59
60
 
60
- let(:proxified_html) { generate_html_for_test(['http://site.net/proxy/p2/p2_2/',
61
- 'http://othersite.net',
62
- 'http://site.net/p3', '../p2/p2_2/',
63
- 'http://site.net/proxy/p1',
64
- 'http://site.net/proxy/p1',
65
- '/proxy/p1']) }
61
+ let (:link_examples) do
62
+ [ 'http://site.net/p2/p2_2/', 'http://othersite.net', '../p3',
63
+ '../p2/p2_2/', 'http://site.net/proxy/p1', '../proxy/p1', '/p1'
64
+ ]
65
+ end
66
+
67
+ let(:html_base_href) { 'http://othersite.net/p1' }
68
+ let(:original_html_with_base) do
69
+ generate_html_for_test(html_base_href, link_examples)
70
+ end
71
+ let(:proxified_html_with_base) do
72
+ # in this case only the first link is proxified because all others are
73
+ # pointing for a site outside proxy control (or for the proxy itself)
74
+ generate_html_for_test(html_base_href, [
75
+ 'http://site.net/proxy/p2/p2_2/', 'http://othersite.net',
76
+ 'http://othersite.net/p3', 'http://othersite.net/p2/p2_2/',
77
+ 'http://site.net/proxy/p1', 'http://othersite.net/proxy/p1',
78
+ 'http://othersite.net/p1'
79
+ ])
80
+ end
81
+
82
+ let(:original_html) do
83
+ generate_html_for_test(nil, link_examples)
84
+ end
85
+ let(:proxified_html) do
86
+ generate_html_for_test(original_html_url, [
87
+ 'http://site.net/proxy/p2/p2_2/', 'http://othersite.net',
88
+ 'http://site.net/p3', 'http://site.net/proxy/p2/p2_2/',
89
+ 'http://site.net/proxy/p1', 'http://site.net/proxy/p1',
90
+ 'http://site.net/proxy/p1'
91
+ ])
92
+ end
66
93
 
67
94
  let (:correct_new_args_example) { ['/proxy/', 'http://myoriginalsite.com', '/content'] }
68
95
  let (:pp) { described_class.new(*correct_new_args_example) }
@@ -84,7 +111,6 @@ describe PrettyProxy do
84
111
  end
85
112
  end
86
113
 
87
- # TODO: Add specs for '/' in the start of the proxy_path
88
114
  let (:right_args) { correct_new_args_example }
89
115
  context "when proxy_path doesn't start with a '/'" do
90
116
  it { expect {new.call('proxy/', right_args[1], right_args[2])}.to raise_error(PrettyProxy::ConfigError) }
@@ -169,7 +195,7 @@ describe PrettyProxy do
169
195
  expect(pp.unproxify_url('http://myproxy.net/proxys/sitez/p1/#id')).to eq 'http://site.net/p1/#id'
170
196
  end
171
197
  it 'change the port to the original' do
172
- expect(URI(pp.unproxify_url('http://myproxy.net:9292/proxys/sitez/p1/#id')).port).to eq 80
198
+ expect(URI.parse(pp.unproxify_url('http://myproxy.net:9292/proxys/sitez/p1/#id')).port).to eq 80
173
199
  end
174
200
 
175
201
  context 'when the url redirect to the own proxy' do
@@ -188,36 +214,26 @@ describe PrettyProxy do
188
214
  describe '#proxify_hyperlink' do
189
215
  let (:pp) { described_class.new('/proxy/', 'http://site.net', ['/p1', '/p2/p2_2/']) }
190
216
 
191
- it "proxify absolute paths to inside the proxy control" do
192
- expect(pp.proxify_hyperlink('/p2/p2_2/', 'http://theproxy.net/proxy/p1')).to eq '/proxy/p2/p2_2/'
193
- end
194
- it "proxify hyperlinks with scheme to inside the proxy control" do
195
- expect(pp.proxify_hyperlink('http://site.net/p2/p2_2/', 'http://theproxy.net/proxy/p1')).to eq 'http://theproxy.net/proxy/p2/p2_2/'
196
- end
197
- it "proxify hyperlinks without scheme to inside the proxy control" do
198
- expect(pp.proxify_hyperlink('//site.net/p2/p2_2/', 'http://theproxy.net/proxy/p1')).to eq '//theproxy.net/proxy/p2/p2_2/'
217
+ it 'proxify urls that are inside the proxy control' do
218
+ original_link = 'http://site.net/p2/p2_2/'
219
+ url_proxy_page_with_the_link = 'http://theproxy.net/proxy/p1'
220
+ proxified_link = 'http://theproxy.net/proxy/p2/p2_2/'
221
+ expect(pp.proxify_hyperlink(original_link, url_proxy_page_with_the_link)).to eq proxified_link
199
222
  end
200
- it "don't change hyperlinks with scheme to ouside the proxy control" do
201
- expect(pp.proxify_hyperlink('http://othersite.net', 'http://theproxy.net/proxy/p1')).to eq 'http://othersite.net'
202
- end
203
- it 'change to urls the relative paths to outside the proxy control' do
204
- expect(pp.proxify_hyperlink('../p3', 'http://theproxy.net/proxy/p1')).to eq 'http://site.net/p3'
205
- expect(pp.proxify_hyperlink('../p2/p2_2', 'http://theproxy.net/proxy/p1')).to eq 'http://site.net/p2/p2_2' # without the trailing '/'
206
- end
207
- it "don't change relative paths to inside the proxy control" do
208
- expect(pp.proxify_hyperlink('../p2/p2_2/', 'http://theproxy.net/proxy/p1')).to eq '../p2/p2_2/'
223
+ it "don't proxify urls that are outside proxy control" do
224
+ outside_site = 'http://othersite.net'
225
+ proxy_url = 'http://theproxy.net/proxy/p1'
226
+ expect(pp.proxify_hyperlink(outside_site, proxy_url)).to eq outside_site
209
227
  end
210
228
 
211
229
  context 'when the proxy itself is inside the proxy control' do
212
230
  let (:pp) { described_class.new('/proxy/', 'http://site.net', '/') }
213
231
 
214
- it "dont't change urls to the proxy itself" do
215
- expect(pp.proxify_hyperlink('http://site.net/proxy/p1', 'http://site.net/proxy/p1')).to eq 'http://site.net/proxy/p1'
216
- expect(pp.proxify_hyperlink('http://site.net/proxy/p1', 'http://site.net/proxy/p2/p2_2/')).to eq 'http://site.net/proxy/p1'
217
- end
218
- it 'change to urls the relative paths to the proxy itself' do
219
- expect(pp.proxify_hyperlink('../proxy/p1', 'http://site.net/proxy/p1')).to eq 'http://site.net/proxy/p1'
220
- expect(pp.proxify_hyperlink('../../proxy/p1', 'http://site.net/proxy/p2/p2_2/')).to eq 'http://site.net/proxy/p1'
232
+ it "preserve urls to the proxy itself (don't double proxify)" do
233
+ proxy_url = 'http://site.net/proxy/p1'
234
+ another_proxy_url = 'http://site.net/proxy/p2/p2_2/'
235
+ expect(pp.proxify_hyperlink(proxy_url, proxy_url)).to eq proxy_url
236
+ expect(pp.proxify_hyperlink(proxy_url, another_proxy_url)).to eq proxy_url
221
237
  end
222
238
  end
223
239
  end
@@ -225,13 +241,44 @@ describe PrettyProxy do
225
241
  describe '#proxify_html' do
226
242
  let (:pp) { described_class.new('/proxy/', 'http://site.net', ['/p1', '/p2/p2_2/']) }
227
243
 
228
- it 'apply #proxify_hyperlink in all hyperlinks in the page' do
244
+ # valid mime_types are 'text/html' and 'application/xhtml+xml' (with any
245
+ # others characters before or after)
246
+ context 'when the mime_type is invalid' do
247
+ it { expect {pp.proxify_html(original_html, 'http://site.net/proxy/p1', 'not a valid mime-type')}.to raise_error(described_class::ProxyError) }
248
+ end
249
+
250
+ context "when the content can't be parsed" do
251
+ it 'return the original string' do
252
+ # without the </b> to make this xml invalid the test fail
253
+ # (the href is changed)
254
+ page = '<a href="http://site.net/p2/p2_2/">test</a></b>'
255
+ expect(pp.proxify_html(page, 'http://proxy.net/proxy/p1', 'application/xhtml+xml')).to equal(page)
256
+ end
257
+ end
258
+
259
+ it 'apply #proxify_hyperlink in all anchors in the page' do
229
260
  # We aren't really testing with HTML, but with XHTML, what is a XML
230
261
  # This is because we dont have a matcher to test HTML equivalence, only XML equivalence
231
262
  # This test is not guaranteed to pass if the input is a HTML non-XHTML
232
263
  # The parse and unparse of the HTML can output a value who is not XML equivalent to the input
233
264
  # Maybe the way is use regex instead of Nokogiri to this work
234
- expect(pp.proxify_html(original_html, 'http://site.net/proxy/p1')).to be_equivalent_to(proxified_html)
265
+ expect(pp.proxify_html(original_html, requested_to_proxy_url, 'application/xhtml+xml')).to be_equivalent_to(proxified_html)
266
+ end
267
+
268
+ context 'when the page has a base tag' do
269
+ subject do
270
+ pp.proxify_html(
271
+ original_html_with_base,
272
+ requested_to_proxy_url,
273
+ 'application/xhtml+xml'
274
+ )
275
+ end
276
+ it 'do not alter the base tag' do
277
+ should have_tag("base[href='#{html_base_href}']")
278
+ end
279
+ it 'use the base tag href as base url for relative links' do
280
+ should be_equivalent_to(proxified_html_with_base)
281
+ end
235
282
  end
236
283
  end
237
284
 
@@ -299,19 +346,25 @@ describe PrettyProxy do
299
346
  describe '#rewrite_response' do
300
347
  let (:pp) { described_class.new('/proxy/', 'http://site.net', ['/p1', '/p2/p2_2/']) }
301
348
  # See http://rack.rubyforge.org/doc/SPEC.html for the rack env hash fields spec
302
- let (:original_env) {{'HTTP_HOST' => 'site.net',
303
- 'SCRIPT_NAME' => '',
304
- 'PATH_INFO' => '/proxy/p1',
305
- 'QUERY_STRING' => '',
306
- 'SERVER_NAME' => 'site.net',
307
- 'SERVER_PORT' => '80',
308
- 'rack.url_scheme' => 'http'}}
349
+ let (:original_env) do
350
+ url = URI.parse(original_html_url)
351
+ { 'HTTP_HOST' => url.host,
352
+ 'SCRIPT_NAME' => '',
353
+ 'PATH_INFO' => pp.proxy_path[0..-2] + url.path,
354
+ 'QUERY_STRING' => '',
355
+ 'SERVER_NAME' => url.host,
356
+ 'SERVER_PORT' => url.port,
357
+ 'rack.url_scheme' => url.scheme
358
+ }
359
+ end
309
360
  let (:rewritten_env) { pp.rewrite_env(original_env) }
310
- let (:response_example) { original_content = [200,
311
- {'content-type' => 'application/xhtml+xml',
312
- 'content-encoding' => 'identity',
313
- 'content-length' => original_html.bytesize.to_s },
314
- [original_html]] }
361
+ let (:response_example) do
362
+ original_content = [200, {
363
+ 'content-type' => 'application/xhtml+xml',
364
+ 'content-encoding' => 'identity',
365
+ 'content-length' => original_html.bytesize.to_s
366
+ }, [original_html]]
367
+ end
315
368
 
316
369
  context 'when the content-type is html or xhtml' do
317
370
  let (:original_response) { response_example }
@@ -319,11 +372,11 @@ describe PrettyProxy do
319
372
 
320
373
  let (:rewritten_headers) { subject[1] }
321
374
  let (:rewritten_body) { subject[2].join }
322
- let (:original_url) { Rack::Request.new(original_env).url }
375
+ let (:requested_to_proxy_url) { Rack::Request.new(original_env).url }
323
376
 
324
377
  # NOTE: TESTING ONLY WITH XHTML, BY THE SAME MOTIVE EXPLAINED IN THE #proxify_html SPEC
325
378
  it 'apply #proxify_html to the body' do
326
- expect(rewritten_body).to be_equivalent_to pp.proxify_html(original_html, original_url)
379
+ expect(rewritten_body).to be_equivalent_to pp.proxify_html(original_html, requested_to_proxy_url, 'application/xhtml+xml')
327
380
  end
328
381
 
329
382
  it 'change the content-length header to the new size of the body' do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pretty_proxy
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.1
4
+ version: 4.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Henrique Becker
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-05-30 00:00:00.000000000 Z
11
+ date: 2013-07-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -150,6 +150,20 @@ dependencies:
150
150
  - - ~>
151
151
  - !ruby/object:Gem::Version
152
152
  version: '10.0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: rspec-html-matchers
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ~>
158
+ - !ruby/object:Gem::Version
159
+ version: 0.4.1
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ~>
165
+ - !ruby/object:Gem::Version
166
+ version: 0.4.1
153
167
  description: If you want to replicate a site section with some change (like translation)
154
168
  and mantain the url pretty maybe this is the right library.
155
169
  email: henriquebecker91@gmail.com
@@ -184,7 +198,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
184
198
  version: '0'
185
199
  requirements: []
186
200
  rubyforge_project:
187
- rubygems_version: 2.0.0
201
+ rubygems_version: 2.0.3
188
202
  signing_key:
189
203
  specification_version: 4
190
204
  summary: A Rack::Proxy child pretty url oriented