pretty_proxy 3.0.1 → 4.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4f0b7cd40975e96c78486559c56a14332c80c480
4
- data.tar.gz: 36e64c2a281cf96d487b4258a096204f4f1bc9da
3
+ metadata.gz: 3d98ec7a74142c72c40fbc20de2aa4553829196e
4
+ data.tar.gz: 671e81774cfc968511a1b9350538411cf935b50e
5
5
  SHA512:
6
- metadata.gz: dd0cd6022492f5b31122b58b52f8f907674b8c55c37f5d38dbc510d19f59904bdcab62714b35884670a0047f54169bce43c4c24ab7fc31646fd434a29079c650
7
- data.tar.gz: 164ea3a9431730971635076a2173799fa87e555f35f0f5fc2aeba73220e903154c4ac122da8b670c8cafdc7f7d7449938afdeb0300e8684211e13f780d46ab30
6
+ metadata.gz: 38e3c0e99b16af783e0f34c488d94a161cb8a034eb9a5aef609c3a8117c0c8a6e051f0d8fb66cb4dc040cebc980da0a00edfeb718b186a34f1fc5a0b367ca843
7
+ data.tar.gz: b204811390ada1dadf6b127d377dea989f9c475a577bc99ff9050ca0d79b074aab10afdb7e75c0e0dd46c759c813c9c7e471d2430477a3656af7b2702af8c760
data/example/example.ru CHANGED
@@ -11,9 +11,10 @@ proxy_path = pretty_proxy_new_args['proxy_path']
11
11
  original_domain = pretty_proxy_new_args['original_domain']
12
12
  original_paths = pretty_proxy_new_args['original_paths']
13
13
 
14
+ # replace the constants with the paths without trailing slashs
14
15
  original_html = config['xhtml_template'].join("\n")
15
- .gsub('PROXY_PATH', proxy_path)
16
- .gsub('ORIGINAL_DOMAIN', original_domain)
16
+ .gsub('PROXY_PATH', proxy_path[1..-2])
17
+ .gsub('ORIGINAL_DOMAIN', original_domain[0..-2])
17
18
 
18
19
  pp = PrettyProxy.new(proxy_path, original_domain, original_paths)
19
20
 
@@ -17,6 +17,8 @@ proxy_url = original_domain + Pathname.new(proxy_path).join('.' + config['conten
17
17
 
18
18
  describe 'PrettyProxy example' do
19
19
  let (:pp) { PrettyProxy.new(proxy_path, original_domain, original_paths) }
20
- it { expect(open(proxy_url)).to be_equivalent_to(pp.proxify_html(open(original_url), proxy_url)) }
20
+ let (:original_page) { open(original_url) }
21
+ let (:proxy_page) { open(proxy_url) }
22
+ it { expect(proxy_page.read).to be_equivalent_to(pp.proxify_html(original_page.read, proxy_url, original_page.content_type)) }
21
23
  end
22
24
 
data/lib/pretty_proxy.rb CHANGED
@@ -51,6 +51,19 @@ require 'addressable/uri'
51
51
  # same of the original_domain.
52
52
  #
53
53
  # CHANGELOG:
54
+ # 4.0.0
55
+ # * proxify_hyperlink don't take relative paths or urls anymore, only
56
+ # absolute urls. This is because the proxy url was used for a double
57
+ # purpose (know the proxy scheme+host+port and resolve relative
58
+ # hyperlinks). This can lead to the mistake of believing that the
59
+ # base url to resolve relative links in the page is the page url
60
+ # (what's false if the page has a base tag). See more in:
61
+ # http://www.w3.org/TR/html5/infrastructure.html#base-urls
62
+ # * proxify_html (and other methods who use it, as #call) use the base
63
+ # tag from the page to determine the base url, and add the the base
64
+ # tag (if the page don't have one) to simplify the assets proxification.
65
+ # All a[href] are changed to absolute urls.
66
+ # * rspec-html-matchers added as development dependency
54
67
  # 3.0.0
55
68
  # * return a String for unproxify_url (and not more a URI)
56
69
  # because this is a change in the API (and can break code) the major
@@ -147,84 +160,81 @@ class PrettyProxy < Rack::Proxy
147
160
  raise ArgumentError, "the url argument isn't a valid uri"
148
161
  end
149
162
 
150
- # Take a hyperlink and the url of the proxy page (not the original page)
151
- # where it come from and return the rewritten hyperlink. If the page
152
- # pointed vy the hyperlink is in the proxy control the rewritten hyperlink
153
- # gonna point to the proxyfied version, otherwise gonna point to the original
154
- # version.
155
- # @param hyperlink [String, URI::HTTP, URI::HTTPS] A string with a relative
156
- # path or an url (string or URI).
157
- # @param proxy_page_url [String, URI::HTTP, URI::HTTPS] The url from the
158
- # proxy page where the hyperlink come from.
159
- # @return [String] A relative path or an url.
163
+ # Take a absolute URL and the scheme://host[:port] of the proxy page
164
+ # (can have path/?query#fragment, but they are ignored) and return the
165
+ # rewritten hyperlink.
166
+ # The url only is rewritten to point the proxyfied version if it's under
167
+ # proxy control.
168
+ # If the url is under proxy control, but it's also a url to the proxy, the
169
+ # url isn't changed (to not double proxyfy, /proxy/ ~> /proxy/proxy/).
170
+ # @param hyperlink [String, URI::HTTP, URI::HTTPS] A string or URI object
171
+ # with a absolute url.
172
+ # @param proxy_site [String, URI::HTTP, URI::HTTPS] A URL with
173
+ # scheme://host[:port] to use in the hyperlink proxification.
174
+ # @return [String] A absolute URL.
160
175
  # @raise PrettyProxy::ProxyError
161
- def proxify_hyperlink(hyperlink, proxy_page_url)
162
- hyperlink = Addressable::URI.parse(hyperlink.clone)
163
- proxy_page_url = Addressable::URI.parse(proxy_page_url)
164
-
165
- # this is URI relative ('//duckduckgo.com', '/path', '../path')
166
- if hyperlink.relative?
167
- absolute_hyperlink = Addressable::URI.parse(unproxify_url(proxy_page_url))
168
- .join(hyperlink)
169
- if inside_proxy_control? absolute_hyperlink
170
- # this is path relative ('../path', 'path', but not '//duckduckgo.com' or '/path')
171
- if Pathname.new(hyperlink.path).relative?
172
- if point_to_a_proxy_page?(absolute_hyperlink, proxy_page_url)
173
- # in the case of a relative path in the original page who points
174
- # to a proxy page, and the proxy page is inside the proxy control
175
- # we have to use the absolute_hyperlink or the page will be double
176
- # proxified. Example: ../proxy/content in http://example.com/proxy/content,
177
- # with original_path as '/' is http://example.com/proxy/proxy/content
178
- hyperlink = absolute_hyperlink
179
- end
180
- else
181
- hyperlink.path = @proxy_path[0..-2] + absolute_hyperlink.path
182
- hyperlink.host = proxy_page_url.host if hyperlink.host
183
- hyperlink.port = proxy_page_url.port if hyperlink.port
184
- end
185
- else
186
- hyperlink = absolute_hyperlink
187
- end
188
- else # the hyperlink is absolute
189
- if inside_proxy_control? hyperlink
190
- # if points to the proxy itself we don't double-proxify
191
- unless point_to_a_proxy_page?(hyperlink, proxy_page_url)
192
- hyperlink = proxify_uri(hyperlink, proxy_page_url)
193
- end
194
- end
195
- end
176
+ def proxify_hyperlink(hyperlink, proxy_site)
177
+ hyperlink = Addressable::URI.parse(hyperlink)
178
+ proxy_site = Addressable::URI.parse(proxy_site)
196
179
 
197
- hyperlink.to_s
180
+ if inside_proxy_control?(hyperlink) &&
181
+ ! point_to_a_proxy_page?(hyperlink, proxy_site)
182
+ proxify_uri(hyperlink, proxy_site).to_s
183
+ else
184
+ hyperlink.to_s
185
+ end
198
186
  end
199
187
 
200
- # Take a (X)HTML Document and apply proxify_hyperlink to the 'href'
201
- # attribute of each 'a' element.
188
+ # Take a (X)HTML Document add a base tag (if none) and apply
189
+ # proxify_hyperlink to the 'href' attribute of each 'a' element.
190
+ # If the page has a base tag leave it unchanged.
191
+ # If a valid mime_type is passed as argument, but the html argument
192
+ # can't be parsed by this mime-type it simple returns the first argument
193
+ # unchanged.
202
194
  # @param html [String] A (X)HTML document.
203
195
  # @param proxy_url [String, URI::HTTP, URI::HTTPS] The url where the
204
196
  # the proxified version of the page will be displayed.
205
- # @return [String] A copy of the document with the changes applied.
197
+ # @param mime_type [String] A string containing 'text/html' or
198
+ # 'application/xhtml+xml' (insensitive to case and any characters
199
+ # before or after the type). Define if the content will be parsed as xml or
200
+ # html. See this link for more info: http://www.w3.org/TR/xhtml-media-types/.
201
+ # Raise an exception if an invalid value is provided.
202
+ # @return [String] A copy of the document with the changes applied,
203
+ # or the original string, if the document can't be parsed.
206
204
  # @raise PrettyProxy::ProxyError
207
- def proxify_html(html, proxy_url)
208
- parsed_html = nil
209
-
210
- # If you parse XHTML as HTML with Nokogiri and use to_s after the markup can be messed up
211
- #
212
- # Example: <meta name="description" content="not important" />
213
- # becomes <meta name="description" content="not important" >
214
- # To avoid this we parse a document who is XML valid as XML, and, otherwise as HTML
215
- begin
216
- # this also isn't a great way to do this
217
- # the Nokogiri don't have exception classes, this way any StandardError will be silenced
218
- options = Nokogiri::XML::ParseOptions::DEFAULT_XML &
219
- Nokogiri::XML::ParseOptions::STRICT &
220
- Nokogiri::XML::ParseOptions::DTDVALID
221
- parsed_html = Nokogiri::XML::Document.parse(html, nil, nil, options)
222
- rescue
223
- parsed_html = Nokogiri::HTML(html)
205
+ def proxify_html(html, proxy_url, mime_type)
206
+ parsed_html = Utils.parse_html_or_xhtml(html, mime_type)
207
+
208
+ if parsed_html.nil?
209
+ return html
224
210
  end
225
211
 
226
- parsed_html.css('a').each do | hyperlink |
227
- hyperlink['href'] = proxify_hyperlink(hyperlink['href'], proxy_url)
212
+ # This isn't in conformance with the following document
213
+ # http://www.w3.org/TR/html5/infrastructure.html#base-urls
214
+ # but support to frames is not a priority
215
+ document_original_url = unproxify_url(proxy_url)
216
+ # in theory base must have a href... but to avoid an exception by bad html
217
+ base_tag = parsed_html.at_css('base[href]')
218
+ base_url = nil
219
+ if base_tag
220
+ base_url = Addressable::URI.parse(document_original_url)
221
+ .join(base_tag['href']).to_s
222
+ else
223
+ base_url = document_original_url
224
+ end
225
+
226
+ # the href isn't a obrigatory attribute of an anchor element
227
+ parsed_html.css('a[href]').each do | hyperlink |
228
+ absolute_hyperlink = Addressable::URI.parse(base_url)
229
+ .join(hyperlink['href']).to_s
230
+ hyperlink['href'] = proxify_hyperlink(absolute_hyperlink, proxy_url)
231
+ end
232
+
233
+ unless base_tag
234
+ is_XML = %r{application/xhtml\+xml}.match(mime_type)
235
+ base_tag = "<base href='#{document_original_url}' #{is_XML ? '/' : ''}>"
236
+ parsed_html.at_css('head').first_element_child
237
+ .add_previous_sibling(base_tag)
228
238
  end
229
239
 
230
240
  parsed_html.to_s
@@ -312,7 +322,8 @@ class PrettyProxy < Rack::Proxy
312
322
  fail ProxyError, 'unknown content-encoding, only encodings known are gzip, deflate and identity'
313
323
  end
314
324
 
315
- page = proxify_html(page, Rack::Request.new(requested_to_proxy_env).url)
325
+ request_to_proxy = Rack::Request.new(requested_to_proxy_env)
326
+ page = proxify_html(page, request_to_proxy.url, content_type)
316
327
  status, headers, page = sugared_rewrite_response([status, headers, page],
317
328
  requested_to_proxy_env,
318
329
  rewritten_env)
@@ -338,6 +349,8 @@ class PrettyProxy < Rack::Proxy
338
349
  [status, headers, [page]]
339
350
  end
340
351
 
352
+ # The simplest way to make use of this class is subclass this class and
353
+ # redefine this method.
341
354
  # @abstract This method is called only over (X)HTML responses, after they are
342
355
  # decompressed and the hyperlinks proxified, before they are compressed
343
356
  # again and the new content-length calculated.
@@ -412,7 +425,7 @@ class PrettyProxy < Rack::Proxy
412
425
  uri = absolute_uri.clone
413
426
 
414
427
  uri.site = proxy_site.site
415
- uri.path = @proxy_path + uri.path[1..-1]
428
+ uri.path = @proxy_path[0..-2] + uri.path
416
429
 
417
430
  uri
418
431
  end
@@ -424,9 +437,45 @@ class PrettyProxy < Rack::Proxy
424
437
  def self.same_domain?(u1, u2)
425
438
  u1.normalized_scheme == u2.normalized_scheme &&
426
439
  u1.normalized_host == u2.normalized_host &&
427
- u1.normalized_port == u2.normalized_port
440
+ u1.inferred_port == u2.inferred_port
428
441
  end
429
442
 
443
+ def self.parse_html_or_xhtml(doc, mime_type)
444
+ # If you parse XHTML as HTML with Nokogiri, and use to_s after, the markup
445
+ # can be messed up, breaking the structural integrity of the xml
446
+ #
447
+ # Example: <meta name="description" content="not important" />
448
+ # becomes <meta name="description" content="not important" >
449
+ #
450
+ # In the other side if you parse HTML as a XML, and use to_s after, the
451
+ # Nokogiri make empty content tags self-close
452
+ #
453
+ # Example: <script type="text/javascript" src="/ballonizer.js"></script>
454
+ # becomes: <script type="text/javascript" src="/ballonizer.js" />
455
+ #
456
+ # What's even worse than the contrary (xml as html)
457
+ parsed_doc = nil
458
+
459
+ case mime_type
460
+ when /text\/html/i
461
+ parsed_doc = Nokogiri::HTML(doc)
462
+ when /application\/xhtml\+xml/i
463
+ options = Nokogiri::XML::ParseOptions::DEFAULT_XML &
464
+ Nokogiri::XML::ParseOptions::STRICT &
465
+ Nokogiri::XML::ParseOptions::NONET
466
+ begin
467
+ parsed_doc = Nokogiri::XML::Document.parse(doc, nil, nil, options)
468
+ rescue
469
+ return nil
470
+ end
471
+ else
472
+ fail ProxyError, "the only mime-types accepted are text/html and" +
473
+ " application/xhtml+xml, the passed argument was " +
474
+ "'#{mime_type}'"
475
+ end
476
+
477
+ parsed_doc
478
+ end
430
479
  def self.validate_proxy_path(proxy_path)
431
480
  fail ConfigError, "proxy_path argument don't start with a '/'" unless proxy_path.start_with? '/'
432
481
  fail ConfigError, "proxy_path argument don't end with a '/'" unless proxy_path.end_with? '/'
@@ -1,5 +1,6 @@
1
1
  require 'pretty_proxy'
2
- require 'equivalent-xml' # needed for be_equivalent_to xml rspec matcher
2
+ require 'equivalent-xml'
3
+ require 'rspec-html-matchers'
3
4
  require 'zlib'
4
5
  require 'uri'
5
6
 
@@ -22,12 +23,13 @@ end
22
23
 
23
24
  describe PrettyProxy do
24
25
 
25
- def generate_html_for_test(hyperlinks)
26
+ def generate_html_for_test(base_url, hyperlinks)
26
27
  doc = <<-END
27
28
  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
28
29
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
29
30
  <html xmlns="http://www.w3.org/1999/xhtml">
30
31
  <head>
32
+ #{base_url ? "<base href='#{base_url}' />" : ''}
31
33
  <title>A title</title>
32
34
  <meta http-equiv="content-type" content="application/xhtml+xml; charset=UTF-8" />
33
35
  </head>
@@ -51,18 +53,43 @@ describe PrettyProxy do
51
53
  doc
52
54
  end
53
55
 
54
- let(:original_html) { generate_html_for_test(['http://site.net/p2/p2_2/',
55
- 'http://othersite.net',
56
- '../p3', '../p2/p2_2/',
57
- 'http://site.net/proxy/p1',
58
- '../proxy/p1', '/p1']) }
56
+ let(:original_html_url) { 'http://site.net/p1' }
57
+ let(:requested_to_proxy_url) do
58
+ pp.proxify_hyperlink(original_html_url, 'http://site.net')
59
+ end
59
60
 
60
- let(:proxified_html) { generate_html_for_test(['http://site.net/proxy/p2/p2_2/',
61
- 'http://othersite.net',
62
- 'http://site.net/p3', '../p2/p2_2/',
63
- 'http://site.net/proxy/p1',
64
- 'http://site.net/proxy/p1',
65
- '/proxy/p1']) }
61
+ let (:link_examples) do
62
+ [ 'http://site.net/p2/p2_2/', 'http://othersite.net', '../p3',
63
+ '../p2/p2_2/', 'http://site.net/proxy/p1', '../proxy/p1', '/p1'
64
+ ]
65
+ end
66
+
67
+ let(:html_base_href) { 'http://othersite.net/p1' }
68
+ let(:original_html_with_base) do
69
+ generate_html_for_test(html_base_href, link_examples)
70
+ end
71
+ let(:proxified_html_with_base) do
72
+ # in this case only the first link is proxified because all others are
73
+ # pointing for a site outside proxy control (or for the proxy itself)
74
+ generate_html_for_test(html_base_href, [
75
+ 'http://site.net/proxy/p2/p2_2/', 'http://othersite.net',
76
+ 'http://othersite.net/p3', 'http://othersite.net/p2/p2_2/',
77
+ 'http://site.net/proxy/p1', 'http://othersite.net/proxy/p1',
78
+ 'http://othersite.net/p1'
79
+ ])
80
+ end
81
+
82
+ let(:original_html) do
83
+ generate_html_for_test(nil, link_examples)
84
+ end
85
+ let(:proxified_html) do
86
+ generate_html_for_test(original_html_url, [
87
+ 'http://site.net/proxy/p2/p2_2/', 'http://othersite.net',
88
+ 'http://site.net/p3', 'http://site.net/proxy/p2/p2_2/',
89
+ 'http://site.net/proxy/p1', 'http://site.net/proxy/p1',
90
+ 'http://site.net/proxy/p1'
91
+ ])
92
+ end
66
93
 
67
94
  let (:correct_new_args_example) { ['/proxy/', 'http://myoriginalsite.com', '/content'] }
68
95
  let (:pp) { described_class.new(*correct_new_args_example) }
@@ -84,7 +111,6 @@ describe PrettyProxy do
84
111
  end
85
112
  end
86
113
 
87
- # TODO: Add specs for '/' in the start of the proxy_path
88
114
  let (:right_args) { correct_new_args_example }
89
115
  context "when proxy_path doesn't start with a '/'" do
90
116
  it { expect {new.call('proxy/', right_args[1], right_args[2])}.to raise_error(PrettyProxy::ConfigError) }
@@ -169,7 +195,7 @@ describe PrettyProxy do
169
195
  expect(pp.unproxify_url('http://myproxy.net/proxys/sitez/p1/#id')).to eq 'http://site.net/p1/#id'
170
196
  end
171
197
  it 'change the port to the original' do
172
- expect(URI(pp.unproxify_url('http://myproxy.net:9292/proxys/sitez/p1/#id')).port).to eq 80
198
+ expect(URI.parse(pp.unproxify_url('http://myproxy.net:9292/proxys/sitez/p1/#id')).port).to eq 80
173
199
  end
174
200
 
175
201
  context 'when the url redirect to the own proxy' do
@@ -188,36 +214,26 @@ describe PrettyProxy do
188
214
  describe '#proxify_hyperlink' do
189
215
  let (:pp) { described_class.new('/proxy/', 'http://site.net', ['/p1', '/p2/p2_2/']) }
190
216
 
191
- it "proxify absolute paths to inside the proxy control" do
192
- expect(pp.proxify_hyperlink('/p2/p2_2/', 'http://theproxy.net/proxy/p1')).to eq '/proxy/p2/p2_2/'
193
- end
194
- it "proxify hyperlinks with scheme to inside the proxy control" do
195
- expect(pp.proxify_hyperlink('http://site.net/p2/p2_2/', 'http://theproxy.net/proxy/p1')).to eq 'http://theproxy.net/proxy/p2/p2_2/'
196
- end
197
- it "proxify hyperlinks without scheme to inside the proxy control" do
198
- expect(pp.proxify_hyperlink('//site.net/p2/p2_2/', 'http://theproxy.net/proxy/p1')).to eq '//theproxy.net/proxy/p2/p2_2/'
217
+ it 'proxify urls that are inside the proxy control' do
218
+ original_link = 'http://site.net/p2/p2_2/'
219
+ url_proxy_page_with_the_link = 'http://theproxy.net/proxy/p1'
220
+ proxified_link = 'http://theproxy.net/proxy/p2/p2_2/'
221
+ expect(pp.proxify_hyperlink(original_link, url_proxy_page_with_the_link)).to eq proxified_link
199
222
  end
200
- it "don't change hyperlinks with scheme to ouside the proxy control" do
201
- expect(pp.proxify_hyperlink('http://othersite.net', 'http://theproxy.net/proxy/p1')).to eq 'http://othersite.net'
202
- end
203
- it 'change to urls the relative paths to outside the proxy control' do
204
- expect(pp.proxify_hyperlink('../p3', 'http://theproxy.net/proxy/p1')).to eq 'http://site.net/p3'
205
- expect(pp.proxify_hyperlink('../p2/p2_2', 'http://theproxy.net/proxy/p1')).to eq 'http://site.net/p2/p2_2' # without the trailing '/'
206
- end
207
- it "don't change relative paths to inside the proxy control" do
208
- expect(pp.proxify_hyperlink('../p2/p2_2/', 'http://theproxy.net/proxy/p1')).to eq '../p2/p2_2/'
223
+ it "don't proxify urls that are outside proxy control" do
224
+ outside_site = 'http://othersite.net'
225
+ proxy_url = 'http://theproxy.net/proxy/p1'
226
+ expect(pp.proxify_hyperlink(outside_site, proxy_url)).to eq outside_site
209
227
  end
210
228
 
211
229
  context 'when the proxy itself is inside the proxy control' do
212
230
  let (:pp) { described_class.new('/proxy/', 'http://site.net', '/') }
213
231
 
214
- it "dont't change urls to the proxy itself" do
215
- expect(pp.proxify_hyperlink('http://site.net/proxy/p1', 'http://site.net/proxy/p1')).to eq 'http://site.net/proxy/p1'
216
- expect(pp.proxify_hyperlink('http://site.net/proxy/p1', 'http://site.net/proxy/p2/p2_2/')).to eq 'http://site.net/proxy/p1'
217
- end
218
- it 'change to urls the relative paths to the proxy itself' do
219
- expect(pp.proxify_hyperlink('../proxy/p1', 'http://site.net/proxy/p1')).to eq 'http://site.net/proxy/p1'
220
- expect(pp.proxify_hyperlink('../../proxy/p1', 'http://site.net/proxy/p2/p2_2/')).to eq 'http://site.net/proxy/p1'
232
+ it "preserve urls to the proxy itself (don't double proxify)" do
233
+ proxy_url = 'http://site.net/proxy/p1'
234
+ another_proxy_url = 'http://site.net/proxy/p2/p2_2/'
235
+ expect(pp.proxify_hyperlink(proxy_url, proxy_url)).to eq proxy_url
236
+ expect(pp.proxify_hyperlink(proxy_url, another_proxy_url)).to eq proxy_url
221
237
  end
222
238
  end
223
239
  end
@@ -225,13 +241,44 @@ describe PrettyProxy do
225
241
  describe '#proxify_html' do
226
242
  let (:pp) { described_class.new('/proxy/', 'http://site.net', ['/p1', '/p2/p2_2/']) }
227
243
 
228
- it 'apply #proxify_hyperlink in all hyperlinks in the page' do
244
+ # valid mime_types are 'text/html' and 'application/xhtml+xml' (with any
245
+ # others characters before or after)
246
+ context 'when the mime_type is invalid' do
247
+ it { expect {pp.proxify_html(original_html, 'http://site.net/proxy/p1', 'not a valid mime-type')}.to raise_error(described_class::ProxyError) }
248
+ end
249
+
250
+ context "when the content can't be parsed" do
251
+ it 'return the original string' do
252
+ # without the </b> to make this xml invalid the test fail
253
+ # (the href is changed)
254
+ page = '<a href="http://site.net/p2/p2_2/">test</a></b>'
255
+ expect(pp.proxify_html(page, 'http://proxy.net/proxy/p1', 'application/xhtml+xml')).to equal(page)
256
+ end
257
+ end
258
+
259
+ it 'apply #proxify_hyperlink in all anchors in the page' do
229
260
  # We aren't really testing with HTML, but with XHTML, what is a XML
230
261
  # This is because we dont have a matcher to test HTML equivalence, only XML equivalence
231
262
  # This test is not guaranteed to pass if the input is a HTML non-XHTML
232
263
  # The parse and unparse of the HTML can output a value who is not XML equivalent to the input
233
264
  # Maybe the way is use regex instead of Nokogiri to this work
234
- expect(pp.proxify_html(original_html, 'http://site.net/proxy/p1')).to be_equivalent_to(proxified_html)
265
+ expect(pp.proxify_html(original_html, requested_to_proxy_url, 'application/xhtml+xml')).to be_equivalent_to(proxified_html)
266
+ end
267
+
268
+ context 'when the page has a base tag' do
269
+ subject do
270
+ pp.proxify_html(
271
+ original_html_with_base,
272
+ requested_to_proxy_url,
273
+ 'application/xhtml+xml'
274
+ )
275
+ end
276
+ it 'do not alter the base tag' do
277
+ should have_tag("base[href='#{html_base_href}']")
278
+ end
279
+ it 'use the base tag href as base url for relative links' do
280
+ should be_equivalent_to(proxified_html_with_base)
281
+ end
235
282
  end
236
283
  end
237
284
 
@@ -299,19 +346,25 @@ describe PrettyProxy do
299
346
  describe '#rewrite_response' do
300
347
  let (:pp) { described_class.new('/proxy/', 'http://site.net', ['/p1', '/p2/p2_2/']) }
301
348
  # See http://rack.rubyforge.org/doc/SPEC.html for the rack env hash fields spec
302
- let (:original_env) {{'HTTP_HOST' => 'site.net',
303
- 'SCRIPT_NAME' => '',
304
- 'PATH_INFO' => '/proxy/p1',
305
- 'QUERY_STRING' => '',
306
- 'SERVER_NAME' => 'site.net',
307
- 'SERVER_PORT' => '80',
308
- 'rack.url_scheme' => 'http'}}
349
+ let (:original_env) do
350
+ url = URI.parse(original_html_url)
351
+ { 'HTTP_HOST' => url.host,
352
+ 'SCRIPT_NAME' => '',
353
+ 'PATH_INFO' => pp.proxy_path[0..-2] + url.path,
354
+ 'QUERY_STRING' => '',
355
+ 'SERVER_NAME' => url.host,
356
+ 'SERVER_PORT' => url.port,
357
+ 'rack.url_scheme' => url.scheme
358
+ }
359
+ end
309
360
  let (:rewritten_env) { pp.rewrite_env(original_env) }
310
- let (:response_example) { original_content = [200,
311
- {'content-type' => 'application/xhtml+xml',
312
- 'content-encoding' => 'identity',
313
- 'content-length' => original_html.bytesize.to_s },
314
- [original_html]] }
361
+ let (:response_example) do
362
+ original_content = [200, {
363
+ 'content-type' => 'application/xhtml+xml',
364
+ 'content-encoding' => 'identity',
365
+ 'content-length' => original_html.bytesize.to_s
366
+ }, [original_html]]
367
+ end
315
368
 
316
369
  context 'when the content-type is html or xhtml' do
317
370
  let (:original_response) { response_example }
@@ -319,11 +372,11 @@ describe PrettyProxy do
319
372
 
320
373
  let (:rewritten_headers) { subject[1] }
321
374
  let (:rewritten_body) { subject[2].join }
322
- let (:original_url) { Rack::Request.new(original_env).url }
375
+ let (:requested_to_proxy_url) { Rack::Request.new(original_env).url }
323
376
 
324
377
  # NOTE: TESTING ONLY WITH XHTML, BY THE SAME MOTIVE EXPLAINED IN THE #proxify_html SPEC
325
378
  it 'apply #proxify_html to the body' do
326
- expect(rewritten_body).to be_equivalent_to pp.proxify_html(original_html, original_url)
379
+ expect(rewritten_body).to be_equivalent_to pp.proxify_html(original_html, requested_to_proxy_url, 'application/xhtml+xml')
327
380
  end
328
381
 
329
382
  it 'change the content-length header to the new size of the body' do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pretty_proxy
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.1
4
+ version: 4.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Henrique Becker
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-05-30 00:00:00.000000000 Z
11
+ date: 2013-07-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -150,6 +150,20 @@ dependencies:
150
150
  - - ~>
151
151
  - !ruby/object:Gem::Version
152
152
  version: '10.0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: rspec-html-matchers
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ~>
158
+ - !ruby/object:Gem::Version
159
+ version: 0.4.1
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ~>
165
+ - !ruby/object:Gem::Version
166
+ version: 0.4.1
153
167
  description: If you want to replicate a site section with some change (like translation)
154
168
  and mantain the url pretty maybe this is the right library.
155
169
  email: henriquebecker91@gmail.com
@@ -184,7 +198,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
184
198
  version: '0'
185
199
  requirements: []
186
200
  rubyforge_project:
187
- rubygems_version: 2.0.0
201
+ rubygems_version: 2.0.3
188
202
  signing_key:
189
203
  specification_version: 4
190
204
  summary: A Rack::Proxy child pretty url oriented