pretty_proxy 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Rakefile +21 -0
- data/example/example.ru +35 -0
- data/example/example_conf.json +32 -0
- data/example/example_spec.rb +22 -0
- data/example/heresy.ru +12 -0
- data/lib/pretty_proxy.rb +452 -0
- data/spec/pretty_proxy_spec.rb +357 -0
- metadata +179 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 55e1a99924c4f5f41b6e78e0c26d8be032b59076
|
4
|
+
data.tar.gz: 2917669d77edead2513f8abfabbe232bd258a8e5
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6c47f72b5f7542ac0d2f6b7e0753721960e7552d7a6ffc518be2c48c3779e05f351e2d3c60f8faa1e0fbd2c185e4ee27fc18e66077ad5482fe0ae004edd6e793
|
7
|
+
data.tar.gz: 25c966ed96630729a914288da0df7db998130ba1485504607179a09643e9c164b342e3ceb77eb6bc1bcf685dd7e8efb902106d91766f2b3c732b1fe2dd7a99a1
|
data/Rakefile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'rspec/core/rake_task'
|
2
|
+
|
3
|
+
RSpec::Core::RakeTask.new :spec
|
4
|
+
|
5
|
+
task :default => [:spec]
|
6
|
+
|
7
|
+
desc 'run a sample of the horrors this class is capable of (in localhost:9292/proxy)'
|
8
|
+
task :heresy_example do
|
9
|
+
sh 'rackup ./example/heresy.ru'
|
10
|
+
end
|
11
|
+
|
12
|
+
desc 'run a multithread example in http://localhost:9292/{p1,proxy/p1} with thin'
|
13
|
+
task :run_example do
|
14
|
+
sh 'thin start --threaded -p 9292 --rackup ./example/example.ru'
|
15
|
+
end
|
16
|
+
|
17
|
+
desc "run the specs of the multithread example, run 'rake :run_example' before"
|
18
|
+
task :test_example do
|
19
|
+
sh 'rspec ./example/example_spec.rb'
|
20
|
+
end
|
21
|
+
|
data/example/example.ru
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'rack'
|
2
|
+
require 'json'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'pretty_proxy'
|
5
|
+
|
6
|
+
# the json path below is relative to the Rakefile, call rake or change it
|
7
|
+
config = JSON.parse(open('example/example_conf.json').read)
|
8
|
+
|
9
|
+
pretty_proxy_new_args = config['pretty_proxy_new_args']
|
10
|
+
proxy_path = pretty_proxy_new_args['proxy_path']
|
11
|
+
original_domain = pretty_proxy_new_args['original_domain']
|
12
|
+
original_paths = pretty_proxy_new_args['original_paths']
|
13
|
+
|
14
|
+
original_html = config['xhtml_template'].join("\n")
|
15
|
+
.gsub('PROXY_PATH', proxy_path)
|
16
|
+
.gsub('ORIGINAL_DOMAIN', original_domain)
|
17
|
+
|
18
|
+
pp = PrettyProxy.new(proxy_path, original_domain, original_paths)
|
19
|
+
|
20
|
+
headers = { 'content-type' => 'application/xhtml+xml',
|
21
|
+
'content-encoding' => 'identity',
|
22
|
+
'content-length' => original_html.bytesize.to_s }
|
23
|
+
|
24
|
+
app = Rack::Builder.new do
|
25
|
+
map config['content_path'] do
|
26
|
+
run (->(env) { [200, headers, [original_html]] })
|
27
|
+
end
|
28
|
+
|
29
|
+
map Pathname.new(proxy_path).join('.' + config['content_path']).to_s do
|
30
|
+
run pp
|
31
|
+
end
|
32
|
+
end.to_app
|
33
|
+
|
34
|
+
run app
|
35
|
+
|
@@ -0,0 +1,32 @@
|
|
1
|
+
{
|
2
|
+
"pretty_proxy_new_args": {
|
3
|
+
"proxy_path": "/proxy/",
|
4
|
+
"original_domain": "http://localhost:9292",
|
5
|
+
"__comment": "if you change the 'Original paths' field you have to edit the 'Content path' and the 'XHTML Template' fields by hand",
|
6
|
+
"original_paths": ["/p1", "/p2/p2_2"]
|
7
|
+
},
|
8
|
+
"content_path": "/p1",
|
9
|
+
"xhtml_template": [
|
10
|
+
"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"",
|
11
|
+
"\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">",
|
12
|
+
"<html xmlns=\"http://www.w3.org/1999/xhtml\">",
|
13
|
+
"<head>",
|
14
|
+
" <title>A title</title>",
|
15
|
+
" <meta http-equiv=\"content-type\" content=\"application/xhtml+xml; charset=UTF-8\" />",
|
16
|
+
"</head>",
|
17
|
+
"<body>",
|
18
|
+
" <a href=\"ORIGINAL_DOMAIN/p2/p2_2/\" >a link </a>",
|
19
|
+
" <p><a href=\"http://othersite.net\" >other link</a></p>",
|
20
|
+
" <div>",
|
21
|
+
" <a href=\"../p3\" >another link</a>",
|
22
|
+
" <p><a href=\"../p2/p2_2/\" >yet another link</a></p>",
|
23
|
+
" </div>",
|
24
|
+
" <div>",
|
25
|
+
" <a href=\"ORIGINAL_DOMAIN/PROXY_PATH/p1\" >and yet another link</a>",
|
26
|
+
" <p><a href=\"../PROXY_PATH/p1\" >the last link</a></p>",
|
27
|
+
" </div>",
|
28
|
+
"</body>",
|
29
|
+
"</html>"
|
30
|
+
]
|
31
|
+
}
|
32
|
+
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'equivalent-xml'
|
3
|
+
require 'json'
|
4
|
+
require 'pretty_proxy'
|
5
|
+
|
6
|
+
# the json path below is relative to the Rakefile, call rake or change it
|
7
|
+
config = JSON.parse(open('example/example_conf.json').read)
|
8
|
+
|
9
|
+
# this is ugly, but simple and clear, and this is a example
|
10
|
+
pretty_proxy_new_args = config['pretty_proxy_new_args']
|
11
|
+
proxy_path = pretty_proxy_new_args['proxy_path']
|
12
|
+
original_domain = pretty_proxy_new_args['original_domain']
|
13
|
+
original_paths = pretty_proxy_new_args['original_paths']
|
14
|
+
|
15
|
+
original_url = original_domain + config['content_path']
|
16
|
+
proxy_url = original_domain + Pathname.new(proxy_path).join('.' + config['content_path']).to_s
|
17
|
+
|
18
|
+
describe 'PrettyProxy example' do
|
19
|
+
let (:pp) { PrettyProxy.new(proxy_path, original_domain, original_paths) }
|
20
|
+
it { expect(open(proxy_url)).to be_equivalent_to(pp.proxify_html(open(original_url), proxy_url)) }
|
21
|
+
end
|
22
|
+
|
data/example/heresy.ru
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'pretty_proxy'
|
2
|
+
|
3
|
+
class Heresy < PrettyProxy
|
4
|
+
def sugared_rewrite_response(triplet, requested_to_proxy_env, rewritten_env)
|
5
|
+
status, headers, page = triplet
|
6
|
+
page = page.gsub(/(MTG )?Magic(: The Gathering)?/, 'Yu-Gi-Oh')
|
7
|
+
[status, headers, page]
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
run Heresy.new('/proxy/', 'http://magiccards.info', '/')
|
12
|
+
|
data/lib/pretty_proxy.rb
ADDED
@@ -0,0 +1,452 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
require 'uri'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'rack'
|
5
|
+
require 'rack-proxy'
|
6
|
+
|
7
|
+
# The PrettyProxy class aggregate and validate the configuration of a
|
8
|
+
# proxy based in simple pretty url oriented rewriting rules. It's too
|
9
|
+
# a rack app, and offers a abstract method for rewrite the responses
|
10
|
+
# returned by the proxy. The (X)HTML responses are rewritten to make
|
11
|
+
# the hyperlinks point to the proxy version of the page if it exist.
|
12
|
+
#
|
13
|
+
# @example A terrible example
|
14
|
+
# require 'pretty_proxy'
|
15
|
+
#
|
16
|
+
# class Heresy < PrettyProxy
|
17
|
+
# def sugared_rewrite_response(triplet, requested_to_proxy_env, rewritten_env)
|
18
|
+
# status, headers, page = triplet
|
19
|
+
# page = page.gsub(/(MTG )?Magic(: The Gathering)?/, 'Yu-Gi-Oh')
|
20
|
+
# [status, headers, page]
|
21
|
+
# end
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# run Heresy.new('/proxy/', 'http://magiccards.info', '/')
|
25
|
+
#
|
26
|
+
# You can see the result in http://localhost:9292/proxy/ (if you use the
|
27
|
+
# command 'rake heresy_example' in the gem folder).
|
28
|
+
#
|
29
|
+
# @note: If you want to make a Rack app who use the proxy to point to
|
30
|
+
# another path of the same app you have to use a server in multithread
|
31
|
+
# mode, otherwise requests to the proxy will end in a deadlock.
|
32
|
+
# The proxy request the original page but the server don't respond because
|
33
|
+
# is waiting the proxy request to be resolved. The proxy request don't end
|
34
|
+
# because need the original page. A timeout error occur.
|
35
|
+
#
|
36
|
+
# What this class can't do but maybe will do in the future: smart
|
37
|
+
# handling of 3xx status response and chunked encoding (the chunks are
|
38
|
+
# concatened in the proxy and the transfer-encoding header removed);
|
39
|
+
# support more than deflate and gzip; exception classes with more
|
40
|
+
# than a message;
|
41
|
+
#
|
42
|
+
# Glossary:
|
43
|
+
# 'a valid proxy url/path': The path (or the path of the url) start with
|
44
|
+
# the proxy_path and is followed by a original_path.
|
45
|
+
# 'in(side)/out(side) the proxy control': The url have (or not) the path
|
46
|
+
# starting with a original_path and the scheme, port and host are the
|
47
|
+
# same of the original_domain.
|
48
|
+
#
|
49
|
+
# The exception classes (except Error) inherit Error, and Error inherit
|
50
|
+
# ArgumentError. They are empty yet, only have a message.
|
51
|
+
#
|
52
|
+
# @see PrettyProxy::Error
|
53
|
+
# @see PrettyProxy::ConfigError
|
54
|
+
# @see PrettyProxy::ProxyError
|
55
|
+
#
|
56
|
+
# @author: Henrique Becker
|
57
|
+
class PrettyProxy < Rack::Proxy
|
58
|
+
# The supertype of any exceptions explicitly raised by the methods
|
59
|
+
class Error < ArgumentError; end
|
60
|
+
# Class of exceptions thrown when trying to set the internal state
|
61
|
+
# of the class to a invalid value
|
62
|
+
class ConfigError < Error; end
|
63
|
+
# Class of exceptions thrown when the arguments of the method
|
64
|
+
# are invalid for the proxy configuration
|
65
|
+
class ProxyError < Error; end
|
66
|
+
|
67
|
+
@proxy_path = nil
|
68
|
+
@original_domain = nil
|
69
|
+
@original_paths = nil
|
70
|
+
|
71
|
+
# Create a new PrettyProxy instance or raise a ConfigError. Clone the arguments.
|
72
|
+
# @param proxy_path [String] Start and end with slashes, represent the
|
73
|
+
# path in the proxy site who map to the proxy app (and, in consequence,
|
74
|
+
# to another path in the same or another site).
|
75
|
+
# @param original_domain [String, URI] A URL without path (no trailing slash),
|
76
|
+
# query or fragment (can have scheme (http[s]), domain and port), the site
|
77
|
+
# to where the proxy map.
|
78
|
+
# @param original_paths [String, #each] The path (or the paths) to be mapped
|
79
|
+
# right inside the proxy_path (has to begin with slash).
|
80
|
+
# @note See the specs {file:../spec/pretty_proxy_spec.rb} for examples and
|
81
|
+
# complete definition of invalid args.
|
82
|
+
# @return [PrettyProxy] a new instance
|
83
|
+
# @raise PrettyProxy::ConfigError
|
84
|
+
def initialize(proxy_path, original_domain, original_paths)
|
85
|
+
Utils.validate_proxy_path(proxy_path)
|
86
|
+
Utils.validate_original_domain_and_paths(original_domain, original_paths)
|
87
|
+
|
88
|
+
@proxy_path = proxy_path.clone
|
89
|
+
@original_domain = URI(original_domain.clone)
|
90
|
+
if original_paths.respond_to? :each
|
91
|
+
@original_paths = original_paths.clone
|
92
|
+
else
|
93
|
+
@original_paths = [original_paths.clone]
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# !@attribute proxy_path
|
98
|
+
# @param a input who will be validated as in the initialize
|
99
|
+
# @return the clone of the internal value
|
100
|
+
# !@attribute original_domain
|
101
|
+
# @param a input who will be validated as in the initialize
|
102
|
+
# @return the clone of the internal value
|
103
|
+
# !@attribute original_paths
|
104
|
+
# @param a input who will be validated as in the initialize
|
105
|
+
# @return the clone of the internal value
|
106
|
+
[:proxy_path, :original_domain, :original_paths].each do | reader |
|
107
|
+
define_method(reader) { instance_variable_get("@#{reader.to_s}").clone }
|
108
|
+
end
|
109
|
+
|
110
|
+
def proxy_path=(proxy_path)
|
111
|
+
Utils.validate_proxy_path(proxy_path)
|
112
|
+
@proxy_path = proxy_path
|
113
|
+
end
|
114
|
+
|
115
|
+
def original_domain=(original_domain)
|
116
|
+
Utils.validate_original_domain_and_paths(original_domain, @original_paths)
|
117
|
+
@original_domain = original_domain
|
118
|
+
end
|
119
|
+
|
120
|
+
def original_paths=(original_paths)
|
121
|
+
Utils.validate_original_domain_and_paths(@original_domain, original_paths)
|
122
|
+
@original_paths = original_paths
|
123
|
+
end
|
124
|
+
|
125
|
+
# Take a proxy url and return the original URL behind the proxy. Preserve the
|
126
|
+
# query and fragment, if any. For the rewrite of a request @see rewrite_env.
|
127
|
+
# @param [String, URI::HTTP, URI::HTTPS] A URL.
|
128
|
+
# @return [URI::HTTP, URI::HTTPS] A URI object.
|
129
|
+
# @raise PrettyProxy::ProxyError
|
130
|
+
def unproxify_url(url)
|
131
|
+
url = URI(url.clone)
|
132
|
+
unless url.path.start_with?(@proxy_path)
|
133
|
+
fail ProxyError, "url path has to be prefixed by proxy_path (#{@proxy_path})"
|
134
|
+
end
|
135
|
+
url.path = url.path.slice((proxy_path.size-1)..-1)
|
136
|
+
unless original_paths.any? { | path | url.path.start_with? path }
|
137
|
+
fail ProxyError, "the proxy only responds to paths in the original_paths (#{@original_paths})"
|
138
|
+
end
|
139
|
+
if url.host == original_domain.host && url.path.start_with?(@proxy_path)
|
140
|
+
fail ProxyError, 'this is a request for the proxy for a proxy page (recursive request)'
|
141
|
+
end
|
142
|
+
url.host = original_domain.host
|
143
|
+
url.scheme = original_domain.scheme
|
144
|
+
url.port = original_domain.port
|
145
|
+
|
146
|
+
url
|
147
|
+
rescue URI::InvalidURIError
|
148
|
+
raise ArgumentError, "the url argument isn't a valid uri"
|
149
|
+
rescue URI::Error => e
|
150
|
+
raise ProxyError, "an unexpected URI exception has been thrown, the message is '#{e.message}'"
|
151
|
+
end
|
152
|
+
|
153
|
+
# Take a hyperlink and the url of the proxy page (not the original page)
|
154
|
+
# where it come from and return the rewritten hyperlink. If the page
|
155
|
+
# pointed vy the hyperlink is in the proxy control the rewritten hyperlink
|
156
|
+
# gonna point to the proxyfied version, otherwise gonna point to the original
|
157
|
+
# version.
|
158
|
+
# @param hyperlink [String, URI::HTTP, URI::HTTPS] A string with a relative
|
159
|
+
# path or an url (string or URI).
|
160
|
+
# @param proxy_page_url [String, URI::HTTP, URI::HTTPS] The url from the
|
161
|
+
# proxy page where the hyperlink come from.
|
162
|
+
# @return [String] A relative path or an url.
|
163
|
+
# @raise PrettyProxy::ProxyError
|
164
|
+
def proxify_hyperlink(hyperlink, proxy_page_url)
|
165
|
+
hyperlink = URI(hyperlink.clone)
|
166
|
+
proxy_page_url = URI(proxy_page_url)
|
167
|
+
if Utils.relative_path? hyperlink
|
168
|
+
# recreate the original site url from the relative path
|
169
|
+
absolute_link = unproxify_url proxy_page_url
|
170
|
+
absolute_link.path = Pathname.new(absolute_link.path).join(hyperlink.path).to_s
|
171
|
+
if inside_proxy_control? absolute_link
|
172
|
+
if same_domain_as_original?(proxy_page_url) &&
|
173
|
+
valid_path_for_proxy?(absolute_link.path)
|
174
|
+
# in the case of a relative path in the original page who points
|
175
|
+
# to a proxy page, and the proxy page is inside the proxy control
|
176
|
+
# we have to use the absolute_link or the page will be double proxified
|
177
|
+
# example: ../proxy/content in http://example.com/proxy/content, with
|
178
|
+
# original_path as '/' is http://example.com/proxy/proxy/content
|
179
|
+
hyperlink = absolute_link
|
180
|
+
end
|
181
|
+
else
|
182
|
+
hyperlink = absolute_link
|
183
|
+
end
|
184
|
+
else
|
185
|
+
if inside_proxy_control? hyperlink
|
186
|
+
unless point_to_a_proxy_page?(hyperlink, proxy_page_url)
|
187
|
+
hyperlink.scheme = proxy_page_url.scheme
|
188
|
+
hyperlink.host = proxy_page_url.host
|
189
|
+
hyperlink.port = proxy_page_url.port
|
190
|
+
hyperlink.path = @proxy_path + hyperlink.path[1..-1]
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
hyperlink.to_s
|
196
|
+
end
|
197
|
+
|
198
|
+
# Take a (X)HTML Document and apply proxify_hyperlink to the 'href'
|
199
|
+
# attribute of each 'a' element.
|
200
|
+
# @param html [String] A (X)HTML document.
|
201
|
+
# @param proxy_url [String, URI::HTTP, URI::HTTPS] The url where the
|
202
|
+
# the proxified version of the page will be displayed.
|
203
|
+
# @return [String] A copy of the document with the changes applied.
|
204
|
+
# @raise PrettyProxy::ProxyError
|
205
|
+
def proxify_html(html, proxy_url)
|
206
|
+
parsed_html = nil
|
207
|
+
|
208
|
+
# If you parse XHTML as HTML with Nokogiri and use to_s after the markup can be messed up
|
209
|
+
#
|
210
|
+
# Example: <meta name="description" content="not important" />
|
211
|
+
# becomes <meta name="description" content="not important" >
|
212
|
+
# To avoid this we parse a document who is XML valid as XML, and, otherwise as HTML
|
213
|
+
begin
|
214
|
+
# this also isn't a great way to do this
|
215
|
+
# the Nokogiri don't have exception classes, this way any StandardError will be silenced
|
216
|
+
options = Nokogiri::XML::ParseOptions::DEFAULT_XML &
|
217
|
+
Nokogiri::XML::ParseOptions::STRICT &
|
218
|
+
Nokogiri::XML::ParseOptions::DTDVALID
|
219
|
+
parsed_html = Nokogiri::XML::Document.parse(html, nil, nil, options)
|
220
|
+
rescue
|
221
|
+
parsed_html = Nokogiri::HTML(html)
|
222
|
+
end
|
223
|
+
|
224
|
+
parsed_html.css('a').each do | hyperlink |
|
225
|
+
hyperlink['href'] = proxify_hyperlink(hyperlink['href'], proxy_url)
|
226
|
+
end
|
227
|
+
|
228
|
+
parsed_html.to_s
|
229
|
+
end
|
230
|
+
|
231
|
+
# Modify a Rack environment hash of a request to the proxy version of
|
232
|
+
# a page to a request to the original page. As in Rack::proxy is used
|
233
|
+
# by #call for require the original page before call rewrite_response in
|
234
|
+
# the response. If you want to use your own rewrite rules maybe is more
|
235
|
+
# wise to subclass Rack::Proxy instead subclass this class. The purpose
|
236
|
+
# of this class is mainly implement and enforce these rules for you.
|
237
|
+
# @param html [Hash{String => String}] A Rack environment hash.
|
238
|
+
# (see: {http://rack.rubyforge.org/doc/SPEC.html})
|
239
|
+
# @return [Hash{String => String}] A unproxified copy of the argument.
|
240
|
+
# @raise PrettyProxy::ProxyError
|
241
|
+
def rewrite_env(env)
|
242
|
+
env = env.clone
|
243
|
+
url_requested_to_proxy = Rack::Request.new(env).url
|
244
|
+
unproxified_url = unproxify_url(url_requested_to_proxy)
|
245
|
+
|
246
|
+
if env['HTTP_HOST']
|
247
|
+
env['HTTP_HOST'] = unproxified_url.host
|
248
|
+
end
|
249
|
+
env['SERVER_NAME'] = unproxified_url.host
|
250
|
+
env['SERVER_PORT'] = unproxified_url.port.to_s
|
251
|
+
|
252
|
+
if env['SCRIPT_NAME'].empty? && !env['PATH_INFO'].empty?
|
253
|
+
env['PATH_INFO'] = unproxified_url.path
|
254
|
+
end
|
255
|
+
if !env['SCRIPT_NAME'].empty? && env['PATH_INFO'].empty?
|
256
|
+
env['SCRIPT_NAME'] = unproxified_url.path
|
257
|
+
end
|
258
|
+
# Seriously, i don't know how to split again the unproxified url, so PATH_INFO gonna have the full path
|
259
|
+
if (!env['SCRIPT_NAME'].empty? && !env['PATH_INFO'].empty?) ||
|
260
|
+
(env['SCRIPT_NAME'].empty? && env['PATH_INFO'].empty?)
|
261
|
+
env['PATH_INFO'] = unproxified_url.path
|
262
|
+
env['SCRIPT_NAME'] = ''
|
263
|
+
end
|
264
|
+
|
265
|
+
env['REQUEST_PATH'] = unproxified_url.path
|
266
|
+
env['REQUEST_URI'] = unproxified_url.path
|
267
|
+
|
268
|
+
env
|
269
|
+
end
|
270
|
+
|
271
|
+
# Mainly apply the proxify_html to the body of the response if it is a html.
|
272
|
+
# Raise an error if the 'content-encoding' is other than deflate, gzip or
|
273
|
+
# identity. Change the 'content-length' header for the new body bytesize.
|
274
|
+
# Remove the 'transfer-encoding' if it is chunked, and act as not chunked.
|
275
|
+
# This method is inherited of Rack::Proxy, but in the original it have only
|
276
|
+
# the first parameter (the triplet). This version have the request Rack env
|
277
|
+
# to the proxy and the rewritten Rack env as second and third parameters,
|
278
|
+
# respectively.
|
279
|
+
# @param triplet [Array<(Integer, Hash{String => String}, #each)>] A Rack
|
280
|
+
# response (see {http://rack.rubyforge.org/doc/SPEC.html}) for the request
|
281
|
+
# to the original site.
|
282
|
+
# @param [Hash{String => String}] A Rack environment hash. The requested to
|
283
|
+
# the proxy version.
|
284
|
+
# @param [Hash{String => String}] A Rack environment hash. The rewritten by
|
285
|
+
# the proxy to point to the original version.
|
286
|
+
# @return [Array<(Integer, Hash{String => String}, #each)>] A unproxified
|
287
|
+
# copy of the first argument.
|
288
|
+
# @raise PrettyProxy::ProxyError
|
289
|
+
def rewrite_response(triplet, requested_to_proxy_env, rewritten_env)
|
290
|
+
status, headers, body = triplet
|
291
|
+
content_type = headers['content-type']
|
292
|
+
return triplet unless %r{text/html} =~ content_type ||
|
293
|
+
%r{application/xhtml\+xml} =~ content_type
|
294
|
+
|
295
|
+
# the #each method of body can't be called twice, but we need to call it here and it is called
|
296
|
+
# after this method return, so we fake the body with a array of one string
|
297
|
+
# we can't return a string (even it responds to #each) see: http://rack.rubyforge.org/doc/SPEC.html (section 'The Body')
|
298
|
+
page = ''
|
299
|
+
body.each do | chunk |
|
300
|
+
page << chunk
|
301
|
+
end
|
302
|
+
|
303
|
+
case headers['content-encoding']
|
304
|
+
when 'gzip' then page = Zlib::GzipReader.new(StringIO.new(page)).read
|
305
|
+
when 'deflate' then page = Zlib::Inflate.inflate(page)
|
306
|
+
when 'identity' then page = page
|
307
|
+
else
|
308
|
+
fail ProxyError, 'unknown content-encoding, only encodings known are gzip, deflate and identity'
|
309
|
+
end
|
310
|
+
|
311
|
+
page = proxify_html(page, Rack::Request.new(requested_to_proxy_env).url)
|
312
|
+
status, headers, page = sugared_rewrite_response([status, headers, page],
|
313
|
+
requested_to_proxy_env,
|
314
|
+
rewritten_env)
|
315
|
+
|
316
|
+
case headers['content-encoding']
|
317
|
+
when 'gzip'
|
318
|
+
page_ = page.clone
|
319
|
+
gzip_stream = Zlib::GzipWriter.new(StringIO.new(page_))
|
320
|
+
gzip_stream.write page
|
321
|
+
gzip_stream.close
|
322
|
+
page = page_
|
323
|
+
when 'deflate' then page = Zlib::Deflate.deflate(page)
|
324
|
+
end
|
325
|
+
|
326
|
+
headers['content-length'] = page.bytesize.to_s if headers['content-length']
|
327
|
+
|
328
|
+
# TODO: find a way to make the code work with chunked encoding
|
329
|
+
if 'chunked' == headers['transfer-encoding']
|
330
|
+
headers.delete('transfer-encoding')
|
331
|
+
headers['content-length'] = page.bytesize.to_s
|
332
|
+
end
|
333
|
+
|
334
|
+
[status, headers, [page]]
|
335
|
+
end
|
336
|
+
|
337
|
+
# @abstract This method is called only over (X)HTML responses, after they are
|
338
|
+
# decompressed and the hyperlinks proxified, before they are compressed
|
339
|
+
# again and the new content-length calculated. The body of the triplet is
|
340
|
+
# a String and not a object who respond to #each, the same has to be true
|
341
|
+
# in the return. Return a modified clone of the response, don't change
|
342
|
+
# the argument.
|
343
|
+
# @param triplet [Array<(Integer, Hash{String => String}, String)>] Not a
|
344
|
+
# valid Rack response, the third element is a string with the response body.
|
345
|
+
# @param [Hash{String => String}] A Rack environment hash. The requested to
|
346
|
+
# the proxy version.
|
347
|
+
# @param [Hash{String => String}] A Rack environment hash. The rewritten by
|
348
|
+
# the proxy to point to the original version.
|
349
|
+
# @return [Array<(Integer, Hash{String => String}, String)>] A unproxified
|
350
|
+
# copy of the first argument.
|
351
|
+
def sugared_rewrite_response(triplet, requested_to_proxy_env, rewritten_env)
|
352
|
+
triplet
|
353
|
+
end
|
354
|
+
|
355
|
+
# Make this class a Rack app. Is overriden to repass to the rewrite_response
|
356
|
+
# the original Rack environment (request to the proxy) and the rewritten env
|
357
|
+
# (modified to point the original page request).
|
358
|
+
# If you don't know the parameters and return of this method, please read
|
359
|
+
# {http://rack.rubyforge.org/doc/SPEC.html}.
|
360
|
+
def call(env)
|
361
|
+
# in theory we only need to repass the rewritten_env, any original env info
|
362
|
+
# needed can be passed as a environment application variable
|
363
|
+
# example: (env['app_name.original_path'] = env['PATH_INFO'])
|
364
|
+
# but to avoid this to be a common idiom we repass the original env too
|
365
|
+
rewritten_env = rewrite_env(env)
|
366
|
+
rewrite_response(perform_request(rewritten_env), env, rewritten_env)
|
367
|
+
end
|
368
|
+
|
369
|
+
# Check if the #scheme, #host, and #port of the argument are equal to the
|
370
|
+
# original_domain ones.
|
371
|
+
def same_domain_as_original?(uri)
|
372
|
+
Utils.same_domain?(@original_domain, uri)
|
373
|
+
end
|
374
|
+
|
375
|
+
# Check if the URI::HTTP(S) is a page who can be accessed through the proxy
|
376
|
+
def inside_proxy_control?(uri)
|
377
|
+
same_domain_as_original?(uri) &&
|
378
|
+
valid_path_for_proxy?(@proxy_path + uri.path[1..-1])
|
379
|
+
end
|
380
|
+
|
381
|
+
# Check if the absolute path begin with a proxy_path and is followed by a
|
382
|
+
# original_paths element.
|
383
|
+
def valid_path_for_proxy?(absolute_path)
|
384
|
+
path_without_proxy_prefix = absolute_path[(@proxy_path.size-1)..-1]
|
385
|
+
# if we don't add the trailing slash '/about' and '/about_us' match
|
386
|
+
original_paths_with_trailing_slash = []
|
387
|
+
@original_paths.each do | path |
|
388
|
+
original_paths_with_trailing_slash << (path.end_with?('/') ? path : "#{path}/")
|
389
|
+
end
|
390
|
+
|
391
|
+
absolute_path.start_with?(@proxy_path) &&
|
392
|
+
original_paths_with_trailing_slash.any? do | original_path |
|
393
|
+
path_without_proxy_prefix.start_with? original_path
|
394
|
+
end
|
395
|
+
end
|
396
|
+
|
397
|
+
# Take a url and the proxy domain (scheme, host and port) and return if
|
398
|
+
# the url point to a valid proxy page.
|
399
|
+
def point_to_a_proxy_page?(hyperlink, proxy_domain)
|
400
|
+
Utils.same_domain?(hyperlink, proxy_domain) &&
|
401
|
+
valid_path_for_proxy?(hyperlink.path)
|
402
|
+
end
|
403
|
+
|
404
|
+
# api private Don't use the methods of this class. They are for internal use only.
|
405
|
+
class Utils
|
406
|
+
def self.relative_path?(hyperlink)
|
407
|
+
! hyperlink.scheme
|
408
|
+
end
|
409
|
+
|
410
|
+
def self.same_domain?(u1, u2)
|
411
|
+
u1.scheme == u2.scheme &&
|
412
|
+
u1.host == u2.host &&
|
413
|
+
u1.port == u2.port
|
414
|
+
end
|
415
|
+
|
416
|
+
def self.validate_proxy_path(proxy_path)
|
417
|
+
fail ConfigError, "proxy_path argument don't end with a '/'" unless proxy_path.end_with? '/'
|
418
|
+
# NOTE: if the user want to proxify 'www.site.net', and not 'www.site.net/'?
|
419
|
+
# Well, majority of the internet answers for this are 'the right way is to use the trailing slash'
|
420
|
+
# See: http://tim-stanley.com/post/pretty-good-urls/
|
421
|
+
# http://www.w3.org/Provider/Style/URI.html
|
422
|
+
# http://stackoverflow.com/questions/7355305/preventing-trailing-slash-on-domain-name
|
423
|
+
# http://alistapart.com/article/slashforward
|
424
|
+
# http://www.searchenginejournal.com/linking-issues-why-a-trailing-slash-in-the-url-does-matter/13021/?ModPagespeed=noscript
|
425
|
+
end
|
426
|
+
|
427
|
+
def self.validate_original_domain_and_paths(original_domain, original_paths)
|
428
|
+
fail ConfigError, 'original_paths is empty' if original_paths.empty?
|
429
|
+
|
430
|
+
original_domain = URI(original_domain) # can raise URI:Error's
|
431
|
+
fail ConfigError, 'the original_domain has to have no query or fragment' if original_domain.query || original_domain.fragment
|
432
|
+
|
433
|
+
# can raise URI:Error's
|
434
|
+
test_uri = original_domain.clone
|
435
|
+
if original_paths.respond_to?(:each)
|
436
|
+
original_paths.each { | path | test_uri.path = path }
|
437
|
+
else
|
438
|
+
test_uri.path = original_paths
|
439
|
+
end
|
440
|
+
|
441
|
+
rescue URI::InvalidComponentError => e
|
442
|
+
raise ConfigError, "the original_paths contain a invalid path, message of the URI exception: '#{e.message}'"
|
443
|
+
rescue URI::InvalidURIError => e
|
444
|
+
raise ConfigError, "the original_domain isn't a valid URI, message of the URI exception: '#{e.message}'"
|
445
|
+
rescue URI::Error => e
|
446
|
+
raise ConfigError, "a unexpected URI::Error exception was raised, message of the exception: '#{e.message}'"
|
447
|
+
end
|
448
|
+
end
|
449
|
+
|
450
|
+
private_constant :Utils
|
451
|
+
end
|
452
|
+
|
@@ -0,0 +1,357 @@
|
|
1
|
+
require 'pretty_proxy'
|
2
|
+
require 'equivalent-xml' # needed for be_equivalent_to xml rspec matcher
|
3
|
+
require 'zlib'
|
4
|
+
|
5
|
+
shared_examples 'an reader method who encapsulate a mutable variable' do
|
6
|
+
context 'when the return is changed' do
|
7
|
+
it 'does not change the next return value' do
|
8
|
+
instance = described_class.new(*new_args)
|
9
|
+
first_return = instance.send reader_method_name
|
10
|
+
if change_return.respond_to? :call
|
11
|
+
change_return.call first_return
|
12
|
+
else
|
13
|
+
first_return.send change_return
|
14
|
+
end
|
15
|
+
second_return = instance.send reader_method_name
|
16
|
+
|
17
|
+
expect(second_return).to_not eq first_return
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe PrettyProxy do
|
23
|
+
|
24
|
+
def generate_html_for_test(hyperlinks)
|
25
|
+
doc = <<-END
|
26
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
27
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
28
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
29
|
+
<head>
|
30
|
+
<title>A title</title>
|
31
|
+
<meta http-equiv="content-type" content="application/xhtml+xml; charset=UTF-8" />
|
32
|
+
</head>
|
33
|
+
<body>
|
34
|
+
<a href="ARG_0" >a link </a>
|
35
|
+
<p><a href="ARG_1" >other link</a></p>
|
36
|
+
<div>
|
37
|
+
<a href="ARG_2" >another link</a>
|
38
|
+
<p><a href="ARG_3" >yet another link</a></p>
|
39
|
+
</div>
|
40
|
+
<div>
|
41
|
+
<a href="ARG_4" >and yet another link</a>
|
42
|
+
<p><a href="ARG_5" >the last link</a></p>
|
43
|
+
</div>
|
44
|
+
</body>
|
45
|
+
</html>
|
46
|
+
END
|
47
|
+
doc.gsub!(/ARG_\d+/) { | match | hyperlinks[match[4..-1].to_i] }
|
48
|
+
|
49
|
+
doc
|
50
|
+
end
|
51
|
+
|
52
|
+
let(:original_html) { generate_html_for_test(['http://site.net/p2/p2_2/',
|
53
|
+
'http://othersite.net',
|
54
|
+
'../p3', '../p2/p2_2/',
|
55
|
+
'http://site.net/proxy/p1',
|
56
|
+
'../proxy/p1']) }
|
57
|
+
|
58
|
+
let(:proxified_html) { generate_html_for_test(['http://site.net/proxy/p2/p2_2/',
|
59
|
+
'http://othersite.net',
|
60
|
+
'http://site.net/p3', '../p2/p2_2/',
|
61
|
+
'http://site.net/proxy/p1',
|
62
|
+
'http://site.net/proxy/p1']) }
|
63
|
+
|
64
|
+
let (:correct_new_args_example) { ['/proxy/', 'http://myoriginalsite.com', '/content'] }
|
65
|
+
let (:pp) { described_class.new(*correct_new_args_example) }
|
66
|
+
|
67
|
+
describe '.new' do
|
68
|
+
subject (:new) { described_class.method :new }
|
69
|
+
|
70
|
+
[ {desc: 'accept original_paths as a String',
|
71
|
+
args: ['/proxy/', 'http://myoriginalsite.com', '/content']},
|
72
|
+
{desc: 'accept original_paths as an object who yelds strings with #each',
|
73
|
+
args: ['/proxy/', 'http://myoriginalsite.com', ['/content', '/other_content']]},
|
74
|
+
{desc: 'accept https in the original_domain',
|
75
|
+
args: ['/proxy/', 'https://myoriginalsite.com', ['/content']]},
|
76
|
+
{desc: 'accept port in the original_domain',
|
77
|
+
args: ['/proxy/', 'https://myoriginalsite.com:8080', ['/content']]}
|
78
|
+
].each do | happy_case |
|
79
|
+
it happy_case[:desc] do
|
80
|
+
expect(new.call(*happy_case[:args])).to be_a_instance_of described_class
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# TODO: Add specs for '/' in the start of the proxy_path
|
85
|
+
let (:right_args) { correct_new_args_example }
|
86
|
+
context "when proxy_path doesn't end with a '/'" do
|
87
|
+
it { expect {new.call('/proxy', right_args[1], right_args[2])}.to raise_error(PrettyProxy::ConfigError) }
|
88
|
+
end
|
89
|
+
|
90
|
+
context 'when the original_domain is invalid' do
|
91
|
+
it { expect {new.call(right_args[0], 'http://myoriginalsite.com/%%%/', right_args[2])}.to raise_error(PrettyProxy::ConfigError)}
|
92
|
+
end
|
93
|
+
|
94
|
+
context 'when the original_domain has a query' do
|
95
|
+
it { expect {new.call(right_args[0], 'http://myoriginalsite.com/?q=error', right_args[2])}.to raise_error(PrettyProxy::ConfigError)}
|
96
|
+
end
|
97
|
+
|
98
|
+
context 'when the original_domain has a fragment' do
|
99
|
+
it { expect {new.call(right_args[0], 'http://myoriginalsite.com/#id', right_args[2])}.to raise_error(PrettyProxy::ConfigError)}
|
100
|
+
end
|
101
|
+
|
102
|
+
context "when the original_paths don't begin with a '/'" do
|
103
|
+
it { expect {new.call(right_args[0], right_args[1], ['content'])}.to raise_error(PrettyProxy::ConfigError) }
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
#NOTE: save ten lines of the not metaprogrammed way
|
108
|
+
[:proxy_path, :original_domain, :original_paths].each do | reader_method |
|
109
|
+
describe "##{reader_method.to_s}" do
|
110
|
+
return_changers = { proxy_path: :chop!,
|
111
|
+
original_domain: ->(uri){ uri.host = 'otherdomain.com'},
|
112
|
+
original_paths: :shift }
|
113
|
+
|
114
|
+
it_behaves_like 'an reader method who encapsulate a mutable variable' do
|
115
|
+
let(:reader_method_name) { reader_method }
|
116
|
+
let(:new_args) { ['/proxy/', 'http://myoriginalsite.com', '/content'] }
|
117
|
+
let(:change_return) { return_changers[reader_method] }
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# NOTE: excessive metaprogramming? only save 3~6 lines
|
123
|
+
[ [ :proxy_path=, "when proxy_path doesn't end with a '/'", '/proxy'],
|
124
|
+
[ :original_domain=, 'when the original_domain is invalid', 'http://myoriginalsite.com/%%%/'],
|
125
|
+
[ :original_paths=, "when the original_paths don't begin with a '/'", 'content']
|
126
|
+
].each do | error_case |
|
127
|
+
writter, context_desc, invalid_input = *error_case
|
128
|
+
describe "##{writter.to_s}" do
|
129
|
+
context context_desc do
|
130
|
+
it { expect {pp.send(writter, invalid_input)}.to raise_error(PrettyProxy::ConfigError) }
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
describe '#unproxify_url' do
|
136
|
+
new_args = ['/proxys/sitez/', 'http://site.net', ['/p1', '/p2/p2_2/']]
|
137
|
+
let (:pp) { described_class.new(*new_args) }
|
138
|
+
|
139
|
+
context 'when the original_path has no trailing slash' do
|
140
|
+
it 'allow no trailing slash in the url' do
|
141
|
+
expect(pp.unproxify_url('http://myproxy.net/proxys/sitez/p1')).to eq URI('http://site.net/p1')
|
142
|
+
end
|
143
|
+
it 'allow trailing slash in the url' do
|
144
|
+
expect(pp.unproxify_url('http://myproxy.net/proxys/sitez/p1/')).to eq URI('http://site.net/p1/')
|
145
|
+
end
|
146
|
+
end
|
147
|
+
context 'when the original_path has a trailing slash' do
|
148
|
+
it 'allow trailing slash in the url' do
|
149
|
+
expect(pp.unproxify_url('http://myproxy.net/proxys/sitez/p2/p2_2/')).to eq URI('http://site.net/p2/p2_2/')
|
150
|
+
end
|
151
|
+
it "don't allow no trailing slash" do
|
152
|
+
expect { pp.unproxify_url('http://myproxy.net/proxys/sitez/p2/p2_2') }.to raise_error(PrettyProxy::ProxyError)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
it 'allow subdirectories inside that path' do
|
156
|
+
expect(pp.unproxify_url('http://myproxy.net/proxys/sitez/p1/a/b/c/')).to eq URI('http://site.net/p1/a/b/c/')
|
157
|
+
end
|
158
|
+
it 'preserve querys in the url' do
|
159
|
+
expect(pp.unproxify_url('http://myproxy.net/proxys/sitez/p1/?q=error&l=pt')).to eq URI('http://site.net/p1/?q=error&l=pt')
|
160
|
+
end
|
161
|
+
it 'preserve fragments in the url' do
|
162
|
+
expect(pp.unproxify_url('http://myproxy.net/proxys/sitez/p1/#id')).to eq URI('http://site.net/p1/#id')
|
163
|
+
end
|
164
|
+
it 'change the port to the original' do
|
165
|
+
expect(pp.unproxify_url('http://myproxy.net:9292/proxys/sitez/p1/#id').port).to eq 80
|
166
|
+
end
|
167
|
+
|
168
|
+
context 'when the url redirect to the own proxy' do
|
169
|
+
let (:pp) { described_class.new('/', 'http://myoriginalsite.com/', '/content') }
|
170
|
+
|
171
|
+
it { expect {pp.unproxify_url('http://myproxysite.com/proxy/proxy/')}.to raise_error(PrettyProxy::ProxyError) }
|
172
|
+
end
|
173
|
+
context "when the url don't begin with the proxy_path" do
|
174
|
+
it { expect {pp.unproxify_url('http://myproxysite.com/no_proxy/content')}.to raise_error(PrettyProxy::ProxyError) }
|
175
|
+
end
|
176
|
+
context "when the proxy_path in the url isn't followed by a original_paths" do
|
177
|
+
it { expect {pp.unproxify_url('http://myproxysite.com/proxy/other_content')}.to raise_error(PrettyProxy::ProxyError) }
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
describe '#proxify_hyperlink' do
|
182
|
+
let (:pp) { described_class.new('/proxy/', 'http://site.net', ['/p1', '/p2/p2_2/']) }
|
183
|
+
|
184
|
+
it "proxify absolute hyperlinks to inside the proxy control" do
|
185
|
+
expect(pp.proxify_hyperlink('http://site.net/p2/p2_2/', 'http://theproxy.net/proxy/p1')).to eq 'http://theproxy.net/proxy/p2/p2_2/'
|
186
|
+
end
|
187
|
+
it "don't change absolute hyperlinks to ouside the proxy control" do
|
188
|
+
expect(pp.proxify_hyperlink('http://othersite.net', 'http://theproxy.net/proxy/p1')).to eq 'http://othersite.net'
|
189
|
+
end
|
190
|
+
it 'change to absolute hyperlinks the relative paths to outside the proxy control' do
|
191
|
+
expect(pp.proxify_hyperlink('../p3', 'http://theproxy.net/proxy/p1')).to eq 'http://site.net/p3'
|
192
|
+
expect(pp.proxify_hyperlink('../p2/p2_2', 'http://theproxy.net/proxy/p1')).to eq 'http://site.net/p2/p2_2' # without the trailing '/'
|
193
|
+
end
|
194
|
+
it "don't change relative paths to inside the proxy control" do
|
195
|
+
expect(pp.proxify_hyperlink('../p2/p2_2/', 'http://theproxy.net/proxy/p1')).to eq '../p2/p2_2/'
|
196
|
+
end
|
197
|
+
|
198
|
+
context 'when the proxy itself is inside the proxy control' do
|
199
|
+
let (:pp) { described_class.new('/proxy/', 'http://site.net', '/') }
|
200
|
+
|
201
|
+
it "dont't change absolute hyperlinks to the proxy itself" do
|
202
|
+
expect(pp.proxify_hyperlink('http://site.net/proxy/p1', 'http://site.net/proxy/p1')).to eq 'http://site.net/proxy/p1'
|
203
|
+
expect(pp.proxify_hyperlink('http://site.net/proxy/p1', 'http://site.net/proxy/p2/p2_2/')).to eq 'http://site.net/proxy/p1'
|
204
|
+
end
|
205
|
+
it 'change to absolute hyperlinks the relative paths to the proxy itself' do
|
206
|
+
expect(pp.proxify_hyperlink('../proxy/p1', 'http://site.net/proxy/p1')).to eq 'http://site.net/proxy/p1'
|
207
|
+
expect(pp.proxify_hyperlink('../../proxy/p1', 'http://site.net/proxy/p2/p2_2/')).to eq 'http://site.net/proxy/p1'
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
describe '#proxify_html' do
|
213
|
+
let (:pp) { described_class.new('/proxy/', 'http://site.net', ['/p1', '/p2/p2_2/']) }
|
214
|
+
|
215
|
+
it 'apply #proxify_hyperlink in all hyperlinks in the page' do
|
216
|
+
# We aren't really testing with HTML, but with XHTML, what is a XML
|
217
|
+
# This is because we dont have a matcher to test HTML equivalence, only XML equivalence
|
218
|
+
# This test is not guaranteed to pass if the input is a HTML non-XHTML
|
219
|
+
# The parse and unparse of the HTML can output a value who is not XML equivalent to the input
|
220
|
+
# Maybe the way is use regex instead of Nokogiri to this work
|
221
|
+
expect(pp.proxify_html(original_html, 'http://site.net/proxy/p1')).to be_equivalent_to(proxified_html)
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
describe '#rewrite_env' do
|
226
|
+
# See http://rack.rubyforge.org/doc/SPEC.html for the rack env hash fields spec
|
227
|
+
example_request = {'HTTP_HOST' => 'myproxysite.com',
|
228
|
+
'SCRIPT_NAME' => '',
|
229
|
+
'PATH_INFO' => '/proxy/content',
|
230
|
+
'QUERY_STRING' => '',
|
231
|
+
'SERVER_NAME' => 'myproxysite.com',
|
232
|
+
'SERVER_PORT' => '9292',
|
233
|
+
'rack.url_scheme' => 'http'}
|
234
|
+
|
235
|
+
context "when the request is not prefixed by proxy_path" do
|
236
|
+
let (:request_to_outside_content) { example_request.clone.update({'PATH_INFO' => '/no_proxy/content'}) }
|
237
|
+
it { expect {pp.rewrite_env(request_to_outside_content)}.to raise_error(PrettyProxy::ProxyError) }
|
238
|
+
end
|
239
|
+
context "when the request don't point to a original_path" do
|
240
|
+
let (:request_to_not_a_proxy) { example_request.clone.update({'PATH_INFO' => '/no_proxy/content'}) }
|
241
|
+
it { expect {pp.rewrite_env(request_to_not_a_proxy)}.to raise_error(PrettyProxy::ProxyError) }
|
242
|
+
end
|
243
|
+
|
244
|
+
let (:by_proxy_request) { example_request.clone }
|
245
|
+
let (:rewritten_env) { pp.rewrite_env by_proxy_request }
|
246
|
+
|
247
|
+
context 'when the HTTP_HOST is not empty' do
|
248
|
+
it 'change the HTTP_HOST and SERVER_NAME to the unproxyfied version' do
|
249
|
+
expect(rewritten_env['HTTP_HOST']).to eq 'myoriginalsite.com'
|
250
|
+
expect(rewritten_env['SERVER_NAME']).to eq 'myoriginalsite.com'
|
251
|
+
end
|
252
|
+
end
|
253
|
+
context 'when the HTTP_HOST is empty' do
|
254
|
+
let (:by_proxy_request) { t = example_request.clone; t.delete('HTTP_HOST'); t }
|
255
|
+
it 'change the SERVER_NAME to the unproxyfied version' do
|
256
|
+
expect(rewritten_env.has_key? 'HTTP_HOST').to be_false
|
257
|
+
expect(rewritten_env['SERVER_NAME']).to eq 'myoriginalsite.com'
|
258
|
+
end
|
259
|
+
end
|
260
|
+
context 'when the SCRIPT_NAME is not empty and the PATH_INFO is empty' do
|
261
|
+
let (:by_proxy_request) { example_request.clone.update({'SCRIPT_NAME' => '/proxy/content',
|
262
|
+
'PATH_INFO' => ''}) }
|
263
|
+
it 'changes only the SCRIPT_NAME' do
|
264
|
+
expect(rewritten_env['SCRIPT_NAME']).to eq '/content'
|
265
|
+
expect(rewritten_env['PATH_INFO']).to eq ''
|
266
|
+
end
|
267
|
+
end
|
268
|
+
context 'when the PATH_INFO is not empty and the SCRIPT_NAME is empty' do
|
269
|
+
it 'changes only the PATH_INFO' do
|
270
|
+
expect(rewritten_env['PATH_INFO']).to eq '/content'
|
271
|
+
expect(rewritten_env['SCRIPT_NAME']).to eq ''
|
272
|
+
end
|
273
|
+
end
|
274
|
+
context 'when the SCRIPT_NAME and the PATH_INFO are not empty' do
|
275
|
+
# NOTE: in a real request the SCRIPT_NAME have a trailing slash?
|
276
|
+
# even if the PATH_INFO start with a slash?
|
277
|
+
let (:by_proxy_request) { example_request.update({'SCRIPT_NAME' => '/proxy',
|
278
|
+
'PATH_INFO' => '/content'}) }
|
279
|
+
it 'change the SCRIPT_NAME to empty and the PATH_INFO has the full path' do
|
280
|
+
expect(rewritten_env['PATH_INFO']).to eq '/content'
|
281
|
+
expect(rewritten_env['SCRIPT_NAME']).to eq ''
|
282
|
+
end
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
describe '#rewrite_response' do
|
287
|
+
let (:pp) { described_class.new('/proxy/', 'http://site.net', ['/p1', '/p2/p2_2/']) }
|
288
|
+
# See http://rack.rubyforge.org/doc/SPEC.html for the rack env hash fields spec
|
289
|
+
let (:original_env) {{'HTTP_HOST' => 'site.net',
|
290
|
+
'SCRIPT_NAME' => '',
|
291
|
+
'PATH_INFO' => '/proxy/p1',
|
292
|
+
'QUERY_STRING' => '',
|
293
|
+
'SERVER_NAME' => 'site.net',
|
294
|
+
'SERVER_PORT' => '80',
|
295
|
+
'rack.url_scheme' => 'http'}}
|
296
|
+
let (:rewritten_env) { pp.rewrite_env(original_env) }
|
297
|
+
let (:response_example) { original_content = [200,
|
298
|
+
{'content-type' => 'application/xhtml+xml',
|
299
|
+
'content-encoding' => 'identity',
|
300
|
+
'content-length' => original_html.bytesize.to_s },
|
301
|
+
[original_html]] }
|
302
|
+
|
303
|
+
context 'when the content-type is html or xhtml' do
|
304
|
+
let (:original_response) { response_example }
|
305
|
+
subject { pp.rewrite_response(original_response, original_env, rewritten_env) }
|
306
|
+
|
307
|
+
let (:rewritten_headers) { subject[1] }
|
308
|
+
let (:rewritten_body) { subject[2].join }
|
309
|
+
let (:original_url) { Rack::Request.new(original_env).url }
|
310
|
+
|
311
|
+
# NOTE: TESTING ONLY WITH XHTML, BY THE SAME MOTIVE EXPLAINED IN THE #proxify_html SPEC
|
312
|
+
it 'apply #proxify_html to the body' do
|
313
|
+
expect(rewritten_body).to be_equivalent_to pp.proxify_html(original_html, original_url)
|
314
|
+
end
|
315
|
+
|
316
|
+
it 'change the content-length header to the new size of the body' do
|
317
|
+
expect(rewritten_headers['content-length']).to eq rewritten_body.bytesize.to_s
|
318
|
+
end
|
319
|
+
|
320
|
+
context 'compressed with deflate' do
|
321
|
+
it 'decompress, make the changes, and return it compressed again' do
|
322
|
+
original_response[1].update({'content-encoding' => 'deflate'})
|
323
|
+
deflate = Zlib::Deflate.method :deflate
|
324
|
+
original_response[2] = [deflate.call(original_html)]
|
325
|
+
inflate = Zlib::Inflate.method :inflate
|
326
|
+
|
327
|
+
expect(inflate.call(rewritten_body)).to be_equivalent_to(proxified_html)
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
context 'compressed with gzip' do
|
332
|
+
it 'decompress, make the changes, and return it compressed again' do
|
333
|
+
original_response[1].update({'content-encoding' => 'gzip'})
|
334
|
+
gzip = ->(str) do
|
335
|
+
return_str = ''
|
336
|
+
gzip_stream = Zlib::GzipWriter.new(StringIO.new(return_str))
|
337
|
+
gzip_stream.write str
|
338
|
+
gzip_stream.close
|
339
|
+
return_str
|
340
|
+
end
|
341
|
+
ungzip = ->(str) do
|
342
|
+
Zlib::GzipReader.new(StringIO.new(str)).read
|
343
|
+
end
|
344
|
+
original_response[2] = [gzip.call(original_html)]
|
345
|
+
|
346
|
+
expect(ungzip.call(rewritten_body)).to be_equivalent_to proxified_html
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
context 'compressed with another method' do
|
351
|
+
let (:original_response) { response_example[1].update({'content-encoding' => 'unknown-encoding'}); response_example }
|
352
|
+
it { expect {subject}.to raise_error(PrettyProxy::ProxyError) }
|
353
|
+
end
|
354
|
+
end
|
355
|
+
end
|
356
|
+
end
|
357
|
+
|
metadata
ADDED
@@ -0,0 +1,179 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pretty_proxy
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Henrique Becker
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-05-13 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.5'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.5'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rack
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.5'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.5'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rack-proxy
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0.3'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.3'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: equivalent-xml
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0.3'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0.3'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: thin
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ~>
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '1.5'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ~>
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.5'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: json
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ~>
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.7'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ~>
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.7'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rspec-core
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ~>
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '2.13'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ~>
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '2.13'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: rspec-expectations
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ~>
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '2.13'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ~>
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '2.13'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: rake
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ~>
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '10.0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ~>
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '10.0'
|
139
|
+
description: If you want to replicate a site section with some change (like translation)
|
140
|
+
and mantain the url pretty maybe this is the right library.
|
141
|
+
email: henriquebecker91@gmail.com
|
142
|
+
executables: []
|
143
|
+
extensions: []
|
144
|
+
extra_rdoc_files: []
|
145
|
+
files:
|
146
|
+
- lib/pretty_proxy.rb
|
147
|
+
- example/example_spec.rb
|
148
|
+
- example/example_conf.json
|
149
|
+
- example/example.ru
|
150
|
+
- example/heresy.ru
|
151
|
+
- spec/pretty_proxy_spec.rb
|
152
|
+
- Rakefile
|
153
|
+
homepage: http://rubygems.org/gems/pretty_proxy
|
154
|
+
licenses:
|
155
|
+
- Public domain
|
156
|
+
metadata: {}
|
157
|
+
post_install_message:
|
158
|
+
rdoc_options: []
|
159
|
+
require_paths:
|
160
|
+
- lib
|
161
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
162
|
+
requirements:
|
163
|
+
- - '>='
|
164
|
+
- !ruby/object:Gem::Version
|
165
|
+
version: '0'
|
166
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
167
|
+
requirements:
|
168
|
+
- - '>='
|
169
|
+
- !ruby/object:Gem::Version
|
170
|
+
version: '0'
|
171
|
+
requirements: []
|
172
|
+
rubyforge_project:
|
173
|
+
rubygems_version: 2.0.0
|
174
|
+
signing_key:
|
175
|
+
specification_version: 4
|
176
|
+
summary: A Rack::Proxy child pretty url oriented
|
177
|
+
test_files:
|
178
|
+
- spec/pretty_proxy_spec.rb
|
179
|
+
has_rdoc:
|