phantom_proxy 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +2 -0
- data/README.rdoc +37 -0
- data/bin/phantom_proxy +37 -0
- data/bin/phantomjs +0 -0
- data/lib/phantom_proxy/config.ru +10 -0
- data/lib/phantom_proxy/phantomjs.rb +84 -0
- data/lib/phantom_proxy/phantomjsserver.rb +104 -0
- data/lib/phantom_proxy/scripts/proxy.js +169 -0
- data/lib/phantom_proxy.rb +10 -0
- metadata +71 -0
data/Gemfile
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
== Phantom Proxy - A webkit proxy
|
2
|
+
The phantom proxy acts as a http proxy server. It fetches the remote webpages
|
3
|
+
with the help phantomjs (see http://www.phantomjs.org/ ).
|
4
|
+
|
5
|
+
You can use this to get a page after the javascipt execution. By setting some HTTP
|
6
|
+
headers you can get the page with all iframes included or as an image.
|
7
|
+
|
8
|
+
== Installation
|
9
|
+
Install phanotmjs (see: http://code.google.com/p/phantomjs/wiki/BuildInstructions)
|
10
|
+
|
11
|
+
On Debian:
|
12
|
+
|
13
|
+
sudo apt-get install libqt4-dev libqtwebkit-dev qt4-qmake
|
14
|
+
cd phantom
|
15
|
+
git clone https://github.com/ariya/phantomjs.git
|
16
|
+
git checkout 1.2
|
17
|
+
qmake-qt4 && make
|
18
|
+
|
19
|
+
checkout phantom_proxy
|
20
|
+
|
21
|
+
gem build phantom_proxy.gemspec
|
22
|
+
|
23
|
+
gem install phantom_proxy-*.gem
|
24
|
+
|
25
|
+
== Usage
|
26
|
+
Run
|
27
|
+
phantom_proxy
|
28
|
+
either with -self (ip, port) to not use the thin::runner framework
|
29
|
+
or
|
30
|
+
with any thin parameter you want (e.g. -p 8080).
|
31
|
+
|
32
|
+
Point your browser's proxy to http://localhost:8080 for testting.
|
33
|
+
|
34
|
+
You can use the Net::HTTP lib to fetch page or use the phantom_client
|
35
|
+
(see: https://github.com/experteer/phantom_client).
|
36
|
+
|
37
|
+
== TODO
|
data/bin/phantom_proxy
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'thin'
|
4
|
+
|
5
|
+
require 'fileutils'
|
6
|
+
require 'timeout'
|
7
|
+
require 'stringio'
|
8
|
+
require 'time'
|
9
|
+
require 'forwardable'
|
10
|
+
require 'rack'
|
11
|
+
require 'daemons'
|
12
|
+
|
13
|
+
module PhantomJSProxy
|
14
|
+
CONFIG = File.expand_path(File.dirname(__FILE__))+"/../lib/phantom_proxy/config.ru"
|
15
|
+
end
|
16
|
+
|
17
|
+
require 'phantom_proxy'
|
18
|
+
|
19
|
+
# Become a daemon
|
20
|
+
options = {
|
21
|
+
:app_name => "phantom_proxy",
|
22
|
+
:backtrace => true,
|
23
|
+
:ontop => true,
|
24
|
+
:log_output => true
|
25
|
+
}
|
26
|
+
#Daemons.daemonize(options)
|
27
|
+
phantom = false
|
28
|
+
ARGV.each { |arg|
|
29
|
+
phantom = true if /-self/.match(arg)
|
30
|
+
}
|
31
|
+
|
32
|
+
if !phantom
|
33
|
+
startoptions = ["start", "-R", PhantomJSProxy::CONFIG, "-P", "/tmp/pids/phantom_proxy.pid", "--tag", "phantom_proxy"]+ARGV
|
34
|
+
Thin::Runner.new(startoptions).run!
|
35
|
+
else
|
36
|
+
Thin::Server.start(PhantomJSProxy::PhantomJSServer.new, ARGV[0], ARGV[1], ARGV[2])
|
37
|
+
end
|
data/bin/phantomjs
ADDED
Binary file
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
|
3
|
+
module PhantomJSProxy
|
4
|
+
class PhantomJS
|
5
|
+
attr_accessor :dom
|
6
|
+
attr_accessor :image
|
7
|
+
attr_accessor :ready
|
8
|
+
|
9
|
+
def initialize()
|
10
|
+
@ready = false
|
11
|
+
end
|
12
|
+
|
13
|
+
def getUrl(url, pictureOnly=true, loadIFrames=true)
|
14
|
+
puts("PhantomJS: "+url)
|
15
|
+
@ready = false
|
16
|
+
|
17
|
+
pictureFile = nil
|
18
|
+
picture = "none"
|
19
|
+
|
20
|
+
loadFrames = "false"
|
21
|
+
|
22
|
+
if loadIFrames
|
23
|
+
loadFrames = "true"
|
24
|
+
end
|
25
|
+
|
26
|
+
if pictureOnly
|
27
|
+
if !File.directory?("/tmp/phantomjs_proxy")
|
28
|
+
Dir.mkdir("/tmp/phantomjs_proxy")
|
29
|
+
end
|
30
|
+
pictureFile = Tempfile.new(["phantomjs_proxy/page", ".png"])
|
31
|
+
picture = pictureFile.path
|
32
|
+
end
|
33
|
+
|
34
|
+
url_args = ""
|
35
|
+
url_args_ = []
|
36
|
+
|
37
|
+
if /\?/.match(url)
|
38
|
+
url_args = url.split('?')[1]
|
39
|
+
url = url.split('?')[0]
|
40
|
+
|
41
|
+
if url_args
|
42
|
+
url_args_ = url_args.split('&')
|
43
|
+
url_args = url_args_.join(' ')
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
@dom = invokePhantomJS(SCRIPT, [picture, loadFrames, url, url_args_.length, url_args])
|
48
|
+
|
49
|
+
puts("Opened page: "+ /Open page: (.*?) END/.match(@dom)[1])
|
50
|
+
|
51
|
+
if /DONE_LOADING_URL/.match(@dom)
|
52
|
+
@dom = @dom.split('PHANTOMJS_DOMDATA_WRITE:')[1];
|
53
|
+
@dom = @dom.split('PHANTOMJS_DOMDATA_END')[0]
|
54
|
+
if pictureOnly && File.exist?(picture)
|
55
|
+
puts("File is there")
|
56
|
+
@image = IO::File.open(picture, "rb") {|f| f.read }
|
57
|
+
pictureFile.close!
|
58
|
+
else
|
59
|
+
puts("No file to load at: "+picture)
|
60
|
+
@image = ""
|
61
|
+
end
|
62
|
+
@ready = true
|
63
|
+
else
|
64
|
+
@dom = "Failed to load page"
|
65
|
+
puts("TOTAL FAIL")
|
66
|
+
end
|
67
|
+
puts("Return dom")
|
68
|
+
return @dom
|
69
|
+
end
|
70
|
+
|
71
|
+
def getAsImageResponse(type='png')
|
72
|
+
return "HTTP/1.0 200 OK\r\nConnection: close\r\nContent-Type: image/"+type+"\r\n\r\n"+@image;
|
73
|
+
end
|
74
|
+
|
75
|
+
def invokePhantomJS(script, args)
|
76
|
+
argString = " "+args.join(" ")
|
77
|
+
puts("Call phantomJS with: "+argString)
|
78
|
+
out = IO.popen(PHANTOMJS_BIN+" --cookies-file=/tmp/phantomjs_proxy/cookies.txt "+script+argString)
|
79
|
+
o = out.readlines.join
|
80
|
+
puts("PHANTOMJS_OUT: "+o)
|
81
|
+
return o
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
|
3
|
+
module PhantomJSProxy
|
4
|
+
class PhantomJSServer
|
5
|
+
def initialize()
|
6
|
+
end
|
7
|
+
|
8
|
+
def check_for_route(url)
|
9
|
+
if /\.js/i.match(url)
|
10
|
+
return 'text/html';
|
11
|
+
end
|
12
|
+
if /\.css/i.match(url)
|
13
|
+
return 'text/css'
|
14
|
+
end
|
15
|
+
if /\.png/i.match(url) or /\.jpg/i.match(url) or /\.jpeg/i.match(url) or /\.gif/i.match(url)
|
16
|
+
return 'image/*';
|
17
|
+
end
|
18
|
+
"none"
|
19
|
+
end
|
20
|
+
|
21
|
+
def route(env, type)
|
22
|
+
_req = Net::HTTP::Get.new(env['REQUEST_URI'])
|
23
|
+
|
24
|
+
_req['User-Agent'] = env['HTTP_USER_AGENT']
|
25
|
+
|
26
|
+
_res = Net::HTTP.start(env['HTTP_HOST'], env['SERVER_PORT']) {|http|
|
27
|
+
#http.request(_req)
|
28
|
+
http.get(env['REQUEST_URI'])
|
29
|
+
}
|
30
|
+
|
31
|
+
env['rack.errors'].write("Response is:"+_res.body+"\n")
|
32
|
+
|
33
|
+
resp = Rack::Response.new([], 200, {'Content-Type' => type}) { |r|
|
34
|
+
r.write(_res.body)
|
35
|
+
}
|
36
|
+
resp.finish
|
37
|
+
end
|
38
|
+
|
39
|
+
def call(env)
|
40
|
+
req = Rack::Request.new(env)
|
41
|
+
|
42
|
+
haha = env.collect { |k, v| "#{k} : #{v}\n" }.join
|
43
|
+
env['rack.errors'].write("The request: "+req.url()+"\nGET: "+haha+"\n")
|
44
|
+
|
45
|
+
params = req.params.collect { |k, v| "#{k}=#{v}&\n" }.join
|
46
|
+
env['rack.errors'].write("Paramas: "+params+"\n")
|
47
|
+
|
48
|
+
#this routes the request to the outgoing server incase its not html that we want to load
|
49
|
+
type = check_for_route(env['REQUEST_URI'])
|
50
|
+
if type != "none"
|
51
|
+
return route(env, type)
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
#Fetch the Webpage with PhantomJS
|
56
|
+
phJS = PhantomJS.new
|
57
|
+
|
58
|
+
env['rack.errors'].write("Extract the uri\n")
|
59
|
+
|
60
|
+
if defined? env['HTTP_GET_PAGE_AS_IMAGE']
|
61
|
+
picture = env['HTTP_GET_PAGE_AS_IMAGE']
|
62
|
+
else
|
63
|
+
picture = true
|
64
|
+
end
|
65
|
+
|
66
|
+
if defined? env['HTTP_GET_PAGE_WITH_IFRAMES']
|
67
|
+
loadFrames = env['HTTP_GET_PAGE_WITH_IFRAMES']
|
68
|
+
else
|
69
|
+
loadFrames = false
|
70
|
+
end
|
71
|
+
|
72
|
+
url = env['REQUEST_URI'];
|
73
|
+
if params.length > 0
|
74
|
+
url += '?'+params;
|
75
|
+
end
|
76
|
+
|
77
|
+
phJS.getUrl(url, picture, loadFrames)
|
78
|
+
|
79
|
+
#Create the response
|
80
|
+
if !phJS.ready
|
81
|
+
resp = Rack::Response.new([], 503, {
|
82
|
+
'Content-Type' => 'text/html'
|
83
|
+
}) { |r|
|
84
|
+
r.write(phJS.dom)
|
85
|
+
}
|
86
|
+
resp.finish
|
87
|
+
elsif picture
|
88
|
+
resp = Rack::Response.new([], 200, {
|
89
|
+
'Content-Type' => 'image/png'
|
90
|
+
}) { |r|
|
91
|
+
r.write(phJS.image)
|
92
|
+
}
|
93
|
+
resp.finish
|
94
|
+
else
|
95
|
+
resp = Rack::Response.new([], 200, {
|
96
|
+
'Content-Type' => 'text/html'
|
97
|
+
}) { |r|
|
98
|
+
r.write(phJS.dom)
|
99
|
+
}
|
100
|
+
resp.finish
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,169 @@
|
|
1
|
+
var fs = require('fs');
|
2
|
+
|
3
|
+
var framesWorked = 0;
|
4
|
+
var frameCount = 1;
|
5
|
+
var frameContent = [];
|
6
|
+
var masterURL = "";
|
7
|
+
|
8
|
+
evaluateWithVars = function(page, func, vars)
|
9
|
+
{
|
10
|
+
var fstr = func.toString()
|
11
|
+
//console.log(fstr.replace("function () {", "function () {\n"+vstr))
|
12
|
+
var evalstr = fstr.replace(
|
13
|
+
new RegExp("function \((.*?)\) {"),
|
14
|
+
"function $1 {\n" +
|
15
|
+
"var vars = JSON.parse('" + JSON.stringify(vars) + "')\n" +
|
16
|
+
"for (var v in vars) window[v] = vars[v]\n" +
|
17
|
+
"\n"
|
18
|
+
)
|
19
|
+
console.log(evalstr)
|
20
|
+
return page.evaluate(evalstr)
|
21
|
+
}
|
22
|
+
|
23
|
+
var insertFrames = function(url) {
|
24
|
+
var page = require('webpage').create();
|
25
|
+
page.onConsoleMessage = function (msg) { console.log(msg); };
|
26
|
+
page.onAlert = function(msg) { console.log(msg);};
|
27
|
+
page.onLoadStarted = function () {
|
28
|
+
console.log('Start loading final Page...'+url);
|
29
|
+
};
|
30
|
+
page.open(url, function (status) {
|
31
|
+
if (status !== 'success') {
|
32
|
+
console.log('FAILED_LOADING_URL: '+url);
|
33
|
+
} else {
|
34
|
+
page.evaluate(function () {
|
35
|
+
var framestmp = document.getElementsByTagName('IFRAME');
|
36
|
+
var frames = []
|
37
|
+
for (var i=0;i<framestmp.length;i++) {
|
38
|
+
frames.push(framestmp[i]);
|
39
|
+
}
|
40
|
+
//mark iframes
|
41
|
+
for (var i in frames) {
|
42
|
+
frames[i].innerHTML = "PHANTOMJS_PROXY_IFRAME"+i;
|
43
|
+
}
|
44
|
+
});
|
45
|
+
//replace iframes with their data
|
46
|
+
var content = new String(page.content);
|
47
|
+
for (var i in frameContent) {
|
48
|
+
content = content.replace("PHANTOMJS_PROXY_IFRAME"+i, "<phantomjsframe>"+frameContent[i]+"</phantomjsframe>");
|
49
|
+
}
|
50
|
+
console.log("PHANTOMJS_DOMDATA_WRITE:"+content);
|
51
|
+
console.log('PHANTOMJS_DOMDATA_END');
|
52
|
+
}
|
53
|
+
console.log('WHATEVER');
|
54
|
+
phantom.exit();
|
55
|
+
});
|
56
|
+
};
|
57
|
+
|
58
|
+
function exit() {
|
59
|
+
framesWorked++;
|
60
|
+
if (framesWorked == frameCount)
|
61
|
+
insertFrames(masterURL);
|
62
|
+
}
|
63
|
+
|
64
|
+
var loadpage = function(url) {
|
65
|
+
var page = require('webpage').create();
|
66
|
+
page.onConsoleMessage = function (msg) { console.log(msg); };
|
67
|
+
//page.onLoadFinished =
|
68
|
+
page.onAlert = function(msg) { console.log(msg);};
|
69
|
+
page.onLoadStarted = function () {
|
70
|
+
console.log('Start loading...'+url);
|
71
|
+
};
|
72
|
+
page.open(url, function (status) {
|
73
|
+
if (status !== 'success') {
|
74
|
+
console.log('FAILED_LOADING_URL: '+url);
|
75
|
+
} else {
|
76
|
+
console.log('LOADED PAGE CONTENT['+url+']\n');
|
77
|
+
frameContent.push(page.content);
|
78
|
+
}
|
79
|
+
console.log('WHATEVER');
|
80
|
+
exit();
|
81
|
+
});
|
82
|
+
};
|
83
|
+
|
84
|
+
function loadIFrames(page) {
|
85
|
+
var frames = page.evaluate(function () {
|
86
|
+
var framestmp = document.getElementsByTagName('IFRAME');
|
87
|
+
var frames = []
|
88
|
+
for (var i=0;i<framestmp.length;i++) {
|
89
|
+
frames.push(framestmp[i].getAttribute('src'));
|
90
|
+
}
|
91
|
+
return frames;
|
92
|
+
});
|
93
|
+
|
94
|
+
for (var i=0;i<frames.length;i++) {
|
95
|
+
console.log("Frame: "+i+" : "+frames[i]);
|
96
|
+
loadpage(frames[i]);
|
97
|
+
}
|
98
|
+
|
99
|
+
frameCount = frames.length+1;
|
100
|
+
}
|
101
|
+
|
102
|
+
function main() {
|
103
|
+
|
104
|
+
if (phantom.args.length < 2) {
|
105
|
+
console.log('Usage: proxy.js <picture filename or none> <load iframe(true/false)> <URL> <url param count> <url params...>');
|
106
|
+
phantom.exit();
|
107
|
+
} else {
|
108
|
+
file_name = phantom.args[0];
|
109
|
+
var loadIframes = phantom.args[1].match(/true/i) ? true : false;
|
110
|
+
address = phantom.args[2];
|
111
|
+
|
112
|
+
var argCount = phantom.args[3];
|
113
|
+
|
114
|
+
args = ""
|
115
|
+
for (var i=0;i<argCount;i++)
|
116
|
+
args += phantom.args[i+4]+'&';
|
117
|
+
if (args.length > 0)
|
118
|
+
address += '?'+args;
|
119
|
+
|
120
|
+
console.log("Open page: "+address+", "+args+" END");
|
121
|
+
|
122
|
+
var page = require('webpage').create();
|
123
|
+
|
124
|
+
page.onConsoleMessage = function (msg) { console.log(msg); };
|
125
|
+
|
126
|
+
console.log('start openning page');
|
127
|
+
|
128
|
+
masterURL = address;
|
129
|
+
|
130
|
+
page.open(address, function (status) {
|
131
|
+
if (status !== 'success') {
|
132
|
+
console.log('FAILED_LOADING_URL');
|
133
|
+
} else {
|
134
|
+
console.log('DONE_LOADING_URL');
|
135
|
+
|
136
|
+
//load iframes into page
|
137
|
+
if (loadIframes) {
|
138
|
+
loadIFrames(page);
|
139
|
+
/*
|
140
|
+
var frames = page.evaluate(function () {
|
141
|
+
var framestmp = document.getElementsByTagName('IFRAME');
|
142
|
+
var frames = []
|
143
|
+
for (var i=0;i<framestmp.length;i++) {
|
144
|
+
frames.push(framestmp[i].getAttribute('src'));
|
145
|
+
}
|
146
|
+
return frames;
|
147
|
+
});
|
148
|
+
|
149
|
+
for (var i=0;i<frames.length;i++) {
|
150
|
+
|
151
|
+
console.log("Frame: "+i+" : "+frames[i]);
|
152
|
+
loadpage(frames[i]);
|
153
|
+
}
|
154
|
+
frameCount = frames.length+1;
|
155
|
+
*/
|
156
|
+
}
|
157
|
+
//evaluateWithVars(page, function(){}, phantom.args);
|
158
|
+
console.log('PHANTOMJS_MAINDOM_WRITE:'+page.content);
|
159
|
+
console.log('PHANTOMJS_MAINDOM_END');
|
160
|
+
}
|
161
|
+
if (file_name != null && file_name != "none") {
|
162
|
+
page.render(file_name);
|
163
|
+
}
|
164
|
+
exit();
|
165
|
+
});
|
166
|
+
}
|
167
|
+
}
|
168
|
+
|
169
|
+
main();
|
@@ -0,0 +1,10 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
module PhantomJSProxy
|
4
|
+
ROOT = File.expand_path(File.dirname(__FILE__))
|
5
|
+
SCRIPT = ROOT+"/phantom_proxy/scripts/proxy.js"
|
6
|
+
PHANTOMJS_BIN = ROOT+'/../bin/phantomjs'
|
7
|
+
end
|
8
|
+
|
9
|
+
require PhantomJSProxy::ROOT+'/phantom_proxy/phantomjs.rb'
|
10
|
+
require PhantomJSProxy::ROOT+'/phantom_proxy/phantomjsserver.rb'
|
metadata
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: phantom_proxy
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Daniel Sudmann
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-03-23 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: thin
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.3.1
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.3.1
|
30
|
+
description: This is a phyntonjs Proxy it allows you to fetch webpages and execute
|
31
|
+
javascript in them.
|
32
|
+
email: suddani@googlemail.com
|
33
|
+
executables:
|
34
|
+
- phantom_proxy
|
35
|
+
extensions: []
|
36
|
+
extra_rdoc_files: []
|
37
|
+
files:
|
38
|
+
- lib/phantom_proxy/phantomjsserver.rb
|
39
|
+
- lib/phantom_proxy/phantomjs.rb
|
40
|
+
- lib/phantom_proxy.rb
|
41
|
+
- lib/phantom_proxy/scripts/proxy.js
|
42
|
+
- lib/phantom_proxy/config.ru
|
43
|
+
- bin/phantom_proxy
|
44
|
+
- bin/phantomjs
|
45
|
+
- README.rdoc
|
46
|
+
- Gemfile
|
47
|
+
homepage: http://experteer.com
|
48
|
+
licenses: []
|
49
|
+
post_install_message:
|
50
|
+
rdoc_options: []
|
51
|
+
require_paths:
|
52
|
+
- lib
|
53
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
55
|
+
requirements:
|
56
|
+
- - ! '>='
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0'
|
59
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
60
|
+
none: false
|
61
|
+
requirements:
|
62
|
+
- - ! '>='
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
requirements: []
|
66
|
+
rubyforge_project:
|
67
|
+
rubygems_version: 1.8.19
|
68
|
+
signing_key:
|
69
|
+
specification_version: 3
|
70
|
+
summary: This is a phantomjs Proxy
|
71
|
+
test_files: []
|