phantom_proxy 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +2 -0
- data/README.rdoc +37 -0
- data/bin/phantom_proxy +37 -0
- data/bin/phantomjs +0 -0
- data/lib/phantom_proxy/config.ru +10 -0
- data/lib/phantom_proxy/phantomjs.rb +84 -0
- data/lib/phantom_proxy/phantomjsserver.rb +104 -0
- data/lib/phantom_proxy/scripts/proxy.js +169 -0
- data/lib/phantom_proxy.rb +10 -0
- metadata +71 -0
data/Gemfile
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
== Phantom Proxy - A webkit proxy
|
2
|
+
The phantom proxy acts as a http proxy server. It fetches the remote webpages
|
3
|
+
with the help phantomjs (see http://www.phantomjs.org/ ).
|
4
|
+
|
5
|
+
You can use this to get a page after the javascipt execution. By setting some HTTP
|
6
|
+
headers you can get the page with all iframes included or as an image.
|
7
|
+
|
8
|
+
== Installation
|
9
|
+
Install phanotmjs (see: http://code.google.com/p/phantomjs/wiki/BuildInstructions)
|
10
|
+
|
11
|
+
On Debian:
|
12
|
+
|
13
|
+
sudo apt-get install libqt4-dev libqtwebkit-dev qt4-qmake
|
14
|
+
cd phantom
|
15
|
+
git clone https://github.com/ariya/phantomjs.git
|
16
|
+
git checkout 1.2
|
17
|
+
qmake-qt4 && make
|
18
|
+
|
19
|
+
checkout phantom_proxy
|
20
|
+
|
21
|
+
gem build phantom_proxy.gemspec
|
22
|
+
|
23
|
+
gem install phantom_proxy-*.gem
|
24
|
+
|
25
|
+
== Usage
|
26
|
+
Run
|
27
|
+
phantom_proxy
|
28
|
+
either with -self (ip, port) to not use the thin::runner framework
|
29
|
+
or
|
30
|
+
with any thin parameter you want (e.g. -p 8080).
|
31
|
+
|
32
|
+
Point your browser's proxy to http://localhost:8080 for testting.
|
33
|
+
|
34
|
+
You can use the Net::HTTP lib to fetch page or use the phantom_client
|
35
|
+
(see: https://github.com/experteer/phantom_client).
|
36
|
+
|
37
|
+
== TODO
|
data/bin/phantom_proxy
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'thin'
|
4
|
+
|
5
|
+
require 'fileutils'
|
6
|
+
require 'timeout'
|
7
|
+
require 'stringio'
|
8
|
+
require 'time'
|
9
|
+
require 'forwardable'
|
10
|
+
require 'rack'
|
11
|
+
require 'daemons'
|
12
|
+
|
13
|
+
module PhantomJSProxy
|
14
|
+
CONFIG = File.expand_path(File.dirname(__FILE__))+"/../lib/phantom_proxy/config.ru"
|
15
|
+
end
|
16
|
+
|
17
|
+
require 'phantom_proxy'
|
18
|
+
|
19
|
+
# Become a daemon
|
20
|
+
options = {
|
21
|
+
:app_name => "phantom_proxy",
|
22
|
+
:backtrace => true,
|
23
|
+
:ontop => true,
|
24
|
+
:log_output => true
|
25
|
+
}
|
26
|
+
#Daemons.daemonize(options)
|
27
|
+
phantom = false
|
28
|
+
ARGV.each { |arg|
|
29
|
+
phantom = true if /-self/.match(arg)
|
30
|
+
}
|
31
|
+
|
32
|
+
if !phantom
|
33
|
+
startoptions = ["start", "-R", PhantomJSProxy::CONFIG, "-P", "/tmp/pids/phantom_proxy.pid", "--tag", "phantom_proxy"]+ARGV
|
34
|
+
Thin::Runner.new(startoptions).run!
|
35
|
+
else
|
36
|
+
Thin::Server.start(PhantomJSProxy::PhantomJSServer.new, ARGV[0], ARGV[1], ARGV[2])
|
37
|
+
end
|
data/bin/phantomjs
ADDED
Binary file
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
|
3
|
+
module PhantomJSProxy
|
4
|
+
class PhantomJS
|
5
|
+
attr_accessor :dom
|
6
|
+
attr_accessor :image
|
7
|
+
attr_accessor :ready
|
8
|
+
|
9
|
+
def initialize()
|
10
|
+
@ready = false
|
11
|
+
end
|
12
|
+
|
13
|
+
def getUrl(url, pictureOnly=true, loadIFrames=true)
|
14
|
+
puts("PhantomJS: "+url)
|
15
|
+
@ready = false
|
16
|
+
|
17
|
+
pictureFile = nil
|
18
|
+
picture = "none"
|
19
|
+
|
20
|
+
loadFrames = "false"
|
21
|
+
|
22
|
+
if loadIFrames
|
23
|
+
loadFrames = "true"
|
24
|
+
end
|
25
|
+
|
26
|
+
if pictureOnly
|
27
|
+
if !File.directory?("/tmp/phantomjs_proxy")
|
28
|
+
Dir.mkdir("/tmp/phantomjs_proxy")
|
29
|
+
end
|
30
|
+
pictureFile = Tempfile.new(["phantomjs_proxy/page", ".png"])
|
31
|
+
picture = pictureFile.path
|
32
|
+
end
|
33
|
+
|
34
|
+
url_args = ""
|
35
|
+
url_args_ = []
|
36
|
+
|
37
|
+
if /\?/.match(url)
|
38
|
+
url_args = url.split('?')[1]
|
39
|
+
url = url.split('?')[0]
|
40
|
+
|
41
|
+
if url_args
|
42
|
+
url_args_ = url_args.split('&')
|
43
|
+
url_args = url_args_.join(' ')
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
@dom = invokePhantomJS(SCRIPT, [picture, loadFrames, url, url_args_.length, url_args])
|
48
|
+
|
49
|
+
puts("Opened page: "+ /Open page: (.*?) END/.match(@dom)[1])
|
50
|
+
|
51
|
+
if /DONE_LOADING_URL/.match(@dom)
|
52
|
+
@dom = @dom.split('PHANTOMJS_DOMDATA_WRITE:')[1];
|
53
|
+
@dom = @dom.split('PHANTOMJS_DOMDATA_END')[0]
|
54
|
+
if pictureOnly && File.exist?(picture)
|
55
|
+
puts("File is there")
|
56
|
+
@image = IO::File.open(picture, "rb") {|f| f.read }
|
57
|
+
pictureFile.close!
|
58
|
+
else
|
59
|
+
puts("No file to load at: "+picture)
|
60
|
+
@image = ""
|
61
|
+
end
|
62
|
+
@ready = true
|
63
|
+
else
|
64
|
+
@dom = "Failed to load page"
|
65
|
+
puts("TOTAL FAIL")
|
66
|
+
end
|
67
|
+
puts("Return dom")
|
68
|
+
return @dom
|
69
|
+
end
|
70
|
+
|
71
|
+
def getAsImageResponse(type='png')
|
72
|
+
return "HTTP/1.0 200 OK\r\nConnection: close\r\nContent-Type: image/"+type+"\r\n\r\n"+@image;
|
73
|
+
end
|
74
|
+
|
75
|
+
def invokePhantomJS(script, args)
|
76
|
+
argString = " "+args.join(" ")
|
77
|
+
puts("Call phantomJS with: "+argString)
|
78
|
+
out = IO.popen(PHANTOMJS_BIN+" --cookies-file=/tmp/phantomjs_proxy/cookies.txt "+script+argString)
|
79
|
+
o = out.readlines.join
|
80
|
+
puts("PHANTOMJS_OUT: "+o)
|
81
|
+
return o
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
|
3
|
+
module PhantomJSProxy
|
4
|
+
class PhantomJSServer
|
5
|
+
def initialize()
|
6
|
+
end
|
7
|
+
|
8
|
+
def check_for_route(url)
|
9
|
+
if /\.js/i.match(url)
|
10
|
+
return 'text/html';
|
11
|
+
end
|
12
|
+
if /\.css/i.match(url)
|
13
|
+
return 'text/css'
|
14
|
+
end
|
15
|
+
if /\.png/i.match(url) or /\.jpg/i.match(url) or /\.jpeg/i.match(url) or /\.gif/i.match(url)
|
16
|
+
return 'image/*';
|
17
|
+
end
|
18
|
+
"none"
|
19
|
+
end
|
20
|
+
|
21
|
+
def route(env, type)
|
22
|
+
_req = Net::HTTP::Get.new(env['REQUEST_URI'])
|
23
|
+
|
24
|
+
_req['User-Agent'] = env['HTTP_USER_AGENT']
|
25
|
+
|
26
|
+
_res = Net::HTTP.start(env['HTTP_HOST'], env['SERVER_PORT']) {|http|
|
27
|
+
#http.request(_req)
|
28
|
+
http.get(env['REQUEST_URI'])
|
29
|
+
}
|
30
|
+
|
31
|
+
env['rack.errors'].write("Response is:"+_res.body+"\n")
|
32
|
+
|
33
|
+
resp = Rack::Response.new([], 200, {'Content-Type' => type}) { |r|
|
34
|
+
r.write(_res.body)
|
35
|
+
}
|
36
|
+
resp.finish
|
37
|
+
end
|
38
|
+
|
39
|
+
def call(env)
|
40
|
+
req = Rack::Request.new(env)
|
41
|
+
|
42
|
+
haha = env.collect { |k, v| "#{k} : #{v}\n" }.join
|
43
|
+
env['rack.errors'].write("The request: "+req.url()+"\nGET: "+haha+"\n")
|
44
|
+
|
45
|
+
params = req.params.collect { |k, v| "#{k}=#{v}&\n" }.join
|
46
|
+
env['rack.errors'].write("Paramas: "+params+"\n")
|
47
|
+
|
48
|
+
#this routes the request to the outgoing server incase its not html that we want to load
|
49
|
+
type = check_for_route(env['REQUEST_URI'])
|
50
|
+
if type != "none"
|
51
|
+
return route(env, type)
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
#Fetch the Webpage with PhantomJS
|
56
|
+
phJS = PhantomJS.new
|
57
|
+
|
58
|
+
env['rack.errors'].write("Extract the uri\n")
|
59
|
+
|
60
|
+
if defined? env['HTTP_GET_PAGE_AS_IMAGE']
|
61
|
+
picture = env['HTTP_GET_PAGE_AS_IMAGE']
|
62
|
+
else
|
63
|
+
picture = true
|
64
|
+
end
|
65
|
+
|
66
|
+
if defined? env['HTTP_GET_PAGE_WITH_IFRAMES']
|
67
|
+
loadFrames = env['HTTP_GET_PAGE_WITH_IFRAMES']
|
68
|
+
else
|
69
|
+
loadFrames = false
|
70
|
+
end
|
71
|
+
|
72
|
+
url = env['REQUEST_URI'];
|
73
|
+
if params.length > 0
|
74
|
+
url += '?'+params;
|
75
|
+
end
|
76
|
+
|
77
|
+
phJS.getUrl(url, picture, loadFrames)
|
78
|
+
|
79
|
+
#Create the response
|
80
|
+
if !phJS.ready
|
81
|
+
resp = Rack::Response.new([], 503, {
|
82
|
+
'Content-Type' => 'text/html'
|
83
|
+
}) { |r|
|
84
|
+
r.write(phJS.dom)
|
85
|
+
}
|
86
|
+
resp.finish
|
87
|
+
elsif picture
|
88
|
+
resp = Rack::Response.new([], 200, {
|
89
|
+
'Content-Type' => 'image/png'
|
90
|
+
}) { |r|
|
91
|
+
r.write(phJS.image)
|
92
|
+
}
|
93
|
+
resp.finish
|
94
|
+
else
|
95
|
+
resp = Rack::Response.new([], 200, {
|
96
|
+
'Content-Type' => 'text/html'
|
97
|
+
}) { |r|
|
98
|
+
r.write(phJS.dom)
|
99
|
+
}
|
100
|
+
resp.finish
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,169 @@
|
|
1
|
+
var fs = require('fs');
|
2
|
+
|
3
|
+
var framesWorked = 0;
|
4
|
+
var frameCount = 1;
|
5
|
+
var frameContent = [];
|
6
|
+
var masterURL = "";
|
7
|
+
|
8
|
+
evaluateWithVars = function(page, func, vars)
|
9
|
+
{
|
10
|
+
var fstr = func.toString()
|
11
|
+
//console.log(fstr.replace("function () {", "function () {\n"+vstr))
|
12
|
+
var evalstr = fstr.replace(
|
13
|
+
new RegExp("function \((.*?)\) {"),
|
14
|
+
"function $1 {\n" +
|
15
|
+
"var vars = JSON.parse('" + JSON.stringify(vars) + "')\n" +
|
16
|
+
"for (var v in vars) window[v] = vars[v]\n" +
|
17
|
+
"\n"
|
18
|
+
)
|
19
|
+
console.log(evalstr)
|
20
|
+
return page.evaluate(evalstr)
|
21
|
+
}
|
22
|
+
|
23
|
+
var insertFrames = function(url) {
|
24
|
+
var page = require('webpage').create();
|
25
|
+
page.onConsoleMessage = function (msg) { console.log(msg); };
|
26
|
+
page.onAlert = function(msg) { console.log(msg);};
|
27
|
+
page.onLoadStarted = function () {
|
28
|
+
console.log('Start loading final Page...'+url);
|
29
|
+
};
|
30
|
+
page.open(url, function (status) {
|
31
|
+
if (status !== 'success') {
|
32
|
+
console.log('FAILED_LOADING_URL: '+url);
|
33
|
+
} else {
|
34
|
+
page.evaluate(function () {
|
35
|
+
var framestmp = document.getElementsByTagName('IFRAME');
|
36
|
+
var frames = []
|
37
|
+
for (var i=0;i<framestmp.length;i++) {
|
38
|
+
frames.push(framestmp[i]);
|
39
|
+
}
|
40
|
+
//mark iframes
|
41
|
+
for (var i in frames) {
|
42
|
+
frames[i].innerHTML = "PHANTOMJS_PROXY_IFRAME"+i;
|
43
|
+
}
|
44
|
+
});
|
45
|
+
//replace iframes with their data
|
46
|
+
var content = new String(page.content);
|
47
|
+
for (var i in frameContent) {
|
48
|
+
content = content.replace("PHANTOMJS_PROXY_IFRAME"+i, "<phantomjsframe>"+frameContent[i]+"</phantomjsframe>");
|
49
|
+
}
|
50
|
+
console.log("PHANTOMJS_DOMDATA_WRITE:"+content);
|
51
|
+
console.log('PHANTOMJS_DOMDATA_END');
|
52
|
+
}
|
53
|
+
console.log('WHATEVER');
|
54
|
+
phantom.exit();
|
55
|
+
});
|
56
|
+
};
|
57
|
+
|
58
|
+
function exit() {
|
59
|
+
framesWorked++;
|
60
|
+
if (framesWorked == frameCount)
|
61
|
+
insertFrames(masterURL);
|
62
|
+
}
|
63
|
+
|
64
|
+
var loadpage = function(url) {
|
65
|
+
var page = require('webpage').create();
|
66
|
+
page.onConsoleMessage = function (msg) { console.log(msg); };
|
67
|
+
//page.onLoadFinished =
|
68
|
+
page.onAlert = function(msg) { console.log(msg);};
|
69
|
+
page.onLoadStarted = function () {
|
70
|
+
console.log('Start loading...'+url);
|
71
|
+
};
|
72
|
+
page.open(url, function (status) {
|
73
|
+
if (status !== 'success') {
|
74
|
+
console.log('FAILED_LOADING_URL: '+url);
|
75
|
+
} else {
|
76
|
+
console.log('LOADED PAGE CONTENT['+url+']\n');
|
77
|
+
frameContent.push(page.content);
|
78
|
+
}
|
79
|
+
console.log('WHATEVER');
|
80
|
+
exit();
|
81
|
+
});
|
82
|
+
};
|
83
|
+
|
84
|
+
function loadIFrames(page) {
|
85
|
+
var frames = page.evaluate(function () {
|
86
|
+
var framestmp = document.getElementsByTagName('IFRAME');
|
87
|
+
var frames = []
|
88
|
+
for (var i=0;i<framestmp.length;i++) {
|
89
|
+
frames.push(framestmp[i].getAttribute('src'));
|
90
|
+
}
|
91
|
+
return frames;
|
92
|
+
});
|
93
|
+
|
94
|
+
for (var i=0;i<frames.length;i++) {
|
95
|
+
console.log("Frame: "+i+" : "+frames[i]);
|
96
|
+
loadpage(frames[i]);
|
97
|
+
}
|
98
|
+
|
99
|
+
frameCount = frames.length+1;
|
100
|
+
}
|
101
|
+
|
102
|
+
function main() {
|
103
|
+
|
104
|
+
if (phantom.args.length < 2) {
|
105
|
+
console.log('Usage: proxy.js <picture filename or none> <load iframe(true/false)> <URL> <url param count> <url params...>');
|
106
|
+
phantom.exit();
|
107
|
+
} else {
|
108
|
+
file_name = phantom.args[0];
|
109
|
+
var loadIframes = phantom.args[1].match(/true/i) ? true : false;
|
110
|
+
address = phantom.args[2];
|
111
|
+
|
112
|
+
var argCount = phantom.args[3];
|
113
|
+
|
114
|
+
args = ""
|
115
|
+
for (var i=0;i<argCount;i++)
|
116
|
+
args += phantom.args[i+4]+'&';
|
117
|
+
if (args.length > 0)
|
118
|
+
address += '?'+args;
|
119
|
+
|
120
|
+
console.log("Open page: "+address+", "+args+" END");
|
121
|
+
|
122
|
+
var page = require('webpage').create();
|
123
|
+
|
124
|
+
page.onConsoleMessage = function (msg) { console.log(msg); };
|
125
|
+
|
126
|
+
console.log('start openning page');
|
127
|
+
|
128
|
+
masterURL = address;
|
129
|
+
|
130
|
+
page.open(address, function (status) {
|
131
|
+
if (status !== 'success') {
|
132
|
+
console.log('FAILED_LOADING_URL');
|
133
|
+
} else {
|
134
|
+
console.log('DONE_LOADING_URL');
|
135
|
+
|
136
|
+
//load iframes into page
|
137
|
+
if (loadIframes) {
|
138
|
+
loadIFrames(page);
|
139
|
+
/*
|
140
|
+
var frames = page.evaluate(function () {
|
141
|
+
var framestmp = document.getElementsByTagName('IFRAME');
|
142
|
+
var frames = []
|
143
|
+
for (var i=0;i<framestmp.length;i++) {
|
144
|
+
frames.push(framestmp[i].getAttribute('src'));
|
145
|
+
}
|
146
|
+
return frames;
|
147
|
+
});
|
148
|
+
|
149
|
+
for (var i=0;i<frames.length;i++) {
|
150
|
+
|
151
|
+
console.log("Frame: "+i+" : "+frames[i]);
|
152
|
+
loadpage(frames[i]);
|
153
|
+
}
|
154
|
+
frameCount = frames.length+1;
|
155
|
+
*/
|
156
|
+
}
|
157
|
+
//evaluateWithVars(page, function(){}, phantom.args);
|
158
|
+
console.log('PHANTOMJS_MAINDOM_WRITE:'+page.content);
|
159
|
+
console.log('PHANTOMJS_MAINDOM_END');
|
160
|
+
}
|
161
|
+
if (file_name != null && file_name != "none") {
|
162
|
+
page.render(file_name);
|
163
|
+
}
|
164
|
+
exit();
|
165
|
+
});
|
166
|
+
}
|
167
|
+
}
|
168
|
+
|
169
|
+
main();
|
@@ -0,0 +1,10 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
module PhantomJSProxy
|
4
|
+
ROOT = File.expand_path(File.dirname(__FILE__))
|
5
|
+
SCRIPT = ROOT+"/phantom_proxy/scripts/proxy.js"
|
6
|
+
PHANTOMJS_BIN = ROOT+'/../bin/phantomjs'
|
7
|
+
end
|
8
|
+
|
9
|
+
require PhantomJSProxy::ROOT+'/phantom_proxy/phantomjs.rb'
|
10
|
+
require PhantomJSProxy::ROOT+'/phantom_proxy/phantomjsserver.rb'
|
metadata
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: phantom_proxy
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Daniel Sudmann
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-03-23 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: thin
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.3.1
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.3.1
|
30
|
+
description: This is a phyntonjs Proxy it allows you to fetch webpages and execute
|
31
|
+
javascript in them.
|
32
|
+
email: suddani@googlemail.com
|
33
|
+
executables:
|
34
|
+
- phantom_proxy
|
35
|
+
extensions: []
|
36
|
+
extra_rdoc_files: []
|
37
|
+
files:
|
38
|
+
- lib/phantom_proxy/phantomjsserver.rb
|
39
|
+
- lib/phantom_proxy/phantomjs.rb
|
40
|
+
- lib/phantom_proxy.rb
|
41
|
+
- lib/phantom_proxy/scripts/proxy.js
|
42
|
+
- lib/phantom_proxy/config.ru
|
43
|
+
- bin/phantom_proxy
|
44
|
+
- bin/phantomjs
|
45
|
+
- README.rdoc
|
46
|
+
- Gemfile
|
47
|
+
homepage: http://experteer.com
|
48
|
+
licenses: []
|
49
|
+
post_install_message:
|
50
|
+
rdoc_options: []
|
51
|
+
require_paths:
|
52
|
+
- lib
|
53
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
55
|
+
requirements:
|
56
|
+
- - ! '>='
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0'
|
59
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
60
|
+
none: false
|
61
|
+
requirements:
|
62
|
+
- - ! '>='
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
requirements: []
|
66
|
+
rubyforge_project:
|
67
|
+
rubygems_version: 1.8.19
|
68
|
+
signing_key:
|
69
|
+
specification_version: 3
|
70
|
+
summary: This is a phantomjs Proxy
|
71
|
+
test_files: []
|