phantom_proxy 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rack'
data/README.rdoc ADDED
@@ -0,0 +1,37 @@
1
+ == Phantom Proxy - A webkit proxy
2
+ The phantom proxy acts as a http proxy server. It fetches the remote webpages
3
+ with the help phantomjs (see http://www.phantomjs.org/ ).
4
+
5
+ You can use this to get a page after the javascipt execution. By setting some HTTP
6
+ headers you can get the page with all iframes included or as an image.
7
+
8
+ == Installation
9
+ Install phanotmjs (see: http://code.google.com/p/phantomjs/wiki/BuildInstructions)
10
+
11
+ On Debian:
12
+
13
+ sudo apt-get install libqt4-dev libqtwebkit-dev qt4-qmake
14
+ cd phantom
15
+ git clone https://github.com/ariya/phantomjs.git
16
+ git checkout 1.2
17
+ qmake-qt4 && make
18
+
19
+ checkout phantom_proxy
20
+
21
+ gem build phantom_proxy.gemspec
22
+
23
+ gem install phantom_proxy-*.gem
24
+
25
+ == Usage
26
+ Run
27
+ phantom_proxy
28
+ either with -self (ip, port) to not use the thin::runner framework
29
+ or
30
+ with any thin parameter you want (e.g. -p 8080).
31
+
32
+ Point your browser's proxy to http://localhost:8080 for testting.
33
+
34
+ You can use the Net::HTTP lib to fetch page or use the phantom_client
35
+ (see: https://github.com/experteer/phantom_client).
36
+
37
+ == TODO
data/bin/phantom_proxy ADDED
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'thin'
4
+
5
+ require 'fileutils'
6
+ require 'timeout'
7
+ require 'stringio'
8
+ require 'time'
9
+ require 'forwardable'
10
+ require 'rack'
11
+ require 'daemons'
12
+
13
+ module PhantomJSProxy
14
+ CONFIG = File.expand_path(File.dirname(__FILE__))+"/../lib/phantom_proxy/config.ru"
15
+ end
16
+
17
+ require 'phantom_proxy'
18
+
19
+ # Become a daemon
20
+ options = {
21
+ :app_name => "phantom_proxy",
22
+ :backtrace => true,
23
+ :ontop => true,
24
+ :log_output => true
25
+ }
26
+ #Daemons.daemonize(options)
27
+ phantom = false
28
+ ARGV.each { |arg|
29
+ phantom = true if /-self/.match(arg)
30
+ }
31
+
32
+ if !phantom
33
+ startoptions = ["start", "-R", PhantomJSProxy::CONFIG, "-P", "/tmp/pids/phantom_proxy.pid", "--tag", "phantom_proxy"]+ARGV
34
+ Thin::Runner.new(startoptions).run!
35
+ else
36
+ Thin::Server.start(PhantomJSProxy::PhantomJSServer.new, ARGV[0], ARGV[1], ARGV[2])
37
+ end
data/bin/phantomjs ADDED
Binary file
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'phantom_proxy'
3
+
4
+ # You can install Rack middlewares
5
+ # to do some crazy stuff like logging,
6
+ # filtering, auth or build your own.
7
+ use Rack::CommonLogger
8
+
9
+ run PhantomJSProxy::PhantomJSServer.new()
10
+
@@ -0,0 +1,84 @@
1
+ require 'tempfile'
2
+
3
+ module PhantomJSProxy
4
+ class PhantomJS
5
+ attr_accessor :dom
6
+ attr_accessor :image
7
+ attr_accessor :ready
8
+
9
+ def initialize()
10
+ @ready = false
11
+ end
12
+
13
+ def getUrl(url, pictureOnly=true, loadIFrames=true)
14
+ puts("PhantomJS: "+url)
15
+ @ready = false
16
+
17
+ pictureFile = nil
18
+ picture = "none"
19
+
20
+ loadFrames = "false"
21
+
22
+ if loadIFrames
23
+ loadFrames = "true"
24
+ end
25
+
26
+ if pictureOnly
27
+ if !File.directory?("/tmp/phantomjs_proxy")
28
+ Dir.mkdir("/tmp/phantomjs_proxy")
29
+ end
30
+ pictureFile = Tempfile.new(["phantomjs_proxy/page", ".png"])
31
+ picture = pictureFile.path
32
+ end
33
+
34
+ url_args = ""
35
+ url_args_ = []
36
+
37
+ if /\?/.match(url)
38
+ url_args = url.split('?')[1]
39
+ url = url.split('?')[0]
40
+
41
+ if url_args
42
+ url_args_ = url_args.split('&')
43
+ url_args = url_args_.join(' ')
44
+ end
45
+ end
46
+
47
+ @dom = invokePhantomJS(SCRIPT, [picture, loadFrames, url, url_args_.length, url_args])
48
+
49
+ puts("Opened page: "+ /Open page: (.*?) END/.match(@dom)[1])
50
+
51
+ if /DONE_LOADING_URL/.match(@dom)
52
+ @dom = @dom.split('PHANTOMJS_DOMDATA_WRITE:')[1];
53
+ @dom = @dom.split('PHANTOMJS_DOMDATA_END')[0]
54
+ if pictureOnly && File.exist?(picture)
55
+ puts("File is there")
56
+ @image = IO::File.open(picture, "rb") {|f| f.read }
57
+ pictureFile.close!
58
+ else
59
+ puts("No file to load at: "+picture)
60
+ @image = ""
61
+ end
62
+ @ready = true
63
+ else
64
+ @dom = "Failed to load page"
65
+ puts("TOTAL FAIL")
66
+ end
67
+ puts("Return dom")
68
+ return @dom
69
+ end
70
+
71
+ def getAsImageResponse(type='png')
72
+ return "HTTP/1.0 200 OK\r\nConnection: close\r\nContent-Type: image/"+type+"\r\n\r\n"+@image;
73
+ end
74
+
75
+ def invokePhantomJS(script, args)
76
+ argString = " "+args.join(" ")
77
+ puts("Call phantomJS with: "+argString)
78
+ out = IO.popen(PHANTOMJS_BIN+" --cookies-file=/tmp/phantomjs_proxy/cookies.txt "+script+argString)
79
+ o = out.readlines.join
80
+ puts("PHANTOMJS_OUT: "+o)
81
+ return o
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,104 @@
1
+ require 'net/http'
2
+
3
+ module PhantomJSProxy
4
+ class PhantomJSServer
5
+ def initialize()
6
+ end
7
+
8
+ def check_for_route(url)
9
+ if /\.js/i.match(url)
10
+ return 'text/html';
11
+ end
12
+ if /\.css/i.match(url)
13
+ return 'text/css'
14
+ end
15
+ if /\.png/i.match(url) or /\.jpg/i.match(url) or /\.jpeg/i.match(url) or /\.gif/i.match(url)
16
+ return 'image/*';
17
+ end
18
+ "none"
19
+ end
20
+
21
+ def route(env, type)
22
+ _req = Net::HTTP::Get.new(env['REQUEST_URI'])
23
+
24
+ _req['User-Agent'] = env['HTTP_USER_AGENT']
25
+
26
+ _res = Net::HTTP.start(env['HTTP_HOST'], env['SERVER_PORT']) {|http|
27
+ #http.request(_req)
28
+ http.get(env['REQUEST_URI'])
29
+ }
30
+
31
+ env['rack.errors'].write("Response is:"+_res.body+"\n")
32
+
33
+ resp = Rack::Response.new([], 200, {'Content-Type' => type}) { |r|
34
+ r.write(_res.body)
35
+ }
36
+ resp.finish
37
+ end
38
+
39
+ def call(env)
40
+ req = Rack::Request.new(env)
41
+
42
+ haha = env.collect { |k, v| "#{k} : #{v}\n" }.join
43
+ env['rack.errors'].write("The request: "+req.url()+"\nGET: "+haha+"\n")
44
+
45
+ params = req.params.collect { |k, v| "#{k}=#{v}&\n" }.join
46
+ env['rack.errors'].write("Paramas: "+params+"\n")
47
+
48
+ #this routes the request to the outgoing server incase its not html that we want to load
49
+ type = check_for_route(env['REQUEST_URI'])
50
+ if type != "none"
51
+ return route(env, type)
52
+ end
53
+
54
+
55
+ #Fetch the Webpage with PhantomJS
56
+ phJS = PhantomJS.new
57
+
58
+ env['rack.errors'].write("Extract the uri\n")
59
+
60
+ if defined? env['HTTP_GET_PAGE_AS_IMAGE']
61
+ picture = env['HTTP_GET_PAGE_AS_IMAGE']
62
+ else
63
+ picture = true
64
+ end
65
+
66
+ if defined? env['HTTP_GET_PAGE_WITH_IFRAMES']
67
+ loadFrames = env['HTTP_GET_PAGE_WITH_IFRAMES']
68
+ else
69
+ loadFrames = false
70
+ end
71
+
72
+ url = env['REQUEST_URI'];
73
+ if params.length > 0
74
+ url += '?'+params;
75
+ end
76
+
77
+ phJS.getUrl(url, picture, loadFrames)
78
+
79
+ #Create the response
80
+ if !phJS.ready
81
+ resp = Rack::Response.new([], 503, {
82
+ 'Content-Type' => 'text/html'
83
+ }) { |r|
84
+ r.write(phJS.dom)
85
+ }
86
+ resp.finish
87
+ elsif picture
88
+ resp = Rack::Response.new([], 200, {
89
+ 'Content-Type' => 'image/png'
90
+ }) { |r|
91
+ r.write(phJS.image)
92
+ }
93
+ resp.finish
94
+ else
95
+ resp = Rack::Response.new([], 200, {
96
+ 'Content-Type' => 'text/html'
97
+ }) { |r|
98
+ r.write(phJS.dom)
99
+ }
100
+ resp.finish
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,169 @@
1
+ var fs = require('fs');
2
+
3
+ var framesWorked = 0;
4
+ var frameCount = 1;
5
+ var frameContent = [];
6
+ var masterURL = "";
7
+
8
+ evaluateWithVars = function(page, func, vars)
9
+ {
10
+ var fstr = func.toString()
11
+ //console.log(fstr.replace("function () {", "function () {\n"+vstr))
12
+ var evalstr = fstr.replace(
13
+ new RegExp("function \((.*?)\) {"),
14
+ "function $1 {\n" +
15
+ "var vars = JSON.parse('" + JSON.stringify(vars) + "')\n" +
16
+ "for (var v in vars) window[v] = vars[v]\n" +
17
+ "\n"
18
+ )
19
+ console.log(evalstr)
20
+ return page.evaluate(evalstr)
21
+ }
22
+
23
+ var insertFrames = function(url) {
24
+ var page = require('webpage').create();
25
+ page.onConsoleMessage = function (msg) { console.log(msg); };
26
+ page.onAlert = function(msg) { console.log(msg);};
27
+ page.onLoadStarted = function () {
28
+ console.log('Start loading final Page...'+url);
29
+ };
30
+ page.open(url, function (status) {
31
+ if (status !== 'success') {
32
+ console.log('FAILED_LOADING_URL: '+url);
33
+ } else {
34
+ page.evaluate(function () {
35
+ var framestmp = document.getElementsByTagName('IFRAME');
36
+ var frames = []
37
+ for (var i=0;i<framestmp.length;i++) {
38
+ frames.push(framestmp[i]);
39
+ }
40
+ //mark iframes
41
+ for (var i in frames) {
42
+ frames[i].innerHTML = "PHANTOMJS_PROXY_IFRAME"+i;
43
+ }
44
+ });
45
+ //replace iframes with their data
46
+ var content = new String(page.content);
47
+ for (var i in frameContent) {
48
+ content = content.replace("PHANTOMJS_PROXY_IFRAME"+i, "<phantomjsframe>"+frameContent[i]+"</phantomjsframe>");
49
+ }
50
+ console.log("PHANTOMJS_DOMDATA_WRITE:"+content);
51
+ console.log('PHANTOMJS_DOMDATA_END');
52
+ }
53
+ console.log('WHATEVER');
54
+ phantom.exit();
55
+ });
56
+ };
57
+
58
+ function exit() {
59
+ framesWorked++;
60
+ if (framesWorked == frameCount)
61
+ insertFrames(masterURL);
62
+ }
63
+
64
+ var loadpage = function(url) {
65
+ var page = require('webpage').create();
66
+ page.onConsoleMessage = function (msg) { console.log(msg); };
67
+ //page.onLoadFinished =
68
+ page.onAlert = function(msg) { console.log(msg);};
69
+ page.onLoadStarted = function () {
70
+ console.log('Start loading...'+url);
71
+ };
72
+ page.open(url, function (status) {
73
+ if (status !== 'success') {
74
+ console.log('FAILED_LOADING_URL: '+url);
75
+ } else {
76
+ console.log('LOADED PAGE CONTENT['+url+']\n');
77
+ frameContent.push(page.content);
78
+ }
79
+ console.log('WHATEVER');
80
+ exit();
81
+ });
82
+ };
83
+
84
+ function loadIFrames(page) {
85
+ var frames = page.evaluate(function () {
86
+ var framestmp = document.getElementsByTagName('IFRAME');
87
+ var frames = []
88
+ for (var i=0;i<framestmp.length;i++) {
89
+ frames.push(framestmp[i].getAttribute('src'));
90
+ }
91
+ return frames;
92
+ });
93
+
94
+ for (var i=0;i<frames.length;i++) {
95
+ console.log("Frame: "+i+" : "+frames[i]);
96
+ loadpage(frames[i]);
97
+ }
98
+
99
+ frameCount = frames.length+1;
100
+ }
101
+
102
+ function main() {
103
+
104
+ if (phantom.args.length < 2) {
105
+ console.log('Usage: proxy.js <picture filename or none> <load iframe(true/false)> <URL> <url param count> <url params...>');
106
+ phantom.exit();
107
+ } else {
108
+ file_name = phantom.args[0];
109
+ var loadIframes = phantom.args[1].match(/true/i) ? true : false;
110
+ address = phantom.args[2];
111
+
112
+ var argCount = phantom.args[3];
113
+
114
+ args = ""
115
+ for (var i=0;i<argCount;i++)
116
+ args += phantom.args[i+4]+'&';
117
+ if (args.length > 0)
118
+ address += '?'+args;
119
+
120
+ console.log("Open page: "+address+", "+args+" END");
121
+
122
+ var page = require('webpage').create();
123
+
124
+ page.onConsoleMessage = function (msg) { console.log(msg); };
125
+
126
+ console.log('start openning page');
127
+
128
+ masterURL = address;
129
+
130
+ page.open(address, function (status) {
131
+ if (status !== 'success') {
132
+ console.log('FAILED_LOADING_URL');
133
+ } else {
134
+ console.log('DONE_LOADING_URL');
135
+
136
+ //load iframes into page
137
+ if (loadIframes) {
138
+ loadIFrames(page);
139
+ /*
140
+ var frames = page.evaluate(function () {
141
+ var framestmp = document.getElementsByTagName('IFRAME');
142
+ var frames = []
143
+ for (var i=0;i<framestmp.length;i++) {
144
+ frames.push(framestmp[i].getAttribute('src'));
145
+ }
146
+ return frames;
147
+ });
148
+
149
+ for (var i=0;i<frames.length;i++) {
150
+
151
+ console.log("Frame: "+i+" : "+frames[i]);
152
+ loadpage(frames[i]);
153
+ }
154
+ frameCount = frames.length+1;
155
+ */
156
+ }
157
+ //evaluateWithVars(page, function(){}, phantom.args);
158
+ console.log('PHANTOMJS_MAINDOM_WRITE:'+page.content);
159
+ console.log('PHANTOMJS_MAINDOM_END');
160
+ }
161
+ if (file_name != null && file_name != "none") {
162
+ page.render(file_name);
163
+ }
164
+ exit();
165
+ });
166
+ }
167
+ }
168
+
169
+ main();
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+
3
+ module PhantomJSProxy
4
+ ROOT = File.expand_path(File.dirname(__FILE__))
5
+ SCRIPT = ROOT+"/phantom_proxy/scripts/proxy.js"
6
+ PHANTOMJS_BIN = ROOT+'/../bin/phantomjs'
7
+ end
8
+
9
+ require PhantomJSProxy::ROOT+'/phantom_proxy/phantomjs.rb'
10
+ require PhantomJSProxy::ROOT+'/phantom_proxy/phantomjsserver.rb'
metadata ADDED
@@ -0,0 +1,71 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: phantom_proxy
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Daniel Sudmann
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-03-23 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: thin
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 1.3.1
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.3.1
30
+ description: This is a phyntonjs Proxy it allows you to fetch webpages and execute
31
+ javascript in them.
32
+ email: suddani@googlemail.com
33
+ executables:
34
+ - phantom_proxy
35
+ extensions: []
36
+ extra_rdoc_files: []
37
+ files:
38
+ - lib/phantom_proxy/phantomjsserver.rb
39
+ - lib/phantom_proxy/phantomjs.rb
40
+ - lib/phantom_proxy.rb
41
+ - lib/phantom_proxy/scripts/proxy.js
42
+ - lib/phantom_proxy/config.ru
43
+ - bin/phantom_proxy
44
+ - bin/phantomjs
45
+ - README.rdoc
46
+ - Gemfile
47
+ homepage: http://experteer.com
48
+ licenses: []
49
+ post_install_message:
50
+ rdoc_options: []
51
+ require_paths:
52
+ - lib
53
+ required_ruby_version: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ! '>='
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ required_rubygems_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ! '>='
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ requirements: []
66
+ rubyforge_project:
67
+ rubygems_version: 1.8.19
68
+ signing_key:
69
+ specification_version: 3
70
+ summary: This is a phantomjs Proxy
71
+ test_files: []