phantom_proxy 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rack'
data/README.rdoc ADDED
@@ -0,0 +1,37 @@
1
+ == Phantom Proxy - A webkit proxy
2
+ The phantom proxy acts as a http proxy server. It fetches the remote webpages
3
+ with the help phantomjs (see http://www.phantomjs.org/ ).
4
+
5
+ You can use this to get a page after the javascipt execution. By setting some HTTP
6
+ headers you can get the page with all iframes included or as an image.
7
+
8
+ == Installation
9
+ Install phanotmjs (see: http://code.google.com/p/phantomjs/wiki/BuildInstructions)
10
+
11
+ On Debian:
12
+
13
+ sudo apt-get install libqt4-dev libqtwebkit-dev qt4-qmake
14
+ cd phantom
15
+ git clone https://github.com/ariya/phantomjs.git
16
+ git checkout 1.2
17
+ qmake-qt4 && make
18
+
19
+ checkout phantom_proxy
20
+
21
+ gem build phantom_proxy.gemspec
22
+
23
+ gem install phantom_proxy-*.gem
24
+
25
+ == Usage
26
+ Run
27
+ phantom_proxy
28
+ either with -self (ip, port) to not use the thin::runner framework
29
+ or
30
+ with any thin parameter you want (e.g. -p 8080).
31
+
32
+ Point your browser's proxy to http://localhost:8080 for testting.
33
+
34
+ You can use the Net::HTTP lib to fetch page or use the phantom_client
35
+ (see: https://github.com/experteer/phantom_client).
36
+
37
+ == TODO
data/bin/phantom_proxy ADDED
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'thin'
4
+
5
+ require 'fileutils'
6
+ require 'timeout'
7
+ require 'stringio'
8
+ require 'time'
9
+ require 'forwardable'
10
+ require 'rack'
11
+ require 'daemons'
12
+
13
+ module PhantomJSProxy
14
+ CONFIG = File.expand_path(File.dirname(__FILE__))+"/../lib/phantom_proxy/config.ru"
15
+ end
16
+
17
+ require 'phantom_proxy'
18
+
19
+ # Become a daemon
20
+ options = {
21
+ :app_name => "phantom_proxy",
22
+ :backtrace => true,
23
+ :ontop => true,
24
+ :log_output => true
25
+ }
26
+ #Daemons.daemonize(options)
27
+ phantom = false
28
+ ARGV.each { |arg|
29
+ phantom = true if /-self/.match(arg)
30
+ }
31
+
32
+ if !phantom
33
+ startoptions = ["start", "-R", PhantomJSProxy::CONFIG, "-P", "/tmp/pids/phantom_proxy.pid", "--tag", "phantom_proxy"]+ARGV
34
+ Thin::Runner.new(startoptions).run!
35
+ else
36
+ Thin::Server.start(PhantomJSProxy::PhantomJSServer.new, ARGV[0], ARGV[1], ARGV[2])
37
+ end
data/bin/phantomjs ADDED
Binary file
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'phantom_proxy'
3
+
4
+ # You can install Rack middlewares
5
+ # to do some crazy stuff like logging,
6
+ # filtering, auth or build your own.
7
+ use Rack::CommonLogger
8
+
9
+ run PhantomJSProxy::PhantomJSServer.new()
10
+
@@ -0,0 +1,84 @@
1
+ require 'tempfile'
2
+
3
+ module PhantomJSProxy
4
+ class PhantomJS
5
+ attr_accessor :dom
6
+ attr_accessor :image
7
+ attr_accessor :ready
8
+
9
+ def initialize()
10
+ @ready = false
11
+ end
12
+
13
+ def getUrl(url, pictureOnly=true, loadIFrames=true)
14
+ puts("PhantomJS: "+url)
15
+ @ready = false
16
+
17
+ pictureFile = nil
18
+ picture = "none"
19
+
20
+ loadFrames = "false"
21
+
22
+ if loadIFrames
23
+ loadFrames = "true"
24
+ end
25
+
26
+ if pictureOnly
27
+ if !File.directory?("/tmp/phantomjs_proxy")
28
+ Dir.mkdir("/tmp/phantomjs_proxy")
29
+ end
30
+ pictureFile = Tempfile.new(["phantomjs_proxy/page", ".png"])
31
+ picture = pictureFile.path
32
+ end
33
+
34
+ url_args = ""
35
+ url_args_ = []
36
+
37
+ if /\?/.match(url)
38
+ url_args = url.split('?')[1]
39
+ url = url.split('?')[0]
40
+
41
+ if url_args
42
+ url_args_ = url_args.split('&')
43
+ url_args = url_args_.join(' ')
44
+ end
45
+ end
46
+
47
+ @dom = invokePhantomJS(SCRIPT, [picture, loadFrames, url, url_args_.length, url_args])
48
+
49
+ puts("Opened page: "+ /Open page: (.*?) END/.match(@dom)[1])
50
+
51
+ if /DONE_LOADING_URL/.match(@dom)
52
+ @dom = @dom.split('PHANTOMJS_DOMDATA_WRITE:')[1];
53
+ @dom = @dom.split('PHANTOMJS_DOMDATA_END')[0]
54
+ if pictureOnly && File.exist?(picture)
55
+ puts("File is there")
56
+ @image = IO::File.open(picture, "rb") {|f| f.read }
57
+ pictureFile.close!
58
+ else
59
+ puts("No file to load at: "+picture)
60
+ @image = ""
61
+ end
62
+ @ready = true
63
+ else
64
+ @dom = "Failed to load page"
65
+ puts("TOTAL FAIL")
66
+ end
67
+ puts("Return dom")
68
+ return @dom
69
+ end
70
+
71
+ def getAsImageResponse(type='png')
72
+ return "HTTP/1.0 200 OK\r\nConnection: close\r\nContent-Type: image/"+type+"\r\n\r\n"+@image;
73
+ end
74
+
75
+ def invokePhantomJS(script, args)
76
+ argString = " "+args.join(" ")
77
+ puts("Call phantomJS with: "+argString)
78
+ out = IO.popen(PHANTOMJS_BIN+" --cookies-file=/tmp/phantomjs_proxy/cookies.txt "+script+argString)
79
+ o = out.readlines.join
80
+ puts("PHANTOMJS_OUT: "+o)
81
+ return o
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,104 @@
1
+ require 'net/http'
2
+
3
+ module PhantomJSProxy
4
+ class PhantomJSServer
5
+ def initialize()
6
+ end
7
+
8
+ def check_for_route(url)
9
+ if /\.js/i.match(url)
10
+ return 'text/html';
11
+ end
12
+ if /\.css/i.match(url)
13
+ return 'text/css'
14
+ end
15
+ if /\.png/i.match(url) or /\.jpg/i.match(url) or /\.jpeg/i.match(url) or /\.gif/i.match(url)
16
+ return 'image/*';
17
+ end
18
+ "none"
19
+ end
20
+
21
+ def route(env, type)
22
+ _req = Net::HTTP::Get.new(env['REQUEST_URI'])
23
+
24
+ _req['User-Agent'] = env['HTTP_USER_AGENT']
25
+
26
+ _res = Net::HTTP.start(env['HTTP_HOST'], env['SERVER_PORT']) {|http|
27
+ #http.request(_req)
28
+ http.get(env['REQUEST_URI'])
29
+ }
30
+
31
+ env['rack.errors'].write("Response is:"+_res.body+"\n")
32
+
33
+ resp = Rack::Response.new([], 200, {'Content-Type' => type}) { |r|
34
+ r.write(_res.body)
35
+ }
36
+ resp.finish
37
+ end
38
+
39
+ def call(env)
40
+ req = Rack::Request.new(env)
41
+
42
+ haha = env.collect { |k, v| "#{k} : #{v}\n" }.join
43
+ env['rack.errors'].write("The request: "+req.url()+"\nGET: "+haha+"\n")
44
+
45
+ params = req.params.collect { |k, v| "#{k}=#{v}&\n" }.join
46
+ env['rack.errors'].write("Paramas: "+params+"\n")
47
+
48
+ #this routes the request to the outgoing server incase its not html that we want to load
49
+ type = check_for_route(env['REQUEST_URI'])
50
+ if type != "none"
51
+ return route(env, type)
52
+ end
53
+
54
+
55
+ #Fetch the Webpage with PhantomJS
56
+ phJS = PhantomJS.new
57
+
58
+ env['rack.errors'].write("Extract the uri\n")
59
+
60
+ if defined? env['HTTP_GET_PAGE_AS_IMAGE']
61
+ picture = env['HTTP_GET_PAGE_AS_IMAGE']
62
+ else
63
+ picture = true
64
+ end
65
+
66
+ if defined? env['HTTP_GET_PAGE_WITH_IFRAMES']
67
+ loadFrames = env['HTTP_GET_PAGE_WITH_IFRAMES']
68
+ else
69
+ loadFrames = false
70
+ end
71
+
72
+ url = env['REQUEST_URI'];
73
+ if params.length > 0
74
+ url += '?'+params;
75
+ end
76
+
77
+ phJS.getUrl(url, picture, loadFrames)
78
+
79
+ #Create the response
80
+ if !phJS.ready
81
+ resp = Rack::Response.new([], 503, {
82
+ 'Content-Type' => 'text/html'
83
+ }) { |r|
84
+ r.write(phJS.dom)
85
+ }
86
+ resp.finish
87
+ elsif picture
88
+ resp = Rack::Response.new([], 200, {
89
+ 'Content-Type' => 'image/png'
90
+ }) { |r|
91
+ r.write(phJS.image)
92
+ }
93
+ resp.finish
94
+ else
95
+ resp = Rack::Response.new([], 200, {
96
+ 'Content-Type' => 'text/html'
97
+ }) { |r|
98
+ r.write(phJS.dom)
99
+ }
100
+ resp.finish
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,169 @@
1
+ var fs = require('fs');
2
+
3
+ var framesWorked = 0;
4
+ var frameCount = 1;
5
+ var frameContent = [];
6
+ var masterURL = "";
7
+
8
+ evaluateWithVars = function(page, func, vars)
9
+ {
10
+ var fstr = func.toString()
11
+ //console.log(fstr.replace("function () {", "function () {\n"+vstr))
12
+ var evalstr = fstr.replace(
13
+ new RegExp("function \((.*?)\) {"),
14
+ "function $1 {\n" +
15
+ "var vars = JSON.parse('" + JSON.stringify(vars) + "')\n" +
16
+ "for (var v in vars) window[v] = vars[v]\n" +
17
+ "\n"
18
+ )
19
+ console.log(evalstr)
20
+ return page.evaluate(evalstr)
21
+ }
22
+
23
+ var insertFrames = function(url) {
24
+ var page = require('webpage').create();
25
+ page.onConsoleMessage = function (msg) { console.log(msg); };
26
+ page.onAlert = function(msg) { console.log(msg);};
27
+ page.onLoadStarted = function () {
28
+ console.log('Start loading final Page...'+url);
29
+ };
30
+ page.open(url, function (status) {
31
+ if (status !== 'success') {
32
+ console.log('FAILED_LOADING_URL: '+url);
33
+ } else {
34
+ page.evaluate(function () {
35
+ var framestmp = document.getElementsByTagName('IFRAME');
36
+ var frames = []
37
+ for (var i=0;i<framestmp.length;i++) {
38
+ frames.push(framestmp[i]);
39
+ }
40
+ //mark iframes
41
+ for (var i in frames) {
42
+ frames[i].innerHTML = "PHANTOMJS_PROXY_IFRAME"+i;
43
+ }
44
+ });
45
+ //replace iframes with their data
46
+ var content = new String(page.content);
47
+ for (var i in frameContent) {
48
+ content = content.replace("PHANTOMJS_PROXY_IFRAME"+i, "<phantomjsframe>"+frameContent[i]+"</phantomjsframe>");
49
+ }
50
+ console.log("PHANTOMJS_DOMDATA_WRITE:"+content);
51
+ console.log('PHANTOMJS_DOMDATA_END');
52
+ }
53
+ console.log('WHATEVER');
54
+ phantom.exit();
55
+ });
56
+ };
57
+
58
+ function exit() {
59
+ framesWorked++;
60
+ if (framesWorked == frameCount)
61
+ insertFrames(masterURL);
62
+ }
63
+
64
+ var loadpage = function(url) {
65
+ var page = require('webpage').create();
66
+ page.onConsoleMessage = function (msg) { console.log(msg); };
67
+ //page.onLoadFinished =
68
+ page.onAlert = function(msg) { console.log(msg);};
69
+ page.onLoadStarted = function () {
70
+ console.log('Start loading...'+url);
71
+ };
72
+ page.open(url, function (status) {
73
+ if (status !== 'success') {
74
+ console.log('FAILED_LOADING_URL: '+url);
75
+ } else {
76
+ console.log('LOADED PAGE CONTENT['+url+']\n');
77
+ frameContent.push(page.content);
78
+ }
79
+ console.log('WHATEVER');
80
+ exit();
81
+ });
82
+ };
83
+
84
+ function loadIFrames(page) {
85
+ var frames = page.evaluate(function () {
86
+ var framestmp = document.getElementsByTagName('IFRAME');
87
+ var frames = []
88
+ for (var i=0;i<framestmp.length;i++) {
89
+ frames.push(framestmp[i].getAttribute('src'));
90
+ }
91
+ return frames;
92
+ });
93
+
94
+ for (var i=0;i<frames.length;i++) {
95
+ console.log("Frame: "+i+" : "+frames[i]);
96
+ loadpage(frames[i]);
97
+ }
98
+
99
+ frameCount = frames.length+1;
100
+ }
101
+
102
+ function main() {
103
+
104
+ if (phantom.args.length < 2) {
105
+ console.log('Usage: proxy.js <picture filename or none> <load iframe(true/false)> <URL> <url param count> <url params...>');
106
+ phantom.exit();
107
+ } else {
108
+ file_name = phantom.args[0];
109
+ var loadIframes = phantom.args[1].match(/true/i) ? true : false;
110
+ address = phantom.args[2];
111
+
112
+ var argCount = phantom.args[3];
113
+
114
+ args = ""
115
+ for (var i=0;i<argCount;i++)
116
+ args += phantom.args[i+4]+'&';
117
+ if (args.length > 0)
118
+ address += '?'+args;
119
+
120
+ console.log("Open page: "+address+", "+args+" END");
121
+
122
+ var page = require('webpage').create();
123
+
124
+ page.onConsoleMessage = function (msg) { console.log(msg); };
125
+
126
+ console.log('start openning page');
127
+
128
+ masterURL = address;
129
+
130
+ page.open(address, function (status) {
131
+ if (status !== 'success') {
132
+ console.log('FAILED_LOADING_URL');
133
+ } else {
134
+ console.log('DONE_LOADING_URL');
135
+
136
+ //load iframes into page
137
+ if (loadIframes) {
138
+ loadIFrames(page);
139
+ /*
140
+ var frames = page.evaluate(function () {
141
+ var framestmp = document.getElementsByTagName('IFRAME');
142
+ var frames = []
143
+ for (var i=0;i<framestmp.length;i++) {
144
+ frames.push(framestmp[i].getAttribute('src'));
145
+ }
146
+ return frames;
147
+ });
148
+
149
+ for (var i=0;i<frames.length;i++) {
150
+
151
+ console.log("Frame: "+i+" : "+frames[i]);
152
+ loadpage(frames[i]);
153
+ }
154
+ frameCount = frames.length+1;
155
+ */
156
+ }
157
+ //evaluateWithVars(page, function(){}, phantom.args);
158
+ console.log('PHANTOMJS_MAINDOM_WRITE:'+page.content);
159
+ console.log('PHANTOMJS_MAINDOM_END');
160
+ }
161
+ if (file_name != null && file_name != "none") {
162
+ page.render(file_name);
163
+ }
164
+ exit();
165
+ });
166
+ }
167
+ }
168
+
169
+ main();
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+
3
+ module PhantomJSProxy
4
+ ROOT = File.expand_path(File.dirname(__FILE__))
5
+ SCRIPT = ROOT+"/phantom_proxy/scripts/proxy.js"
6
+ PHANTOMJS_BIN = ROOT+'/../bin/phantomjs'
7
+ end
8
+
9
+ require PhantomJSProxy::ROOT+'/phantom_proxy/phantomjs.rb'
10
+ require PhantomJSProxy::ROOT+'/phantom_proxy/phantomjsserver.rb'
metadata ADDED
@@ -0,0 +1,71 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: phantom_proxy
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Daniel Sudmann
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-03-23 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: thin
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 1.3.1
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.3.1
30
+ description: This is a phyntonjs Proxy it allows you to fetch webpages and execute
31
+ javascript in them.
32
+ email: suddani@googlemail.com
33
+ executables:
34
+ - phantom_proxy
35
+ extensions: []
36
+ extra_rdoc_files: []
37
+ files:
38
+ - lib/phantom_proxy/phantomjsserver.rb
39
+ - lib/phantom_proxy/phantomjs.rb
40
+ - lib/phantom_proxy.rb
41
+ - lib/phantom_proxy/scripts/proxy.js
42
+ - lib/phantom_proxy/config.ru
43
+ - bin/phantom_proxy
44
+ - bin/phantomjs
45
+ - README.rdoc
46
+ - Gemfile
47
+ homepage: http://experteer.com
48
+ licenses: []
49
+ post_install_message:
50
+ rdoc_options: []
51
+ require_paths:
52
+ - lib
53
+ required_ruby_version: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ! '>='
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ required_rubygems_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ! '>='
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ requirements: []
66
+ rubyforge_project:
67
+ rubygems_version: 1.8.19
68
+ signing_key:
69
+ specification_version: 3
70
+ summary: This is a phantomjs Proxy
71
+ test_files: []