phantom_proxy 1.2.14 → 1.2.15

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,12 @@
1
+ phantom_proxy-*.gem
2
+ .rvmrc
3
+ log/
4
+
5
+ #IDE
6
+ .redcar
7
+ .project
8
+ .rbenv-gemsets
9
+
10
+ doc
11
+ !.keep
12
+ Gemfile.lock
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ ree-1.8.7-2012.02
data/Gemfile CHANGED
@@ -1,2 +1,5 @@
1
1
  source 'http://rubygems.org'
2
+
3
+ gemspec
4
+
2
5
  gem 'rack'
@@ -68,7 +68,7 @@ module PhantomJSProxy
68
68
  if /URL_ERROR_CODE/.match(@dom)
69
69
  puts("LOAD_ERROR_CODE")
70
70
  @ready = getHTTPCode @dom
71
- puts("LOAD_ERROR_CODE_DONE")
71
+ puts("LOAD_ERROR_CODE_DONE: #{@ready}")
72
72
  end
73
73
  @dom = dom_text
74
74
  return @dom
@@ -3,6 +3,20 @@ require 'hmac-md5'
3
3
  require 'base64'
4
4
 
5
5
  module PhantomJSProxy
6
+ class Options
7
+ def initialize(env)
8
+ @env = env
9
+ end
10
+
11
+ def picture?
12
+ @env['HTTP_GET_PAGE_AS_IMAGE']||false
13
+ end
14
+
15
+ def loadFrames?
16
+ @env['HTTP_GET_PAGE_WITH_IFRAMES']||true
17
+ end
18
+ end
19
+
6
20
  class PhantomJSServer
7
21
  def initialize
8
22
  @control_panel = PhantomJSProxy::PhantomJSControlPanel.new
@@ -78,28 +92,13 @@ module PhantomJSProxy
78
92
  return true
79
93
  end
80
94
 
81
- def getOptions(env)
82
- if defined? env['HTTP_GET_PAGE_AS_IMAGE']
83
- picture = env['HTTP_GET_PAGE_AS_IMAGE']
84
- else
85
- picture = true
86
- end
87
-
88
- if defined? env['HTTP_GET_PAGE_WITH_IFRAMES']
89
- loadFrames = env['HTTP_GET_PAGE_WITH_IFRAMES']
90
- else
91
- loadFrames = false
92
- end
93
-
94
- return picture,loadFrames
95
- end
96
-
97
95
  def prepareUrl(env, params, req, https_request, type)
98
96
  if type == "none"
99
- url = env['REQUEST_URI'];
97
+ url = req.url#env['REQUEST_URI']
98
+ puts "URL is: #{url}"
100
99
  if https_request
101
- url['http'] = 'https'
102
- url[':443'] = ''
100
+ url['http'] = 'https' if url['http']
101
+ url[':443'] = '' if url[':443']
103
102
  end
104
103
 
105
104
  if params.length > 0
@@ -108,7 +107,7 @@ module PhantomJSProxy
108
107
  return url
109
108
  end
110
109
  url = Base64.decode64(req.params["address"])
111
- env['rack.errors'].write("After Base64 decoding: "+url)
110
+ env['rack.errors'].write("After Base64 decoding: "+url+"\n")
112
111
  return url
113
112
  end
114
113
 
@@ -138,7 +137,7 @@ module PhantomJSProxy
138
137
  env['rack.errors'].write("Paramas: "+params+"\n")
139
138
 
140
139
  #this routes the request to the outgoing server incase its not html that we want to load
141
- type = check_for_route(env['REQUEST_URI'])
140
+ type = check_for_route(req.url)#env['REQUEST_URI'])
142
141
  if type == "control_panel"
143
142
  return control_panel.show()
144
143
  elsif type != "none" and type != "base64"
@@ -149,15 +148,25 @@ module PhantomJSProxy
149
148
  phJS = PhantomJS.new
150
149
 
151
150
  env['rack.errors'].write("Extract the uri\n")
152
-
153
- picture,loadFrames = getOptions(env)
151
+
152
+ loadOptions = Options.new(env)
153
+
154
+ puts "Options: #{loadOptions.picture?}, #{loadOptions.loadFrames?}"
154
155
 
155
156
  url = prepareUrl(env, params, req, https_request, type)
156
157
 
157
- phJS.getUrl(url, picture, loadFrames)
158
+ phJS.getUrl(url, loadOptions.picture?, loadOptions.loadFrames?)
158
159
 
159
160
  #Create the response
160
- if phJS.ready != 200
161
+ if loadOptions.picture?
162
+ control_panel.add_special_request "@image_requests"
163
+ resp = Rack::Response.new([], 200, {
164
+ 'Content-Type' => 'image/png'
165
+ }) { |r|
166
+ r.write(phJS.image)
167
+ }
168
+ resp.finish
169
+ elsif phJS.ready != 200
161
170
  if !/favicon\.ico/.match(req.url())
162
171
  env['rack.errors'].write("Request FAILED\n")
163
172
  control_panel.add_special_request "@failed_requests"
@@ -170,14 +179,6 @@ module PhantomJSProxy
170
179
  r.write(phJS.dom)
171
180
  }
172
181
  resp.finish
173
- elsif picture
174
- control_panel.add_special_request "@image_requests"
175
- resp = Rack::Response.new([], 200, {
176
- 'Content-Type' => 'image/png'
177
- }) { |r|
178
- r.write(phJS.image)
179
- }
180
- resp.finish
181
182
  else
182
183
  control_panel.add_special_request "@html_requests"
183
184
  resp = Rack::Response.new([], 200, {
@@ -6,103 +6,72 @@ var frameContent = [];
6
6
  var masterURL = "";
7
7
  var masterPage = null;
8
8
 
9
- function evaluateWithVars(page, func, vars)
10
- {
11
- var fstr = func.toString()
12
- //console.log(fstr.replace("function () {", "function () {\n"+vstr))
13
- var evalstr = fstr.replace(
14
- new RegExp("function \((.*?)\) {"),
15
- "function $1 {\n" +
16
- "var vars = JSON.parse('" + JSON.stringify(vars) + "')\n" +
17
- "for (var v in vars) window[v] = vars[v]\n" +
18
- "\n"
19
- )
20
- console.log(evalstr)
21
- return page.evaluate(evalstr)
22
- }
23
-
24
- function insertFrames(url) {
25
-
26
- var page = masterPage;
27
- /*
28
- var page = require('webpage').create();
29
- page.onConsoleMessage = function (msg) { console.log(msg); };
30
- page.onAlert = function(msg) { console.log(msg);};
31
- page.onLoadStarted = function () {
32
- console.log('Start loading final Page...'+url);
33
- };
34
- page.open(url, function (status) {
35
- if (status !== 'success') {
36
- console.log('FAILED_LOADING_URL: '+url);
37
- } else {*/
38
- page.evaluate(function () {
39
- var framestmp = document.getElementsByTagName('IFRAME');
40
- var frames = []
41
- for (var i=0;i<framestmp.length;i++) {
42
- frames.push(framestmp[i]);
43
- }
44
- //mark iframes
45
- for (var i in frames) {
46
- frames[i].innerHTML = "PHANTOMJS_PROXY_IFRAME"+i;
47
- }
48
- });
49
-
50
- //replace iframes with their data
51
- var content = new String(page.content);
52
- for (var i in frameContent) {
53
- content = content.replace("PHANTOMJS_PROXY_IFRAME"+i, "<phantomjsframe>"+frameContent[i]+"</phantomjsframe>");
9
+ function newIFrameLoad(page, load_iframes) {
10
+ var frame_data = [];
11
+ if (load_iframes) {
12
+ frame_data = page.evaluate(function () {
13
+ var framestmp = document.getElementsByTagName('IFRAME');
14
+ var frames = [];
15
+ for (var i=0;i<framestmp.length;i++) {
16
+ frames.push(framestmp[i].contentWindow.document.body.innerHTML);
17
+ framestmp[i].outerHTML = "<phantomjsframe>PHANTOM_JS_FRAME_"+i+"</phantomjsframe>";
54
18
  }
55
- console.log("PHANTOMJS_DOMDATA_WRITE:"+content);
56
- console.log('PHANTOMJS_DOMDATA_END');
57
- //}
58
- console.log('WHATEVER');
59
- phantom.exit();
60
- //});
61
- };
62
-
63
- function exit() {
64
- framesWorked++;
65
- if (framesWorked == frameCount)
66
- insertFrames(masterURL);
19
+ return frames;
20
+ });
21
+ }
22
+ var content = new String(page.content);
23
+ for (var i=0;i<frame_data.length;i++) {
24
+ content = content.replace("PHANTOM_JS_FRAME_"+i, frame_data[i]);
25
+ }
26
+ console.log("PHANTOMJS_DOMDATA_WRITE:"+content);
27
+ console.log('PHANTOMJS_DOMDATA_END');
67
28
  }
68
29
 
69
- var loadpage = function(url) {
30
+ var loadpage = function(url, referer, success, failure, configure) {
31
+ var redirectURL = null;
32
+
70
33
  var page = require('webpage').create();
34
+
35
+ page.settings.localToRemoteUrlAccessEnabled = true;
36
+
37
+ page.settings.userAgent = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/34.0.1847.116 Chrome/34.0.1847.116 Safari/537.36";
38
+
71
39
  page.onConsoleMessage = function (msg) { console.log(msg); };
72
- //page.onLoadFinished =
40
+
73
41
  page.onAlert = function(msg) { console.log(msg);};
42
+
74
43
  page.onLoadStarted = function () {
75
44
  console.log('Start loading...'+url);
76
45
  };
77
- page.open(url, function (status) {
78
- if (status !== 'success') {
79
- console.log('FAILED_LOADING_URL: '+url);
80
- } else {
81
- console.log('LOADED PAGE CONTENT['+url+']\n');
82
- frameContent.push(page.content);
83
- }
84
- console.log('WHATEVER');
85
- exit();
86
- });
87
- };
88
46
 
89
- function loadIFrames(page) {
90
- var frames = page.evaluate(function () {
91
- var framestmp = document.getElementsByTagName('IFRAME');
92
- var frames = [];
93
- for (var i=0;i<framestmp.length;i++) {
94
- frames.push(framestmp[i].getAttribute('src'));
95
- }
96
- return frames;
97
- });
47
+ page.onResourceReceived = function(resource) {
48
+ if (url == resource.url && resource.redirectURL && resource.stage == "end") {
49
+ redirectURL = resource.redirectURL;
50
+ console.log('FRAME_URL_ERROR_CODE: '+resource.status+'FRAME_URL_ERROR_CODE_END');
51
+ }
52
+ };
53
+
54
+ if (referer!=undefined) {
55
+ console.log("Set Referer: "+referer);
56
+ page.customHeaders = {
57
+ "REFERER": referer
58
+ };
59
+ }
98
60
 
99
- frameCount = frames.length+1;
61
+ if (configure != undefined)
62
+ configure(page);
100
63
 
101
- for (var i=0;i<frames.length;i++) {
102
- console.log("Frame: "+i+" : "+frames[i]);
103
- loadpage(frames[i]);
104
- }
105
- }
64
+ page.open(url, function (status) {
65
+ console.log("Page Status: "+status);
66
+ if (redirectURL) {
67
+ loadpage(redirectURL, url, success, failure);
68
+ } else if (status !== 'success') {
69
+ failure(page, url);
70
+ } else {
71
+ success(page, url);
72
+ }
73
+ });
74
+ };
106
75
 
107
76
  function main() {
108
77
 
@@ -118,49 +87,43 @@ function main() {
118
87
 
119
88
  args = ""
120
89
 
121
- for (var i=0;i<argCount;i++)
122
- args += phantom.args[i+4]+'&';
90
+ for (var i=0;i<argCount;i++) {
91
+ args += phantom.args[i+4];
92
+ if (i<argCount-1) args += "&"
93
+ }
123
94
  if (args.length > 0)
124
95
  address += '?'+args;
125
96
 
126
97
  console.log("Open page: "+address+", "+args+" END");
127
98
 
128
- var page = require('webpage').create();
129
-
130
- page.onConsoleMessage = function (msg) { console.log(msg); };
131
-
132
99
  console.log('start openning page');
133
100
 
134
101
  masterURL = address;
135
-
136
- masterPage = page;
137
-
138
- //catches status != 200 and throws error immidiatly
139
- page.onResourceReceived = function (response) {
140
- if (response.stage == "end" && response.url == address && response.status != 200)
141
- {
142
- console.log('URL_ERROR_CODE: '+response.status+'URL_ERROR_CODE_END');
143
- //phantom.exit();
144
- }
145
- };
146
102
 
147
- page.open(address, function (status) {
148
- if (status != 'success') {
149
- console.log('FAILED_LOADING_URL');
150
- } else {
151
- console.log('DONE_LOADING_URL');
152
- //load iframes into page
153
- if (loadIframes) {
154
- loadIFrames(page);
155
- }
156
- //evaluateWithVars(page, function(){}, phantom.args);
157
- console.log('PHANTOMJS_MAINDOM_WRITE:'+page.content);
158
- console.log('PHANTOMJS_MAINDOM_END');
103
+ loadpage(address, undefined, function(page) {
104
+ masterPage = page;
105
+ console.log('DONE_LOADING_URL');
106
+ //load iframes into page
107
+ newIFrameLoad(page, loadIframes);
108
+ if (file_name != null && file_name != "none") {
109
+ page.render(file_name);
159
110
  }
111
+ phantom.exit();
112
+ }, function(page) {
113
+ masterPage = page;
114
+ console.log('FAILED_LOADING_URL: '+status+", "+address);
160
115
  if (file_name != null && file_name != "none") {
161
116
  page.render(file_name);
162
117
  }
163
- exit();
118
+ phantom.exit();
119
+ }, function(page) {
120
+ //page.customHeaders = {"Referer": "http://uk-amazon.icims.com/jobs/240290/account-representative---amazon-web-services---iberia/job"}
121
+ // page.onResourceReceived = function (response) {
122
+ // if (response.stage == "end" && response.url == address && response.status != 200)
123
+ // {
124
+ // console.log('URL_ERROR_CODE: '+response.status+'URL_ERROR_CODE_END');
125
+ // }
126
+ // };
164
127
  });
165
128
  }
166
129
  }
@@ -0,0 +1,28 @@
1
+ require 'rubygems'
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'phantom_proxy'
5
+ s.version = '1.2.15'
6
+ s.summary = "This is a phantomjs Proxy"
7
+ s.description = "This is a phyntonjs Proxy it allows you to fetch webpages and execute javascript in them."
8
+ s.authors = ["Daniel Sudmann"]
9
+ s.email = 'suddani@googlemail.com'
10
+ s.files = `git ls-files`.split($\)
11
+ =begin
12
+ FileList['lib/**/*.rb',
13
+ 'lib/**/*.js',
14
+ 'lib/**/**/*.ru',
15
+ 'lib/**/**/*.html',
16
+ 'lib/**/**/*',
17
+ 'lib/**/**/**/*',
18
+ 'lib/phantom_proxy/install/**/*',
19
+ 'lib/phantom_proxy/install/**/**/*',
20
+ 'bin/*',
21
+ '[A-Z]*',
22
+ 'test/**/*'].to_a
23
+ =end
24
+ s.homepage = 'http://experteer.com'
25
+ s.executables = ['phantom_proxy']
26
+ s.add_dependency('thin', '>= 1.3.1')
27
+ s.add_dependency('ruby-hmac', '>= 0.4.0')
28
+ end
data/spec/test ADDED
@@ -0,0 +1,2 @@
1
+ #bash
2
+ bin/phantomjs lib/phantom_proxy/scripts/proxy.js tmp/picture.png true http://dl.dropbox.com/u/36978459/index.html > tmp/log.txt
data/tmp/.keep ADDED
File without changes
data/tmp/run ADDED
@@ -0,0 +1,5 @@
1
+ gem uninstall phantom_proxy -x
2
+ rm phantom_proxy-*.*.*.gem
3
+ gem build phantom_proxy.gemspec
4
+ gem install phantom_proxy-*.*.*.gem
5
+ phantom_proxy -p 3003
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: phantom_proxy
3
3
  version: !ruby/object:Gem::Version
4
- hash: 3
4
+ hash: 1
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 2
9
- - 14
10
- version: 1.2.14
9
+ - 15
10
+ version: 1.2.15
11
11
  platform: ruby
12
12
  authors:
13
13
  - Daniel Sudmann
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2014-01-14 00:00:00 +01:00
18
+ date: 2014-05-09 00:00:00 +02:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -59,18 +59,24 @@ extensions: []
59
59
  extra_rdoc_files: []
60
60
 
61
61
  files:
62
- - lib/phantom_proxy/phantomjsserver.rb
63
- - lib/phantom_proxy/phantomjs_control_panel.rb
64
- - lib/phantom_proxy/phantomjs.rb
62
+ - .gitignore
63
+ - .ruby-version
64
+ - Gemfile
65
+ - README.rdoc
66
+ - bin/phantom_proxy
65
67
  - lib/phantom_proxy.rb
66
- - lib/phantom_proxy/scripts/proxy.js
67
68
  - lib/phantom_proxy/config.ru
68
- - lib/phantom_proxy/web/control_panel.html
69
69
  - lib/phantom_proxy/install/etc/init.d/phproxy
70
70
  - lib/phantom_proxy/install/etc/phantom_proxy/config/example_config
71
- - bin/phantom_proxy
72
- - README.rdoc
73
- - Gemfile
71
+ - lib/phantom_proxy/phantomjs.rb
72
+ - lib/phantom_proxy/phantomjs_control_panel.rb
73
+ - lib/phantom_proxy/phantomjsserver.rb
74
+ - lib/phantom_proxy/scripts/proxy.js
75
+ - lib/phantom_proxy/web/control_panel.html
76
+ - phantom_proxy.gemspec
77
+ - spec/test
78
+ - tmp/.keep
79
+ - tmp/run
74
80
  has_rdoc: true
75
81
  homepage: http://experteer.com
76
82
  licenses: []
@@ -101,7 +107,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
101
107
  requirements: []
102
108
 
103
109
  rubyforge_project:
104
- rubygems_version: 1.5.2
110
+ rubygems_version: 1.5.3
105
111
  signing_key:
106
112
  specification_version: 3
107
113
  summary: This is a phantomjs Proxy