phantom_proxy 1.2.14 → 1.2.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,12 @@
1
+ phantom_proxy-*.gem
2
+ .rvmrc
3
+ log/
4
+
5
+ #IDE
6
+ .redcar
7
+ .project
8
+ .rbenv-gemsets
9
+
10
+ doc
11
+ !.keep
12
+ Gemfile.lock
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ ree-1.8.7-2012.02
data/Gemfile CHANGED
@@ -1,2 +1,5 @@
1
1
  source 'http://rubygems.org'
2
+
3
+ gemspec
4
+
2
5
  gem 'rack'
@@ -68,7 +68,7 @@ module PhantomJSProxy
68
68
  if /URL_ERROR_CODE/.match(@dom)
69
69
  puts("LOAD_ERROR_CODE")
70
70
  @ready = getHTTPCode @dom
71
- puts("LOAD_ERROR_CODE_DONE")
71
+ puts("LOAD_ERROR_CODE_DONE: #{@ready}")
72
72
  end
73
73
  @dom = dom_text
74
74
  return @dom
@@ -3,6 +3,20 @@ require 'hmac-md5'
3
3
  require 'base64'
4
4
 
5
5
  module PhantomJSProxy
6
+ class Options
7
+ def initialize(env)
8
+ @env = env
9
+ end
10
+
11
+ def picture?
12
+ @env['HTTP_GET_PAGE_AS_IMAGE']||false
13
+ end
14
+
15
+ def loadFrames?
16
+ @env['HTTP_GET_PAGE_WITH_IFRAMES']||true
17
+ end
18
+ end
19
+
6
20
  class PhantomJSServer
7
21
  def initialize
8
22
  @control_panel = PhantomJSProxy::PhantomJSControlPanel.new
@@ -78,28 +92,13 @@ module PhantomJSProxy
78
92
  return true
79
93
  end
80
94
 
81
- def getOptions(env)
82
- if defined? env['HTTP_GET_PAGE_AS_IMAGE']
83
- picture = env['HTTP_GET_PAGE_AS_IMAGE']
84
- else
85
- picture = true
86
- end
87
-
88
- if defined? env['HTTP_GET_PAGE_WITH_IFRAMES']
89
- loadFrames = env['HTTP_GET_PAGE_WITH_IFRAMES']
90
- else
91
- loadFrames = false
92
- end
93
-
94
- return picture,loadFrames
95
- end
96
-
97
95
  def prepareUrl(env, params, req, https_request, type)
98
96
  if type == "none"
99
- url = env['REQUEST_URI'];
97
+ url = req.url#env['REQUEST_URI']
98
+ puts "URL is: #{url}"
100
99
  if https_request
101
- url['http'] = 'https'
102
- url[':443'] = ''
100
+ url['http'] = 'https' if url['http']
101
+ url[':443'] = '' if url[':443']
103
102
  end
104
103
 
105
104
  if params.length > 0
@@ -108,7 +107,7 @@ module PhantomJSProxy
108
107
  return url
109
108
  end
110
109
  url = Base64.decode64(req.params["address"])
111
- env['rack.errors'].write("After Base64 decoding: "+url)
110
+ env['rack.errors'].write("After Base64 decoding: "+url+"\n")
112
111
  return url
113
112
  end
114
113
 
@@ -138,7 +137,7 @@ module PhantomJSProxy
138
137
  env['rack.errors'].write("Paramas: "+params+"\n")
139
138
 
140
139
  #this routes the request to the outgoing server incase its not html that we want to load
141
- type = check_for_route(env['REQUEST_URI'])
140
+ type = check_for_route(req.url)#env['REQUEST_URI'])
142
141
  if type == "control_panel"
143
142
  return control_panel.show()
144
143
  elsif type != "none" and type != "base64"
@@ -149,15 +148,25 @@ module PhantomJSProxy
149
148
  phJS = PhantomJS.new
150
149
 
151
150
  env['rack.errors'].write("Extract the uri\n")
152
-
153
- picture,loadFrames = getOptions(env)
151
+
152
+ loadOptions = Options.new(env)
153
+
154
+ puts "Options: #{loadOptions.picture?}, #{loadOptions.loadFrames?}"
154
155
 
155
156
  url = prepareUrl(env, params, req, https_request, type)
156
157
 
157
- phJS.getUrl(url, picture, loadFrames)
158
+ phJS.getUrl(url, loadOptions.picture?, loadOptions.loadFrames?)
158
159
 
159
160
  #Create the response
160
- if phJS.ready != 200
161
+ if loadOptions.picture?
162
+ control_panel.add_special_request "@image_requests"
163
+ resp = Rack::Response.new([], 200, {
164
+ 'Content-Type' => 'image/png'
165
+ }) { |r|
166
+ r.write(phJS.image)
167
+ }
168
+ resp.finish
169
+ elsif phJS.ready != 200
161
170
  if !/favicon\.ico/.match(req.url())
162
171
  env['rack.errors'].write("Request FAILED\n")
163
172
  control_panel.add_special_request "@failed_requests"
@@ -170,14 +179,6 @@ module PhantomJSProxy
170
179
  r.write(phJS.dom)
171
180
  }
172
181
  resp.finish
173
- elsif picture
174
- control_panel.add_special_request "@image_requests"
175
- resp = Rack::Response.new([], 200, {
176
- 'Content-Type' => 'image/png'
177
- }) { |r|
178
- r.write(phJS.image)
179
- }
180
- resp.finish
181
182
  else
182
183
  control_panel.add_special_request "@html_requests"
183
184
  resp = Rack::Response.new([], 200, {
@@ -6,103 +6,72 @@ var frameContent = [];
6
6
  var masterURL = "";
7
7
  var masterPage = null;
8
8
 
9
- function evaluateWithVars(page, func, vars)
10
- {
11
- var fstr = func.toString()
12
- //console.log(fstr.replace("function () {", "function () {\n"+vstr))
13
- var evalstr = fstr.replace(
14
- new RegExp("function \((.*?)\) {"),
15
- "function $1 {\n" +
16
- "var vars = JSON.parse('" + JSON.stringify(vars) + "')\n" +
17
- "for (var v in vars) window[v] = vars[v]\n" +
18
- "\n"
19
- )
20
- console.log(evalstr)
21
- return page.evaluate(evalstr)
22
- }
23
-
24
- function insertFrames(url) {
25
-
26
- var page = masterPage;
27
- /*
28
- var page = require('webpage').create();
29
- page.onConsoleMessage = function (msg) { console.log(msg); };
30
- page.onAlert = function(msg) { console.log(msg);};
31
- page.onLoadStarted = function () {
32
- console.log('Start loading final Page...'+url);
33
- };
34
- page.open(url, function (status) {
35
- if (status !== 'success') {
36
- console.log('FAILED_LOADING_URL: '+url);
37
- } else {*/
38
- page.evaluate(function () {
39
- var framestmp = document.getElementsByTagName('IFRAME');
40
- var frames = []
41
- for (var i=0;i<framestmp.length;i++) {
42
- frames.push(framestmp[i]);
43
- }
44
- //mark iframes
45
- for (var i in frames) {
46
- frames[i].innerHTML = "PHANTOMJS_PROXY_IFRAME"+i;
47
- }
48
- });
49
-
50
- //replace iframes with their data
51
- var content = new String(page.content);
52
- for (var i in frameContent) {
53
- content = content.replace("PHANTOMJS_PROXY_IFRAME"+i, "<phantomjsframe>"+frameContent[i]+"</phantomjsframe>");
9
+ function newIFrameLoad(page, load_iframes) {
10
+ var frame_data = [];
11
+ if (load_iframes) {
12
+ frame_data = page.evaluate(function () {
13
+ var framestmp = document.getElementsByTagName('IFRAME');
14
+ var frames = [];
15
+ for (var i=0;i<framestmp.length;i++) {
16
+ frames.push(framestmp[i].contentWindow.document.body.innerHTML);
17
+ framestmp[i].outerHTML = "<phantomjsframe>PHANTOM_JS_FRAME_"+i+"</phantomjsframe>";
54
18
  }
55
- console.log("PHANTOMJS_DOMDATA_WRITE:"+content);
56
- console.log('PHANTOMJS_DOMDATA_END');
57
- //}
58
- console.log('WHATEVER');
59
- phantom.exit();
60
- //});
61
- };
62
-
63
- function exit() {
64
- framesWorked++;
65
- if (framesWorked == frameCount)
66
- insertFrames(masterURL);
19
+ return frames;
20
+ });
21
+ }
22
+ var content = new String(page.content);
23
+ for (var i=0;i<frame_data.length;i++) {
24
+ content = content.replace("PHANTOM_JS_FRAME_"+i, frame_data[i]);
25
+ }
26
+ console.log("PHANTOMJS_DOMDATA_WRITE:"+content);
27
+ console.log('PHANTOMJS_DOMDATA_END');
67
28
  }
68
29
 
69
- var loadpage = function(url) {
30
+ var loadpage = function(url, referer, success, failure, configure) {
31
+ var redirectURL = null;
32
+
70
33
  var page = require('webpage').create();
34
+
35
+ page.settings.localToRemoteUrlAccessEnabled = true;
36
+
37
+ page.settings.userAgent = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/34.0.1847.116 Chrome/34.0.1847.116 Safari/537.36";
38
+
71
39
  page.onConsoleMessage = function (msg) { console.log(msg); };
72
- //page.onLoadFinished =
40
+
73
41
  page.onAlert = function(msg) { console.log(msg);};
42
+
74
43
  page.onLoadStarted = function () {
75
44
  console.log('Start loading...'+url);
76
45
  };
77
- page.open(url, function (status) {
78
- if (status !== 'success') {
79
- console.log('FAILED_LOADING_URL: '+url);
80
- } else {
81
- console.log('LOADED PAGE CONTENT['+url+']\n');
82
- frameContent.push(page.content);
83
- }
84
- console.log('WHATEVER');
85
- exit();
86
- });
87
- };
88
46
 
89
- function loadIFrames(page) {
90
- var frames = page.evaluate(function () {
91
- var framestmp = document.getElementsByTagName('IFRAME');
92
- var frames = [];
93
- for (var i=0;i<framestmp.length;i++) {
94
- frames.push(framestmp[i].getAttribute('src'));
95
- }
96
- return frames;
97
- });
47
+ page.onResourceReceived = function(resource) {
48
+ if (url == resource.url && resource.redirectURL && resource.stage == "end") {
49
+ redirectURL = resource.redirectURL;
50
+ console.log('FRAME_URL_ERROR_CODE: '+resource.status+'FRAME_URL_ERROR_CODE_END');
51
+ }
52
+ };
53
+
54
+ if (referer!=undefined) {
55
+ console.log("Set Referer: "+referer);
56
+ page.customHeaders = {
57
+ "REFERER": referer
58
+ };
59
+ }
98
60
 
99
- frameCount = frames.length+1;
61
+ if (configure != undefined)
62
+ configure(page);
100
63
 
101
- for (var i=0;i<frames.length;i++) {
102
- console.log("Frame: "+i+" : "+frames[i]);
103
- loadpage(frames[i]);
104
- }
105
- }
64
+ page.open(url, function (status) {
65
+ console.log("Page Status: "+status);
66
+ if (redirectURL) {
67
+ loadpage(redirectURL, url, success, failure);
68
+ } else if (status !== 'success') {
69
+ failure(page, url);
70
+ } else {
71
+ success(page, url);
72
+ }
73
+ });
74
+ };
106
75
 
107
76
  function main() {
108
77
 
@@ -118,49 +87,43 @@ function main() {
118
87
 
119
88
  args = ""
120
89
 
121
- for (var i=0;i<argCount;i++)
122
- args += phantom.args[i+4]+'&';
90
+ for (var i=0;i<argCount;i++) {
91
+ args += phantom.args[i+4];
92
+ if (i<argCount-1) args += "&"
93
+ }
123
94
  if (args.length > 0)
124
95
  address += '?'+args;
125
96
 
126
97
  console.log("Open page: "+address+", "+args+" END");
127
98
 
128
- var page = require('webpage').create();
129
-
130
- page.onConsoleMessage = function (msg) { console.log(msg); };
131
-
132
99
  console.log('start openning page');
133
100
 
134
101
  masterURL = address;
135
-
136
- masterPage = page;
137
-
138
- //catches status != 200 and throws error immidiatly
139
- page.onResourceReceived = function (response) {
140
- if (response.stage == "end" && response.url == address && response.status != 200)
141
- {
142
- console.log('URL_ERROR_CODE: '+response.status+'URL_ERROR_CODE_END');
143
- //phantom.exit();
144
- }
145
- };
146
102
 
147
- page.open(address, function (status) {
148
- if (status != 'success') {
149
- console.log('FAILED_LOADING_URL');
150
- } else {
151
- console.log('DONE_LOADING_URL');
152
- //load iframes into page
153
- if (loadIframes) {
154
- loadIFrames(page);
155
- }
156
- //evaluateWithVars(page, function(){}, phantom.args);
157
- console.log('PHANTOMJS_MAINDOM_WRITE:'+page.content);
158
- console.log('PHANTOMJS_MAINDOM_END');
103
+ loadpage(address, undefined, function(page) {
104
+ masterPage = page;
105
+ console.log('DONE_LOADING_URL');
106
+ //load iframes into page
107
+ newIFrameLoad(page, loadIframes);
108
+ if (file_name != null && file_name != "none") {
109
+ page.render(file_name);
159
110
  }
111
+ phantom.exit();
112
+ }, function(page) {
113
+ masterPage = page;
114
+ console.log('FAILED_LOADING_URL: '+status+", "+address);
160
115
  if (file_name != null && file_name != "none") {
161
116
  page.render(file_name);
162
117
  }
163
- exit();
118
+ phantom.exit();
119
+ }, function(page) {
120
+ //page.customHeaders = {"Referer": "http://uk-amazon.icims.com/jobs/240290/account-representative---amazon-web-services---iberia/job"}
121
+ // page.onResourceReceived = function (response) {
122
+ // if (response.stage == "end" && response.url == address && response.status != 200)
123
+ // {
124
+ // console.log('URL_ERROR_CODE: '+response.status+'URL_ERROR_CODE_END');
125
+ // }
126
+ // };
164
127
  });
165
128
  }
166
129
  }
@@ -0,0 +1,28 @@
1
+ require 'rubygems'
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'phantom_proxy'
5
+ s.version = '1.2.15'
6
+ s.summary = "This is a phantomjs Proxy"
7
+ s.description = "This is a phyntonjs Proxy it allows you to fetch webpages and execute javascript in them."
8
+ s.authors = ["Daniel Sudmann"]
9
+ s.email = 'suddani@googlemail.com'
10
+ s.files = `git ls-files`.split($\)
11
+ =begin
12
+ FileList['lib/**/*.rb',
13
+ 'lib/**/*.js',
14
+ 'lib/**/**/*.ru',
15
+ 'lib/**/**/*.html',
16
+ 'lib/**/**/*',
17
+ 'lib/**/**/**/*',
18
+ 'lib/phantom_proxy/install/**/*',
19
+ 'lib/phantom_proxy/install/**/**/*',
20
+ 'bin/*',
21
+ '[A-Z]*',
22
+ 'test/**/*'].to_a
23
+ =end
24
+ s.homepage = 'http://experteer.com'
25
+ s.executables = ['phantom_proxy']
26
+ s.add_dependency('thin', '>= 1.3.1')
27
+ s.add_dependency('ruby-hmac', '>= 0.4.0')
28
+ end
data/spec/test ADDED
@@ -0,0 +1,2 @@
1
+ #bash
2
+ bin/phantomjs lib/phantom_proxy/scripts/proxy.js tmp/picture.png true http://dl.dropbox.com/u/36978459/index.html > tmp/log.txt
data/tmp/.keep ADDED
File without changes
data/tmp/run ADDED
@@ -0,0 +1,5 @@
1
+ gem uninstall phantom_proxy -x
2
+ rm phantom_proxy-*.*.*.gem
3
+ gem build phantom_proxy.gemspec
4
+ gem install phantom_proxy-*.*.*.gem
5
+ phantom_proxy -p 3003
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: phantom_proxy
3
3
  version: !ruby/object:Gem::Version
4
- hash: 3
4
+ hash: 1
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 2
9
- - 14
10
- version: 1.2.14
9
+ - 15
10
+ version: 1.2.15
11
11
  platform: ruby
12
12
  authors:
13
13
  - Daniel Sudmann
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2014-01-14 00:00:00 +01:00
18
+ date: 2014-05-09 00:00:00 +02:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -59,18 +59,24 @@ extensions: []
59
59
  extra_rdoc_files: []
60
60
 
61
61
  files:
62
- - lib/phantom_proxy/phantomjsserver.rb
63
- - lib/phantom_proxy/phantomjs_control_panel.rb
64
- - lib/phantom_proxy/phantomjs.rb
62
+ - .gitignore
63
+ - .ruby-version
64
+ - Gemfile
65
+ - README.rdoc
66
+ - bin/phantom_proxy
65
67
  - lib/phantom_proxy.rb
66
- - lib/phantom_proxy/scripts/proxy.js
67
68
  - lib/phantom_proxy/config.ru
68
- - lib/phantom_proxy/web/control_panel.html
69
69
  - lib/phantom_proxy/install/etc/init.d/phproxy
70
70
  - lib/phantom_proxy/install/etc/phantom_proxy/config/example_config
71
- - bin/phantom_proxy
72
- - README.rdoc
73
- - Gemfile
71
+ - lib/phantom_proxy/phantomjs.rb
72
+ - lib/phantom_proxy/phantomjs_control_panel.rb
73
+ - lib/phantom_proxy/phantomjsserver.rb
74
+ - lib/phantom_proxy/scripts/proxy.js
75
+ - lib/phantom_proxy/web/control_panel.html
76
+ - phantom_proxy.gemspec
77
+ - spec/test
78
+ - tmp/.keep
79
+ - tmp/run
74
80
  has_rdoc: true
75
81
  homepage: http://experteer.com
76
82
  licenses: []
@@ -101,7 +107,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
101
107
  requirements: []
102
108
 
103
109
  rubyforge_project:
104
- rubygems_version: 1.5.2
110
+ rubygems_version: 1.5.3
105
111
  signing_key:
106
112
  specification_version: 3
107
113
  summary: This is a phantomjs Proxy