phantom_proxy 1.2.17 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,125 +7,125 @@ var masterURL = "";
7
7
  var masterPage = null;
8
8
 
9
9
  function newIFrameLoad(page, load_iframes) {
10
- var frame_data = [];
11
- if (load_iframes) {
12
- frame_data = page.evaluate(function () {
13
- var framestmp = document.getElementsByTagName('IFRAME');
14
- var frames = [];
15
- for (var i=0;i<framestmp.length;i++) {
16
- frames.push(framestmp[i].contentWindow.document.body.innerHTML);
17
- framestmp[i].outerHTML = "<phantomjsframe>PHANTOM_JS_FRAME_"+i+"</phantomjsframe>";
18
- }
19
- return frames;
20
- });
21
- }
22
- var content = new String(page.content);
23
- for (var i=0;i<frame_data.length;i++) {
24
- content = content.replace("PHANTOM_JS_FRAME_"+i, frame_data[i]);
25
- }
26
- console.log("PHANTOMJS_DOMDATA_WRITE:"+content);
27
- console.log('PHANTOMJS_DOMDATA_END');
10
+ var frame_data = [];
11
+ if (load_iframes) {
12
+ frame_data = page.evaluate(function () {
13
+ var framestmp = document.getElementsByTagName('IFRAME');
14
+ var frames = [];
15
+ for (var i=0;i<framestmp.length;i++) {
16
+ frames.push(framestmp[i].contentWindow.document.body.innerHTML);
17
+ framestmp[i].outerHTML = "<phantomjsframe>PHANTOM_JS_FRAME_"+i+"</phantomjsframe>";
18
+ }
19
+ return frames;
20
+ });
21
+ }
22
+ var content = new String(page.content);
23
+ for (var i=0;i<frame_data.length;i++) {
24
+ content = content.replace("PHANTOM_JS_FRAME_"+i, frame_data[i]);
25
+ }
26
+ console.log("PHANTOMJS_DOMDATA_WRITE:"+content);
27
+ console.log('PHANTOMJS_DOMDATA_END');
28
28
  }
29
-
29
+
30
30
  var loadpage = function(url, referer, success, failure, configure) {
31
- var redirectURL = null;
31
+ var redirectURL = null;
32
32
 
33
- var page = require('webpage').create();
33
+ var page = require('webpage').create();
34
34
 
35
- page.settings.localToRemoteUrlAccessEnabled = true;
35
+ page.settings.localToRemoteUrlAccessEnabled = true;
36
36
 
37
- page.settings.userAgent = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/34.0.1847.116 Chrome/34.0.1847.116 Safari/537.36";
37
+ page.settings.userAgent = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/34.0.1847.116 Chrome/34.0.1847.116 Safari/537.36";
38
38
 
39
- page.onConsoleMessage = function (msg) { console.log(msg); };
39
+ page.onConsoleMessage = function (msg) { console.log(msg); };
40
40
 
41
- page.onAlert = function(msg) { console.log(msg);};
41
+ page.onAlert = function(msg) { console.log(msg);};
42
42
 
43
- page.onLoadStarted = function () {
44
- console.log('Start loading...'+url);
45
- };
43
+ page.onLoadStarted = function () {
44
+ console.log('Start loading...'+url);
45
+ };
46
46
 
47
- page.onResourceReceived = function(resource) {
47
+ page.onResourceReceived = function(resource) {
48
48
  if (url == resource.url && resource.redirectURL && resource.stage == "end") {
49
49
  redirectURL = resource.redirectURL;
50
50
  console.log('FRAME_URL_ERROR_CODE: '+resource.status+'FRAME_URL_ERROR_CODE_END');
51
51
  }
52
52
  };
53
53
 
54
- if (referer!=undefined) {
55
- console.log("Set Referer: "+referer);
56
- page.customHeaders = {
57
- "REFERER": referer
58
- };
59
- }
54
+ if (referer!=undefined) {
55
+ console.log("Set Referer: "+referer);
56
+ page.customHeaders = {
57
+ "REFERER": referer
58
+ };
59
+ }
60
60
 
61
- if (configure != undefined)
62
- configure(page);
61
+ if (configure != undefined)
62
+ configure(page);
63
63
 
64
- page.open(url, function (status) {
65
- console.log("Page Status: "+status);
66
- if (redirectURL) {
64
+ page.open(url, function (status) {
65
+ console.log("Page Status: "+status);
66
+ if (redirectURL) {
67
67
  loadpage(redirectURL, url, success, failure);
68
68
  } else if (status !== 'success') {
69
- failure(page, url);
69
+ failure(page, url);
70
70
  } else {
71
- success(page, url);
71
+ success(page, url);
72
72
  }
73
73
  });
74
74
  };
75
75
 
76
76
  function main() {
77
77
 
78
- if (phantom.args.length < 2) {
79
- console.log('Usage: proxy.js <picture filename or none> <load iframe(true/false)> <URL> <url param count> <url params...>');
80
- phantom.exit();
81
- } else {
82
- file_name = phantom.args[0];
83
- var loadIframes = phantom.args[1].match(/true/i) ? true : false;
84
- address = phantom.args[2];
85
-
86
- var argCount = phantom.args[3];
87
-
88
- args = ""
89
-
90
- for (var i=0;i<argCount;i++) {
91
- args += phantom.args[i+4];
92
- if (i<argCount-1) args += "&"
93
- }
94
- if (args.length > 0)
95
- address += '?'+args;
96
-
97
- console.log("Open page: "+address+", "+args+" END");
98
-
99
- console.log('start openning page');
100
-
101
- masterURL = address;
102
-
103
- loadpage(address, undefined, function(page) {
104
- masterPage = page;
105
- console.log('DONE_LOADING_URL');
106
- //load iframes into page
107
- newIFrameLoad(page, loadIframes);
108
- if (file_name != null && file_name != "none") {
109
- page.render(file_name);
110
- }
111
- phantom.exit();
112
- }, function(page) {
113
- masterPage = page;
114
- console.log('FAILED_LOADING_URL: '+status+", "+address);
115
- if (file_name != null && file_name != "none") {
116
- page.render(file_name);
117
- }
118
- phantom.exit();
119
- }, function(page) {
120
- //page.customHeaders = {"Referer": "http://uk-amazon.icims.com/jobs/240290/account-representative---amazon-web-services---iberia/job"}
121
- // page.onResourceReceived = function (response) {
122
- // if (response.stage == "end" && response.url == address && response.status != 200)
123
- // {
124
- // console.log('URL_ERROR_CODE: '+response.status+'URL_ERROR_CODE_END');
125
- // }
126
- // };
127
- });
128
- }
78
+ if (phantom.args.length < 2) {
79
+ console.log('Usage: proxy.js <picture filename or none> <load iframe(true/false)> <URL> <url param count> <url params...>');
80
+ phantom.exit();
81
+ } else {
82
+ file_name = phantom.args[0];
83
+ var loadIframes = phantom.args[1].match(/true/i) ? true : false;
84
+ address = phantom.args[2];
85
+
86
+ var argCount = phantom.args[3];
87
+
88
+ args = ""
89
+
90
+ for (var i=0;i<argCount;i++) {
91
+ args += phantom.args[i+4];
92
+ if (i<argCount-1) args += "&"
93
+ }
94
+ if (args.length > 0)
95
+ address += '?'+args;
96
+
97
+ console.log("Open page: "+address+", "+args+" END");
98
+
99
+ console.log('start openning page');
100
+
101
+ masterURL = address;
102
+
103
+ loadpage(address, undefined, function(page) {
104
+ masterPage = page;
105
+ console.log('DONE_LOADING_URL');
106
+ //load iframes into page
107
+ newIFrameLoad(page, loadIframes);
108
+ if (file_name != null && file_name != "none") {
109
+ page.render(file_name);
110
+ }
111
+ phantom.exit();
112
+ }, function(page) {
113
+ masterPage = page;
114
+ console.log('FAILED_LOADING_URL: '+status+", "+address);
115
+ if (file_name != null && file_name != "none") {
116
+ page.render(file_name);
117
+ }
118
+ phantom.exit();
119
+ }, function(page) {
120
+ //page.customHeaders = {"Referer": "http://uk-amazon.icims.com/jobs/240290/account-representative---amazon-web-services---iberia/job"}
121
+ // page.onResourceReceived = function (response) {
122
+ // if (response.stage == "end" && response.url == address && response.status != 200)
123
+ // {
124
+ // console.log('URL_ERROR_CODE: '+response.status+'URL_ERROR_CODE_END');
125
+ // }
126
+ // };
127
+ });
128
+ }
129
129
  }
130
130
 
131
131
  main();
@@ -0,0 +1,21 @@
1
+ module PhantomProxy
2
+ class Service < Goliath::API
3
+ use Goliath::Rack::Params
4
+
5
+ def response(env)
6
+ env["params"] = params
7
+ call_stack(env, StatusApi, ProxyApi)
8
+ end
9
+
10
+ def call_stack(env, *apis)
11
+ last_answer = [404,{}, ""]
12
+ apis.each do |api|
13
+ last_answer = api.call(env)
14
+ if last_answer[0] != 600
15
+ return last_answer
16
+ end
17
+ end
18
+ last_answer[0] != 600 ? last_answer : [404,{}, ""]
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,43 @@
1
+ module PhantomProxy
2
+ class StatusApi < AppRouterBase
3
+ get "/phantom_proxy_control_panel(.:format)", :status_page
4
+
5
+ get "*any", :next_api
6
+ put "*any", :next_api
7
+ delete "*any", :next_api
8
+ head "*any", :next_api
9
+ post "*any", :next_api
10
+
11
+ json_var :uptime
12
+
13
+ private
14
+ def status_page
15
+ case format
16
+ when :json
17
+ render_json
18
+ when :xml
19
+ render_xml
20
+ else
21
+ render "status_page"
22
+ end
23
+ end
24
+
25
+ def name
26
+ "Data"
27
+ end
28
+ def value
29
+ @value||="none"
30
+ end
31
+ def uptime
32
+ logger.info "Call Uptime"
33
+ @uptime||=StatusInfo.uptime
34
+ end
35
+ def format
36
+ if env[nil] && env[nil][:format]
37
+ env[nil][:format].to_sym
38
+ else
39
+ :html
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,3 @@
1
+ module PhantomProxy
2
+ VERSION = "1.3.0"
3
+ end
data/lib/phantom_proxy.rb CHANGED
@@ -1,13 +1,96 @@
1
- require 'rubygems'
2
-
3
- module PhantomJSProxy
4
- ROOT = File.expand_path(File.dirname(__FILE__))
5
- SCRIPT = ROOT+"/phantom_proxy/scripts/proxy.js"
6
- CONTROL_PANEL = ROOT+"/phantom_proxy/web/control_panel.html"
7
- PHANTOMJS_BIN = 'phantomjs'#ROOT+'/phantom_proxy/vendor/bin/phantomjs'
8
- PHANTOMJS_SERVICE_SCRIPT = ROOT+"/phantom_proxy/install/phproxy"
9
- end
1
+ require "phantom_proxy/version"
2
+
3
+ #libs
4
+ require 'tempfile'
5
+ require 'scanf'
6
+ require 'nokogiri'
7
+ require 'journey'
8
+ require 'eventmachine'
9
+ require 'em-synchrony'
10
+ require 'json'
11
+ require 'logger'
12
+ require 'erb'
13
+ require 'openssl'
14
+ require 'base64'
15
+ require 'goliath/api'
16
+ require 'hmac-md5'
17
+
18
+ # Helper
19
+ require 'phantom_proxy/helper/logable'
20
+ require 'phantom_proxy/helper/jsonizer'
21
+ require 'phantom_proxy/helper/template_renderer'
22
+ require 'phantom_proxy/helper/status_info'
23
+ require 'phantom_proxy/helper/http'
24
+
25
+ # PhantomJS
26
+ require 'phantom_proxy/phantomjs/phantomjs'
27
+
28
+ # Router
29
+ require 'phantom_proxy/router/app_router'
30
+
31
+ # API
32
+ require 'phantom_proxy/status/status_api'
33
+ require 'phantom_proxy/proxy/proxy_api'
34
+
35
+ require 'phantom_proxy/service'
36
+
37
+ PHANTOMPROXY_ROOT=Dir.pwd
38
+ PHANTOMPROXY_GEM_DIR = File.join(File.dirname(__FILE__), "../")
39
+
40
+ module PhantomProxy
41
+ def self.script_path
42
+ @script_path||=root_gem.join("lib/phantom_proxy/scripts/proxy.js").to_s
43
+ end
44
+
45
+ def self.phantomjs_bin
46
+ "phantomjs"
47
+ end
48
+
49
+ def self.root
50
+ @root ||= Pathname.new(PHANTOMPROXY_ROOT)
51
+ end
10
52
 
11
- require PhantomJSProxy::ROOT+'/phantom_proxy/phantomjs.rb'
12
- require PhantomJSProxy::ROOT+'/phantom_proxy/phantomjsserver.rb'
13
- require PhantomJSProxy::ROOT+'/phantom_proxy/phantomjs_control_panel.rb'
53
+ def self.root_gem
54
+ @root_gem ||= Pathname.new(PHANTOMPROXY_GEM_DIR)
55
+ end
56
+
57
+ def self.logger=(obj)
58
+ @logger=obj
59
+ end
60
+
61
+ def self.logger
62
+ Thread.current[:in_fiber_logger] ||= PhantomProxyLogger.new((@logger||Logger.new(STDOUT)),Logable.next_id)
63
+ end
64
+
65
+ def self.hmac_key
66
+ @hmac_key
67
+ end
68
+
69
+ def self.hmac_key=(obj)
70
+ @hmac_key=::HMAC::MD5.new obj
71
+ end
72
+
73
+ def self.always_image?
74
+ @always_image
75
+ end
76
+
77
+ def self.always_image=(obj)
78
+ @always_image=obj
79
+ end
80
+
81
+ def self.always_iframe?
82
+ @always_iframe
83
+ end
84
+
85
+ def self.always_iframe=(obj)
86
+ @always_iframe=obj
87
+ end
88
+
89
+ def self.wait_for(op = nil)
90
+ fiber = Fiber.current
91
+ EM.defer(op, Proc.new {|result|
92
+ fiber.resume result
93
+ })
94
+ Fiber.yield
95
+ end
96
+ end
@@ -1,28 +1,29 @@
1
- require 'rubygems'
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'phantom_proxy/version'
2
5
 
3
- Gem::Specification.new do |s|
4
- s.name = 'phantom_proxy'
5
- s.version = '1.2.17'
6
- s.summary = "This is a phantomjs Proxy"
7
- s.description = "This is a phyntonjs Proxy it allows you to fetch webpages and execute javascript in them."
8
- s.authors = ["Daniel Sudmann"]
9
- s.email = 'suddani@googlemail.com'
10
- s.files = `git ls-files`.split($\)
11
- =begin
12
- FileList['lib/**/*.rb',
13
- 'lib/**/*.js',
14
- 'lib/**/**/*.ru',
15
- 'lib/**/**/*.html',
16
- 'lib/**/**/*',
17
- 'lib/**/**/**/*',
18
- 'lib/phantom_proxy/install/**/*',
19
- 'lib/phantom_proxy/install/**/**/*',
20
- 'bin/*',
21
- '[A-Z]*',
22
- 'test/**/*'].to_a
23
- =end
24
- s.homepage = 'http://experteer.com'
25
- s.executables = ['phantom_proxy']
26
- s.add_dependency('thin', '= 1.3.1')
27
- s.add_dependency('ruby-hmac', '>= 0.4.0')
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "phantom_proxy"
8
+ spec.version = PhantomProxy::VERSION
9
+ spec.authors = ["Suddani"]
10
+ spec.email = ["suddani@googlemail.com"]
11
+ spec.summary = "This is a phantomjs Proxy"
12
+ spec.description = "This is a phyntonjs Proxy it allows you to fetch webpages and execute javascript in them."
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "json", "~> 1.8.1"
22
+ spec.add_dependency "goliath", "~> 1.0.3"
23
+ spec.add_dependency "journey", "~> 1.0.4"
24
+ spec.add_dependency "nokogiri", "~> 1.6.1"
25
+ spec.add_dependency "ruby-hmac", ">= 0.4.0"
26
+
27
+ spec.add_development_dependency "bundler", "~> 1.6"
28
+ spec.add_development_dependency "rake", "~> 10.0"
28
29
  end
@@ -0,0 +1,18 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
2
+ <html>
3
+ <head>
4
+ <meta http-equiv="content-type" content="text/html; charset=windows-1250">
5
+ <title><%=title%></title>
6
+ <style type="text/css">
7
+ .c {
8
+ text-align: center;
9
+ }
10
+ </style>
11
+ </head>
12
+ <body>
13
+ <h1 class="c"><%=error_code%></h1>
14
+ <div class="c">
15
+ <%=content%>
16
+ </div>
17
+ </body>
18
+ </html>
@@ -0,0 +1,46 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
2
+ <html>
3
+ <head>
4
+ <title>Phantom Proxy Control</title>
5
+ <style>
6
+ h1 {
7
+ text-align: center;
8
+ }
9
+ h4 {
10
+ text-align: center;
11
+ position: relative;
12
+ top: -20px;
13
+ }
14
+ .infobox {
15
+ width: 800px;
16
+ margin-left: auto;
17
+ margin-right: auto;
18
+ }
19
+ .name {
20
+ float: left;
21
+ }
22
+ .value {
23
+ float: right;
24
+ margin-bottom: 10px;
25
+ }
26
+ .divider {
27
+ background-color: black;
28
+ height: 1px;
29
+ width: 700px;
30
+ margin-bottom: 10px;
31
+ margin-left: auto;
32
+ margin-right: auto;
33
+ clear: left;
34
+ clear: right;
35
+ }
36
+ </style>
37
+ </head>
38
+ <body>
39
+ <h1>Phantom Proxy</h1>
40
+ <h4>Control Panel</h4>
41
+ <div class="infobox" id="infobox">
42
+ <div class='name'>Uptime:</div><div class='value' id='uptime'><%=uptime%></div><div class='divider'></div>
43
+ <div class='name'><%=name%>:</div><div class='value' id='<%=name%>'><%=value%></div><div class='divider'></div>
44
+ </div>
45
+ </body>
46
+ </html>