phantom_mechanize 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZWJlNzExOGNkYmFjMjBhNjg5Yjg0MWI1NTNkZTMxM2ViZTkyY2MzZA==
4
+ ZDExZGVjMDRhZDFlZTQzMmFhMmY4Mzc4YmVmNzc3MGJkZTljMmI3Yw==
5
5
  data.tar.gz: !binary |-
6
- OTFlYjdlZWNkNjI4ZTg0YzMzNmE1ODZmOGE4YmZhNmZlZDQ5OWM3Mw==
6
+ MGFmY2VkY2Q0YTg4MDk5N2EyNzlmYWViMzc1NWJmYjcyNTZhMzE4Mg==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- MTRmZWIzNDFkYTNiYjlmZTRmZTc1MmI5ZGU2OGU5ZGFkYjVkY2NkMzUyMTM0
10
- ZWQwMDRjNGU5OWQxNTkxYjA0MjlhYWFkZTBmY2FlMGI0MTcyZTk0YjMxNzZl
11
- NWYxZWU0MDg4NDU1MzIzZGI2NDBhZTkyNzMwMDc0ZDJiODc1YWE=
9
+ NmIyNjk1ZTZhYjc1MjkxOTJmMGU1MTRhOTYwMTYzOWExY2RjMGQ1OTRhMzEx
10
+ OGM2OTM5YjU0M2IyMGQ3NTliOWRjZjQ5ZDA3ZGQ1YzdiMTZhODZjODVkOGQ4
11
+ ODEzOGJlNjhiNWNkYmFkODI4ZTFmZDE3YWI3MjUxM2M5NjMyMTY=
12
12
  data.tar.gz: !binary |-
13
- ZTY3NWQzYjcxZTA1N2QzM2E3MzliNDE4ZGUwNjEyNGY1OGI3N2M2ZDg2Zjlk
14
- NjY3MTI1N2FmODU5Nzg4ZGQ0ODU2Y2M0ODBjODBhNmJmZWU1ODNlNDVmYjRi
15
- NTY3NTYwNmJkYjU4MjAyYTEzNTY5OTdiYTI1NWJlMmZkNGEwNjM=
13
+ ZDhhNTUzMjNjMjNmODk0YWQ3MjRlZWE2N2MyNTk1NzhjODk2YzQwM2RkYWMx
14
+ NjkxNjk5NDg4MjYwYjhmMWEzOTZlN2RmM2JiMWY3NzMxZDI0OTc0Y2RkZTJm
15
+ MDNhMmQyOTRjODZiZDY5NGIxYjZhNmRiZjhmOTEwODViNjM5MDI=
data/.gitignore CHANGED
@@ -1,14 +1 @@
1
- /.bundle/
2
- /.yardoc
3
- /Gemfile.lock
4
- /_yardoc/
5
- /coverage/
6
- /doc/
7
- /pkg/
8
- /spec/reports/
9
- /tmp/
10
- *.bundle
11
- *.so
12
- *.o
13
- *.a
14
- mkmf.log
1
+ pkg
data/js/phget.js CHANGED
@@ -23,27 +23,54 @@ for(i in cookies){
23
23
  });
24
24
  }
25
25
 
26
+ function output(html, page){
27
+ if(!html.match(/<html/i)) return;
28
+ console.log(JSON.stringify(phantom.cookies, null, 2) + '<<<phget_separator>>>' + html);
29
+ phantom.exit();
30
+ }
31
+
26
32
  var page = require('webpage').create();
27
33
 
28
34
  setInterval(function() {
29
- page.includeJs('//ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js', function() {
30
- page.evaluate(function() {
31
- console.log($(':root').html());
32
- });
35
+ var html = page.evaluate(function() {
36
+ return document.documentElement.outerHTML;
33
37
  });
38
+ output(html, page);
39
+ output(page.content, page);
34
40
  }, timeout);
35
41
 
42
+ setInterval(function() {
43
+ page.render('phantomjs.png');
44
+ }, 500);
45
+
46
+
36
47
  page.settings.userAgent = user_agent;
37
48
 
38
49
  page.onConsoleMessage = function(msg, lineNum, sourceId) {
39
- console.log(JSON.stringify(phantom.cookies, null, 2) + '<<<phget_separator>>>' + msg);
40
- phantom.exit();
50
+ console.log(msg);
51
+ };
52
+
53
+ page.onInitialized = function() {
54
+ page.evaluate(function() {
55
+ // spoof plugins
56
+ window.navigator = {plugins: {length: 5}};
57
+ // remove stuff that will give us away
58
+ delete window.callPhantom;
59
+ });
41
60
  };
42
61
 
43
62
  page.onResourceRequested = function(requestData, networkRequest) {
44
- if(requestData.url.match(/.(gif|jpe?g|png|css)/)) {
63
+ // skip loading images / styles
64
+ if(requestData.url.match(/\.(gif|jpe?g|png|css)\b/i)) {
45
65
  networkRequest.abort();
46
66
  }
67
+
68
+ host = networkRequest.setHeader('Host');
69
+ networkRequest.setHeader('Host', 0);
70
+ networkRequest.setHeader('Host', host);
71
+ networkRequest.setHeader('Connection', 'keep-alive');
72
+ //networkRequest.setHeader('Accept-Encoding', 'gzip, deflate');
73
+
47
74
  };
48
75
 
49
76
  page.onLoadFinished = function() {
@@ -54,27 +81,30 @@ page.onLoadFinished = function() {
54
81
  var js = jss.shift();
55
82
  var done = (selectors[0] == undefined && js == undefined);
56
83
 
84
+ if(selector !== undefined){
57
85
  page.includeJs('//ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js', function() {
58
- page.evaluate(function(selector, page, js, done) {
86
+ html = page.evaluate(function(selector, page, js, done) {
59
87
  setInterval(function() {
60
88
  if($(selector)[0]){
61
- if(js){
89
+ if(js !== undefined){
62
90
  eval(js);
63
91
  }
64
92
  if(done){
65
- console.log($(':root').html());
66
- }
67
-
93
+ return document.documentElement.outerHTML;
94
+ }
68
95
  }
69
96
  }, 500);
70
97
  }, selector, page, js, done);
71
-
98
+ output(html, page);
72
99
  });
100
+ // console.log(document.documentElement.outerHTML);
101
+ }
102
+
73
103
  }
74
104
  };
75
105
 
76
106
  page.onError = function(msg, trace) {
77
- /*
107
+ /*
78
108
  var msgStack = ['ERROR: ' + msg];
79
109
  if (trace && trace.length) {
80
110
  msgStack.push('TRACE:');
@@ -82,9 +112,9 @@ page.onError = function(msg, trace) {
82
112
  msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function + '")' : ''));
83
113
  });
84
114
  }
85
- */
86
- // uncomment to log into the console
87
- // console.error(msgStack.join('\n'));
115
+
116
+ console.error(msgStack.join('\n'));
117
+ */
88
118
  };
89
119
 
90
120
  page.open(url, function() {
@@ -1,5 +1,6 @@
1
1
  class Mechanize
2
2
  def phget url, *args
3
+
3
4
  args = args[0] || {}
4
5
  wait = args[:wait] || 10000
5
6
  selector = args[:selector] || ""
@@ -9,11 +10,13 @@ class Mechanize
9
10
 
10
11
  pc = cookies.map{|c| [c.name, c.value, c.domain, c.path, c.httponly, c.secure, c.expires.to_i]}.to_json
11
12
 
12
- ph_args = []
13
+ ph_args = ['--ssl-protocol=any', '--web-security=false']
13
14
  ph_args << "--proxy=#{proxy_addr}:#{proxy_port}" if proxy_port && proxy_addr
14
- # puts "phantomjs #{ph_args.join(' ')} \"#{PhantomMechanize::JS_FOLDER}/phget.js\" \"#{url}\" \"#{wait}\" \"#{selector.to_json.gsub('"', '\"')}\" \"#{pc.gsub('"', '\"')}\" \"#{user_agent.gsub('"', '\"')}\" \"#{js.to_json.gsub('"', '\"')}\""
15
+ puts "phantomjs #{ph_args.join(' ')} \"#{PhantomMechanize::JS_FOLDER}/phget.js\" \"#{url}\" \"#{wait}\" \"#{selector.to_json.gsub('"', '\"')}\" \"#{pc.gsub('"', '\"')}\" \"#{user_agent.gsub('"', '\"')}\" \"#{js.to_json.gsub('"', '\"')}\""
15
16
 
16
17
  response = `phantomjs #{ph_args.join(' ')} "#{PhantomMechanize::JS_FOLDER}/phget.js" "#{url}" "#{wait}" "#{selector.to_json.gsub('"', '\"')}" "#{pc.gsub('"', '\"')}" "#{user_agent.gsub('"', '\"')}" "#{js.to_json.gsub('"', '\"')}"`
18
+ raise 'bad response' if response == ''
19
+
17
20
  mcs, html = response.split '<<<phget_separator>>>'
18
21
  JSON.parse(mcs).each do |mc|
19
22
  cookie = Cookie.new Hash[mc.map{|k, v| [k.to_sym, v]}]
@@ -1,3 +1,3 @@
1
1
  module PhantomMechanize
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: phantom_mechanize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - P Guardiario
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-14 00:00:00.000000000 Z
11
+ date: 2015-10-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler