phantom_mechanize 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZWJlNzExOGNkYmFjMjBhNjg5Yjg0MWI1NTNkZTMxM2ViZTkyY2MzZA==
4
+ ZDExZGVjMDRhZDFlZTQzMmFhMmY4Mzc4YmVmNzc3MGJkZTljMmI3Yw==
5
5
  data.tar.gz: !binary |-
6
- OTFlYjdlZWNkNjI4ZTg0YzMzNmE1ODZmOGE4YmZhNmZlZDQ5OWM3Mw==
6
+ MGFmY2VkY2Q0YTg4MDk5N2EyNzlmYWViMzc1NWJmYjcyNTZhMzE4Mg==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- MTRmZWIzNDFkYTNiYjlmZTRmZTc1MmI5ZGU2OGU5ZGFkYjVkY2NkMzUyMTM0
10
- ZWQwMDRjNGU5OWQxNTkxYjA0MjlhYWFkZTBmY2FlMGI0MTcyZTk0YjMxNzZl
11
- NWYxZWU0MDg4NDU1MzIzZGI2NDBhZTkyNzMwMDc0ZDJiODc1YWE=
9
+ NmIyNjk1ZTZhYjc1MjkxOTJmMGU1MTRhOTYwMTYzOWExY2RjMGQ1OTRhMzEx
10
+ OGM2OTM5YjU0M2IyMGQ3NTliOWRjZjQ5ZDA3ZGQ1YzdiMTZhODZjODVkOGQ4
11
+ ODEzOGJlNjhiNWNkYmFkODI4ZTFmZDE3YWI3MjUxM2M5NjMyMTY=
12
12
  data.tar.gz: !binary |-
13
- ZTY3NWQzYjcxZTA1N2QzM2E3MzliNDE4ZGUwNjEyNGY1OGI3N2M2ZDg2Zjlk
14
- NjY3MTI1N2FmODU5Nzg4ZGQ0ODU2Y2M0ODBjODBhNmJmZWU1ODNlNDVmYjRi
15
- NTY3NTYwNmJkYjU4MjAyYTEzNTY5OTdiYTI1NWJlMmZkNGEwNjM=
13
+ ZDhhNTUzMjNjMjNmODk0YWQ3MjRlZWE2N2MyNTk1NzhjODk2YzQwM2RkYWMx
14
+ NjkxNjk5NDg4MjYwYjhmMWEzOTZlN2RmM2JiMWY3NzMxZDI0OTc0Y2RkZTJm
15
+ MDNhMmQyOTRjODZiZDY5NGIxYjZhNmRiZjhmOTEwODViNjM5MDI=
data/.gitignore CHANGED
@@ -1,14 +1 @@
1
- /.bundle/
2
- /.yardoc
3
- /Gemfile.lock
4
- /_yardoc/
5
- /coverage/
6
- /doc/
7
- /pkg/
8
- /spec/reports/
9
- /tmp/
10
- *.bundle
11
- *.so
12
- *.o
13
- *.a
14
- mkmf.log
1
+ pkg
data/js/phget.js CHANGED
@@ -23,27 +23,54 @@ for(i in cookies){
23
23
  });
24
24
  }
25
25
 
26
+ function output(html, page){
27
+ if(!html.match(/<html/i)) return;
28
+ console.log(JSON.stringify(phantom.cookies, null, 2) + '<<<phget_separator>>>' + html);
29
+ phantom.exit();
30
+ }
31
+
26
32
  var page = require('webpage').create();
27
33
 
28
34
  setInterval(function() {
29
- page.includeJs('//ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js', function() {
30
- page.evaluate(function() {
31
- console.log($(':root').html());
32
- });
35
+ var html = page.evaluate(function() {
36
+ return document.documentElement.outerHTML;
33
37
  });
38
+ output(html, page);
39
+ output(page.content, page);
34
40
  }, timeout);
35
41
 
42
+ setInterval(function() {
43
+ page.render('phantomjs.png');
44
+ }, 500);
45
+
46
+
36
47
  page.settings.userAgent = user_agent;
37
48
 
38
49
  page.onConsoleMessage = function(msg, lineNum, sourceId) {
39
- console.log(JSON.stringify(phantom.cookies, null, 2) + '<<<phget_separator>>>' + msg);
40
- phantom.exit();
50
+ console.log(msg);
51
+ };
52
+
53
+ page.onInitialized = function() {
54
+ page.evaluate(function() {
55
+ // spoof plugins
56
+ window.navigator = {plugins: {length: 5}};
57
+ // remove stuff that will give us away
58
+ delete window.callPhantom;
59
+ });
41
60
  };
42
61
 
43
62
  page.onResourceRequested = function(requestData, networkRequest) {
44
- if(requestData.url.match(/.(gif|jpe?g|png|css)/)) {
63
+ // skip loading images / styles
64
+ if(requestData.url.match(/\.(gif|jpe?g|png|css)\b/i)) {
45
65
  networkRequest.abort();
46
66
  }
67
+
68
+ host = networkRequest.setHeader('Host');
69
+ networkRequest.setHeader('Host', 0);
70
+ networkRequest.setHeader('Host', host);
71
+ networkRequest.setHeader('Connection', 'keep-alive');
72
+ //networkRequest.setHeader('Accept-Encoding', 'gzip, deflate');
73
+
47
74
  };
48
75
 
49
76
  page.onLoadFinished = function() {
@@ -54,27 +81,30 @@ page.onLoadFinished = function() {
54
81
  var js = jss.shift();
55
82
  var done = (selectors[0] == undefined && js == undefined);
56
83
 
84
+ if(selector !== undefined){
57
85
  page.includeJs('//ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js', function() {
58
- page.evaluate(function(selector, page, js, done) {
86
+ html = page.evaluate(function(selector, page, js, done) {
59
87
  setInterval(function() {
60
88
  if($(selector)[0]){
61
- if(js){
89
+ if(js !== undefined){
62
90
  eval(js);
63
91
  }
64
92
  if(done){
65
- console.log($(':root').html());
66
- }
67
-
93
+ return document.documentElement.outerHTML;
94
+ }
68
95
  }
69
96
  }, 500);
70
97
  }, selector, page, js, done);
71
-
98
+ output(html, page);
72
99
  });
100
+ // console.log(document.documentElement.outerHTML);
101
+ }
102
+
73
103
  }
74
104
  };
75
105
 
76
106
  page.onError = function(msg, trace) {
77
- /*
107
+ /*
78
108
  var msgStack = ['ERROR: ' + msg];
79
109
  if (trace && trace.length) {
80
110
  msgStack.push('TRACE:');
@@ -82,9 +112,9 @@ page.onError = function(msg, trace) {
82
112
  msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function + '")' : ''));
83
113
  });
84
114
  }
85
- */
86
- // uncomment to log into the console
87
- // console.error(msgStack.join('\n'));
115
+
116
+ console.error(msgStack.join('\n'));
117
+ */
88
118
  };
89
119
 
90
120
  page.open(url, function() {
@@ -1,5 +1,6 @@
1
1
  class Mechanize
2
2
  def phget url, *args
3
+
3
4
  args = args[0] || {}
4
5
  wait = args[:wait] || 10000
5
6
  selector = args[:selector] || ""
@@ -9,11 +10,13 @@ class Mechanize
9
10
 
10
11
  pc = cookies.map{|c| [c.name, c.value, c.domain, c.path, c.httponly, c.secure, c.expires.to_i]}.to_json
11
12
 
12
- ph_args = []
13
+ ph_args = ['--ssl-protocol=any', '--web-security=false']
13
14
  ph_args << "--proxy=#{proxy_addr}:#{proxy_port}" if proxy_port && proxy_addr
14
- # puts "phantomjs #{ph_args.join(' ')} \"#{PhantomMechanize::JS_FOLDER}/phget.js\" \"#{url}\" \"#{wait}\" \"#{selector.to_json.gsub('"', '\"')}\" \"#{pc.gsub('"', '\"')}\" \"#{user_agent.gsub('"', '\"')}\" \"#{js.to_json.gsub('"', '\"')}\""
15
+ puts "phantomjs #{ph_args.join(' ')} \"#{PhantomMechanize::JS_FOLDER}/phget.js\" \"#{url}\" \"#{wait}\" \"#{selector.to_json.gsub('"', '\"')}\" \"#{pc.gsub('"', '\"')}\" \"#{user_agent.gsub('"', '\"')}\" \"#{js.to_json.gsub('"', '\"')}\""
15
16
 
16
17
  response = `phantomjs #{ph_args.join(' ')} "#{PhantomMechanize::JS_FOLDER}/phget.js" "#{url}" "#{wait}" "#{selector.to_json.gsub('"', '\"')}" "#{pc.gsub('"', '\"')}" "#{user_agent.gsub('"', '\"')}" "#{js.to_json.gsub('"', '\"')}"`
18
+ raise 'bad response' if response == ''
19
+
17
20
  mcs, html = response.split '<<<phget_separator>>>'
18
21
  JSON.parse(mcs).each do |mc|
19
22
  cookie = Cookie.new Hash[mc.map{|k, v| [k.to_sym, v]}]
@@ -1,3 +1,3 @@
1
1
  module PhantomMechanize
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: phantom_mechanize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - P Guardiario
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-14 00:00:00.000000000 Z
11
+ date: 2015-10-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler