phantom_mechanize 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/.gitignore +1 -14
- data/js/phget.js +47 -17
- data/lib/phantom_mechanize/ext/mechanize.rb +5 -2
- data/lib/phantom_mechanize/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZDExZGVjMDRhZDFlZTQzMmFhMmY4Mzc4YmVmNzc3MGJkZTljMmI3Yw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MGFmY2VkY2Q0YTg4MDk5N2EyNzlmYWViMzc1NWJmYjcyNTZhMzE4Mg==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NmIyNjk1ZTZhYjc1MjkxOTJmMGU1MTRhOTYwMTYzOWExY2RjMGQ1OTRhMzEx
|
10
|
+
OGM2OTM5YjU0M2IyMGQ3NTliOWRjZjQ5ZDA3ZGQ1YzdiMTZhODZjODVkOGQ4
|
11
|
+
ODEzOGJlNjhiNWNkYmFkODI4ZTFmZDE3YWI3MjUxM2M5NjMyMTY=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZDhhNTUzMjNjMjNmODk0YWQ3MjRlZWE2N2MyNTk1NzhjODk2YzQwM2RkYWMx
|
14
|
+
NjkxNjk5NDg4MjYwYjhmMWEzOTZlN2RmM2JiMWY3NzMxZDI0OTc0Y2RkZTJm
|
15
|
+
MDNhMmQyOTRjODZiZDY5NGIxYjZhNmRiZjhmOTEwODViNjM5MDI=
|
data/.gitignore
CHANGED
data/js/phget.js
CHANGED
@@ -23,27 +23,54 @@ for(i in cookies){
|
|
23
23
|
});
|
24
24
|
}
|
25
25
|
|
26
|
+
function output(html, page){
|
27
|
+
if(!html.match(/<html/i)) return;
|
28
|
+
console.log(JSON.stringify(phantom.cookies, null, 2) + '<<<phget_separator>>>' + html);
|
29
|
+
phantom.exit();
|
30
|
+
}
|
31
|
+
|
26
32
|
var page = require('webpage').create();
|
27
33
|
|
28
34
|
setInterval(function() {
|
29
|
-
page.
|
30
|
-
|
31
|
-
console.log($(':root').html());
|
32
|
-
});
|
35
|
+
var html = page.evaluate(function() {
|
36
|
+
return document.documentElement.outerHTML;
|
33
37
|
});
|
38
|
+
output(html, page);
|
39
|
+
output(page.content, page);
|
34
40
|
}, timeout);
|
35
41
|
|
42
|
+
setInterval(function() {
|
43
|
+
page.render('phantomjs.png');
|
44
|
+
}, 500);
|
45
|
+
|
46
|
+
|
36
47
|
page.settings.userAgent = user_agent;
|
37
48
|
|
38
49
|
page.onConsoleMessage = function(msg, lineNum, sourceId) {
|
39
|
-
console.log(
|
40
|
-
|
50
|
+
console.log(msg);
|
51
|
+
};
|
52
|
+
|
53
|
+
page.onInitialized = function() {
|
54
|
+
page.evaluate(function() {
|
55
|
+
// spoof plugins
|
56
|
+
window.navigator = {plugins: {length: 5}};
|
57
|
+
// remove stuff that will give us away
|
58
|
+
delete window.callPhantom;
|
59
|
+
});
|
41
60
|
};
|
42
61
|
|
43
62
|
page.onResourceRequested = function(requestData, networkRequest) {
|
44
|
-
|
63
|
+
// skip loading images / styles
|
64
|
+
if(requestData.url.match(/\.(gif|jpe?g|png|css)\b/i)) {
|
45
65
|
networkRequest.abort();
|
46
66
|
}
|
67
|
+
|
68
|
+
host = networkRequest.setHeader('Host');
|
69
|
+
networkRequest.setHeader('Host', 0);
|
70
|
+
networkRequest.setHeader('Host', host);
|
71
|
+
networkRequest.setHeader('Connection', 'keep-alive');
|
72
|
+
//networkRequest.setHeader('Accept-Encoding', 'gzip, deflate');
|
73
|
+
|
47
74
|
};
|
48
75
|
|
49
76
|
page.onLoadFinished = function() {
|
@@ -54,27 +81,30 @@ page.onLoadFinished = function() {
|
|
54
81
|
var js = jss.shift();
|
55
82
|
var done = (selectors[0] == undefined && js == undefined);
|
56
83
|
|
84
|
+
if(selector !== undefined){
|
57
85
|
page.includeJs('//ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js', function() {
|
58
|
-
page.evaluate(function(selector, page, js, done) {
|
86
|
+
html = page.evaluate(function(selector, page, js, done) {
|
59
87
|
setInterval(function() {
|
60
88
|
if($(selector)[0]){
|
61
|
-
if(js){
|
89
|
+
if(js !== undefined){
|
62
90
|
eval(js);
|
63
91
|
}
|
64
92
|
if(done){
|
65
|
-
|
66
|
-
}
|
67
|
-
|
93
|
+
return document.documentElement.outerHTML;
|
94
|
+
}
|
68
95
|
}
|
69
96
|
}, 500);
|
70
97
|
}, selector, page, js, done);
|
71
|
-
|
98
|
+
output(html, page);
|
72
99
|
});
|
100
|
+
// console.log(document.documentElement.outerHTML);
|
101
|
+
}
|
102
|
+
|
73
103
|
}
|
74
104
|
};
|
75
105
|
|
76
106
|
page.onError = function(msg, trace) {
|
77
|
-
|
107
|
+
/*
|
78
108
|
var msgStack = ['ERROR: ' + msg];
|
79
109
|
if (trace && trace.length) {
|
80
110
|
msgStack.push('TRACE:');
|
@@ -82,9 +112,9 @@ page.onError = function(msg, trace) {
|
|
82
112
|
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function + '")' : ''));
|
83
113
|
});
|
84
114
|
}
|
85
|
-
|
86
|
-
|
87
|
-
|
115
|
+
|
116
|
+
console.error(msgStack.join('\n'));
|
117
|
+
*/
|
88
118
|
};
|
89
119
|
|
90
120
|
page.open(url, function() {
|
@@ -1,5 +1,6 @@
|
|
1
1
|
class Mechanize
|
2
2
|
def phget url, *args
|
3
|
+
|
3
4
|
args = args[0] || {}
|
4
5
|
wait = args[:wait] || 10000
|
5
6
|
selector = args[:selector] || ""
|
@@ -9,11 +10,13 @@ class Mechanize
|
|
9
10
|
|
10
11
|
pc = cookies.map{|c| [c.name, c.value, c.domain, c.path, c.httponly, c.secure, c.expires.to_i]}.to_json
|
11
12
|
|
12
|
-
ph_args = []
|
13
|
+
ph_args = ['--ssl-protocol=any', '--web-security=false']
|
13
14
|
ph_args << "--proxy=#{proxy_addr}:#{proxy_port}" if proxy_port && proxy_addr
|
14
|
-
|
15
|
+
puts "phantomjs #{ph_args.join(' ')} \"#{PhantomMechanize::JS_FOLDER}/phget.js\" \"#{url}\" \"#{wait}\" \"#{selector.to_json.gsub('"', '\"')}\" \"#{pc.gsub('"', '\"')}\" \"#{user_agent.gsub('"', '\"')}\" \"#{js.to_json.gsub('"', '\"')}\""
|
15
16
|
|
16
17
|
response = `phantomjs #{ph_args.join(' ')} "#{PhantomMechanize::JS_FOLDER}/phget.js" "#{url}" "#{wait}" "#{selector.to_json.gsub('"', '\"')}" "#{pc.gsub('"', '\"')}" "#{user_agent.gsub('"', '\"')}" "#{js.to_json.gsub('"', '\"')}"`
|
18
|
+
raise 'bad response' if response == ''
|
19
|
+
|
17
20
|
mcs, html = response.split '<<<phget_separator>>>'
|
18
21
|
JSON.parse(mcs).each do |mc|
|
19
22
|
cookie = Cookie.new Hash[mc.map{|k, v| [k.to_sym, v]}]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: phantom_mechanize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- P Guardiario
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-10-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|