phantom_mechanize 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/README.md +4 -0
- data/js/phget.js +29 -7
- data/lib/phantom_mechanize/ext/mechanize.rb +4 -2
- data/lib/phantom_mechanize/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ODBjYTZjMzJkNGFiNTZiYWFiMzQ0NzA4Njk2MTRlNTFiN2FkZjRmOA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
N2FiOWEyZWZjNDhiNjg0ZjQ1NzY0NGQ0MzJiNGM2MzJkZmJjNGNhNw==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZWNlZDFiODljMTU3NWY0OTNkZmU3NTAzZjBkOGM4MTc1YjczMjE0NGRhYWE2
|
10
|
+
ZmMyZTc2YmJiYjBiYTI3ZWE5ZjVhZThhMjhmNDNhN2Q0NDhlOWIyZWM3ODhm
|
11
|
+
YTBjMjhjZmVjMDliMzQ0OTJkMDA3YWIyZTE3NDkxN2RlY2QwNjU=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
NGFlMmViZTZhZTczOTNjMjJmYWY3YWU2NzIxNTUwYjIxMmJjNTUwZjUzY2Yx
|
14
|
+
MDUzZmVkY2Y3Yjk3YWFjNDM4N2ZkYTA3Yjg1ODRiYmI4ZTYzZTI5NjhkYmM3
|
15
|
+
OTgyNDlkNWM4MjE4NzA2ZmM0Nzg3NjE5NGU5Y2RmZDI2YWI4NDQ=
|
data/README.md
CHANGED
@@ -27,11 +27,15 @@ page = agent.phget 'http://www.google.com', :wait => 10000, :selector => '[name=
|
|
27
27
|
# use some js to submit a form or interact with the dom:
|
28
28
|
page = agent.phget 'http://www.google.com', :selector => ['[name=q]', 'h3 a'], :js => "$('[name=q]').val('phantom_mechanize');$('form').submit()"
|
29
29
|
|
30
|
+
# return page after scrolling has completed
|
31
|
+
page = agent.phget 'http://www.somescrollingpage.com', :scroll => true
|
32
|
+
|
30
33
|
|
31
34
|
```
|
32
35
|
## Options
|
33
36
|
* :selector - return once this selector is located (jquery)
|
34
37
|
* :wait - wait this many milliseconds (default 10,000)
|
38
|
+
* :scroll - scroll to the bottom until no more results show up (infinite scrolling)
|
35
39
|
|
36
40
|
## Faq
|
37
41
|
> What about cookies?
|
data/js/phget.js
CHANGED
@@ -8,6 +8,7 @@ var selector = selectors[0];
|
|
8
8
|
var cookies = JSON.parse(args[4]);
|
9
9
|
var user_agent = args[5];
|
10
10
|
var jss = JSON.parse(args[6]);
|
11
|
+
var scroll = JSON.parse(args[7]);
|
11
12
|
|
12
13
|
// var date =
|
13
14
|
for(i in cookies){
|
@@ -31,13 +32,15 @@ function output(html, page){
|
|
31
32
|
|
32
33
|
var page = require('webpage').create();
|
33
34
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
35
|
+
if(!scroll){
|
36
|
+
setInterval(function() {
|
37
|
+
var html = page.evaluate(function() {
|
38
|
+
return document.documentElement.outerHTML;
|
39
|
+
});
|
40
|
+
output(html, page);
|
41
|
+
output(page.content, page);
|
42
|
+
}, timeout);
|
43
|
+
}
|
41
44
|
|
42
45
|
setInterval(function() {
|
43
46
|
page.render('phantomjs.png');
|
@@ -119,4 +122,23 @@ page.onError = function(msg, trace) {
|
|
119
122
|
|
120
123
|
page.open(url, function() {
|
121
124
|
|
125
|
+
if(scroll){
|
126
|
+
var num = 0;
|
127
|
+
window.setInterval(function() {
|
128
|
+
num2 = page.evaluate(function() {
|
129
|
+
// Scrolls to the bottom of page
|
130
|
+
window.document.body.scrollTop = document.body.scrollHeight;
|
131
|
+
return document.body.scrollHeight;
|
132
|
+
});
|
133
|
+
if (num2 == num){
|
134
|
+
var html = page.evaluate(function() {
|
135
|
+
return document.documentElement.outerHTML;
|
136
|
+
});
|
137
|
+
output(html, page);
|
138
|
+
}
|
139
|
+
num = num2;
|
140
|
+
// console.log(num);
|
141
|
+
}, timeout); // Number of milliseconds to wait between scrolls
|
142
|
+
}
|
143
|
+
|
122
144
|
});
|
@@ -4,6 +4,7 @@ class Mechanize
|
|
4
4
|
args = args[0] || {}
|
5
5
|
wait = args[:wait] || 10000
|
6
6
|
selector = args[:selector] || ""
|
7
|
+
scroll = args[:scroll] ? 1 : 0
|
7
8
|
selector = [selector] if selector.is_a?(String)
|
8
9
|
js = args[:js] || ""
|
9
10
|
js = [js] if js.is_a?(String)
|
@@ -12,9 +13,10 @@ class Mechanize
|
|
12
13
|
|
13
14
|
ph_args = ['--ssl-protocol=any', '--web-security=false']
|
14
15
|
ph_args << "--proxy=#{proxy_addr}:#{proxy_port}" if proxy_port && proxy_addr
|
15
|
-
puts "phantomjs #{ph_args.join(' ')} \"#{PhantomMechanize::JS_FOLDER}/phget.js\" \"#{url}\" \"#{wait}\" \"#{selector.to_json.gsub('"', '\"')}\" \"#{pc.gsub('"', '\"')}\" \"#{user_agent.gsub('"', '\"')}\" \"#{js.to_json.gsub('"', '\"')}\""
|
16
16
|
|
17
|
-
|
17
|
+
cmd = "phantomjs #{ph_args.join(' ')} \"#{PhantomMechanize::JS_FOLDER}/phget.js\" \"#{url}\" \"#{wait}\" \"#{selector.to_json.gsub('\"', '\\"')}\" \"#{pc.gsub('\"', '\\"')}\" \"#{user_agent.gsub('\"', '\\"')}\" \"#{js.to_json.gsub('\"', '\\"')}\" \"#{scroll.to_json}\""
|
18
|
+
response = %x[#{cmd}]
|
19
|
+
|
18
20
|
raise 'bad response' if response == ''
|
19
21
|
|
20
22
|
mcs, html = response.split '<<<phget_separator>>>'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: phantom_mechanize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- P Guardiario
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|