phantom_mechanize 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/README.md +4 -0
- data/js/phget.js +29 -7
- data/lib/phantom_mechanize/ext/mechanize.rb +4 -2
- data/lib/phantom_mechanize/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ODBjYTZjMzJkNGFiNTZiYWFiMzQ0NzA4Njk2MTRlNTFiN2FkZjRmOA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
N2FiOWEyZWZjNDhiNjg0ZjQ1NzY0NGQ0MzJiNGM2MzJkZmJjNGNhNw==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZWNlZDFiODljMTU3NWY0OTNkZmU3NTAzZjBkOGM4MTc1YjczMjE0NGRhYWE2
|
10
|
+
ZmMyZTc2YmJiYjBiYTI3ZWE5ZjVhZThhMjhmNDNhN2Q0NDhlOWIyZWM3ODhm
|
11
|
+
YTBjMjhjZmVjMDliMzQ0OTJkMDA3YWIyZTE3NDkxN2RlY2QwNjU=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
NGFlMmViZTZhZTczOTNjMjJmYWY3YWU2NzIxNTUwYjIxMmJjNTUwZjUzY2Yx
|
14
|
+
MDUzZmVkY2Y3Yjk3YWFjNDM4N2ZkYTA3Yjg1ODRiYmI4ZTYzZTI5NjhkYmM3
|
15
|
+
OTgyNDlkNWM4MjE4NzA2ZmM0Nzg3NjE5NGU5Y2RmZDI2YWI4NDQ=
|
data/README.md
CHANGED
@@ -27,11 +27,15 @@ page = agent.phget 'http://www.google.com', :wait => 10000, :selector => '[name=
|
|
27
27
|
# use some js to submit a form or interact with the dom:
|
28
28
|
page = agent.phget 'http://www.google.com', :selector => ['[name=q]', 'h3 a'], :js => "$('[name=q]').val('phantom_mechanize');$('form').submit()"
|
29
29
|
|
30
|
+
# return page after scrolling has completed
|
31
|
+
page = agent.phget 'http://www.somescrollingpage.com', :scroll => true
|
32
|
+
|
30
33
|
|
31
34
|
```
|
32
35
|
## Options
|
33
36
|
* :selector - return once this selector is located (jquery)
|
34
37
|
* :wait - wait this many milliseconds (default 10,000)
|
38
|
+
* :scroll - scroll to the bottom until no more results show up (infinite scrolling)
|
35
39
|
|
36
40
|
## Faq
|
37
41
|
> What about cookies?
|
data/js/phget.js
CHANGED
@@ -8,6 +8,7 @@ var selector = selectors[0];
|
|
8
8
|
var cookies = JSON.parse(args[4]);
|
9
9
|
var user_agent = args[5];
|
10
10
|
var jss = JSON.parse(args[6]);
|
11
|
+
var scroll = JSON.parse(args[7]);
|
11
12
|
|
12
13
|
// var date =
|
13
14
|
for(i in cookies){
|
@@ -31,13 +32,15 @@ function output(html, page){
|
|
31
32
|
|
32
33
|
var page = require('webpage').create();
|
33
34
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
35
|
+
if(!scroll){
|
36
|
+
setInterval(function() {
|
37
|
+
var html = page.evaluate(function() {
|
38
|
+
return document.documentElement.outerHTML;
|
39
|
+
});
|
40
|
+
output(html, page);
|
41
|
+
output(page.content, page);
|
42
|
+
}, timeout);
|
43
|
+
}
|
41
44
|
|
42
45
|
setInterval(function() {
|
43
46
|
page.render('phantomjs.png');
|
@@ -119,4 +122,23 @@ page.onError = function(msg, trace) {
|
|
119
122
|
|
120
123
|
page.open(url, function() {
|
121
124
|
|
125
|
+
if(scroll){
|
126
|
+
var num = 0;
|
127
|
+
window.setInterval(function() {
|
128
|
+
num2 = page.evaluate(function() {
|
129
|
+
// Scrolls to the bottom of page
|
130
|
+
window.document.body.scrollTop = document.body.scrollHeight;
|
131
|
+
return document.body.scrollHeight;
|
132
|
+
});
|
133
|
+
if (num2 == num){
|
134
|
+
var html = page.evaluate(function() {
|
135
|
+
return document.documentElement.outerHTML;
|
136
|
+
});
|
137
|
+
output(html, page);
|
138
|
+
}
|
139
|
+
num = num2;
|
140
|
+
// console.log(num);
|
141
|
+
}, timeout); // Number of milliseconds to wait between scrolls
|
142
|
+
}
|
143
|
+
|
122
144
|
});
|
@@ -4,6 +4,7 @@ class Mechanize
|
|
4
4
|
args = args[0] || {}
|
5
5
|
wait = args[:wait] || 10000
|
6
6
|
selector = args[:selector] || ""
|
7
|
+
scroll = args[:scroll] ? 1 : 0
|
7
8
|
selector = [selector] if selector.is_a?(String)
|
8
9
|
js = args[:js] || ""
|
9
10
|
js = [js] if js.is_a?(String)
|
@@ -12,9 +13,10 @@ class Mechanize
|
|
12
13
|
|
13
14
|
ph_args = ['--ssl-protocol=any', '--web-security=false']
|
14
15
|
ph_args << "--proxy=#{proxy_addr}:#{proxy_port}" if proxy_port && proxy_addr
|
15
|
-
puts "phantomjs #{ph_args.join(' ')} \"#{PhantomMechanize::JS_FOLDER}/phget.js\" \"#{url}\" \"#{wait}\" \"#{selector.to_json.gsub('"', '\"')}\" \"#{pc.gsub('"', '\"')}\" \"#{user_agent.gsub('"', '\"')}\" \"#{js.to_json.gsub('"', '\"')}\""
|
16
16
|
|
17
|
-
|
17
|
+
cmd = "phantomjs #{ph_args.join(' ')} \"#{PhantomMechanize::JS_FOLDER}/phget.js\" \"#{url}\" \"#{wait}\" \"#{selector.to_json.gsub('\"', '\\"')}\" \"#{pc.gsub('\"', '\\"')}\" \"#{user_agent.gsub('\"', '\\"')}\" \"#{js.to_json.gsub('\"', '\\"')}\" \"#{scroll.to_json}\""
|
18
|
+
response = %x[#{cmd}]
|
19
|
+
|
18
20
|
raise 'bad response' if response == ''
|
19
21
|
|
20
22
|
mcs, html = response.split '<<<phget_separator>>>'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: phantom_mechanize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- P Guardiario
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|