phantom_mechanize 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZDExZGVjMDRhZDFlZTQzMmFhMmY4Mzc4YmVmNzc3MGJkZTljMmI3Yw==
4
+ ODBjYTZjMzJkNGFiNTZiYWFiMzQ0NzA4Njk2MTRlNTFiN2FkZjRmOA==
5
5
  data.tar.gz: !binary |-
6
- MGFmY2VkY2Q0YTg4MDk5N2EyNzlmYWViMzc1NWJmYjcyNTZhMzE4Mg==
6
+ N2FiOWEyZWZjNDhiNjg0ZjQ1NzY0NGQ0MzJiNGM2MzJkZmJjNGNhNw==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- NmIyNjk1ZTZhYjc1MjkxOTJmMGU1MTRhOTYwMTYzOWExY2RjMGQ1OTRhMzEx
10
- OGM2OTM5YjU0M2IyMGQ3NTliOWRjZjQ5ZDA3ZGQ1YzdiMTZhODZjODVkOGQ4
11
- ODEzOGJlNjhiNWNkYmFkODI4ZTFmZDE3YWI3MjUxM2M5NjMyMTY=
9
+ ZWNlZDFiODljMTU3NWY0OTNkZmU3NTAzZjBkOGM4MTc1YjczMjE0NGRhYWE2
10
+ ZmMyZTc2YmJiYjBiYTI3ZWE5ZjVhZThhMjhmNDNhN2Q0NDhlOWIyZWM3ODhm
11
+ YTBjMjhjZmVjMDliMzQ0OTJkMDA3YWIyZTE3NDkxN2RlY2QwNjU=
12
12
  data.tar.gz: !binary |-
13
- ZDhhNTUzMjNjMjNmODk0YWQ3MjRlZWE2N2MyNTk1NzhjODk2YzQwM2RkYWMx
14
- NjkxNjk5NDg4MjYwYjhmMWEzOTZlN2RmM2JiMWY3NzMxZDI0OTc0Y2RkZTJm
15
- MDNhMmQyOTRjODZiZDY5NGIxYjZhNmRiZjhmOTEwODViNjM5MDI=
13
+ NGFlMmViZTZhZTczOTNjMjJmYWY3YWU2NzIxNTUwYjIxMmJjNTUwZjUzY2Yx
14
+ MDUzZmVkY2Y3Yjk3YWFjNDM4N2ZkYTA3Yjg1ODRiYmI4ZTYzZTI5NjhkYmM3
15
+ OTgyNDlkNWM4MjE4NzA2ZmM0Nzg3NjE5NGU5Y2RmZDI2YWI4NDQ=
data/README.md CHANGED
@@ -27,11 +27,15 @@ page = agent.phget 'http://www.google.com', :wait => 10000, :selector => '[name=
27
27
  # use some js to submit a form or interact with the dom:
28
28
  page = agent.phget 'http://www.google.com', :selector => ['[name=q]', 'h3 a'], :js => "$('[name=q]').val('phantom_mechanize');$('form').submit()"
29
29
 
30
+ # return page after scrolling has completed
31
+ page = agent.phget 'http://www.somescrollingpage.com', :scroll => true
32
+
30
33
 
31
34
  ```
32
35
  ## Options
33
36
  * :selector - return once this selector is located (jquery)
34
37
  * :wait - wait this many milliseconds (default 10,000)
38
+ * :scroll - scroll to the bottom until no more results show up (infinite scrolling)
35
39
 
36
40
  ## Faq
37
41
  > What about cookies?
@@ -8,6 +8,7 @@ var selector = selectors[0];
8
8
  var cookies = JSON.parse(args[4]);
9
9
  var user_agent = args[5];
10
10
  var jss = JSON.parse(args[6]);
11
+ var scroll = JSON.parse(args[7]);
11
12
 
12
13
  // var date =
13
14
  for(i in cookies){
@@ -31,13 +32,15 @@ function output(html, page){
31
32
 
32
33
  var page = require('webpage').create();
33
34
 
34
- setInterval(function() {
35
- var html = page.evaluate(function() {
36
- return document.documentElement.outerHTML;
37
- });
38
- output(html, page);
39
- output(page.content, page);
40
- }, timeout);
35
+ if(!scroll){
36
+ setInterval(function() {
37
+ var html = page.evaluate(function() {
38
+ return document.documentElement.outerHTML;
39
+ });
40
+ output(html, page);
41
+ output(page.content, page);
42
+ }, timeout);
43
+ }
41
44
 
42
45
  setInterval(function() {
43
46
  page.render('phantomjs.png');
@@ -119,4 +122,23 @@ page.onError = function(msg, trace) {
119
122
 
120
123
  page.open(url, function() {
121
124
 
125
+ if(scroll){
126
+ var num = 0;
127
+ window.setInterval(function() {
128
+ num2 = page.evaluate(function() {
129
+ // Scrolls to the bottom of page
130
+ window.document.body.scrollTop = document.body.scrollHeight;
131
+ return document.body.scrollHeight;
132
+ });
133
+ if (num2 == num){
134
+ var html = page.evaluate(function() {
135
+ return document.documentElement.outerHTML;
136
+ });
137
+ output(html, page);
138
+ }
139
+ num = num2;
140
+ // console.log(num);
141
+ }, timeout); // Number of milliseconds to wait between scrolls
142
+ }
143
+
122
144
  });
@@ -4,6 +4,7 @@ class Mechanize
4
4
  args = args[0] || {}
5
5
  wait = args[:wait] || 10000
6
6
  selector = args[:selector] || ""
7
+ scroll = args[:scroll] ? 1 : 0
7
8
  selector = [selector] if selector.is_a?(String)
8
9
  js = args[:js] || ""
9
10
  js = [js] if js.is_a?(String)
@@ -12,9 +13,10 @@ class Mechanize
12
13
 
13
14
  ph_args = ['--ssl-protocol=any', '--web-security=false']
14
15
  ph_args << "--proxy=#{proxy_addr}:#{proxy_port}" if proxy_port && proxy_addr
15
- puts "phantomjs #{ph_args.join(' ')} \"#{PhantomMechanize::JS_FOLDER}/phget.js\" \"#{url}\" \"#{wait}\" \"#{selector.to_json.gsub('"', '\"')}\" \"#{pc.gsub('"', '\"')}\" \"#{user_agent.gsub('"', '\"')}\" \"#{js.to_json.gsub('"', '\"')}\""
16
16
 
17
- response = `phantomjs #{ph_args.join(' ')} "#{PhantomMechanize::JS_FOLDER}/phget.js" "#{url}" "#{wait}" "#{selector.to_json.gsub('"', '\"')}" "#{pc.gsub('"', '\"')}" "#{user_agent.gsub('"', '\"')}" "#{js.to_json.gsub('"', '\"')}"`
17
+ cmd = "phantomjs #{ph_args.join(' ')} \"#{PhantomMechanize::JS_FOLDER}/phget.js\" \"#{url}\" \"#{wait}\" \"#{selector.to_json.gsub('\"', '\\"')}\" \"#{pc.gsub('\"', '\\"')}\" \"#{user_agent.gsub('\"', '\\"')}\" \"#{js.to_json.gsub('\"', '\\"')}\" \"#{scroll.to_json}\""
18
+ response = %x[#{cmd}]
19
+
18
20
  raise 'bad response' if response == ''
19
21
 
20
22
  mcs, html = response.split '<<<phget_separator>>>'
@@ -1,3 +1,3 @@
1
1
  module PhantomMechanize
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: phantom_mechanize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - P Guardiario
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-08 00:00:00.000000000 Z
11
+ date: 2015-10-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler