phantom_mechanize 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZDExZGVjMDRhZDFlZTQzMmFhMmY4Mzc4YmVmNzc3MGJkZTljMmI3Yw==
4
+ ODBjYTZjMzJkNGFiNTZiYWFiMzQ0NzA4Njk2MTRlNTFiN2FkZjRmOA==
5
5
  data.tar.gz: !binary |-
6
- MGFmY2VkY2Q0YTg4MDk5N2EyNzlmYWViMzc1NWJmYjcyNTZhMzE4Mg==
6
+ N2FiOWEyZWZjNDhiNjg0ZjQ1NzY0NGQ0MzJiNGM2MzJkZmJjNGNhNw==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- NmIyNjk1ZTZhYjc1MjkxOTJmMGU1MTRhOTYwMTYzOWExY2RjMGQ1OTRhMzEx
10
- OGM2OTM5YjU0M2IyMGQ3NTliOWRjZjQ5ZDA3ZGQ1YzdiMTZhODZjODVkOGQ4
11
- ODEzOGJlNjhiNWNkYmFkODI4ZTFmZDE3YWI3MjUxM2M5NjMyMTY=
9
+ ZWNlZDFiODljMTU3NWY0OTNkZmU3NTAzZjBkOGM4MTc1YjczMjE0NGRhYWE2
10
+ ZmMyZTc2YmJiYjBiYTI3ZWE5ZjVhZThhMjhmNDNhN2Q0NDhlOWIyZWM3ODhm
11
+ YTBjMjhjZmVjMDliMzQ0OTJkMDA3YWIyZTE3NDkxN2RlY2QwNjU=
12
12
  data.tar.gz: !binary |-
13
- ZDhhNTUzMjNjMjNmODk0YWQ3MjRlZWE2N2MyNTk1NzhjODk2YzQwM2RkYWMx
14
- NjkxNjk5NDg4MjYwYjhmMWEzOTZlN2RmM2JiMWY3NzMxZDI0OTc0Y2RkZTJm
15
- MDNhMmQyOTRjODZiZDY5NGIxYjZhNmRiZjhmOTEwODViNjM5MDI=
13
+ NGFlMmViZTZhZTczOTNjMjJmYWY3YWU2NzIxNTUwYjIxMmJjNTUwZjUzY2Yx
14
+ MDUzZmVkY2Y3Yjk3YWFjNDM4N2ZkYTA3Yjg1ODRiYmI4ZTYzZTI5NjhkYmM3
15
+ OTgyNDlkNWM4MjE4NzA2ZmM0Nzg3NjE5NGU5Y2RmZDI2YWI4NDQ=
data/README.md CHANGED
@@ -27,11 +27,15 @@ page = agent.phget 'http://www.google.com', :wait => 10000, :selector => '[name=
27
27
  # use some js to submit a form or interact with the dom:
28
28
  page = agent.phget 'http://www.google.com', :selector => ['[name=q]', 'h3 a'], :js => "$('[name=q]').val('phantom_mechanize');$('form').submit()"
29
29
 
30
+ # return page after scrolling has completed
31
+ page = agent.phget 'http://www.somescrollingpage.com', :scroll => true
32
+
30
33
 
31
34
  ```
32
35
  ## Options
33
36
  * :selector - return once this selector is located (jquery)
34
37
  * :wait - wait this many milliseconds (default 10,000)
38
+ * :scroll - scroll to the bottom until no more results show up (infinite scrolling)
35
39
 
36
40
  ## Faq
37
41
  > What about cookies?
@@ -8,6 +8,7 @@ var selector = selectors[0];
8
8
  var cookies = JSON.parse(args[4]);
9
9
  var user_agent = args[5];
10
10
  var jss = JSON.parse(args[6]);
11
+ var scroll = JSON.parse(args[7]);
11
12
 
12
13
  // var date =
13
14
  for(i in cookies){
@@ -31,13 +32,15 @@ function output(html, page){
31
32
 
32
33
  var page = require('webpage').create();
33
34
 
34
- setInterval(function() {
35
- var html = page.evaluate(function() {
36
- return document.documentElement.outerHTML;
37
- });
38
- output(html, page);
39
- output(page.content, page);
40
- }, timeout);
35
+ if(!scroll){
36
+ setInterval(function() {
37
+ var html = page.evaluate(function() {
38
+ return document.documentElement.outerHTML;
39
+ });
40
+ output(html, page);
41
+ output(page.content, page);
42
+ }, timeout);
43
+ }
41
44
 
42
45
  setInterval(function() {
43
46
  page.render('phantomjs.png');
@@ -119,4 +122,23 @@ page.onError = function(msg, trace) {
119
122
 
120
123
  page.open(url, function() {
121
124
 
125
+ if(scroll){
126
+ var num = 0;
127
+ window.setInterval(function() {
128
+ num2 = page.evaluate(function() {
129
+ // Scrolls to the bottom of page
130
+ window.document.body.scrollTop = document.body.scrollHeight;
131
+ return document.body.scrollHeight;
132
+ });
133
+ if (num2 == num){
134
+ var html = page.evaluate(function() {
135
+ return document.documentElement.outerHTML;
136
+ });
137
+ output(html, page);
138
+ }
139
+ num = num2;
140
+ // console.log(num);
141
+ }, timeout); // Number of milliseconds to wait between scrolls
142
+ }
143
+
122
144
  });
@@ -4,6 +4,7 @@ class Mechanize
4
4
  args = args[0] || {}
5
5
  wait = args[:wait] || 10000
6
6
  selector = args[:selector] || ""
7
+ scroll = args[:scroll] ? 1 : 0
7
8
  selector = [selector] if selector.is_a?(String)
8
9
  js = args[:js] || ""
9
10
  js = [js] if js.is_a?(String)
@@ -12,9 +13,10 @@ class Mechanize
12
13
 
13
14
  ph_args = ['--ssl-protocol=any', '--web-security=false']
14
15
  ph_args << "--proxy=#{proxy_addr}:#{proxy_port}" if proxy_port && proxy_addr
15
- puts "phantomjs #{ph_args.join(' ')} \"#{PhantomMechanize::JS_FOLDER}/phget.js\" \"#{url}\" \"#{wait}\" \"#{selector.to_json.gsub('"', '\"')}\" \"#{pc.gsub('"', '\"')}\" \"#{user_agent.gsub('"', '\"')}\" \"#{js.to_json.gsub('"', '\"')}\""
16
16
 
17
- response = `phantomjs #{ph_args.join(' ')} "#{PhantomMechanize::JS_FOLDER}/phget.js" "#{url}" "#{wait}" "#{selector.to_json.gsub('"', '\"')}" "#{pc.gsub('"', '\"')}" "#{user_agent.gsub('"', '\"')}" "#{js.to_json.gsub('"', '\"')}"`
17
+ cmd = "phantomjs #{ph_args.join(' ')} \"#{PhantomMechanize::JS_FOLDER}/phget.js\" \"#{url}\" \"#{wait}\" \"#{selector.to_json.gsub('\"', '\\"')}\" \"#{pc.gsub('\"', '\\"')}\" \"#{user_agent.gsub('\"', '\\"')}\" \"#{js.to_json.gsub('\"', '\\"')}\" \"#{scroll.to_json}\""
18
+ response = %x[#{cmd}]
19
+
18
20
  raise 'bad response' if response == ''
19
21
 
20
22
  mcs, html = response.split '<<<phget_separator>>>'
@@ -1,3 +1,3 @@
1
1
  module PhantomMechanize
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: phantom_mechanize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - P Guardiario
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-08 00:00:00.000000000 Z
11
+ date: 2015-10-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler