generalscraper 0.0.26 → 0.0.27

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 85a128c0ae855a52ee3842ba40807b7adc84a43a
4
- data.tar.gz: c7b8aa3648f3735c0f69ac7f91884bbd04a2450a
3
+ metadata.gz: 0fe81408deb27c96a9a67231be5953c8ddf11b23
4
+ data.tar.gz: 1297c834ccdae5daf1ef1b8664f555f6481995f4
5
5
  SHA512:
6
- metadata.gz: 54c19ac3c90c3b99b9be0115945c1f59ad71951285a5d779090f4ec91c3e4fd2fbf995c4393a27cc0d73d0efee515d62182b2851cb6205f7f4dfe2a0f17fb84d
7
- data.tar.gz: 0ea35567e7c33f9b46ab678f2bf3231d8a7b79a883e749728de0eaa9d46e772e91cc4346bc4ce3c8ff4f9a4855b0acd06cc338e63920c1228a7e5b2ef5f36837
6
+ metadata.gz: b5ae3df4ea3ea229dc7e89e55bb4e56f86e6d2bc93b38e064d38bb772b1a02fad6fe1ec58698e88c97714c269ca4c9bef5a95d8522c2edf4443141d71f6c9fae
7
+ data.tar.gz: e63d78332f8decb130f1968939752d8464a5eb6be4d5da151403b3b7687e426128e3670f0ee37c64472b5371b1d5d971bd01b9dfaeb1b86409b0d994734d5be0
@@ -6,6 +6,7 @@ require 'pry'
6
6
 
7
7
  load 'parse_page.rb'
8
8
  load 'captcha.rb'
9
+ load 'translate_page.rb'
9
10
 
10
11
  class GeneralScraper
11
12
  include ParsePage
@@ -0,0 +1,75 @@
1
+ require 'nokogiri'
2
+ require 'requestmanager'
3
+ require 'json'
4
+
5
+ class TranslatePage
6
+ def initialize(urls, requests)
7
+ @urls = urls
8
+ @requests = requests
9
+ @output = Array.new
10
+ end
11
+
12
+ # Setup browser for translate
13
+ def setup_browser
14
+ @requests.get_page("https://translate.google.com")
15
+ return @requests.get_most_recent_browser[1][0]
16
+ end
17
+
18
+ # First request
19
+ def first_request(url, browser)
20
+ # Enter URL into translate form
21
+ translate_form = browser.find_element(id: "source")
22
+ translate_form.send_keys(url)
23
+
24
+ # Click the button to translate to a particular language
25
+ click_button = browser.find_elements(:xpath, "//*[@value='es']").last
26
+ click_button.click
27
+
28
+ # Press Translate button, then switch back to orginal
29
+ browser.find_element(id: "gt-submit").click
30
+ end
31
+
32
+ # Next request
33
+ def nth_request(url, browser)
34
+ browser.switch_to.default_content
35
+ form_element = browser.find_element(name: "q")
36
+ form_element.clear
37
+ form_element.send_keys(url)
38
+ form_element.submit
39
+ end
40
+
41
+ # Translate the pages
42
+ def translate
43
+ browser = setup_browser
44
+
45
+ # Go through each link
46
+ counter = 0
47
+ @urls.each do |url|
48
+ # Run translate on each page
49
+ if counter == 0
50
+ first_request(url, browser)
51
+ counter+=1
52
+ else
53
+ nth_request(url, browser)
54
+ end
55
+
56
+ # Get html
57
+ @output.push({url: url, html: get_iframe_html(browser)})
58
+ end
59
+
60
+ # Clean up
61
+ @requests.close_all_browsers
62
+ return @output
63
+ end
64
+
65
+ # Get iframe
66
+ def get_iframe_html(browser)
67
+ sleep(3)
68
+ browser.find_element(id: "anno2").click
69
+
70
+ # Get HTML inside the iframe
71
+ browser.switch_to.frame(0)
72
+ iframe_html = browser.find_element(class: "os-linux").attribute("innerHTML")
73
+ return iframe_html
74
+ end
75
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: generalscraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.26
4
+ version: 0.0.27
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-30 00:00:00.000000000 Z
11
+ date: 2017-02-18 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Scrapes Google
14
14
  email: shidash@shidash.com
@@ -19,6 +19,7 @@ files:
19
19
  - lib/captcha.rb
20
20
  - lib/generalscraper.rb
21
21
  - lib/parse_page.rb
22
+ - lib/translate_page.rb
22
23
  homepage: https://github.com/TransparencyToolkit/generalscraper
23
24
  licenses:
24
25
  - GPL