generalscraper 0.0.26 → 0.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 85a128c0ae855a52ee3842ba40807b7adc84a43a
4
- data.tar.gz: c7b8aa3648f3735c0f69ac7f91884bbd04a2450a
3
+ metadata.gz: 0fe81408deb27c96a9a67231be5953c8ddf11b23
4
+ data.tar.gz: 1297c834ccdae5daf1ef1b8664f555f6481995f4
5
5
  SHA512:
6
- metadata.gz: 54c19ac3c90c3b99b9be0115945c1f59ad71951285a5d779090f4ec91c3e4fd2fbf995c4393a27cc0d73d0efee515d62182b2851cb6205f7f4dfe2a0f17fb84d
7
- data.tar.gz: 0ea35567e7c33f9b46ab678f2bf3231d8a7b79a883e749728de0eaa9d46e772e91cc4346bc4ce3c8ff4f9a4855b0acd06cc338e63920c1228a7e5b2ef5f36837
6
+ metadata.gz: b5ae3df4ea3ea229dc7e89e55bb4e56f86e6d2bc93b38e064d38bb772b1a02fad6fe1ec58698e88c97714c269ca4c9bef5a95d8522c2edf4443141d71f6c9fae
7
+ data.tar.gz: e63d78332f8decb130f1968939752d8464a5eb6be4d5da151403b3b7687e426128e3670f0ee37c64472b5371b1d5d971bd01b9dfaeb1b86409b0d994734d5be0
@@ -6,6 +6,7 @@ require 'pry'
6
6
 
7
7
  load 'parse_page.rb'
8
8
  load 'captcha.rb'
9
+ load 'translate_page.rb'
9
10
 
10
11
  class GeneralScraper
11
12
  include ParsePage
@@ -0,0 +1,75 @@
1
+ require 'nokogiri'
2
+ require 'requestmanager'
3
+ require 'json'
4
+
5
+ class TranslatePage
6
+ def initialize(urls, requests)
7
+ @urls = urls
8
+ @requests = requests
9
+ @output = Array.new
10
+ end
11
+
12
+ # Setup browser for translate
13
+ def setup_browser
14
+ @requests.get_page("https://translate.google.com")
15
+ return @requests.get_most_recent_browser[1][0]
16
+ end
17
+
18
+ # First request
19
+ def first_request(url, browser)
20
+ # Enter URL into translate form
21
+ translate_form = browser.find_element(id: "source")
22
+ translate_form.send_keys(url)
23
+
24
+ # Click the button to translate to a particular language
25
+ click_button = browser.find_elements(:xpath, "//*[@value='es']").last
26
+ click_button.click
27
+
28
+ # Press Translate button, then switch back to orginal
29
+ browser.find_element(id: "gt-submit").click
30
+ end
31
+
32
+ # Next request
33
+ def nth_request(url, browser)
34
+ browser.switch_to.default_content
35
+ form_element = browser.find_element(name: "q")
36
+ form_element.clear
37
+ form_element.send_keys(url)
38
+ form_element.submit
39
+ end
40
+
41
+ # Translate the pages
42
+ def translate
43
+ browser = setup_browser
44
+
45
+ # Go through each link
46
+ counter = 0
47
+ @urls.each do |url|
48
+ # Run translate on each page
49
+ if counter == 0
50
+ first_request(url, browser)
51
+ counter+=1
52
+ else
53
+ nth_request(url, browser)
54
+ end
55
+
56
+ # Get html
57
+ @output.push({url: url, html: get_iframe_html(browser)})
58
+ end
59
+
60
+ # Clean up
61
+ @requests.close_all_browsers
62
+ return @output
63
+ end
64
+
65
+ # Get iframe
66
+ def get_iframe_html(browser)
67
+ sleep(3)
68
+ browser.find_element(id: "anno2").click
69
+
70
+ # Get HTML inside the iframe
71
+ browser.switch_to.frame(0)
72
+ iframe_html = browser.find_element(class: "os-linux").attribute("innerHTML")
73
+ return iframe_html
74
+ end
75
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: generalscraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.26
4
+ version: 0.0.27
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-30 00:00:00.000000000 Z
11
+ date: 2017-02-18 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Scrapes Google
14
14
  email: shidash@shidash.com
@@ -19,6 +19,7 @@ files:
19
19
  - lib/captcha.rb
20
20
  - lib/generalscraper.rb
21
21
  - lib/parse_page.rb
22
+ - lib/translate_page.rb
22
23
  homepage: https://github.com/TransparencyToolkit/generalscraper
23
24
  licenses:
24
25
  - GPL