webmaster_tools 0.1.0.rc1 → 0.1.0.rc2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -1 +1,6 @@
1
1
  require "bundler/gem_tasks"
2
+
3
+ require 'rspec/core/rake_task'
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0.rc1
1
+ 0.1.0.rc2
@@ -13,11 +13,17 @@ require 'mechanize'
13
13
  # :security_token -
14
14
  class WebmasterTools
15
15
  LOGIN = "https://accounts.google.com/ServiceLogin?service=sitemaps"
16
+ AUTH = "https://accounts.google.com/ServiceLoginAuth"
16
17
  REMOVAL = "https://www.google.com/webmasters/tools/removals-request?hl=en&siteUrl=%s&urlt=%s"
17
18
  INFO = "https://www.google.com/webmasters/tools/sitemaps-dl?hl=en&siteUrl=%s&security_token=%s"
18
19
  DASHBOARD = "https://www.google.com/webmasters/tools/dashboard?hl=en&siteUrl=%s"
19
20
  ERRORS = "https://www.google.com/webmasters/tools/crawl-errors?hl=en&siteUrl=%s"
20
21
  STATS = "https://www.google.com/webmasters/tools/crawl-stats?hl=en&siteUrl=%s"
22
+ TOKEN = "https://www.google.com/webmasters/tools/gwt/SITEMAPS_READ"
23
+ GWT = "https://www.google.com/webmasters/tools/gwt/"
24
+ GWT_PERM = "E3DA43109D05B1A5067480CE25494CC2"
25
+
26
+ PAYLOAD = "7|0|11|%s|3EA173CEE6992CFDEAB5C18469B06594|com.google.crawl.wmconsole.fe.feature.gwt.sitemaps.shared.SitemapsService|getDataForMainPage|com.google.crawl.wmconsole.fe.feature.gwt.common.shared.FeatureContext/2156265033|Z|/webmasters/tools|com.google.crawl.wmconsole.fe.feature.gwt.config.FeatureKey/497977451|en|%s|com.google.crawl.wmconsole.fe.base.PermissionLevel/2330262508|1|2|3|4|3|5|6|6|5|7|8|5|9|10|11|5|1|0|"
21
27
 
22
28
  def initialize(username, password)
23
29
  login(username, password)
@@ -29,18 +35,32 @@ class WebmasterTools
29
35
  form.Email = username
30
36
  form.Passwd = password
31
37
  end)
38
+ raise "Wrong username + password combination" if page.content.include?(AUTH)
32
39
  end
33
40
 
34
41
  def dashboard(url)
35
- url = norm_url(url)
42
+ url = CGI::escape norm_url(url)
36
43
  page = agent.get(DASHBOARD % url)
37
- {
38
- :indexed => page.search("#sitemap tbody .rightmost").text.gsub(/\D/, '').to_i
39
- }
44
+ page.search("#sitemap tbody .rightmost").map do |node|
45
+ { :indexed_web => node.text.gsub(/\D/, '').to_i }
46
+ end
40
47
  end
41
48
 
42
- def crawl_info(url, token)
43
- url = norm_url(url)
49
+ def security_token(url)
50
+ # looks like `crawl_error_counts(url)` contains the security_token as well (if data available)...
51
+ dashboard(url) # to trigger referer
52
+ url = norm_url(url)
53
+ page = agent.post(TOKEN, PAYLOAD % [GWT, url], {
54
+ "X-GWT-Module-Base" => GWT,
55
+ "X-GWT-Permutation" => GWT_PERM,
56
+ "Content-Type" => "text/x-gwt-rpc; charset=utf-8",
57
+ })
58
+ page.content.scan(/security_token=([^"]+)/).flatten.first
59
+ end
60
+
61
+ def crawl_info(url)
62
+ token = security_token(url)
63
+ url = CGI::escape norm_url(url)
44
64
  page = agent.get(INFO % [url, token])
45
65
 
46
66
  lines = page.content.split("\n").map do |line|
@@ -54,7 +74,7 @@ class WebmasterTools
54
74
  end
55
75
 
56
76
  def crawl_stats(url)
57
- url = norm_url(url)
77
+ url = CGI::escape norm_url(url)
58
78
  types = %w(pages kilobytes milliseconds).map(&:to_sym)
59
79
  head = %w(high avg low).map(&:to_sym)
60
80
 
@@ -68,7 +88,7 @@ class WebmasterTools
68
88
  end
69
89
 
70
90
  def crawl_error_counts(url)
71
- url = norm_url(url)
91
+ url = CGI::escape norm_url(url)
72
92
  page = agent.get(ERRORS % url)
73
93
 
74
94
  page.search(".categories a").inject({}) do |hash, n|
@@ -79,7 +99,7 @@ class WebmasterTools
79
99
  end
80
100
 
81
101
  def remove_url(url, file)
82
- url = norm_url(url)
102
+ url = CGI::escape norm_url(url)
83
103
  page = agent.get(REMOVAL % [url, url + file])
84
104
  page = agent.submit page.form
85
105
  end
@@ -91,6 +111,6 @@ class WebmasterTools
91
111
 
92
112
  def norm_url(url)
93
113
  schema, host = url.scan(/^(https?:\/\/)?(.+?)\/?$/).flatten
94
- CGI::escape "#{schema || 'http://'}#{host}/"
114
+ "#{schema || 'http://'}#{host}/"
95
115
  end
96
116
  end