webmaster_tools 0.1.0.rc1 → 0.1.0.rc2

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -1 +1,6 @@
1
1
  require "bundler/gem_tasks"
2
+
3
+ require 'rspec/core/rake_task'
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0.rc1
1
+ 0.1.0.rc2
@@ -13,11 +13,17 @@ require 'mechanize'
13
13
  # :security_token -
14
14
  class WebmasterTools
15
15
  LOGIN = "https://accounts.google.com/ServiceLogin?service=sitemaps"
16
+ AUTH = "https://accounts.google.com/ServiceLoginAuth"
16
17
  REMOVAL = "https://www.google.com/webmasters/tools/removals-request?hl=en&siteUrl=%s&urlt=%s"
17
18
  INFO = "https://www.google.com/webmasters/tools/sitemaps-dl?hl=en&siteUrl=%s&security_token=%s"
18
19
  DASHBOARD = "https://www.google.com/webmasters/tools/dashboard?hl=en&siteUrl=%s"
19
20
  ERRORS = "https://www.google.com/webmasters/tools/crawl-errors?hl=en&siteUrl=%s"
20
21
  STATS = "https://www.google.com/webmasters/tools/crawl-stats?hl=en&siteUrl=%s"
22
+ TOKEN = "https://www.google.com/webmasters/tools/gwt/SITEMAPS_READ"
23
+ GWT = "https://www.google.com/webmasters/tools/gwt/"
24
+ GWT_PERM = "E3DA43109D05B1A5067480CE25494CC2"
25
+
26
+ PAYLOAD = "7|0|11|%s|3EA173CEE6992CFDEAB5C18469B06594|com.google.crawl.wmconsole.fe.feature.gwt.sitemaps.shared.SitemapsService|getDataForMainPage|com.google.crawl.wmconsole.fe.feature.gwt.common.shared.FeatureContext/2156265033|Z|/webmasters/tools|com.google.crawl.wmconsole.fe.feature.gwt.config.FeatureKey/497977451|en|%s|com.google.crawl.wmconsole.fe.base.PermissionLevel/2330262508|1|2|3|4|3|5|6|6|5|7|8|5|9|10|11|5|1|0|"
21
27
 
22
28
  def initialize(username, password)
23
29
  login(username, password)
@@ -29,18 +35,32 @@ class WebmasterTools
29
35
  form.Email = username
30
36
  form.Passwd = password
31
37
  end)
38
+ raise "Wrong username + password combination" if page.content.include?(AUTH)
32
39
  end
33
40
 
34
41
  def dashboard(url)
35
- url = norm_url(url)
42
+ url = CGI::escape norm_url(url)
36
43
  page = agent.get(DASHBOARD % url)
37
- {
38
- :indexed => page.search("#sitemap tbody .rightmost").text.gsub(/\D/, '').to_i
39
- }
44
+ page.search("#sitemap tbody .rightmost").map do |node|
45
+ { :indexed_web => node.text.gsub(/\D/, '').to_i }
46
+ end
40
47
  end
41
48
 
42
- def crawl_info(url, token)
43
- url = norm_url(url)
49
+ def security_token(url)
50
+ # looks like `crawl_error_counts(url)` contains the security_token as well (if data available)...
51
+ dashboard(url) # to trigger referer
52
+ url = norm_url(url)
53
+ page = agent.post(TOKEN, PAYLOAD % [GWT, url], {
54
+ "X-GWT-Module-Base" => GWT,
55
+ "X-GWT-Permutation" => GWT_PERM,
56
+ "Content-Type" => "text/x-gwt-rpc; charset=utf-8",
57
+ })
58
+ page.content.scan(/security_token=([^"]+)/).flatten.first
59
+ end
60
+
61
+ def crawl_info(url)
62
+ token = security_token(url)
63
+ url = CGI::escape norm_url(url)
44
64
  page = agent.get(INFO % [url, token])
45
65
 
46
66
  lines = page.content.split("\n").map do |line|
@@ -54,7 +74,7 @@ class WebmasterTools
54
74
  end
55
75
 
56
76
  def crawl_stats(url)
57
- url = norm_url(url)
77
+ url = CGI::escape norm_url(url)
58
78
  types = %w(pages kilobytes milliseconds).map(&:to_sym)
59
79
  head = %w(high avg low).map(&:to_sym)
60
80
 
@@ -68,7 +88,7 @@ class WebmasterTools
68
88
  end
69
89
 
70
90
  def crawl_error_counts(url)
71
- url = norm_url(url)
91
+ url = CGI::escape norm_url(url)
72
92
  page = agent.get(ERRORS % url)
73
93
 
74
94
  page.search(".categories a").inject({}) do |hash, n|
@@ -79,7 +99,7 @@ class WebmasterTools
79
99
  end
80
100
 
81
101
  def remove_url(url, file)
82
- url = norm_url(url)
102
+ url = CGI::escape norm_url(url)
83
103
  page = agent.get(REMOVAL % [url, url + file])
84
104
  page = agent.submit page.form
85
105
  end
@@ -91,6 +111,6 @@ class WebmasterTools
91
111
 
92
112
  def norm_url(url)
93
113
  schema, host = url.scan(/^(https?:\/\/)?(.+?)\/?$/).flatten
94
- CGI::escape "#{schema || 'http://'}#{host}/"
114
+ "#{schema || 'http://'}#{host}/"
95
115
  end
96
116
  end