webmaster_tools 0.1.0.rc1 → 0.1.0.rc2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +5 -0
- data/VERSION +1 -1
- data/lib/webmaster_tools.rb +30 -10
- data/spec/fixtures/cassettes/WebmasterTools/_crawl_error_counts/gets_crawl_error_counts.yml +2463 -0
- data/spec/fixtures/cassettes/WebmasterTools/_crawl_info/gets_crawl_info.yml +2794 -0
- data/spec/fixtures/cassettes/WebmasterTools/_crawl_stats/gets_crawl_stats.yml +2301 -0
- data/spec/fixtures/cassettes/WebmasterTools/_dashboard/gets_dashboard.yml +2659 -0
- data/spec/fixtures/cassettes/WebmasterTools/_login/passes_with_correct_username_password.yml +1751 -0
- data/spec/fixtures/cassettes/WebmasterTools/_security_token/gets_security_token.yml +2736 -0
- data/spec/spec_helper.rb +21 -0
- data/spec/webmaster_tools_spec.rb +71 -0
- data/webmaster_tools.gemspec +1 -1
- metadata +47 -9
data/Rakefile
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.0.
|
1
|
+
0.1.0.rc2
|
data/lib/webmaster_tools.rb
CHANGED
@@ -13,11 +13,17 @@ require 'mechanize'
|
|
13
13
|
# :security_token -
|
14
14
|
class WebmasterTools
|
15
15
|
LOGIN = "https://accounts.google.com/ServiceLogin?service=sitemaps"
|
16
|
+
AUTH = "https://accounts.google.com/ServiceLoginAuth"
|
16
17
|
REMOVAL = "https://www.google.com/webmasters/tools/removals-request?hl=en&siteUrl=%s&urlt=%s"
|
17
18
|
INFO = "https://www.google.com/webmasters/tools/sitemaps-dl?hl=en&siteUrl=%s&security_token=%s"
|
18
19
|
DASHBOARD = "https://www.google.com/webmasters/tools/dashboard?hl=en&siteUrl=%s"
|
19
20
|
ERRORS = "https://www.google.com/webmasters/tools/crawl-errors?hl=en&siteUrl=%s"
|
20
21
|
STATS = "https://www.google.com/webmasters/tools/crawl-stats?hl=en&siteUrl=%s"
|
22
|
+
TOKEN = "https://www.google.com/webmasters/tools/gwt/SITEMAPS_READ"
|
23
|
+
GWT = "https://www.google.com/webmasters/tools/gwt/"
|
24
|
+
GWT_PERM = "E3DA43109D05B1A5067480CE25494CC2"
|
25
|
+
|
26
|
+
PAYLOAD = "7|0|11|%s|3EA173CEE6992CFDEAB5C18469B06594|com.google.crawl.wmconsole.fe.feature.gwt.sitemaps.shared.SitemapsService|getDataForMainPage|com.google.crawl.wmconsole.fe.feature.gwt.common.shared.FeatureContext/2156265033|Z|/webmasters/tools|com.google.crawl.wmconsole.fe.feature.gwt.config.FeatureKey/497977451|en|%s|com.google.crawl.wmconsole.fe.base.PermissionLevel/2330262508|1|2|3|4|3|5|6|6|5|7|8|5|9|10|11|5|1|0|"
|
21
27
|
|
22
28
|
def initialize(username, password)
|
23
29
|
login(username, password)
|
@@ -29,18 +35,32 @@ class WebmasterTools
|
|
29
35
|
form.Email = username
|
30
36
|
form.Passwd = password
|
31
37
|
end)
|
38
|
+
raise "Wrong username + password combination" if page.content.include?(AUTH)
|
32
39
|
end
|
33
40
|
|
34
41
|
def dashboard(url)
|
35
|
-
url = norm_url(url)
|
42
|
+
url = CGI::escape norm_url(url)
|
36
43
|
page = agent.get(DASHBOARD % url)
|
37
|
-
|
38
|
-
:
|
39
|
-
|
44
|
+
page.search("#sitemap tbody .rightmost").map do |node|
|
45
|
+
{ :indexed_web => node.text.gsub(/\D/, '').to_i }
|
46
|
+
end
|
40
47
|
end
|
41
48
|
|
42
|
-
def
|
43
|
-
|
49
|
+
def security_token(url)
|
50
|
+
# looks like `crawl_error_counts(url)` contains the security_token as well (if data available)...
|
51
|
+
dashboard(url) # to trigger referer
|
52
|
+
url = norm_url(url)
|
53
|
+
page = agent.post(TOKEN, PAYLOAD % [GWT, url], {
|
54
|
+
"X-GWT-Module-Base" => GWT,
|
55
|
+
"X-GWT-Permutation" => GWT_PERM,
|
56
|
+
"Content-Type" => "text/x-gwt-rpc; charset=utf-8",
|
57
|
+
})
|
58
|
+
page.content.scan(/security_token=([^"]+)/).flatten.first
|
59
|
+
end
|
60
|
+
|
61
|
+
def crawl_info(url)
|
62
|
+
token = security_token(url)
|
63
|
+
url = CGI::escape norm_url(url)
|
44
64
|
page = agent.get(INFO % [url, token])
|
45
65
|
|
46
66
|
lines = page.content.split("\n").map do |line|
|
@@ -54,7 +74,7 @@ class WebmasterTools
|
|
54
74
|
end
|
55
75
|
|
56
76
|
def crawl_stats(url)
|
57
|
-
url = norm_url(url)
|
77
|
+
url = CGI::escape norm_url(url)
|
58
78
|
types = %w(pages kilobytes milliseconds).map(&:to_sym)
|
59
79
|
head = %w(high avg low).map(&:to_sym)
|
60
80
|
|
@@ -68,7 +88,7 @@ class WebmasterTools
|
|
68
88
|
end
|
69
89
|
|
70
90
|
def crawl_error_counts(url)
|
71
|
-
url = norm_url(url)
|
91
|
+
url = CGI::escape norm_url(url)
|
72
92
|
page = agent.get(ERRORS % url)
|
73
93
|
|
74
94
|
page.search(".categories a").inject({}) do |hash, n|
|
@@ -79,7 +99,7 @@ class WebmasterTools
|
|
79
99
|
end
|
80
100
|
|
81
101
|
def remove_url(url, file)
|
82
|
-
url = norm_url(url)
|
102
|
+
url = CGI::escape norm_url(url)
|
83
103
|
page = agent.get(REMOVAL % [url, url + file])
|
84
104
|
page = agent.submit page.form
|
85
105
|
end
|
@@ -91,6 +111,6 @@ class WebmasterTools
|
|
91
111
|
|
92
112
|
def norm_url(url)
|
93
113
|
schema, host = url.scan(/^(https?:\/\/)?(.+?)\/?$/).flatten
|
94
|
-
|
114
|
+
"#{schema || 'http://'}#{host}/"
|
95
115
|
end
|
96
116
|
end
|