webmaster_tools 0.1.0.rc1 → 0.1.0.rc2
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +5 -0
- data/VERSION +1 -1
- data/lib/webmaster_tools.rb +30 -10
- data/spec/fixtures/cassettes/WebmasterTools/_crawl_error_counts/gets_crawl_error_counts.yml +2463 -0
- data/spec/fixtures/cassettes/WebmasterTools/_crawl_info/gets_crawl_info.yml +2794 -0
- data/spec/fixtures/cassettes/WebmasterTools/_crawl_stats/gets_crawl_stats.yml +2301 -0
- data/spec/fixtures/cassettes/WebmasterTools/_dashboard/gets_dashboard.yml +2659 -0
- data/spec/fixtures/cassettes/WebmasterTools/_login/passes_with_correct_username_password.yml +1751 -0
- data/spec/fixtures/cassettes/WebmasterTools/_security_token/gets_security_token.yml +2736 -0
- data/spec/spec_helper.rb +21 -0
- data/spec/webmaster_tools_spec.rb +71 -0
- data/webmaster_tools.gemspec +1 -1
- metadata +47 -9
data/Rakefile
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.0.
|
1
|
+
0.1.0.rc2
|
data/lib/webmaster_tools.rb
CHANGED
@@ -13,11 +13,17 @@ require 'mechanize'
|
|
13
13
|
# :security_token -
|
14
14
|
class WebmasterTools
|
15
15
|
LOGIN = "https://accounts.google.com/ServiceLogin?service=sitemaps"
|
16
|
+
AUTH = "https://accounts.google.com/ServiceLoginAuth"
|
16
17
|
REMOVAL = "https://www.google.com/webmasters/tools/removals-request?hl=en&siteUrl=%s&urlt=%s"
|
17
18
|
INFO = "https://www.google.com/webmasters/tools/sitemaps-dl?hl=en&siteUrl=%s&security_token=%s"
|
18
19
|
DASHBOARD = "https://www.google.com/webmasters/tools/dashboard?hl=en&siteUrl=%s"
|
19
20
|
ERRORS = "https://www.google.com/webmasters/tools/crawl-errors?hl=en&siteUrl=%s"
|
20
21
|
STATS = "https://www.google.com/webmasters/tools/crawl-stats?hl=en&siteUrl=%s"
|
22
|
+
TOKEN = "https://www.google.com/webmasters/tools/gwt/SITEMAPS_READ"
|
23
|
+
GWT = "https://www.google.com/webmasters/tools/gwt/"
|
24
|
+
GWT_PERM = "E3DA43109D05B1A5067480CE25494CC2"
|
25
|
+
|
26
|
+
PAYLOAD = "7|0|11|%s|3EA173CEE6992CFDEAB5C18469B06594|com.google.crawl.wmconsole.fe.feature.gwt.sitemaps.shared.SitemapsService|getDataForMainPage|com.google.crawl.wmconsole.fe.feature.gwt.common.shared.FeatureContext/2156265033|Z|/webmasters/tools|com.google.crawl.wmconsole.fe.feature.gwt.config.FeatureKey/497977451|en|%s|com.google.crawl.wmconsole.fe.base.PermissionLevel/2330262508|1|2|3|4|3|5|6|6|5|7|8|5|9|10|11|5|1|0|"
|
21
27
|
|
22
28
|
def initialize(username, password)
|
23
29
|
login(username, password)
|
@@ -29,18 +35,32 @@ class WebmasterTools
|
|
29
35
|
form.Email = username
|
30
36
|
form.Passwd = password
|
31
37
|
end)
|
38
|
+
raise "Wrong username + password combination" if page.content.include?(AUTH)
|
32
39
|
end
|
33
40
|
|
34
41
|
def dashboard(url)
|
35
|
-
url = norm_url(url)
|
42
|
+
url = CGI::escape norm_url(url)
|
36
43
|
page = agent.get(DASHBOARD % url)
|
37
|
-
|
38
|
-
:
|
39
|
-
|
44
|
+
page.search("#sitemap tbody .rightmost").map do |node|
|
45
|
+
{ :indexed_web => node.text.gsub(/\D/, '').to_i }
|
46
|
+
end
|
40
47
|
end
|
41
48
|
|
42
|
-
def
|
43
|
-
|
49
|
+
def security_token(url)
|
50
|
+
# looks like `crawl_error_counts(url)` contains the security_token as well (if data available)...
|
51
|
+
dashboard(url) # to trigger referer
|
52
|
+
url = norm_url(url)
|
53
|
+
page = agent.post(TOKEN, PAYLOAD % [GWT, url], {
|
54
|
+
"X-GWT-Module-Base" => GWT,
|
55
|
+
"X-GWT-Permutation" => GWT_PERM,
|
56
|
+
"Content-Type" => "text/x-gwt-rpc; charset=utf-8",
|
57
|
+
})
|
58
|
+
page.content.scan(/security_token=([^"]+)/).flatten.first
|
59
|
+
end
|
60
|
+
|
61
|
+
def crawl_info(url)
|
62
|
+
token = security_token(url)
|
63
|
+
url = CGI::escape norm_url(url)
|
44
64
|
page = agent.get(INFO % [url, token])
|
45
65
|
|
46
66
|
lines = page.content.split("\n").map do |line|
|
@@ -54,7 +74,7 @@ class WebmasterTools
|
|
54
74
|
end
|
55
75
|
|
56
76
|
def crawl_stats(url)
|
57
|
-
url = norm_url(url)
|
77
|
+
url = CGI::escape norm_url(url)
|
58
78
|
types = %w(pages kilobytes milliseconds).map(&:to_sym)
|
59
79
|
head = %w(high avg low).map(&:to_sym)
|
60
80
|
|
@@ -68,7 +88,7 @@ class WebmasterTools
|
|
68
88
|
end
|
69
89
|
|
70
90
|
def crawl_error_counts(url)
|
71
|
-
url = norm_url(url)
|
91
|
+
url = CGI::escape norm_url(url)
|
72
92
|
page = agent.get(ERRORS % url)
|
73
93
|
|
74
94
|
page.search(".categories a").inject({}) do |hash, n|
|
@@ -79,7 +99,7 @@ class WebmasterTools
|
|
79
99
|
end
|
80
100
|
|
81
101
|
def remove_url(url, file)
|
82
|
-
url = norm_url(url)
|
102
|
+
url = CGI::escape norm_url(url)
|
83
103
|
page = agent.get(REMOVAL % [url, url + file])
|
84
104
|
page = agent.submit page.form
|
85
105
|
end
|
@@ -91,6 +111,6 @@ class WebmasterTools
|
|
91
111
|
|
92
112
|
def norm_url(url)
|
93
113
|
schema, host = url.scan(/^(https?:\/\/)?(.+?)\/?$/).flatten
|
94
|
-
|
114
|
+
"#{schema || 'http://'}#{host}/"
|
95
115
|
end
|
96
116
|
end
|