webmaster_tools 0.1.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/CHANGELOG.md +11 -0
- data/Gemfile +4 -0
- data/LICENCE +26 -0
- data/README.md +52 -0
- data/Rakefile +1 -0
- data/VERSION +1 -0
- data/lib/webmaster_tools.rb +96 -0
- data/webmaster_tools.gemspec +25 -0
- metadata +89 -0
data/.gitignore
ADDED
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
data/LICENCE
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
Copyright (c) 2012, SoundCloud, Tobias Bielohlawek
|
2
|
+
|
3
|
+
All rights reserved.
|
4
|
+
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
6
|
+
modification, are permitted provided that the following conditions are met:
|
7
|
+
|
8
|
+
- Redistributions of source code must retain the above copyright notice, this
|
9
|
+
list of conditions and the following disclaimer.
|
10
|
+
- Redistributions in binary form must reproduce the above copyright notice,
|
11
|
+
this list of conditions and the following disclaimer in the documentation
|
12
|
+
and/or other materials provided with the distribution.
|
13
|
+
- Neither the name of the SoundCloud nor the names of its contributors may be
|
14
|
+
used to endorse or promote products derived from this software without
|
15
|
+
specific prior written permission.
|
16
|
+
|
17
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
18
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
19
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
20
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
21
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
22
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
23
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
24
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
25
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
26
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
# Webmaster Tools
|
2
|
+
|
3
|
+
* API very limited
|
4
|
+
* subset of tools
|
5
|
+
* based on mechanize
|
6
|
+
|
7
|
+
## Usage
|
8
|
+
|
9
|
+
Simple usage case to get error counts
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
require 'webmaster_tools'
|
13
|
+
|
14
|
+
client = WebmasterTools.new(<username>, <password>)
|
15
|
+
|
16
|
+
pp client.crawl_error_counts(<url>)
|
17
|
+
|
18
|
+
```
|
19
|
+
|
20
|
+
## Interface
|
21
|
+
|
22
|
+
### crawl
|
23
|
+
|
24
|
+
#### info
|
25
|
+
#### errors
|
26
|
+
#### stats
|
27
|
+
|
28
|
+
### other
|
29
|
+
|
30
|
+
#### submit removal request
|
31
|
+
|
32
|
+
|
33
|
+
## Dependencies
|
34
|
+
|
35
|
+
Depends on [mechanize](http://mechanize.rubyforge.org/) to access the Webinterface
|
36
|
+
|
37
|
+
|
38
|
+
## Contributing
|
39
|
+
|
40
|
+
We'll check out your contribution if you:
|
41
|
+
|
42
|
+
- Provide a comprehensive suite of tests for your fork.
|
43
|
+
- Have a clear and documented rationale for your changes.
|
44
|
+
- Package these up in a pull request.
|
45
|
+
|
46
|
+
We'll do our best to help you out with any contribution issues you may have.
|
47
|
+
|
48
|
+
|
49
|
+
## License
|
50
|
+
|
51
|
+
The license is included as LICENSE in this directory.
|
52
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0.rc1
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# Copyright (c) 2012, SoundCloud Ltd., Tobias Bielohlawek
|
2
|
+
|
3
|
+
require 'mechanize'
|
4
|
+
|
5
|
+
# WebmasterTools
|
6
|
+
# Parameters:
|
7
|
+
# required:
|
8
|
+
# :username -
|
9
|
+
# :password -
|
10
|
+
#
|
11
|
+
# optional:
|
12
|
+
# :url -
|
13
|
+
# :security_token -
|
14
|
+
class WebmasterTools
|
15
|
+
LOGIN = "https://accounts.google.com/ServiceLogin?service=sitemaps"
|
16
|
+
REMOVAL = "https://www.google.com/webmasters/tools/removals-request?hl=en&siteUrl=%s&urlt=%s"
|
17
|
+
INFO = "https://www.google.com/webmasters/tools/sitemaps-dl?hl=en&siteUrl=%s&security_token=%s"
|
18
|
+
DASHBOARD = "https://www.google.com/webmasters/tools/dashboard?hl=en&siteUrl=%s"
|
19
|
+
ERRORS = "https://www.google.com/webmasters/tools/crawl-errors?hl=en&siteUrl=%s"
|
20
|
+
STATS = "https://www.google.com/webmasters/tools/crawl-stats?hl=en&siteUrl=%s"
|
21
|
+
|
22
|
+
def initialize(username, password)
|
23
|
+
login(username, password)
|
24
|
+
end
|
25
|
+
|
26
|
+
def login(username, password)
|
27
|
+
page = agent.get(LOGIN)
|
28
|
+
page = agent.submit(page.form.tap do |form|
|
29
|
+
form.Email = username
|
30
|
+
form.Passwd = password
|
31
|
+
end)
|
32
|
+
end
|
33
|
+
|
34
|
+
def dashboard(url)
|
35
|
+
url = norm_url(url)
|
36
|
+
page = agent.get(DASHBOARD % url)
|
37
|
+
{
|
38
|
+
:indexed => page.search("#sitemap tbody .rightmost").text.gsub(/\D/, '').to_i
|
39
|
+
}
|
40
|
+
end
|
41
|
+
|
42
|
+
def crawl_info(url, token)
|
43
|
+
url = norm_url(url)
|
44
|
+
page = agent.get(INFO % [url, token])
|
45
|
+
|
46
|
+
lines = page.content.split("\n").map do |line|
|
47
|
+
line.split(",")
|
48
|
+
end
|
49
|
+
head = lines.shift.map { |key| key.downcase.gsub(' ', '_').to_sym }
|
50
|
+
|
51
|
+
$lines = lines.map do |line|
|
52
|
+
Hash[head.zip(line)]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def crawl_stats(url)
|
57
|
+
url = norm_url(url)
|
58
|
+
types = %w(pages kilobytes milliseconds).map(&:to_sym)
|
59
|
+
head = %w(high avg low).map(&:to_sym)
|
60
|
+
|
61
|
+
page = agent.get(STATS % url)
|
62
|
+
|
63
|
+
Hash[types.zip(page.search(".hostload-activity tr td").map do |node|
|
64
|
+
node.text.gsub(/\D/, '').to_i
|
65
|
+
end.each_slice(3).map do |slice|
|
66
|
+
Hash[head.zip(slice)]
|
67
|
+
end)]
|
68
|
+
end
|
69
|
+
|
70
|
+
def crawl_error_counts(url)
|
71
|
+
url = norm_url(url)
|
72
|
+
page = agent.get(ERRORS % url)
|
73
|
+
|
74
|
+
page.search(".categories a").inject({}) do |hash, n|
|
75
|
+
key, value = n.text.split("\n")
|
76
|
+
hash[key.downcase.gsub(' ', '_').to_sym] = value.gsub(/\D/, '').to_i
|
77
|
+
hash
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def remove_url(url, file)
|
82
|
+
url = norm_url(url)
|
83
|
+
page = agent.get(REMOVAL % [url, url + file])
|
84
|
+
page = agent.submit page.form
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
def agent
|
89
|
+
@agent ||= Mechanize.new
|
90
|
+
end
|
91
|
+
|
92
|
+
def norm_url(url)
|
93
|
+
schema, host = url.scan(/^(https?:\/\/)?(.+?)\/?$/).flatten
|
94
|
+
CGI::escape "#{schema || 'http://'}#{host}/"
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = "webmaster_tools"
|
6
|
+
s.version = File.read("VERSION").to_s.strip
|
7
|
+
s.authors = ["Tobias Bielohlawek"]
|
8
|
+
s.email = ["tobi@soundcloud.com"]
|
9
|
+
s.homepage = "http://github.com/rngtng/webmaster_tools"
|
10
|
+
s.summary = %q{Gives access to Webmaster Tools Interface data programmatically which is not provided by the official API}
|
11
|
+
s.description = %q{Webmaster Tools extends the official API to give programmatically access to various crawl information and functions which are available via the Interface}
|
12
|
+
|
13
|
+
s.files = `git ls-files`.split("\n")
|
14
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
15
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
16
|
+
s.require_paths = ["lib"]
|
17
|
+
|
18
|
+
%w(mechanize).each do |gem|
|
19
|
+
s.add_runtime_dependency *gem.split(' ')
|
20
|
+
end
|
21
|
+
|
22
|
+
%w(rake rspec).each do |gem|
|
23
|
+
s.add_development_dependency *gem.split(' ')
|
24
|
+
end
|
25
|
+
end
|
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: webmaster_tools
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0.rc1
|
5
|
+
prerelease: 6
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Tobias Bielohlawek
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-03-05 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: mechanize
|
16
|
+
requirement: &70232871533880 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70232871533880
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rake
|
27
|
+
requirement: &70232871533360 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70232871533360
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: rspec
|
38
|
+
requirement: &70232871532900 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *70232871532900
|
47
|
+
description: Webmaster Tools extends the official API to give programmatically access
|
48
|
+
to various crawl information and functions which are available via the Interface
|
49
|
+
email:
|
50
|
+
- tobi@soundcloud.com
|
51
|
+
executables: []
|
52
|
+
extensions: []
|
53
|
+
extra_rdoc_files: []
|
54
|
+
files:
|
55
|
+
- .gitignore
|
56
|
+
- CHANGELOG.md
|
57
|
+
- Gemfile
|
58
|
+
- LICENCE
|
59
|
+
- README.md
|
60
|
+
- Rakefile
|
61
|
+
- VERSION
|
62
|
+
- lib/webmaster_tools.rb
|
63
|
+
- webmaster_tools.gemspec
|
64
|
+
homepage: http://github.com/rngtng/webmaster_tools
|
65
|
+
licenses: []
|
66
|
+
post_install_message:
|
67
|
+
rdoc_options: []
|
68
|
+
require_paths:
|
69
|
+
- lib
|
70
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
72
|
+
requirements:
|
73
|
+
- - ! '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
+
none: false
|
78
|
+
requirements:
|
79
|
+
- - ! '>'
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: 1.3.1
|
82
|
+
requirements: []
|
83
|
+
rubyforge_project:
|
84
|
+
rubygems_version: 1.8.15
|
85
|
+
signing_key:
|
86
|
+
specification_version: 3
|
87
|
+
summary: Gives access to Webmaster Tools Interface data programmatically which is
|
88
|
+
not provided by the official API
|
89
|
+
test_files: []
|