webmaster_tools 0.1.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/CHANGELOG.md +11 -0
- data/Gemfile +4 -0
- data/LICENCE +26 -0
- data/README.md +52 -0
- data/Rakefile +1 -0
- data/VERSION +1 -0
- data/lib/webmaster_tools.rb +96 -0
- data/webmaster_tools.gemspec +25 -0
- metadata +89 -0
data/.gitignore
ADDED
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
data/LICENCE
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
Copyright (c) 2012, SoundCloud, Tobias Bielohlawek
|
2
|
+
|
3
|
+
All rights reserved.
|
4
|
+
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
6
|
+
modification, are permitted provided that the following conditions are met:
|
7
|
+
|
8
|
+
- Redistributions of source code must retain the above copyright notice, this
|
9
|
+
list of conditions and the following disclaimer.
|
10
|
+
- Redistributions in binary form must reproduce the above copyright notice,
|
11
|
+
this list of conditions and the following disclaimer in the documentation
|
12
|
+
and/or other materials provided with the distribution.
|
13
|
+
- Neither the name of the SoundCloud nor the names of its contributors may be
|
14
|
+
used to endorse or promote products derived from this software without
|
15
|
+
specific prior written permission.
|
16
|
+
|
17
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
18
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
19
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
20
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
21
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
22
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
23
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
24
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
25
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
26
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
# Webmaster Tools
|
2
|
+
|
3
|
+
* API very limited
|
4
|
+
* subset of tools
|
5
|
+
* based on mechanize
|
6
|
+
|
7
|
+
## Usage
|
8
|
+
|
9
|
+
Simple usage case to get error counts
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
require 'webmaster_tools'
|
13
|
+
|
14
|
+
client = WebmasterTools.new(<username>, <password>)
|
15
|
+
|
16
|
+
pp client.crawl_error_counts(<url>)
|
17
|
+
|
18
|
+
```
|
19
|
+
|
20
|
+
## Interface
|
21
|
+
|
22
|
+
### crawl
|
23
|
+
|
24
|
+
#### info
|
25
|
+
#### errors
|
26
|
+
#### stats
|
27
|
+
|
28
|
+
### other
|
29
|
+
|
30
|
+
#### submit removal request
|
31
|
+
|
32
|
+
|
33
|
+
## Dependencies
|
34
|
+
|
35
|
+
Depends on [mechanize](http://mechanize.rubyforge.org/) to access the Webinterface
|
36
|
+
|
37
|
+
|
38
|
+
## Contributing
|
39
|
+
|
40
|
+
We'll check out your contribution if you:
|
41
|
+
|
42
|
+
- Provide a comprehensive suite of tests for your fork.
|
43
|
+
- Have a clear and documented rationale for your changes.
|
44
|
+
- Package these up in a pull request.
|
45
|
+
|
46
|
+
We'll do our best to help you out with any contribution issues you may have.
|
47
|
+
|
48
|
+
|
49
|
+
## License
|
50
|
+
|
51
|
+
The license is included as LICENSE in this directory.
|
52
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0.rc1
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# Copyright (c) 2012, SoundCloud Ltd., Tobias Bielohlawek
|
2
|
+
|
3
|
+
require 'mechanize'
|
4
|
+
|
5
|
+
# WebmasterTools
|
6
|
+
# Parameters:
|
7
|
+
# required:
|
8
|
+
# :username -
|
9
|
+
# :password -
|
10
|
+
#
|
11
|
+
# optional:
|
12
|
+
# :url -
|
13
|
+
# :security_token -
|
14
|
+
class WebmasterTools
|
15
|
+
LOGIN = "https://accounts.google.com/ServiceLogin?service=sitemaps"
|
16
|
+
REMOVAL = "https://www.google.com/webmasters/tools/removals-request?hl=en&siteUrl=%s&urlt=%s"
|
17
|
+
INFO = "https://www.google.com/webmasters/tools/sitemaps-dl?hl=en&siteUrl=%s&security_token=%s"
|
18
|
+
DASHBOARD = "https://www.google.com/webmasters/tools/dashboard?hl=en&siteUrl=%s"
|
19
|
+
ERRORS = "https://www.google.com/webmasters/tools/crawl-errors?hl=en&siteUrl=%s"
|
20
|
+
STATS = "https://www.google.com/webmasters/tools/crawl-stats?hl=en&siteUrl=%s"
|
21
|
+
|
22
|
+
def initialize(username, password)
|
23
|
+
login(username, password)
|
24
|
+
end
|
25
|
+
|
26
|
+
def login(username, password)
|
27
|
+
page = agent.get(LOGIN)
|
28
|
+
page = agent.submit(page.form.tap do |form|
|
29
|
+
form.Email = username
|
30
|
+
form.Passwd = password
|
31
|
+
end)
|
32
|
+
end
|
33
|
+
|
34
|
+
def dashboard(url)
|
35
|
+
url = norm_url(url)
|
36
|
+
page = agent.get(DASHBOARD % url)
|
37
|
+
{
|
38
|
+
:indexed => page.search("#sitemap tbody .rightmost").text.gsub(/\D/, '').to_i
|
39
|
+
}
|
40
|
+
end
|
41
|
+
|
42
|
+
def crawl_info(url, token)
|
43
|
+
url = norm_url(url)
|
44
|
+
page = agent.get(INFO % [url, token])
|
45
|
+
|
46
|
+
lines = page.content.split("\n").map do |line|
|
47
|
+
line.split(",")
|
48
|
+
end
|
49
|
+
head = lines.shift.map { |key| key.downcase.gsub(' ', '_').to_sym }
|
50
|
+
|
51
|
+
$lines = lines.map do |line|
|
52
|
+
Hash[head.zip(line)]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def crawl_stats(url)
|
57
|
+
url = norm_url(url)
|
58
|
+
types = %w(pages kilobytes milliseconds).map(&:to_sym)
|
59
|
+
head = %w(high avg low).map(&:to_sym)
|
60
|
+
|
61
|
+
page = agent.get(STATS % url)
|
62
|
+
|
63
|
+
Hash[types.zip(page.search(".hostload-activity tr td").map do |node|
|
64
|
+
node.text.gsub(/\D/, '').to_i
|
65
|
+
end.each_slice(3).map do |slice|
|
66
|
+
Hash[head.zip(slice)]
|
67
|
+
end)]
|
68
|
+
end
|
69
|
+
|
70
|
+
def crawl_error_counts(url)
|
71
|
+
url = norm_url(url)
|
72
|
+
page = agent.get(ERRORS % url)
|
73
|
+
|
74
|
+
page.search(".categories a").inject({}) do |hash, n|
|
75
|
+
key, value = n.text.split("\n")
|
76
|
+
hash[key.downcase.gsub(' ', '_').to_sym] = value.gsub(/\D/, '').to_i
|
77
|
+
hash
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def remove_url(url, file)
|
82
|
+
url = norm_url(url)
|
83
|
+
page = agent.get(REMOVAL % [url, url + file])
|
84
|
+
page = agent.submit page.form
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
def agent
|
89
|
+
@agent ||= Mechanize.new
|
90
|
+
end
|
91
|
+
|
92
|
+
def norm_url(url)
|
93
|
+
schema, host = url.scan(/^(https?:\/\/)?(.+?)\/?$/).flatten
|
94
|
+
CGI::escape "#{schema || 'http://'}#{host}/"
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = "webmaster_tools"
|
6
|
+
s.version = File.read("VERSION").to_s.strip
|
7
|
+
s.authors = ["Tobias Bielohlawek"]
|
8
|
+
s.email = ["tobi@soundcloud.com"]
|
9
|
+
s.homepage = "http://github.com/rngtng/webmaster_tools"
|
10
|
+
s.summary = %q{Gives access to Webmaster Tools Interface data programmatically which is not provided by the official API}
|
11
|
+
s.description = %q{Webmaster Tools extends the official API to give programmatically access to various crawl information and functions which are available via the Interface}
|
12
|
+
|
13
|
+
s.files = `git ls-files`.split("\n")
|
14
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
15
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
16
|
+
s.require_paths = ["lib"]
|
17
|
+
|
18
|
+
%w(mechanize).each do |gem|
|
19
|
+
s.add_runtime_dependency *gem.split(' ')
|
20
|
+
end
|
21
|
+
|
22
|
+
%w(rake rspec).each do |gem|
|
23
|
+
s.add_development_dependency *gem.split(' ')
|
24
|
+
end
|
25
|
+
end
|
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: webmaster_tools
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0.rc1
|
5
|
+
prerelease: 6
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Tobias Bielohlawek
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-03-05 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: mechanize
|
16
|
+
requirement: &70232871533880 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70232871533880
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rake
|
27
|
+
requirement: &70232871533360 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70232871533360
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: rspec
|
38
|
+
requirement: &70232871532900 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *70232871532900
|
47
|
+
description: Webmaster Tools extends the official API to give programmatically access
|
48
|
+
to various crawl information and functions which are available via the Interface
|
49
|
+
email:
|
50
|
+
- tobi@soundcloud.com
|
51
|
+
executables: []
|
52
|
+
extensions: []
|
53
|
+
extra_rdoc_files: []
|
54
|
+
files:
|
55
|
+
- .gitignore
|
56
|
+
- CHANGELOG.md
|
57
|
+
- Gemfile
|
58
|
+
- LICENCE
|
59
|
+
- README.md
|
60
|
+
- Rakefile
|
61
|
+
- VERSION
|
62
|
+
- lib/webmaster_tools.rb
|
63
|
+
- webmaster_tools.gemspec
|
64
|
+
homepage: http://github.com/rngtng/webmaster_tools
|
65
|
+
licenses: []
|
66
|
+
post_install_message:
|
67
|
+
rdoc_options: []
|
68
|
+
require_paths:
|
69
|
+
- lib
|
70
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
72
|
+
requirements:
|
73
|
+
- - ! '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
+
none: false
|
78
|
+
requirements:
|
79
|
+
- - ! '>'
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: 1.3.1
|
82
|
+
requirements: []
|
83
|
+
rubyforge_project:
|
84
|
+
rubygems_version: 1.8.15
|
85
|
+
signing_key:
|
86
|
+
specification_version: 3
|
87
|
+
summary: Gives access to Webmaster Tools Interface data programmatically which is
|
88
|
+
not provided by the official API
|
89
|
+
test_files: []
|