webmaster_tools 0.1.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/CHANGELOG.md ADDED
@@ -0,0 +1,11 @@
1
+ # Changes
2
+
3
+ ## v0.1.x - ???
4
+ * add tests
5
+ * add docu
6
+ * check different auth optinos
7
+ * integrate official API
8
+
9
+ ## v0.1.0 - 05-03-2012
10
+
11
+ _inital release_
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in webmaster_tools.gemspec
4
+ gemspec
data/LICENCE ADDED
@@ -0,0 +1,26 @@
1
+ Copyright (c) 2012, SoundCloud, Tobias Bielohlawek
2
+
3
+ All rights reserved.
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ - Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+ - Redistributions in binary form must reproduce the above copyright notice,
11
+ this list of conditions and the following disclaimer in the documentation
12
+ and/or other materials provided with the distribution.
13
+ - Neither the name of the SoundCloud nor the names of its contributors may be
14
+ used to endorse or promote products derived from this software without
15
+ specific prior written permission.
16
+
17
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.md ADDED
@@ -0,0 +1,52 @@
1
+ # Webmaster Tools
2
+
3
+ * API very limited
4
+ * subset of tools
5
+ * based on mechanize
6
+
7
+ ## Usage
8
+
9
+ Simple usage case to get error counts
10
+
11
+ ```ruby
12
+ require 'webmaster_tools'
13
+
14
+ client = WebmasterTools.new(<username>, <password>)
15
+
16
+ pp client.crawl_error_counts(<url>)
17
+
18
+ ```
19
+
20
+ ## Interface
21
+
22
+ ### crawl
23
+
24
+ #### info
25
+ #### errors
26
+ #### stats
27
+
28
+ ### other
29
+
30
+ #### submit removal request
31
+
32
+
33
+ ## Dependencies
34
+
35
+ Depends on [mechanize](http://mechanize.rubyforge.org/) to access the Webinterface
36
+
37
+
38
+ ## Contributing
39
+
40
+ We'll check out your contribution if you:
41
+
42
+ - Provide a comprehensive suite of tests for your fork.
43
+ - Have a clear and documented rationale for your changes.
44
+ - Package these up in a pull request.
45
+
46
+ We'll do our best to help you out with any contribution issues you may have.
47
+
48
+
49
+ ## License
50
+
51
+ The license is included as LICENSE in this directory.
52
+
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0.rc1
@@ -0,0 +1,96 @@
1
+ # Copyright (c) 2012, SoundCloud Ltd., Tobias Bielohlawek
2
+
3
+ require 'mechanize'
4
+
5
+ # WebmasterTools
6
+ # Parameters:
7
+ # required:
8
+ # :username -
9
+ # :password -
10
+ #
11
+ # optional:
12
+ # :url -
13
+ # :security_token -
14
+ class WebmasterTools
15
+ LOGIN = "https://accounts.google.com/ServiceLogin?service=sitemaps"
16
+ REMOVAL = "https://www.google.com/webmasters/tools/removals-request?hl=en&siteUrl=%s&urlt=%s"
17
+ INFO = "https://www.google.com/webmasters/tools/sitemaps-dl?hl=en&siteUrl=%s&security_token=%s"
18
+ DASHBOARD = "https://www.google.com/webmasters/tools/dashboard?hl=en&siteUrl=%s"
19
+ ERRORS = "https://www.google.com/webmasters/tools/crawl-errors?hl=en&siteUrl=%s"
20
+ STATS = "https://www.google.com/webmasters/tools/crawl-stats?hl=en&siteUrl=%s"
21
+
22
+ def initialize(username, password)
23
+ login(username, password)
24
+ end
25
+
26
+ def login(username, password)
27
+ page = agent.get(LOGIN)
28
+ page = agent.submit(page.form.tap do |form|
29
+ form.Email = username
30
+ form.Passwd = password
31
+ end)
32
+ end
33
+
34
+ def dashboard(url)
35
+ url = norm_url(url)
36
+ page = agent.get(DASHBOARD % url)
37
+ {
38
+ :indexed => page.search("#sitemap tbody .rightmost").text.gsub(/\D/, '').to_i
39
+ }
40
+ end
41
+
42
+ def crawl_info(url, token)
43
+ url = norm_url(url)
44
+ page = agent.get(INFO % [url, token])
45
+
46
+ lines = page.content.split("\n").map do |line|
47
+ line.split(",")
48
+ end
49
+ head = lines.shift.map { |key| key.downcase.gsub(' ', '_').to_sym }
50
+
51
+ $lines = lines.map do |line|
52
+ Hash[head.zip(line)]
53
+ end
54
+ end
55
+
56
+ def crawl_stats(url)
57
+ url = norm_url(url)
58
+ types = %w(pages kilobytes milliseconds).map(&:to_sym)
59
+ head = %w(high avg low).map(&:to_sym)
60
+
61
+ page = agent.get(STATS % url)
62
+
63
+ Hash[types.zip(page.search(".hostload-activity tr td").map do |node|
64
+ node.text.gsub(/\D/, '').to_i
65
+ end.each_slice(3).map do |slice|
66
+ Hash[head.zip(slice)]
67
+ end)]
68
+ end
69
+
70
+ def crawl_error_counts(url)
71
+ url = norm_url(url)
72
+ page = agent.get(ERRORS % url)
73
+
74
+ page.search(".categories a").inject({}) do |hash, n|
75
+ key, value = n.text.split("\n")
76
+ hash[key.downcase.gsub(' ', '_').to_sym] = value.gsub(/\D/, '').to_i
77
+ hash
78
+ end
79
+ end
80
+
81
+ def remove_url(url, file)
82
+ url = norm_url(url)
83
+ page = agent.get(REMOVAL % [url, url + file])
84
+ page = agent.submit page.form
85
+ end
86
+
87
+ private
88
+ def agent
89
+ @agent ||= Mechanize.new
90
+ end
91
+
92
+ def norm_url(url)
93
+ schema, host = url.scan(/^(https?:\/\/)?(.+?)\/?$/).flatten
94
+ CGI::escape "#{schema || 'http://'}#{host}/"
95
+ end
96
+ end
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "webmaster_tools"
6
+ s.version = File.read("VERSION").to_s.strip
7
+ s.authors = ["Tobias Bielohlawek"]
8
+ s.email = ["tobi@soundcloud.com"]
9
+ s.homepage = "http://github.com/rngtng/webmaster_tools"
10
+ s.summary = %q{Gives access to Webmaster Tools Interface data programmatically which is not provided by the official API}
11
+ s.description = %q{Webmaster Tools extends the official API to give programmatically access to various crawl information and functions which are available via the Interface}
12
+
13
+ s.files = `git ls-files`.split("\n")
14
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
15
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
16
+ s.require_paths = ["lib"]
17
+
18
+ %w(mechanize).each do |gem|
19
+ s.add_runtime_dependency *gem.split(' ')
20
+ end
21
+
22
+ %w(rake rspec).each do |gem|
23
+ s.add_development_dependency *gem.split(' ')
24
+ end
25
+ end
metadata ADDED
@@ -0,0 +1,89 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: webmaster_tools
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0.rc1
5
+ prerelease: 6
6
+ platform: ruby
7
+ authors:
8
+ - Tobias Bielohlawek
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-03-05 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: mechanize
16
+ requirement: &70232871533880 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *70232871533880
25
+ - !ruby/object:Gem::Dependency
26
+ name: rake
27
+ requirement: &70232871533360 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *70232871533360
36
+ - !ruby/object:Gem::Dependency
37
+ name: rspec
38
+ requirement: &70232871532900 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *70232871532900
47
+ description: Webmaster Tools extends the official API to give programmatically access
48
+ to various crawl information and functions which are available via the Interface
49
+ email:
50
+ - tobi@soundcloud.com
51
+ executables: []
52
+ extensions: []
53
+ extra_rdoc_files: []
54
+ files:
55
+ - .gitignore
56
+ - CHANGELOG.md
57
+ - Gemfile
58
+ - LICENCE
59
+ - README.md
60
+ - Rakefile
61
+ - VERSION
62
+ - lib/webmaster_tools.rb
63
+ - webmaster_tools.gemspec
64
+ homepage: http://github.com/rngtng/webmaster_tools
65
+ licenses: []
66
+ post_install_message:
67
+ rdoc_options: []
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ required_rubygems_version: !ruby/object:Gem::Requirement
77
+ none: false
78
+ requirements:
79
+ - - ! '>'
80
+ - !ruby/object:Gem::Version
81
+ version: 1.3.1
82
+ requirements: []
83
+ rubyforge_project:
84
+ rubygems_version: 1.8.15
85
+ signing_key:
86
+ specification_version: 3
87
+ summary: Gives access to Webmaster Tools Interface data programmatically which is
88
+ not provided by the official API
89
+ test_files: []