webmaster_tools 0.1.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/CHANGELOG.md ADDED
@@ -0,0 +1,11 @@
1
+ # Changes
2
+
3
+ ## v0.1.x - ???
4
+ * add tests
5
+ * add docu
6
+ * check different auth optinos
7
+ * integrate official API
8
+
9
+ ## v0.1.0 - 05-03-2012
10
+
11
+ _inital release_
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in webmaster_tools.gemspec
4
+ gemspec
data/LICENCE ADDED
@@ -0,0 +1,26 @@
1
+ Copyright (c) 2012, SoundCloud, Tobias Bielohlawek
2
+
3
+ All rights reserved.
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ - Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+ - Redistributions in binary form must reproduce the above copyright notice,
11
+ this list of conditions and the following disclaimer in the documentation
12
+ and/or other materials provided with the distribution.
13
+ - Neither the name of the SoundCloud nor the names of its contributors may be
14
+ used to endorse or promote products derived from this software without
15
+ specific prior written permission.
16
+
17
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.md ADDED
@@ -0,0 +1,52 @@
1
+ # Webmaster Tools
2
+
3
+ * API very limited
4
+ * subset of tools
5
+ * based on mechanize
6
+
7
+ ## Usage
8
+
9
+ Simple usage case to get error counts
10
+
11
+ ```ruby
12
+ require 'webmaster_tools'
13
+
14
+ client = WebmasterTools.new(<username>, <password>)
15
+
16
+ pp client.crawl_error_counts(<url>)
17
+
18
+ ```
19
+
20
+ ## Interface
21
+
22
+ ### crawl
23
+
24
+ #### info
25
+ #### errors
26
+ #### stats
27
+
28
+ ### other
29
+
30
+ #### submit removal request
31
+
32
+
33
+ ## Dependencies
34
+
35
+ Depends on [mechanize](http://mechanize.rubyforge.org/) to access the Webinterface
36
+
37
+
38
+ ## Contributing
39
+
40
+ We'll check out your contribution if you:
41
+
42
+ - Provide a comprehensive suite of tests for your fork.
43
+ - Have a clear and documented rationale for your changes.
44
+ - Package these up in a pull request.
45
+
46
+ We'll do our best to help you out with any contribution issues you may have.
47
+
48
+
49
+ ## License
50
+
51
+ The license is included as LICENSE in this directory.
52
+
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0.rc1
@@ -0,0 +1,96 @@
1
+ # Copyright (c) 2012, SoundCloud Ltd., Tobias Bielohlawek
2
+
3
+ require 'mechanize'
4
+
5
+ # WebmasterTools
6
+ # Parameters:
7
+ # required:
8
+ # :username -
9
+ # :password -
10
+ #
11
+ # optional:
12
+ # :url -
13
+ # :security_token -
14
+ class WebmasterTools
15
+ LOGIN = "https://accounts.google.com/ServiceLogin?service=sitemaps"
16
+ REMOVAL = "https://www.google.com/webmasters/tools/removals-request?hl=en&siteUrl=%s&urlt=%s"
17
+ INFO = "https://www.google.com/webmasters/tools/sitemaps-dl?hl=en&siteUrl=%s&security_token=%s"
18
+ DASHBOARD = "https://www.google.com/webmasters/tools/dashboard?hl=en&siteUrl=%s"
19
+ ERRORS = "https://www.google.com/webmasters/tools/crawl-errors?hl=en&siteUrl=%s"
20
+ STATS = "https://www.google.com/webmasters/tools/crawl-stats?hl=en&siteUrl=%s"
21
+
22
+ def initialize(username, password)
23
+ login(username, password)
24
+ end
25
+
26
+ def login(username, password)
27
+ page = agent.get(LOGIN)
28
+ page = agent.submit(page.form.tap do |form|
29
+ form.Email = username
30
+ form.Passwd = password
31
+ end)
32
+ end
33
+
34
+ def dashboard(url)
35
+ url = norm_url(url)
36
+ page = agent.get(DASHBOARD % url)
37
+ {
38
+ :indexed => page.search("#sitemap tbody .rightmost").text.gsub(/\D/, '').to_i
39
+ }
40
+ end
41
+
42
+ def crawl_info(url, token)
43
+ url = norm_url(url)
44
+ page = agent.get(INFO % [url, token])
45
+
46
+ lines = page.content.split("\n").map do |line|
47
+ line.split(",")
48
+ end
49
+ head = lines.shift.map { |key| key.downcase.gsub(' ', '_').to_sym }
50
+
51
+ $lines = lines.map do |line|
52
+ Hash[head.zip(line)]
53
+ end
54
+ end
55
+
56
+ def crawl_stats(url)
57
+ url = norm_url(url)
58
+ types = %w(pages kilobytes milliseconds).map(&:to_sym)
59
+ head = %w(high avg low).map(&:to_sym)
60
+
61
+ page = agent.get(STATS % url)
62
+
63
+ Hash[types.zip(page.search(".hostload-activity tr td").map do |node|
64
+ node.text.gsub(/\D/, '').to_i
65
+ end.each_slice(3).map do |slice|
66
+ Hash[head.zip(slice)]
67
+ end)]
68
+ end
69
+
70
+ def crawl_error_counts(url)
71
+ url = norm_url(url)
72
+ page = agent.get(ERRORS % url)
73
+
74
+ page.search(".categories a").inject({}) do |hash, n|
75
+ key, value = n.text.split("\n")
76
+ hash[key.downcase.gsub(' ', '_').to_sym] = value.gsub(/\D/, '').to_i
77
+ hash
78
+ end
79
+ end
80
+
81
+ def remove_url(url, file)
82
+ url = norm_url(url)
83
+ page = agent.get(REMOVAL % [url, url + file])
84
+ page = agent.submit page.form
85
+ end
86
+
87
+ private
88
+ def agent
89
+ @agent ||= Mechanize.new
90
+ end
91
+
92
+ def norm_url(url)
93
+ schema, host = url.scan(/^(https?:\/\/)?(.+?)\/?$/).flatten
94
+ CGI::escape "#{schema || 'http://'}#{host}/"
95
+ end
96
+ end
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "webmaster_tools"
6
+ s.version = File.read("VERSION").to_s.strip
7
+ s.authors = ["Tobias Bielohlawek"]
8
+ s.email = ["tobi@soundcloud.com"]
9
+ s.homepage = "http://github.com/rngtng/webmaster_tools"
10
+ s.summary = %q{Gives access to Webmaster Tools Interface data programmatically which is not provided by the official API}
11
+ s.description = %q{Webmaster Tools extends the official API to give programmatically access to various crawl information and functions which are available via the Interface}
12
+
13
+ s.files = `git ls-files`.split("\n")
14
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
15
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
16
+ s.require_paths = ["lib"]
17
+
18
+ %w(mechanize).each do |gem|
19
+ s.add_runtime_dependency *gem.split(' ')
20
+ end
21
+
22
+ %w(rake rspec).each do |gem|
23
+ s.add_development_dependency *gem.split(' ')
24
+ end
25
+ end
metadata ADDED
@@ -0,0 +1,89 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: webmaster_tools
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0.rc1
5
+ prerelease: 6
6
+ platform: ruby
7
+ authors:
8
+ - Tobias Bielohlawek
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-03-05 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: mechanize
16
+ requirement: &70232871533880 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *70232871533880
25
+ - !ruby/object:Gem::Dependency
26
+ name: rake
27
+ requirement: &70232871533360 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *70232871533360
36
+ - !ruby/object:Gem::Dependency
37
+ name: rspec
38
+ requirement: &70232871532900 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *70232871532900
47
+ description: Webmaster Tools extends the official API to give programmatically access
48
+ to various crawl information and functions which are available via the Interface
49
+ email:
50
+ - tobi@soundcloud.com
51
+ executables: []
52
+ extensions: []
53
+ extra_rdoc_files: []
54
+ files:
55
+ - .gitignore
56
+ - CHANGELOG.md
57
+ - Gemfile
58
+ - LICENCE
59
+ - README.md
60
+ - Rakefile
61
+ - VERSION
62
+ - lib/webmaster_tools.rb
63
+ - webmaster_tools.gemspec
64
+ homepage: http://github.com/rngtng/webmaster_tools
65
+ licenses: []
66
+ post_install_message:
67
+ rdoc_options: []
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ required_rubygems_version: !ruby/object:Gem::Requirement
77
+ none: false
78
+ requirements:
79
+ - - ! '>'
80
+ - !ruby/object:Gem::Version
81
+ version: 1.3.1
82
+ requirements: []
83
+ rubyforge_project:
84
+ rubygems_version: 1.8.15
85
+ signing_key:
86
+ specification_version: 3
87
+ summary: Gives access to Webmaster Tools Interface data programmatically which is
88
+ not provided by the official API
89
+ test_files: []