better_robots 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/Gemfile +19 -0
- data/MIT-LICENSE +22 -0
- data/README.md +80 -0
- data/Rakefile +13 -0
- data/better_robots.gemspec +19 -0
- data/lib/better_robots/version.rb +3 -0
- data/lib/better_robots.rb +52 -0
- data/test/integration/better_robots_spec.rb +61 -0
- data/test/integration/robots/test.site.org.robots.txt +5 -0
- data/test/routes.rb +6 -0
- data/test/test_helper.rb +10 -0
- metadata +61 -0
data/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Gemfile.lock
|
data/Gemfile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
|
3
|
+
# Specify your gem's dependencies in better_robots.gemspec
|
4
|
+
gemspec
|
5
|
+
|
6
|
+
group :development, :test do
|
7
|
+
gem 'rake'
|
8
|
+
gem 'sinatra'
|
9
|
+
gem 'minitest'
|
10
|
+
gem 'test-unit'
|
11
|
+
gem 'test-spec'
|
12
|
+
gem 'rack-test'
|
13
|
+
gem 'simplecov'
|
14
|
+
gem 'guard'
|
15
|
+
gem 'guard-markdown'
|
16
|
+
gem 'guard-minitest'
|
17
|
+
gem "rb-fsevent"
|
18
|
+
gem "growl"
|
19
|
+
end
|
data/MIT-LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 Gerry Power <gerry@thepowerhouse.com>
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person
|
4
|
+
obtaining a copy of this software and associated documentation
|
5
|
+
files (the "Software"), to deal in the Software without
|
6
|
+
restriction, including without limitation the rights to use,
|
7
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
copies of the Software, and to permit persons to whom the
|
9
|
+
Software is furnished to do so, subject to the following
|
10
|
+
conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
17
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
19
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
20
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
21
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
# BetterRobots - Better SEO with robots.txt
|
2
|
+
|
3
|
+
BetterRobots is designed to SEO enhance your robots.txt serving, in particular for situations where
|
4
|
+
you have multiple domains or subdomains being served from one application. e.g. www.yoursite.com,
|
5
|
+
assets.yoursite.com, beta.yoursite.com, etc.
|
6
|
+
|
7
|
+
## SEO and Your robots.txt
|
8
|
+
|
9
|
+
A web application commonly has several subdomains that refer to the same application. For example,
|
10
|
+
a standard practice to speed browser page load time, is to have multiple asset hosts aliased to your
|
11
|
+
application, allowing a browser to simultaneously stream multiple assets. In an SEO context, each of
|
12
|
+
these aliased hosts are considered to be duplicate content. To avoid this, you should have different
|
13
|
+
robots.txt that exclude search engines for all but your canonical domain.
|
14
|
+
|
15
|
+
BetterRobots provides a robots.txt configuration for your conical domain, optional robots.txt for others,
|
16
|
+
and defaults to disallow for everything that does not have a matching robots.txt.
|
17
|
+
|
18
|
+
As a quick example, assume your canonical domain is www.yoursite.com with a subdomain 'assets0' and
|
19
|
+
'api'. If you rename your robots.txt to www.yoursite.com.robots.txt, the following get requests for
|
20
|
+
robots.txt will return the following:
|
21
|
+
|
22
|
+
www.yoursite.com/robots.txt -> User-agent: *
|
23
|
+
Crawl-Delay: 3
|
24
|
+
|
25
|
+
assets0.yoursite.com/robots.txt -> User-agent: *
|
26
|
+
Disallow: /
|
27
|
+
|
28
|
+
api.yoursite.com/robots.txt -> User-agent: *
|
29
|
+
Disallow: /
|
30
|
+
|
31
|
+
|
32
|
+
## Installation
|
33
|
+
|
34
|
+
Add this line to your application's Gemfile:
|
35
|
+
|
36
|
+
gem 'better_robots'
|
37
|
+
|
38
|
+
And then execute:
|
39
|
+
|
40
|
+
$ bundle
|
41
|
+
|
42
|
+
Or install it yourself as:
|
43
|
+
|
44
|
+
$ gem install better_robots
|
45
|
+
|
46
|
+
## Usage
|
47
|
+
|
48
|
+
For Rails 3, add a route to config/routes.rb
|
49
|
+
```ruby
|
50
|
+
match "/robots.txt" => BetterRobots::Generator
|
51
|
+
```
|
52
|
+
|
53
|
+
For each domain name that you want a robots.txt file served, rename your public/robots.txt to
|
54
|
+
the *\<fully qualified domain name\>*.robots.txt. e.g. Domain: www.example.org; public/www.example.org.robots.txt
|
55
|
+
All other domain names will default to:
|
56
|
+
User-agent: *
|
57
|
+
Disallow: /
|
58
|
+
|
59
|
+
## <a name="works_with"></a>Works with:
|
60
|
+
|
61
|
+
BetterRobots is a Rack based app, and should work with any Rack compatible framework. It has been tested with
|
62
|
+
Rails 3.2 and Sinatra 1.3, and on the following Ruby implementations:
|
63
|
+
|
64
|
+
* JRuby 1.7.1
|
65
|
+
* MRI 1.8.7
|
66
|
+
* MRI 1.9.2
|
67
|
+
* MRI 1.9.3
|
68
|
+
* Rubinius 1.2.4
|
69
|
+
* Ruby EE 1.8.7
|
70
|
+
|
71
|
+
### License
|
72
|
+
|
73
|
+
webhookr is released under the [MIT license](http://www.opensource.org/licenses/MIT).
|
74
|
+
|
75
|
+
## Author
|
76
|
+
|
77
|
+
* [Gerry Power](https://github.com/gerrypower)
|
78
|
+
|
79
|
+
|
80
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require 'rake/testtask'
|
3
|
+
|
4
|
+
$:.push File.expand_path(File.dirname(__FILE__), 'lib')
|
5
|
+
|
6
|
+
desc 'Test better_robots'
|
7
|
+
Rake::TestTask.new(:test) do |t|
|
8
|
+
t.test_files = FileList['test/**/*_*spec.rb']
|
9
|
+
t.verbose = !!ENV['VERBOSE_TESTS']
|
10
|
+
t.warning = !!ENV['WARNINGS']
|
11
|
+
end
|
12
|
+
|
13
|
+
task :default => :test
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'better_robots/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "better_robots"
|
8
|
+
gem.version = BetterRobots::VERSION
|
9
|
+
gem.authors = ["Gerry Power"]
|
10
|
+
gem.email = ["gerry@thepowerhouse.com"]
|
11
|
+
gem.description = "Better SEO with robots.txt"
|
12
|
+
gem.summary = gem.description
|
13
|
+
gem.homepage = "https://github.com/gerrypower/better_robots"
|
14
|
+
|
15
|
+
gem.files = `git ls-files`.split($/)
|
16
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
|
+
gem.require_paths = ["lib"]
|
19
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require "better_robots/version"
|
2
|
+
require 'ostruct'
|
3
|
+
|
4
|
+
module BetterRobots
|
5
|
+
class Generator
|
6
|
+
DEFAULT_DISALLOW_TEXT = "User-agent: *\nDisallow: /"
|
7
|
+
ROBOTS_CACHE = {}
|
8
|
+
|
9
|
+
class << self
|
10
|
+
|
11
|
+
def config
|
12
|
+
@config ||= {
|
13
|
+
:robots_txt_path => ((Rails.root.join("public") if defined? Rails) || ".")
|
14
|
+
}
|
15
|
+
end
|
16
|
+
|
17
|
+
def call(env)
|
18
|
+
res = cached_robots_txt_for(env['SERVER_NAME'])
|
19
|
+
[ 200, headers_for(res), [ res.txt ] ]
|
20
|
+
end
|
21
|
+
|
22
|
+
def cached_robots_txt_for(server_name)
|
23
|
+
ROBOTS_CACHE[server_name] ||= robots_txt_for(server_name)
|
24
|
+
end
|
25
|
+
|
26
|
+
def robots_txt_for(server_name)
|
27
|
+
begin
|
28
|
+
txt = read_robots_file(server_name)
|
29
|
+
OpenStruct.new(:txt => txt, :length => txt.length)
|
30
|
+
rescue Errno::ENOENT
|
31
|
+
OpenStruct.new(:txt => DEFAULT_DISALLOW_TEXT, :length => DEFAULT_DISALLOW_TEXT.length)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def read_robots_file(server_name)
|
36
|
+
File.read(File.join(config[:robots_txt_path], "#{server_name}.robots.txt"))
|
37
|
+
end
|
38
|
+
|
39
|
+
def headers_for(res)
|
40
|
+
{
|
41
|
+
"Content-Type" => "text/plain",
|
42
|
+
"Content-Length" => res.length.to_s
|
43
|
+
}
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
private_class_method :cached_robots_txt_for, :robots_txt_for,
|
49
|
+
:read_robots_file, :headers_for
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
$: << File.join(File.dirname(__FILE__), "..")
|
2
|
+
|
3
|
+
require 'routes'
|
4
|
+
require 'test_helper'
|
5
|
+
require 'rack/test'
|
6
|
+
|
7
|
+
describe BetterRobots::Generator do
|
8
|
+
|
9
|
+
def get_robots(host = "site.org")
|
10
|
+
BetterRobots::Generator.config[:robots_txt_path] = File.join(File.dirname(__FILE__), "robots")
|
11
|
+
browser = Rack::Test::Session.new(Rack::MockSession.new(Sinatra::Application, host))
|
12
|
+
browser.get '/robots.txt'
|
13
|
+
browser
|
14
|
+
end
|
15
|
+
|
16
|
+
def robot_file_contents(robot_file)
|
17
|
+
File.read(File.join(BetterRobots::Generator.config[:robots_txt_path], robot_file))
|
18
|
+
end
|
19
|
+
|
20
|
+
def reload_better_robots
|
21
|
+
Object.send(:remove_const, :BetterRobots).send(:remove_const, :Generator)
|
22
|
+
load 'better_robots.rb'
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should process /robots.txt" do
|
26
|
+
get_robots.last_response.should.be.ok
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should display the contents of the test.site.org.robots.txt" do
|
30
|
+
get_robots("test.site.org").last_response.body.should.equal \
|
31
|
+
robot_file_contents("test.site.org.robots.txt")
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should have a content type of text" do
|
35
|
+
get_robots.last_response.headers["Content-Type"].should.equal "text/plain"
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should have a content length matching the robots file length" do
|
39
|
+
get_robots("test.site.org").last_response.headers["Content-Length"].should.equal \
|
40
|
+
robot_file_contents("test.site.org.robots.txt").length.to_s
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should default to disallow if there is no robots file" do
|
44
|
+
get_robots("test.nosite.org").last_response.body.should.equal \
|
45
|
+
BetterRobots::Generator::DEFAULT_DISALLOW_TEXT
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should cache the results" do
|
49
|
+
get_robots("test.site.org")
|
50
|
+
BetterRobots::Generator::ROBOTS_CACHE["test.site.org"].txt.should.equal \
|
51
|
+
robot_file_contents("test.site.org.robots.txt")
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should use Rails.root + public for robot files, if Rails is defined" do
|
55
|
+
Rails = OpenStruct.new({:root => Pathname.new(File.dirname(__FILE__))})
|
56
|
+
reload_better_robots
|
57
|
+
BetterRobots::Generator.config[:robots_txt_path].to_s.should.equal \
|
58
|
+
File.join(File.dirname(__FILE__), "public")
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
data/test/routes.rb
ADDED
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: better_robots
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.1
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Gerry Power
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-12-24 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: Better SEO with robots.txt
|
15
|
+
email:
|
16
|
+
- gerry@thepowerhouse.com
|
17
|
+
executables: []
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- .gitignore
|
22
|
+
- Gemfile
|
23
|
+
- MIT-LICENSE
|
24
|
+
- README.md
|
25
|
+
- Rakefile
|
26
|
+
- better_robots.gemspec
|
27
|
+
- lib/better_robots.rb
|
28
|
+
- lib/better_robots/version.rb
|
29
|
+
- test/integration/better_robots_spec.rb
|
30
|
+
- test/integration/robots/test.site.org.robots.txt
|
31
|
+
- test/routes.rb
|
32
|
+
- test/test_helper.rb
|
33
|
+
homepage: https://github.com/gerrypower/better_robots
|
34
|
+
licenses: []
|
35
|
+
post_install_message:
|
36
|
+
rdoc_options: []
|
37
|
+
require_paths:
|
38
|
+
- lib
|
39
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
none: false
|
45
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ! '>='
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0'
|
50
|
+
none: false
|
51
|
+
requirements: []
|
52
|
+
rubyforge_project:
|
53
|
+
rubygems_version: 1.8.23
|
54
|
+
signing_key:
|
55
|
+
specification_version: 3
|
56
|
+
summary: Better SEO with robots.txt
|
57
|
+
test_files:
|
58
|
+
- test/integration/better_robots_spec.rb
|
59
|
+
- test/integration/robots/test.site.org.robots.txt
|
60
|
+
- test/routes.rb
|
61
|
+
- test/test_helper.rb
|