better_robots 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1 @@
1
+ Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,19 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in better_robots.gemspec
4
+ gemspec
5
+
6
+ group :development, :test do
7
+ gem 'rake'
8
+ gem 'sinatra'
9
+ gem 'minitest'
10
+ gem 'test-unit'
11
+ gem 'test-spec'
12
+ gem 'rack-test'
13
+ gem 'simplecov'
14
+ gem 'guard'
15
+ gem 'guard-markdown'
16
+ gem 'guard-minitest'
17
+ gem "rb-fsevent"
18
+ gem "growl"
19
+ end
data/MIT-LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Gerry Power <gerry@thepowerhouse.com>
2
+
3
+ Permission is hereby granted, free of charge, to any person
4
+ obtaining a copy of this software and associated documentation
5
+ files (the "Software"), to deal in the Software without
6
+ restriction, including without limitation the rights to use,
7
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the
9
+ Software is furnished to do so, subject to the following
10
+ conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,80 @@
1
+ # BetterRobots - Better SEO with robots.txt
2
+
3
+ BetterRobots is designed to SEO enhance your robots.txt serving, in particular for situations where
4
+ you have multiple domains or subdomains being served from one application. e.g. www.yoursite.com,
5
+ assets.yoursite.com, beta.yoursite.com, etc.
6
+
7
+ ## SEO and Your robots.txt
8
+
9
+ A web application commonly has several subdomains that refer to the same application. For example,
10
+ a standard practice to speed browser page load time, is to have multiple asset hosts aliased to your
11
+ application, allowing a browser to simultaneously stream multiple assets. In an SEO context, each of
12
+ these aliased hosts are considered to be duplicate content. To avoid this, you should have different
13
+ robots.txt that exclude search engines for all but your canonical domain.
14
+
15
+ BetterRobots provides a robots.txt configuration for your conical domain, optional robots.txt for others,
16
+ and defaults to disallow for everything that does not have a matching robots.txt.
17
+
18
+ As a quick example, assume your canonical domain is www.yoursite.com with a subdomain 'assets0' and
19
+ 'api'. If you rename your robots.txt to www.yoursite.com.robots.txt, the following get requests for
20
+ robots.txt will return the following:
21
+
22
+ www.yoursite.com/robots.txt -> User-agent: *
23
+ Crawl-Delay: 3
24
+
25
+ assets0.yoursite.com/robots.txt -> User-agent: *
26
+ Disallow: /
27
+
28
+ api.yoursite.com/robots.txt -> User-agent: *
29
+ Disallow: /
30
+
31
+
32
+ ## Installation
33
+
34
+ Add this line to your application's Gemfile:
35
+
36
+ gem 'better_robots'
37
+
38
+ And then execute:
39
+
40
+ $ bundle
41
+
42
+ Or install it yourself as:
43
+
44
+ $ gem install better_robots
45
+
46
+ ## Usage
47
+
48
+ For Rails 3, add a route to config/routes.rb
49
+ ```ruby
50
+ match "/robots.txt" => BetterRobots::Generator
51
+ ```
52
+
53
+ For each domain name that you want a robots.txt file served, rename your public/robots.txt to
54
+ the *\<fully qualified domain name\>*.robots.txt. e.g. Domain: www.example.org; public/www.example.org.robots.txt
55
+ All other domain names will default to:
56
+ User-agent: *
57
+ Disallow: /
58
+
59
+ ## <a name="works_with"></a>Works with:
60
+
61
+ BetterRobots is a Rack based app, and should work with any Rack compatible framework. It has been tested with
62
+ Rails 3.2 and Sinatra 1.3, and on the following Ruby implementations:
63
+
64
+ * JRuby 1.7.1
65
+ * MRI 1.8.7
66
+ * MRI 1.9.2
67
+ * MRI 1.9.3
68
+ * Rubinius 1.2.4
69
+ * Ruby EE 1.8.7
70
+
71
+ ### License
72
+
73
+ webhookr is released under the [MIT license](http://www.opensource.org/licenses/MIT).
74
+
75
+ ## Author
76
+
77
+ * [Gerry Power](https://github.com/gerrypower)
78
+
79
+
80
+
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rake/testtask'
3
+
4
+ $:.push File.expand_path(File.dirname(__FILE__), 'lib')
5
+
6
+ desc 'Test better_robots'
7
+ Rake::TestTask.new(:test) do |t|
8
+ t.test_files = FileList['test/**/*_*spec.rb']
9
+ t.verbose = !!ENV['VERBOSE_TESTS']
10
+ t.warning = !!ENV['WARNINGS']
11
+ end
12
+
13
+ task :default => :test
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'better_robots/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "better_robots"
8
+ gem.version = BetterRobots::VERSION
9
+ gem.authors = ["Gerry Power"]
10
+ gem.email = ["gerry@thepowerhouse.com"]
11
+ gem.description = "Better SEO with robots.txt"
12
+ gem.summary = gem.description
13
+ gem.homepage = "https://github.com/gerrypower/better_robots"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+ end
@@ -0,0 +1,3 @@
1
+ module BetterRobots
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,52 @@
1
+ require "better_robots/version"
2
+ require 'ostruct'
3
+
4
+ module BetterRobots
5
+ class Generator
6
+ DEFAULT_DISALLOW_TEXT = "User-agent: *\nDisallow: /"
7
+ ROBOTS_CACHE = {}
8
+
9
+ class << self
10
+
11
+ def config
12
+ @config ||= {
13
+ :robots_txt_path => ((Rails.root.join("public") if defined? Rails) || ".")
14
+ }
15
+ end
16
+
17
+ def call(env)
18
+ res = cached_robots_txt_for(env['SERVER_NAME'])
19
+ [ 200, headers_for(res), [ res.txt ] ]
20
+ end
21
+
22
+ def cached_robots_txt_for(server_name)
23
+ ROBOTS_CACHE[server_name] ||= robots_txt_for(server_name)
24
+ end
25
+
26
+ def robots_txt_for(server_name)
27
+ begin
28
+ txt = read_robots_file(server_name)
29
+ OpenStruct.new(:txt => txt, :length => txt.length)
30
+ rescue Errno::ENOENT
31
+ OpenStruct.new(:txt => DEFAULT_DISALLOW_TEXT, :length => DEFAULT_DISALLOW_TEXT.length)
32
+ end
33
+ end
34
+
35
+ def read_robots_file(server_name)
36
+ File.read(File.join(config[:robots_txt_path], "#{server_name}.robots.txt"))
37
+ end
38
+
39
+ def headers_for(res)
40
+ {
41
+ "Content-Type" => "text/plain",
42
+ "Content-Length" => res.length.to_s
43
+ }
44
+ end
45
+
46
+ end
47
+
48
+ private_class_method :cached_robots_txt_for, :robots_txt_for,
49
+ :read_robots_file, :headers_for
50
+
51
+ end
52
+ end
@@ -0,0 +1,61 @@
1
+ $: << File.join(File.dirname(__FILE__), "..")
2
+
3
+ require 'routes'
4
+ require 'test_helper'
5
+ require 'rack/test'
6
+
7
+ describe BetterRobots::Generator do
8
+
9
+ def get_robots(host = "site.org")
10
+ BetterRobots::Generator.config[:robots_txt_path] = File.join(File.dirname(__FILE__), "robots")
11
+ browser = Rack::Test::Session.new(Rack::MockSession.new(Sinatra::Application, host))
12
+ browser.get '/robots.txt'
13
+ browser
14
+ end
15
+
16
+ def robot_file_contents(robot_file)
17
+ File.read(File.join(BetterRobots::Generator.config[:robots_txt_path], robot_file))
18
+ end
19
+
20
+ def reload_better_robots
21
+ Object.send(:remove_const, :BetterRobots).send(:remove_const, :Generator)
22
+ load 'better_robots.rb'
23
+ end
24
+
25
+ it "should process /robots.txt" do
26
+ get_robots.last_response.should.be.ok
27
+ end
28
+
29
+ it "should display the contents of the test.site.org.robots.txt" do
30
+ get_robots("test.site.org").last_response.body.should.equal \
31
+ robot_file_contents("test.site.org.robots.txt")
32
+ end
33
+
34
+ it "should have a content type of text" do
35
+ get_robots.last_response.headers["Content-Type"].should.equal "text/plain"
36
+ end
37
+
38
+ it "should have a content length matching the robots file length" do
39
+ get_robots("test.site.org").last_response.headers["Content-Length"].should.equal \
40
+ robot_file_contents("test.site.org.robots.txt").length.to_s
41
+ end
42
+
43
+ it "should default to disallow if there is no robots file" do
44
+ get_robots("test.nosite.org").last_response.body.should.equal \
45
+ BetterRobots::Generator::DEFAULT_DISALLOW_TEXT
46
+ end
47
+
48
+ it "should cache the results" do
49
+ get_robots("test.site.org")
50
+ BetterRobots::Generator::ROBOTS_CACHE["test.site.org"].txt.should.equal \
51
+ robot_file_contents("test.site.org.robots.txt")
52
+ end
53
+
54
+ it "should use Rails.root + public for robot files, if Rails is defined" do
55
+ Rails = OpenStruct.new({:root => Pathname.new(File.dirname(__FILE__))})
56
+ reload_better_robots
57
+ BetterRobots::Generator.config[:robots_txt_path].to_s.should.equal \
58
+ File.join(File.dirname(__FILE__), "public")
59
+ end
60
+
61
+ end
@@ -0,0 +1,5 @@
1
+ User-agent: Google
2
+ Disallow:
3
+
4
+ User-agent: *
5
+ Disallow: /
data/test/routes.rb ADDED
@@ -0,0 +1,6 @@
1
+ require 'sinatra'
2
+ require 'better_robots'
3
+
4
+ get '/robots.txt' do
5
+ BetterRobots::Generator.call(env)
6
+ end
@@ -0,0 +1,10 @@
1
+ require 'simplecov'
2
+ SimpleCov.start
3
+
4
+ require 'minitest/spec'
5
+ require 'minitest/autorun'
6
+ require 'better_robots'
7
+
8
+ require 'test/spec'
9
+
10
+ set :environment, :test
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: better_robots
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.1
6
+ platform: ruby
7
+ authors:
8
+ - Gerry Power
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-12-24 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Better SEO with robots.txt
15
+ email:
16
+ - gerry@thepowerhouse.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - .gitignore
22
+ - Gemfile
23
+ - MIT-LICENSE
24
+ - README.md
25
+ - Rakefile
26
+ - better_robots.gemspec
27
+ - lib/better_robots.rb
28
+ - lib/better_robots/version.rb
29
+ - test/integration/better_robots_spec.rb
30
+ - test/integration/robots/test.site.org.robots.txt
31
+ - test/routes.rb
32
+ - test/test_helper.rb
33
+ homepage: https://github.com/gerrypower/better_robots
34
+ licenses: []
35
+ post_install_message:
36
+ rdoc_options: []
37
+ require_paths:
38
+ - lib
39
+ required_ruby_version: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ none: false
45
+ required_rubygems_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ! '>='
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
50
+ none: false
51
+ requirements: []
52
+ rubyforge_project:
53
+ rubygems_version: 1.8.23
54
+ signing_key:
55
+ specification_version: 3
56
+ summary: Better SEO with robots.txt
57
+ test_files:
58
+ - test/integration/better_robots_spec.rb
59
+ - test/integration/robots/test.site.org.robots.txt
60
+ - test/routes.rb
61
+ - test/test_helper.rb