better_robots 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1 @@
1
+ Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,19 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in better_robots.gemspec
4
+ gemspec
5
+
6
+ group :development, :test do
7
+ gem 'rake'
8
+ gem 'sinatra'
9
+ gem 'minitest'
10
+ gem 'test-unit'
11
+ gem 'test-spec'
12
+ gem 'rack-test'
13
+ gem 'simplecov'
14
+ gem 'guard'
15
+ gem 'guard-markdown'
16
+ gem 'guard-minitest'
17
+ gem "rb-fsevent"
18
+ gem "growl"
19
+ end
data/MIT-LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Gerry Power <gerry@thepowerhouse.com>
2
+
3
+ Permission is hereby granted, free of charge, to any person
4
+ obtaining a copy of this software and associated documentation
5
+ files (the "Software"), to deal in the Software without
6
+ restriction, including without limitation the rights to use,
7
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the
9
+ Software is furnished to do so, subject to the following
10
+ conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,80 @@
1
+ # BetterRobots - Better SEO with robots.txt
2
+
3
+ BetterRobots is designed to SEO enhance your robots.txt serving, in particular for situations where
4
+ you have multiple domains or subdomains being served from one application. e.g. www.yoursite.com,
5
+ assets.yoursite.com, beta.yoursite.com, etc.
6
+
7
+ ## SEO and Your robots.txt
8
+
9
+ A web application commonly has several subdomains that refer to the same application. For example,
10
+ a standard practice to speed browser page load time, is to have multiple asset hosts aliased to your
11
+ application, allowing a browser to simultaneously stream multiple assets. In an SEO context, each of
12
+ these aliased hosts are considered to be duplicate content. To avoid this, you should have different
13
+ robots.txt that exclude search engines for all but your canonical domain.
14
+
15
+ BetterRobots provides a robots.txt configuration for your conical domain, optional robots.txt for others,
16
+ and defaults to disallow for everything that does not have a matching robots.txt.
17
+
18
+ As a quick example, assume your canonical domain is www.yoursite.com with a subdomain 'assets0' and
19
+ 'api'. If you rename your robots.txt to www.yoursite.com.robots.txt, the following get requests for
20
+ robots.txt will return the following:
21
+
22
+ www.yoursite.com/robots.txt -> User-agent: *
23
+ Crawl-Delay: 3
24
+
25
+ assets0.yoursite.com/robots.txt -> User-agent: *
26
+ Disallow: /
27
+
28
+ api.yoursite.com/robots.txt -> User-agent: *
29
+ Disallow: /
30
+
31
+
32
+ ## Installation
33
+
34
+ Add this line to your application's Gemfile:
35
+
36
+ gem 'better_robots'
37
+
38
+ And then execute:
39
+
40
+ $ bundle
41
+
42
+ Or install it yourself as:
43
+
44
+ $ gem install better_robots
45
+
46
+ ## Usage
47
+
48
+ For Rails 3, add a route to config/routes.rb
49
+ ```ruby
50
+ match "/robots.txt" => BetterRobots::Generator
51
+ ```
52
+
53
+ For each domain name that you want a robots.txt file served, rename your public/robots.txt to
54
+ the *\<fully qualified domain name\>*.robots.txt. e.g. Domain: www.example.org; public/www.example.org.robots.txt
55
+ All other domain names will default to:
56
+ User-agent: *
57
+ Disallow: /
58
+
59
+ ## <a name="works_with"></a>Works with:
60
+
61
+ BetterRobots is a Rack based app, and should work with any Rack compatible framework. It has been tested with
62
+ Rails 3.2 and Sinatra 1.3, and on the following Ruby implementations:
63
+
64
+ * JRuby 1.7.1
65
+ * MRI 1.8.7
66
+ * MRI 1.9.2
67
+ * MRI 1.9.3
68
+ * Rubinius 1.2.4
69
+ * Ruby EE 1.8.7
70
+
71
+ ### License
72
+
73
+ webhookr is released under the [MIT license](http://www.opensource.org/licenses/MIT).
74
+
75
+ ## Author
76
+
77
+ * [Gerry Power](https://github.com/gerrypower)
78
+
79
+
80
+
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rake/testtask'
3
+
4
+ $:.push File.expand_path(File.dirname(__FILE__), 'lib')
5
+
6
+ desc 'Test better_robots'
7
+ Rake::TestTask.new(:test) do |t|
8
+ t.test_files = FileList['test/**/*_*spec.rb']
9
+ t.verbose = !!ENV['VERBOSE_TESTS']
10
+ t.warning = !!ENV['WARNINGS']
11
+ end
12
+
13
+ task :default => :test
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'better_robots/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "better_robots"
8
+ gem.version = BetterRobots::VERSION
9
+ gem.authors = ["Gerry Power"]
10
+ gem.email = ["gerry@thepowerhouse.com"]
11
+ gem.description = "Better SEO with robots.txt"
12
+ gem.summary = gem.description
13
+ gem.homepage = "https://github.com/gerrypower/better_robots"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+ end
@@ -0,0 +1,3 @@
1
+ module BetterRobots
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,52 @@
1
+ require "better_robots/version"
2
+ require 'ostruct'
3
+
4
+ module BetterRobots
5
+ class Generator
6
+ DEFAULT_DISALLOW_TEXT = "User-agent: *\nDisallow: /"
7
+ ROBOTS_CACHE = {}
8
+
9
+ class << self
10
+
11
+ def config
12
+ @config ||= {
13
+ :robots_txt_path => ((Rails.root.join("public") if defined? Rails) || ".")
14
+ }
15
+ end
16
+
17
+ def call(env)
18
+ res = cached_robots_txt_for(env['SERVER_NAME'])
19
+ [ 200, headers_for(res), [ res.txt ] ]
20
+ end
21
+
22
+ def cached_robots_txt_for(server_name)
23
+ ROBOTS_CACHE[server_name] ||= robots_txt_for(server_name)
24
+ end
25
+
26
+ def robots_txt_for(server_name)
27
+ begin
28
+ txt = read_robots_file(server_name)
29
+ OpenStruct.new(:txt => txt, :length => txt.length)
30
+ rescue Errno::ENOENT
31
+ OpenStruct.new(:txt => DEFAULT_DISALLOW_TEXT, :length => DEFAULT_DISALLOW_TEXT.length)
32
+ end
33
+ end
34
+
35
+ def read_robots_file(server_name)
36
+ File.read(File.join(config[:robots_txt_path], "#{server_name}.robots.txt"))
37
+ end
38
+
39
+ def headers_for(res)
40
+ {
41
+ "Content-Type" => "text/plain",
42
+ "Content-Length" => res.length.to_s
43
+ }
44
+ end
45
+
46
+ end
47
+
48
+ private_class_method :cached_robots_txt_for, :robots_txt_for,
49
+ :read_robots_file, :headers_for
50
+
51
+ end
52
+ end
@@ -0,0 +1,61 @@
1
+ $: << File.join(File.dirname(__FILE__), "..")
2
+
3
+ require 'routes'
4
+ require 'test_helper'
5
+ require 'rack/test'
6
+
7
+ describe BetterRobots::Generator do
8
+
9
+ def get_robots(host = "site.org")
10
+ BetterRobots::Generator.config[:robots_txt_path] = File.join(File.dirname(__FILE__), "robots")
11
+ browser = Rack::Test::Session.new(Rack::MockSession.new(Sinatra::Application, host))
12
+ browser.get '/robots.txt'
13
+ browser
14
+ end
15
+
16
+ def robot_file_contents(robot_file)
17
+ File.read(File.join(BetterRobots::Generator.config[:robots_txt_path], robot_file))
18
+ end
19
+
20
+ def reload_better_robots
21
+ Object.send(:remove_const, :BetterRobots).send(:remove_const, :Generator)
22
+ load 'better_robots.rb'
23
+ end
24
+
25
+ it "should process /robots.txt" do
26
+ get_robots.last_response.should.be.ok
27
+ end
28
+
29
+ it "should display the contents of the test.site.org.robots.txt" do
30
+ get_robots("test.site.org").last_response.body.should.equal \
31
+ robot_file_contents("test.site.org.robots.txt")
32
+ end
33
+
34
+ it "should have a content type of text" do
35
+ get_robots.last_response.headers["Content-Type"].should.equal "text/plain"
36
+ end
37
+
38
+ it "should have a content length matching the robots file length" do
39
+ get_robots("test.site.org").last_response.headers["Content-Length"].should.equal \
40
+ robot_file_contents("test.site.org.robots.txt").length.to_s
41
+ end
42
+
43
+ it "should default to disallow if there is no robots file" do
44
+ get_robots("test.nosite.org").last_response.body.should.equal \
45
+ BetterRobots::Generator::DEFAULT_DISALLOW_TEXT
46
+ end
47
+
48
+ it "should cache the results" do
49
+ get_robots("test.site.org")
50
+ BetterRobots::Generator::ROBOTS_CACHE["test.site.org"].txt.should.equal \
51
+ robot_file_contents("test.site.org.robots.txt")
52
+ end
53
+
54
+ it "should use Rails.root + public for robot files, if Rails is defined" do
55
+ Rails = OpenStruct.new({:root => Pathname.new(File.dirname(__FILE__))})
56
+ reload_better_robots
57
+ BetterRobots::Generator.config[:robots_txt_path].to_s.should.equal \
58
+ File.join(File.dirname(__FILE__), "public")
59
+ end
60
+
61
+ end
@@ -0,0 +1,5 @@
1
+ User-agent: Google
2
+ Disallow:
3
+
4
+ User-agent: *
5
+ Disallow: /
data/test/routes.rb ADDED
@@ -0,0 +1,6 @@
1
+ require 'sinatra'
2
+ require 'better_robots'
3
+
4
+ get '/robots.txt' do
5
+ BetterRobots::Generator.call(env)
6
+ end
@@ -0,0 +1,10 @@
1
+ require 'simplecov'
2
+ SimpleCov.start
3
+
4
+ require 'minitest/spec'
5
+ require 'minitest/autorun'
6
+ require 'better_robots'
7
+
8
+ require 'test/spec'
9
+
10
+ set :environment, :test
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: better_robots
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.1
6
+ platform: ruby
7
+ authors:
8
+ - Gerry Power
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-12-24 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Better SEO with robots.txt
15
+ email:
16
+ - gerry@thepowerhouse.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - .gitignore
22
+ - Gemfile
23
+ - MIT-LICENSE
24
+ - README.md
25
+ - Rakefile
26
+ - better_robots.gemspec
27
+ - lib/better_robots.rb
28
+ - lib/better_robots/version.rb
29
+ - test/integration/better_robots_spec.rb
30
+ - test/integration/robots/test.site.org.robots.txt
31
+ - test/routes.rb
32
+ - test/test_helper.rb
33
+ homepage: https://github.com/gerrypower/better_robots
34
+ licenses: []
35
+ post_install_message:
36
+ rdoc_options: []
37
+ require_paths:
38
+ - lib
39
+ required_ruby_version: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ none: false
45
+ required_rubygems_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ! '>='
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
50
+ none: false
51
+ requirements: []
52
+ rubyforge_project:
53
+ rubygems_version: 1.8.23
54
+ signing_key:
55
+ specification_version: 3
56
+ summary: Better SEO with robots.txt
57
+ test_files:
58
+ - test/integration/better_robots_spec.rb
59
+ - test/integration/robots/test.site.org.robots.txt
60
+ - test/routes.rb
61
+ - test/test_helper.rb