RubyGems - robotstxt - Versions diffs - 0.5.1 → 0.5.2 - Mend

robotstxt 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

data/LICENSE.rdoc ADDED

@@ -0,0 +1,25 @@
+= License
+(The MIT License)
+Copyright (c) 2009 Simone Rinzivillo <srinzivillo@gmail.com>
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/Manifest ADDED

@@ -0,0 +1,8 @@
+LICENSE.rdoc
+Manifest
+README.rdoc
+Rakefile
+lib/robotstxt.rb
+lib/robotstxt/parser.rb
+test/parser_test.rb
+test/robotstxt_test.rb

data/Rakefile ADDED

@@ -0,0 +1,57 @@
+$:.unshift(File.dirname(__FILE__) + "/lib")
+require 'rubygems'
+require 'rake'
+require 'echoe'
+require 'robotstxt'
+# Common package properties
+PKG_NAME    = 'robotstxt'
+PKG_VERSION = Robotstxt::VERSION
+RUBYFORGE_PROJECT = 'robotstxt'
+if ENV['SNAPSHOT'].to_i == 1
+  PKG_VERSION << "." << Time.now.utc.strftime("%Y%m%d%H%M%S")
+end
+Echoe.new(PKG_NAME, PKG_VERSION) do |p|
+  p.author        = "Simone Rinzivillo"
+  p.email         = "srinzivillo@gmail.com"
+  p.summary       = "Robotstxt is an Ruby robots.txt file parser"
+  p.url           = "http://www.simonerinzivillo.it"
+  p.project       = RUBYFORGE_PROJECT
+  p.description   = <<-EOD
+    Robotstxt Parser allows you to the check the accessibility of URLs and get other data. \
+    Full support for the robots.txt RFC, wildcards and Sitemap: rules.
+  EOD
+  p.need_zip      = true
+  p.development_dependencies += ["rake  ~>0.8",
+                                 "echoe ~>3.1"]
+  p.rcov_options  = ["-Itest -x mocha,rcov,Rakefile"]
+end
+desc "Open an irb session preloaded with this library"
+task :console do
+  sh "irb -rubygems -I lib -r robotstxt.rb"
+end
+begin
+  require 'code_statistics'
+  desc "Show library's code statistics"
+  task :stats do
+    CodeStatistics.new(["Robotstxt", "lib"],
+                       ["Tests", "test"]).to_s
+  end
+rescue LoadError
+  puts "CodeStatistics (Rails) is not available"
+end
+Dir["tasks/**/*.rake"].each do |file|
+  load(file)
+end

data/lib/robotstxt.rb CHANGED

@@ -24,31 +24,31 @@ module Robotstxt
   NAME            = 'Robotstxt'
   GEM             = 'robotstxt'
   AUTHORS         = ['Simone Rinzivillo <srinzivillo@gmail.com>']
-  VERSION	      = '0.5.1'
+  VERSION	      = '0.5.2'
   # Check if the <tt>URL</tt> is allowed to be crawled from the current <tt>Robot_id</tt>.
   # Robots:Allowed? returns <tt>true</tt> if the robots.txt file does not block the access to the URL.
   #
-  # <tt>Robotstxt.allowed?('http:// www.simonerinzivillo.it/', 'rubytest')</tt>
+  #  Robotstxt.allowed?('http://www.simonerinzivillo.it/', 'rubytest')
   #
   def self.allowed?(url, robot_id)
-	u = URI.parse(url)
-	r = Robotstxt::Parser.new(robot_id)
-	return r.allowed?(url) if r.get(u.scheme + '://' + u.host)
+	  u = URI.parse(url)
+	  r = Robotstxt::Parser.new(robot_id)
+		r.allowed?(url) if r.get(u.scheme + '://' + u.host)
   end
   # Analyze the robots.txt file to return an <tt>Array</tt> containing the list of XML Sitemaps URLs.
   #
-  # <tt>Robotstxt.sitemaps('http:// www.simonerinzivillo.it/', 'rubytest')</tt>
+  #  Robotstxt.sitemaps('http://www.simonerinzivillo.it/', 'rubytest')
   #
   def self.sitemaps(url, robot_id)
-	u = URI.parse(url)
-	r = Robotstxt::Parser.new(robot_id)
-	return r.sitemaps if r.get(u.scheme + '://' + u.host)
+	  u = URI.parse(url)
+	  r = Robotstxt::Parser.new(robot_id)
+		r.sitemaps if r.get(u.scheme + '://' + u.host)
   end

data/lib/robotstxt/parser.rb CHANGED

@@ -27,6 +27,7 @@ module Robotstxt
 		# <tt>client = Robotstxt::Robotstxtistance.new('my_robot_id')</tt>
 		#
 		def initialize(robot_id = nil)
 			@robot_id = '*'
 			@rules = []
 			@sitemaps = []
@@ -37,14 +38,14 @@ module Robotstxt
 		# Requires and parses the Robots.txt file for the <tt>hostname</tt>.
 		#
-		# <tt>client = Robotstxt::Robotstxtistance.new('my_robot_id')</tt>
-		#
-		# <tt>client.get('http:// www.simonerinzivillo.it')</tt>
+		#  client = Robotstxt::Robotstxtistance.new('my_robot_id')
+		#  client.get('http://www.simonerinzivillo.it')
 		#
 		#
 		# This method returns <tt>true</tt> if the parsing is gone.
 		#
 		def get(hostname)
 			@ehttp = true
 			url = URI.parse(hostname)
@@ -59,12 +60,12 @@ module Robotstxt
 				case response
 					when Net::HTTPSuccess then
-						@found = true
-						@body = response.body
-						parse()
+					@found = true
+					@body = response.body
+					parse()
 					else
-						@found = false
+					@found = false
 				end
 				return @found
@@ -73,22 +74,20 @@ module Robotstxt
 				if @ehttp
 					@ettp = false
 					retry
-				else
+					else
 					return nil
 				end
 			end
 		end
 		# Check if the <tt>URL</tt> is allowed to be crawled from the current Robot_id.
 		#
-		# <tt>client = Robotstxt::Robotstxtistance.new('my_robot_id')</tt>
-		#
-		# <tt>if client.get('http:// www.simonerinzivillo.it')</tt>
-		#
-		# <tt>  client.allowed?('http:// www.simonerinzivillo.it/no-dir/')</tt>
-		#
-		# <tt>end</tt>
+		#  client = Robotstxt::Robotstxtistance.new('my_robot_id')
+		#  if client.get('http://www.simonerinzivillo.it')
+		#    client.allowed?('http://www.simonerinzivillo.it/no-dir/')
+		#  end
 		#
 		# This method returns <tt>true</tt> if the robots.txt file does not block the access to the URL.
 		#
@@ -101,9 +100,9 @@ module Robotstxt
 			@rules.each {|ua|
 				if @robot_id == ua[0] || ua[0] == '*'
-					ua[1].each {|d|
+					ua[1].each {|d|
 						is_allow = false if url_path.match('^' + d ) || d == '/'
 					}
@@ -111,23 +110,17 @@ module Robotstxt
 				end
 			}
-			return is_allow
+			is_allow
 		end
 		# Analyze the robots.txt file to return an <tt>Array</tt> containing the list of XML Sitemaps URLs.
 		#
-		# <tt>client = Robotstxt::Robotstxtistance.new('my_robot_id')</tt>
-		#
-		# <tt>if client.get('http:// www.simonerinzivillo.it')</tt>
-		#
-		# <tt>  client.sitemaps.each{ |url|</tt>
-		#
-		# <tt>		puts url</tt>
-		#
-		#
-		# <tt>	}</tt>
-		#
-		# <tt>end</tt>
+		#  client = Robotstxt::Robotstxtistance.new('my_robot_id')
+		#  if client.get('http://www.simonerinzivillo.it')
+		#    client.sitemaps.each{ |url|
+		#    puts url
+		#  }
+		#  end
 		#
 		def sitemaps
 			@sitemaps
@@ -146,7 +139,7 @@ module Robotstxt
 			@body = @body.downcase
 			@body.each_line {|r|
 				case r
 					when /^#.+$/

data/robotstxt.gemspec ADDED

@@ -0,0 +1,38 @@
+# -*- encoding: utf-8 -*-
+Gem::Specification.new do |s|
+  s.name = %q{robotstxt}
+  s.version = "0.5.2"
+  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
+  s.authors = ["Simone Rinzivillo"]
+  s.date = %q{2009-12-19}
+  s.description = %q{    Robotstxt Parser allows you to the check the accessibility of URLs and get other data.     Full support for the robots.txt RFC, wildcards and Sitemap: rules.
+}
+  s.email = %q{srinzivillo@gmail.com}
+  s.extra_rdoc_files = ["LICENSE.rdoc", "README.rdoc", "lib/robotstxt.rb", "lib/robotstxt/parser.rb"]
+  s.files = ["LICENSE.rdoc", "Manifest", "README.rdoc", "Rakefile", "lib/robotstxt.rb", "lib/robotstxt/parser.rb", "test/parser_test.rb", "test/robotstxt_test.rb", "robotstxt.gemspec"]
+  s.homepage = %q{http://www.simonerinzivillo.it}
+  s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Robotstxt", "--main", "README.rdoc"]
+  s.require_paths = ["lib"]
+  s.rubyforge_project = %q{robotstxt}
+  s.rubygems_version = %q{1.3.5}
+  s.summary = %q{Robotstxt is an Ruby robots.txt file parser}
+  s.test_files = ["test/parser_test.rb", "test/robotstxt_test.rb"]
+  if s.respond_to? :specification_version then
+    current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
+    s.specification_version = 3
+    if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
+      s.add_development_dependency(%q<rake>, ["~> 0.8"])
+      s.add_development_dependency(%q<echoe>, ["~> 3.1"])
+    else
+      s.add_dependency(%q<rake>, ["~> 0.8"])
+      s.add_dependency(%q<echoe>, ["~> 3.1"])
+    end
+  else
+    s.add_dependency(%q<rake>, ["~> 0.8"])
+    s.add_dependency(%q<echoe>, ["~> 3.1"])
+  end
+end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: robotstxt
 version: !ruby/object:Gem::Version
-  version: 0.5.1
+  version: 0.5.2
 platform: ruby
 authors:
 - Simone Rinzivillo
@@ -9,46 +9,79 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-12-06 00:00:00 +01:00
+date: 2009-12-19 00:00:00 +01:00
 default_executable:
-dependencies: []
-description:
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: rake
+  type: :development
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: "0.8"
+    version:
+- !ruby/object:Gem::Dependency
+  name: echoe
+  type: :development
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: "3.1"
+    version:
+description: "    Robotstxt Parser allows you to the check the accessibility of URLs and get other data.     Full support for the robots.txt RFC, wildcards and Sitemap: rules.\n"
 email: srinzivillo@gmail.com
 executables: []
 extensions: []
 extra_rdoc_files:
+- LICENSE.rdoc
 - README.rdoc
-files:
-- lib/robotstxt/parser.rb
 - lib/robotstxt.rb
+- lib/robotstxt/parser.rb
+files:
+- LICENSE.rdoc
+- Manifest
 - README.rdoc
+- Rakefile
+- lib/robotstxt.rb
+- lib/robotstxt/parser.rb
+- test/parser_test.rb
+- test/robotstxt_test.rb
+- robotstxt.gemspec
 has_rdoc: true
 homepage: http://www.simonerinzivillo.it
 licenses: []
 post_install_message:
-rdoc_options: []
+rdoc_options:
+- --line-numbers
+- --inline-source
+- --title
+- Robotstxt
+- --main
+- README.rdoc
 require_paths:
 - lib
 required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: 1.8.7
+      version: "0"
   version:
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: "0"
+      version: "1.2"
   version:
 requirements: []
-rubyforge_project:
+rubyforge_project: robotstxt
 rubygems_version: 1.3.5
 signing_key:
 specification_version: 3