fizx-robots 0.3.2 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/robots.rb +23 -23
  2. metadata +4 -2
@@ -1,14 +1,13 @@
1
1
  require "open-uri"
2
2
  require "uri"
3
3
  require "rubygems"
4
- require "loggable"
4
+
5
5
  class Robots
6
- include Loggable
7
6
 
8
7
  class ParsedRobots
9
- include Loggable
10
8
 
11
9
  def initialize(uri)
10
+ @last_accessed = Time.at(1)
12
11
  io = open(URI.join(uri.to_s, "/robots.txt")) rescue nil
13
12
  if !io || io.content_type != "text/plain" || io.status != ["200", "OK"]
14
13
  io = StringIO.new("User-agent: *\nAllow: /\n")
@@ -17,10 +16,13 @@ class Robots
17
16
  @other = {}
18
17
  @disallows = {}
19
18
  @allows = {}
19
+ @delays = {} # added delays to make it work
20
20
  agent = /.*/
21
21
  io.each do |line|
22
22
  next if line =~ /^\s*(#.*|$)/
23
- key, value = line.split(":")
23
+ arr = line.split(":")
24
+ key = arr.shift
25
+ value = arr.join(":").strip
24
26
  value.strip!
25
27
  case key
26
28
  when "User-agent":
@@ -31,9 +33,10 @@ class Robots
31
33
  when "Disallow":
32
34
  @disallows[agent] ||= []
33
35
  @disallows[agent] << to_regex(value)
36
+ when "Crawl-delay"
37
+ @delays[agent] = value.to_i
34
38
  else
35
- @disallows[agent] ||= []
36
- @disallows[agent] << to_regex(value)
39
+ @other[key] = value
37
40
  end
38
41
  end
39
42
 
@@ -44,38 +47,35 @@ class Robots
44
47
  return true unless @parsed
45
48
  allowed = true
46
49
  path = uri.request_uri
47
- debug "path: #{path}"
48
- puts "--------"
49
- puts @disallows.inspect
50
- puts "--------"
50
+
51
51
  @disallows.each do |key, value|
52
- puts ">>>>>>>#{key.inspect}<<<<<<<<"
53
52
  if user_agent =~ key
54
- debug "matched #{key.inspect}"
55
53
  value.each do |rule|
56
54
  if path =~ rule
57
- debug "matched Disallow: #{rule.inspect}"
58
55
  allowed = false
59
56
  end
60
57
  end
61
58
  end
62
59
  end
63
60
 
64
- return true if allowed
65
-
66
61
  @allows.each do |key, value|
67
- if user_agent =~ key
68
- debug "matched #{key.inspect}"
69
- value.each do |rule|
70
- if path =~ rule
71
- debug "matched Allow: #{rule.inspect}"
72
- return true
62
+ unless allowed
63
+ if user_agent =~ key
64
+ value.each do |rule|
65
+ if path =~ rule
66
+ allowed = true
67
+ end
73
68
  end
74
69
  end
75
70
  end
76
71
  end
77
72
 
78
- return false
73
+ if allowed && @delays[user_agent]
74
+ sleep @delays[user_agent] - (Time.now - @last_accessed)
75
+ @last_accessed = Time.now
76
+ end
77
+
78
+ return allowed
79
79
  end
80
80
 
81
81
  def other_values
@@ -109,4 +109,4 @@ class Robots
109
109
  @parsed[host] ||= ParsedRobots.new(uri)
110
110
  @parsed[host].other_values
111
111
  end
112
- end
112
+ end
metadata CHANGED
@@ -1,19 +1,21 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fizx-robots
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kyle Maxwell
8
+ - Sausheong Chang
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
12
 
12
- date: 2008-08-10 00:00:00 -07:00
13
+ date: 2008-12-10 00:00:00 -08:00
13
14
  default_executable:
14
15
  dependencies:
15
16
  - !ruby/object:Gem::Dependency
16
17
  name: fizx-loggable
18
+ type: :runtime
17
19
  version_requirement:
18
20
  version_requirements: !ruby/object:Gem::Requirement
19
21
  requirements: