fizx-robots 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/robots.rb +25 -4
  2. metadata +1 -1
@@ -1,14 +1,27 @@
1
1
  require "open-uri"
2
2
  require "uri"
3
3
  require "rubygems"
4
+ require "timeout"
4
5
 
5
6
  class Robots
6
7
 
8
+ DEFAULT_TIMEOUT = 3
9
+
7
10
  class ParsedRobots
8
11
 
9
- def initialize(uri)
12
+ def initialize(uri, user_agent)
10
13
  @last_accessed = Time.at(1)
11
- io = open(URI.join(uri.to_s, "/robots.txt")) rescue nil
14
+
15
+ io = nil
16
+ begin
17
+ Timeout::timeout(Robots.timeout) do
18
+ io = URI.join(uri.to_s, "/robots.txt").open("User-Agent" => user_agent) rescue nil
19
+ end
20
+ rescue Timeout::Error
21
+ STDERR.puts "robots.txt request timed out"
22
+ end
23
+
24
+
12
25
  if !io || io.content_type != "text/plain" || io.status != ["200", "OK"]
13
26
  io = StringIO.new("User-agent: *\nAllow: /\n")
14
27
  end
@@ -91,6 +104,14 @@ class Robots
91
104
  end
92
105
  end
93
106
 
107
+ def self.timeout=(t)
108
+ @timeout = t
109
+ end
110
+
111
+ def self.timeout
112
+ @timeout || DEFAULT_TIMEOUT
113
+ end
114
+
94
115
  def initialize(user_agent)
95
116
  @user_agent = user_agent
96
117
  @parsed = {}
@@ -99,14 +120,14 @@ class Robots
99
120
  def allowed?(uri)
100
121
  uri = URI.parse(uri.to_s) unless uri.is_a?(URI)
101
122
  host = uri.host
102
- @parsed[host] ||= ParsedRobots.new(uri)
123
+ @parsed[host] ||= ParsedRobots.new(uri, @user_agent)
103
124
  @parsed[host].allowed?(uri, @user_agent)
104
125
  end
105
126
 
106
127
  def other_values(uri)
107
128
  uri = URI.parse(uri.to_s) unless uri.is_a?(URI)
108
129
  host = uri.host
109
- @parsed[host] ||= ParsedRobots.new(uri)
130
+ @parsed[host] ||= ParsedRobots.new(uri, @user_agent)
110
131
  @parsed[host].other_values
111
132
  end
112
133
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fizx-robots
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kyle Maxwell