probot 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/lib/probot/version.rb +1 -1
- data/lib/probot.rb +10 -7
- data/probot.gemspec +0 -6
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1cfcaee58c1bb05d06aa5e1b2b4b8db895965cf8e1f73b31b2f52fca48019d5f
|
4
|
+
data.tar.gz: '04854df032709876daa4c541d43e526b687d9dcd6a7158887bc82bb1544df307'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 78f2a36baa7966eb7ff4f6c4dd95f30f0a4428a0885de08e84689d93f33c70ff3c8f449596fe1b5c6bdf206fcc6858bfc9a4360c49395a64c2f7686be4d146f3
|
7
|
+
data.tar.gz: 8e78bc0ee42a3b9611177273703f49c4c4bd847a551e615ef8f9085c286e891c014983d888b08e8f5b0f0a2f531cb50919a651b1dacd2e566d578fa72a556773
|
data/CHANGELOG.md
CHANGED
data/lib/probot/version.rb
CHANGED
data/lib/probot.rb
CHANGED
@@ -19,8 +19,8 @@ require "net/http"
|
|
19
19
|
# Find the most specific rule for a given URL. We use the length of the regexp as a proxy for specificity.
|
20
20
|
|
21
21
|
class Probot
|
22
|
-
attr_reader :rules, :
|
23
|
-
attr_accessor :agent
|
22
|
+
attr_reader :rules, :doc
|
23
|
+
attr_accessor :agent, :sitemaps, :site
|
24
24
|
|
25
25
|
def initialize(data, agent: "*")
|
26
26
|
raise ArgumentError, "The first argument must be a string" unless data.is_a?(String)
|
@@ -30,8 +30,8 @@ class Probot
|
|
30
30
|
@current_agents = ["*"]
|
31
31
|
@current_agents.each { |agent| @rules[agent] ||= {"disallow" => [], "allow" => [], "crawl_delay" => 0} }
|
32
32
|
@sitemaps = []
|
33
|
-
|
34
|
-
@doc =
|
33
|
+
@site = URI(data) if data.start_with?("http")
|
34
|
+
@doc = @site.nil? ? data : fetch_robots_txt(@site)
|
35
35
|
parse(@doc)
|
36
36
|
end
|
37
37
|
|
@@ -90,11 +90,11 @@ class Probot
|
|
90
90
|
end
|
91
91
|
|
92
92
|
# All Regex characters are escaped, then we unescape * and $ as they may used in robots.txt
|
93
|
-
|
94
93
|
if data.allow? || data.disallow?
|
95
94
|
@current_agents.each { |agent| rules[agent][data.key] << Regexp.new(Regexp.escape(data.value).gsub('\*', ".*").gsub('\$', "$")) }
|
96
95
|
|
97
|
-
|
96
|
+
# When user-agent strings are found on consecutive lines, they are considered to be part of the same record. Google ignores crawl_delay.
|
97
|
+
subsequent_agent = false
|
98
98
|
next
|
99
99
|
end
|
100
100
|
|
@@ -103,8 +103,11 @@ class Probot
|
|
103
103
|
next
|
104
104
|
end
|
105
105
|
|
106
|
+
# Ensure we have an absolute URL
|
106
107
|
if data.sitemap?
|
107
|
-
|
108
|
+
sitemap_uri = URI(data.value)
|
109
|
+
sitemap_uri = sitemap_uri.host.nil? ? URI.join(*[site, sitemap_uri].compact) : sitemap_uri
|
110
|
+
@sitemaps << sitemap_uri.to_s
|
108
111
|
next
|
109
112
|
end
|
110
113
|
|
data/probot.gemspec
CHANGED
@@ -29,10 +29,4 @@ Gem::Specification.new do |spec|
|
|
29
29
|
spec.bindir = "exe"
|
30
30
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
31
31
|
spec.require_paths = ["lib"]
|
32
|
-
|
33
|
-
# Uncomment to register a new dependency of your gem
|
34
|
-
# spec.add_dependency "example-gem", "~> 1.0"
|
35
|
-
|
36
|
-
# For more information and examples about making a new gem, check out our
|
37
|
-
# guide at: https://bundler.io/guides/creating_gem.html
|
38
32
|
end
|