probot 0.1.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/lib/probot/version.rb +1 -1
- data/lib/probot.rb +11 -7
- data/probot.gemspec +0 -6
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 54e371737751fd5162dbcb07fbb7f24906f3d9684c17cec9c54f48ab5c99c35f
|
4
|
+
data.tar.gz: 5babb13f06601f07e58613649ddbc1396236c685e3858c4816f45fba95e95038
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c1250d28b604d70abedc9465c755f04e20c0318169fef2beab5c220692abadad073467130fd8f9d3c81ca443e3d659dded8eeec6af12403b594d9f3ad9a610af
|
7
|
+
data.tar.gz: 53f3d3fadba21111fd84c9a90501d22ea3456002966ce77ecff9757bff1d1544fce07d0971b3a30f0bf3683be21162771313ee6d9c1ed1b676fe11f63ccdbf03
|
data/CHANGELOG.md
CHANGED
data/lib/probot/version.rb
CHANGED
data/lib/probot.rb
CHANGED
@@ -19,8 +19,8 @@ require "net/http"
|
|
19
19
|
# Find the most specific rule for a given URL. We use the length of the regexp as a proxy for specificity.
|
20
20
|
|
21
21
|
class Probot
|
22
|
-
attr_reader :rules, :
|
23
|
-
attr_accessor :agent
|
22
|
+
attr_reader :rules, :doc
|
23
|
+
attr_accessor :agent, :sitemaps, :site
|
24
24
|
|
25
25
|
def initialize(data, agent: "*")
|
26
26
|
raise ArgumentError, "The first argument must be a string" unless data.is_a?(String)
|
@@ -30,8 +30,8 @@ class Probot
|
|
30
30
|
@current_agents = ["*"]
|
31
31
|
@current_agents.each { |agent| @rules[agent] ||= {"disallow" => [], "allow" => [], "crawl_delay" => 0} }
|
32
32
|
@sitemaps = []
|
33
|
-
|
34
|
-
@doc =
|
33
|
+
@site = URI(data) if data.start_with?("http")
|
34
|
+
@doc = @site.nil? ? data : fetch_robots_txt(@site)
|
35
35
|
parse(@doc)
|
36
36
|
end
|
37
37
|
|
@@ -90,11 +90,11 @@ class Probot
|
|
90
90
|
end
|
91
91
|
|
92
92
|
# All Regex characters are escaped, then we unescape * and $ as they may used in robots.txt
|
93
|
-
|
94
93
|
if data.allow? || data.disallow?
|
95
94
|
@current_agents.each { |agent| rules[agent][data.key] << Regexp.new(Regexp.escape(data.value).gsub('\*', ".*").gsub('\$', "$")) }
|
96
95
|
|
97
|
-
|
96
|
+
# When user-agent strings are found on consecutive lines, they are considered to be part of the same record. Google ignores crawl_delay.
|
97
|
+
subsequent_agent = false
|
98
98
|
next
|
99
99
|
end
|
100
100
|
|
@@ -103,8 +103,12 @@ class Probot
|
|
103
103
|
next
|
104
104
|
end
|
105
105
|
|
106
|
+
# Ensure we have an absolute URL
|
106
107
|
if data.sitemap?
|
107
|
-
|
108
|
+
sitemap_uri = URI(data.value)
|
109
|
+
sitemap_uri = sitemap_uri.host.nil? ? URI.join(*[site, sitemap_uri].compact) : sitemap_uri
|
110
|
+
@sitemaps << sitemap_uri.to_s
|
111
|
+
@sitemaps.uniq!
|
108
112
|
next
|
109
113
|
end
|
110
114
|
|
data/probot.gemspec
CHANGED
@@ -29,10 +29,4 @@ Gem::Specification.new do |spec|
|
|
29
29
|
spec.bindir = "exe"
|
30
30
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
31
31
|
spec.require_paths = ["lib"]
|
32
|
-
|
33
|
-
# Uncomment to register a new dependency of your gem
|
34
|
-
# spec.add_dependency "example-gem", "~> 1.0"
|
35
|
-
|
36
|
-
# For more information and examples about making a new gem, check out our
|
37
|
-
# guide at: https://bundler.io/guides/creating_gem.html
|
38
32
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: probot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dan Milne
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-09-
|
11
|
+
date: 2023-09-18 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A fully featured robots.txt parser.
|
14
14
|
email:
|