probot 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/probot/version.rb +1 -1
- data/lib/probot.rb +9 -2
- data/probot.gemspec +2 -0
- metadata +18 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5a82d5e6f6baf600f7f6c22cf5358cefaed6ce9bf1119298c98245e440296c2e
|
|
4
|
+
data.tar.gz: c791777dfc59a614f7797b63765cb9d63d39ec732feab6df754025782e023825
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ada3169c4a078fe19a526ee07563ba355a6032d8674d70487db3ab6c14aabf65d518d989f3e3d5f6e7d4fbc55b1920b27d7b7828129412ae4d81f76b8f90df67
|
|
7
|
+
data.tar.gz: 9487fc153c81dc99ba73ec8a1330688f46b829e2e62db115e0a04651c1c41ca4418afe8cd22abc272272ef0e7dec70d1a81c0bcb4a36c0c7d3666eb7f32e43de
|
data/lib/probot/version.rb
CHANGED
data/lib/probot.rb
CHANGED
|
@@ -31,6 +31,7 @@ class Probot
|
|
|
31
31
|
@current_agents = ["*"]
|
|
32
32
|
@current_agents.each { |agent| @rules[agent] ||= {"disallow" => [], "allow" => [], "crawl_delay" => 0} }
|
|
33
33
|
@sitemaps = []
|
|
34
|
+
|
|
34
35
|
@site = URI(data) if data.start_with?("http")
|
|
35
36
|
@doc = @site.nil? ? data : fetch_robots_txt(@site)
|
|
36
37
|
parse(@doc)
|
|
@@ -92,7 +93,9 @@ class Probot
|
|
|
92
93
|
|
|
93
94
|
# All Regex characters are escaped, then we unescape * and $ as they may used in robots.txt
|
|
94
95
|
if data.allow? || data.disallow?
|
|
95
|
-
@current_agents.each
|
|
96
|
+
@current_agents.each do |agent|
|
|
97
|
+
rules[agent][data.key] << Regexp.new(Regexp.escape(data.value).gsub('\*', ".*").gsub('\$', "$")) unless data.value.nil?
|
|
98
|
+
end
|
|
96
99
|
|
|
97
100
|
# When user-agent strings are found on consecutive lines, they are considered to be part of the same record. Google ignores crawl_delay.
|
|
98
101
|
subsequent_agent = false
|
|
@@ -128,6 +131,8 @@ class Probot
|
|
|
128
131
|
|
|
129
132
|
def clean_value = raw_value.split("#").first&.strip
|
|
130
133
|
|
|
134
|
+
def clean_url = clean_value&.then { URI(_1).to_s }
|
|
135
|
+
|
|
131
136
|
def agent? = key == "user-agent"
|
|
132
137
|
|
|
133
138
|
def disallow? = key == "disallow"
|
|
@@ -140,11 +145,13 @@ class Probot
|
|
|
140
145
|
|
|
141
146
|
def value
|
|
142
147
|
return clean_value.to_f if crawl_delay?
|
|
143
|
-
return
|
|
148
|
+
return clean_url if disallow? || allow?
|
|
144
149
|
|
|
145
150
|
raw_value
|
|
146
151
|
rescue URI::InvalidURIError
|
|
147
152
|
raw_value
|
|
153
|
+
rescue ArgumentError
|
|
154
|
+
raw_value
|
|
148
155
|
end
|
|
149
156
|
end
|
|
150
157
|
|
data/probot.gemspec
CHANGED
|
@@ -13,6 +13,7 @@ Gem::Specification.new do |spec|
|
|
|
13
13
|
spec.homepage = "http://github.com/dkam/probot"
|
|
14
14
|
spec.license = "MIT"
|
|
15
15
|
spec.required_ruby_version = ">= 3.0"
|
|
16
|
+
spec.platform = Gem::Platform::RUBY
|
|
16
17
|
|
|
17
18
|
spec.metadata["homepage_uri"] = spec.homepage
|
|
18
19
|
spec.metadata["source_code_uri"] = "http://github.com/dkam/probot"
|
|
@@ -29,4 +30,5 @@ Gem::Specification.new do |spec|
|
|
|
29
30
|
spec.bindir = "exe"
|
|
30
31
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
|
31
32
|
spec.require_paths = ["lib"]
|
|
33
|
+
spec.add_development_dependency "debug"
|
|
32
34
|
end
|
metadata
CHANGED
|
@@ -1,15 +1,28 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: probot
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Dan Milne
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: exe
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date: 2024-
|
|
12
|
-
dependencies:
|
|
10
|
+
date: 2024-12-23 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: debug
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - ">="
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '0'
|
|
19
|
+
type: :development
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - ">="
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '0'
|
|
13
26
|
description: A fully featured robots.txt parser.
|
|
14
27
|
email:
|
|
15
28
|
- d@nmilne.com
|
|
@@ -33,7 +46,6 @@ metadata:
|
|
|
33
46
|
homepage_uri: http://github.com/dkam/probot
|
|
34
47
|
source_code_uri: http://github.com/dkam/probot
|
|
35
48
|
changelog_uri: http://github.com/dkam/probot/CHANGELOG.md
|
|
36
|
-
post_install_message:
|
|
37
49
|
rdoc_options: []
|
|
38
50
|
require_paths:
|
|
39
51
|
- lib
|
|
@@ -48,8 +60,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
48
60
|
- !ruby/object:Gem::Version
|
|
49
61
|
version: '0'
|
|
50
62
|
requirements: []
|
|
51
|
-
rubygems_version: 3.
|
|
52
|
-
signing_key:
|
|
63
|
+
rubygems_version: 3.6.0
|
|
53
64
|
specification_version: 4
|
|
54
65
|
summary: A robots.txt parser.
|
|
55
66
|
test_files: []
|