probot 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/probot/version.rb +1 -1
- data/lib/probot.rb +10 -2
- data/probot.gemspec +2 -0
- metadata +18 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5a82d5e6f6baf600f7f6c22cf5358cefaed6ce9bf1119298c98245e440296c2e
|
4
|
+
data.tar.gz: c791777dfc59a614f7797b63765cb9d63d39ec732feab6df754025782e023825
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ada3169c4a078fe19a526ee07563ba355a6032d8674d70487db3ab6c14aabf65d518d989f3e3d5f6e7d4fbc55b1920b27d7b7828129412ae4d81f76b8f90df67
|
7
|
+
data.tar.gz: 9487fc153c81dc99ba73ec8a1330688f46b829e2e62db115e0a04651c1c41ca4418afe8cd22abc272272ef0e7dec70d1a81c0bcb4a36c0c7d3666eb7f32e43de
|
data/CHANGELOG.md
CHANGED
data/lib/probot/version.rb
CHANGED
data/lib/probot.rb
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require "uri"
|
4
4
|
require "net/http"
|
5
|
+
require_relative "probot/version"
|
5
6
|
|
6
7
|
# https://moz.com/learn/seo/robotstxt
|
7
8
|
# https://stackoverflow.com/questions/45293419/order-of-directives-in-robots-txt-do-they-overwrite-each-other-or-complement-ea
|
@@ -30,6 +31,7 @@ class Probot
|
|
30
31
|
@current_agents = ["*"]
|
31
32
|
@current_agents.each { |agent| @rules[agent] ||= {"disallow" => [], "allow" => [], "crawl_delay" => 0} }
|
32
33
|
@sitemaps = []
|
34
|
+
|
33
35
|
@site = URI(data) if data.start_with?("http")
|
34
36
|
@doc = @site.nil? ? data : fetch_robots_txt(@site)
|
35
37
|
parse(@doc)
|
@@ -91,7 +93,9 @@ class Probot
|
|
91
93
|
|
92
94
|
# All Regex characters are escaped, then we unescape * and $ as they may used in robots.txt
|
93
95
|
if data.allow? || data.disallow?
|
94
|
-
@current_agents.each
|
96
|
+
@current_agents.each do |agent|
|
97
|
+
rules[agent][data.key] << Regexp.new(Regexp.escape(data.value).gsub('\*', ".*").gsub('\$', "$")) unless data.value.nil?
|
98
|
+
end
|
95
99
|
|
96
100
|
# When user-agent strings are found on consecutive lines, they are considered to be part of the same record. Google ignores crawl_delay.
|
97
101
|
subsequent_agent = false
|
@@ -127,6 +131,8 @@ class Probot
|
|
127
131
|
|
128
132
|
def clean_value = raw_value.split("#").first&.strip
|
129
133
|
|
134
|
+
def clean_url = clean_value&.then { URI(_1).to_s }
|
135
|
+
|
130
136
|
def agent? = key == "user-agent"
|
131
137
|
|
132
138
|
def disallow? = key == "disallow"
|
@@ -139,11 +145,13 @@ class Probot
|
|
139
145
|
|
140
146
|
def value
|
141
147
|
return clean_value.to_f if crawl_delay?
|
142
|
-
return
|
148
|
+
return clean_url if disallow? || allow?
|
143
149
|
|
144
150
|
raw_value
|
145
151
|
rescue URI::InvalidURIError
|
146
152
|
raw_value
|
153
|
+
rescue ArgumentError
|
154
|
+
raw_value
|
147
155
|
end
|
148
156
|
end
|
149
157
|
|
data/probot.gemspec
CHANGED
@@ -13,6 +13,7 @@ Gem::Specification.new do |spec|
|
|
13
13
|
spec.homepage = "http://github.com/dkam/probot"
|
14
14
|
spec.license = "MIT"
|
15
15
|
spec.required_ruby_version = ">= 3.0"
|
16
|
+
spec.platform = Gem::Platform::RUBY
|
16
17
|
|
17
18
|
spec.metadata["homepage_uri"] = spec.homepage
|
18
19
|
spec.metadata["source_code_uri"] = "http://github.com/dkam/probot"
|
@@ -29,4 +30,5 @@ Gem::Specification.new do |spec|
|
|
29
30
|
spec.bindir = "exe"
|
30
31
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
31
32
|
spec.require_paths = ["lib"]
|
33
|
+
spec.add_development_dependency "debug"
|
32
34
|
end
|
metadata
CHANGED
@@ -1,15 +1,28 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: probot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dan Milne
|
8
|
-
autorequire:
|
9
8
|
bindir: exe
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
12
|
-
dependencies:
|
10
|
+
date: 2024-12-23 00:00:00.000000000 Z
|
11
|
+
dependencies:
|
12
|
+
- !ruby/object:Gem::Dependency
|
13
|
+
name: debug
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - ">="
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: '0'
|
19
|
+
type: :development
|
20
|
+
prerelease: false
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
22
|
+
requirements:
|
23
|
+
- - ">="
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: '0'
|
13
26
|
description: A fully featured robots.txt parser.
|
14
27
|
email:
|
15
28
|
- d@nmilne.com
|
@@ -33,7 +46,6 @@ metadata:
|
|
33
46
|
homepage_uri: http://github.com/dkam/probot
|
34
47
|
source_code_uri: http://github.com/dkam/probot
|
35
48
|
changelog_uri: http://github.com/dkam/probot/CHANGELOG.md
|
36
|
-
post_install_message:
|
37
49
|
rdoc_options: []
|
38
50
|
require_paths:
|
39
51
|
- lib
|
@@ -48,8 +60,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
48
60
|
- !ruby/object:Gem::Version
|
49
61
|
version: '0'
|
50
62
|
requirements: []
|
51
|
-
rubygems_version: 3.
|
52
|
-
signing_key:
|
63
|
+
rubygems_version: 3.6.0
|
53
64
|
specification_version: 4
|
54
65
|
summary: A robots.txt parser.
|
55
66
|
test_files: []
|