probot 0.3.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/probot/version.rb +1 -1
- data/lib/probot.rb +10 -2
- data/probot.gemspec +2 -0
- metadata +18 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5a82d5e6f6baf600f7f6c22cf5358cefaed6ce9bf1119298c98245e440296c2e
|
4
|
+
data.tar.gz: c791777dfc59a614f7797b63765cb9d63d39ec732feab6df754025782e023825
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ada3169c4a078fe19a526ee07563ba355a6032d8674d70487db3ab6c14aabf65d518d989f3e3d5f6e7d4fbc55b1920b27d7b7828129412ae4d81f76b8f90df67
|
7
|
+
data.tar.gz: 9487fc153c81dc99ba73ec8a1330688f46b829e2e62db115e0a04651c1c41ca4418afe8cd22abc272272ef0e7dec70d1a81c0bcb4a36c0c7d3666eb7f32e43de
|
data/CHANGELOG.md
CHANGED
data/lib/probot/version.rb
CHANGED
data/lib/probot.rb
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require "uri"
|
4
4
|
require "net/http"
|
5
|
+
require_relative "probot/version"
|
5
6
|
|
6
7
|
# https://moz.com/learn/seo/robotstxt
|
7
8
|
# https://stackoverflow.com/questions/45293419/order-of-directives-in-robots-txt-do-they-overwrite-each-other-or-complement-ea
|
@@ -30,6 +31,7 @@ class Probot
|
|
30
31
|
@current_agents = ["*"]
|
31
32
|
@current_agents.each { |agent| @rules[agent] ||= {"disallow" => [], "allow" => [], "crawl_delay" => 0} }
|
32
33
|
@sitemaps = []
|
34
|
+
|
33
35
|
@site = URI(data) if data.start_with?("http")
|
34
36
|
@doc = @site.nil? ? data : fetch_robots_txt(@site)
|
35
37
|
parse(@doc)
|
@@ -91,7 +93,9 @@ class Probot
|
|
91
93
|
|
92
94
|
# All Regex characters are escaped, then we unescape * and $ as they may used in robots.txt
|
93
95
|
if data.allow? || data.disallow?
|
94
|
-
@current_agents.each
|
96
|
+
@current_agents.each do |agent|
|
97
|
+
rules[agent][data.key] << Regexp.new(Regexp.escape(data.value).gsub('\*', ".*").gsub('\$', "$")) unless data.value.nil?
|
98
|
+
end
|
95
99
|
|
96
100
|
# When user-agent strings are found on consecutive lines, they are considered to be part of the same record. Google ignores crawl_delay.
|
97
101
|
subsequent_agent = false
|
@@ -127,6 +131,8 @@ class Probot
|
|
127
131
|
|
128
132
|
def clean_value = raw_value.split("#").first&.strip
|
129
133
|
|
134
|
+
def clean_url = clean_value&.then { URI(_1).to_s }
|
135
|
+
|
130
136
|
def agent? = key == "user-agent"
|
131
137
|
|
132
138
|
def disallow? = key == "disallow"
|
@@ -139,11 +145,13 @@ class Probot
|
|
139
145
|
|
140
146
|
def value
|
141
147
|
return clean_value.to_f if crawl_delay?
|
142
|
-
return
|
148
|
+
return clean_url if disallow? || allow?
|
143
149
|
|
144
150
|
raw_value
|
145
151
|
rescue URI::InvalidURIError
|
146
152
|
raw_value
|
153
|
+
rescue ArgumentError
|
154
|
+
raw_value
|
147
155
|
end
|
148
156
|
end
|
149
157
|
|
data/probot.gemspec
CHANGED
@@ -13,6 +13,7 @@ Gem::Specification.new do |spec|
|
|
13
13
|
spec.homepage = "http://github.com/dkam/probot"
|
14
14
|
spec.license = "MIT"
|
15
15
|
spec.required_ruby_version = ">= 3.0"
|
16
|
+
spec.platform = Gem::Platform::RUBY
|
16
17
|
|
17
18
|
spec.metadata["homepage_uri"] = spec.homepage
|
18
19
|
spec.metadata["source_code_uri"] = "http://github.com/dkam/probot"
|
@@ -29,4 +30,5 @@ Gem::Specification.new do |spec|
|
|
29
30
|
spec.bindir = "exe"
|
30
31
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
31
32
|
spec.require_paths = ["lib"]
|
33
|
+
spec.add_development_dependency "debug"
|
32
34
|
end
|
metadata
CHANGED
@@ -1,15 +1,28 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: probot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dan Milne
|
8
|
-
autorequire:
|
9
8
|
bindir: exe
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
12
|
-
dependencies:
|
10
|
+
date: 2024-12-23 00:00:00.000000000 Z
|
11
|
+
dependencies:
|
12
|
+
- !ruby/object:Gem::Dependency
|
13
|
+
name: debug
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - ">="
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: '0'
|
19
|
+
type: :development
|
20
|
+
prerelease: false
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
22
|
+
requirements:
|
23
|
+
- - ">="
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: '0'
|
13
26
|
description: A fully featured robots.txt parser.
|
14
27
|
email:
|
15
28
|
- d@nmilne.com
|
@@ -33,7 +46,6 @@ metadata:
|
|
33
46
|
homepage_uri: http://github.com/dkam/probot
|
34
47
|
source_code_uri: http://github.com/dkam/probot
|
35
48
|
changelog_uri: http://github.com/dkam/probot/CHANGELOG.md
|
36
|
-
post_install_message:
|
37
49
|
rdoc_options: []
|
38
50
|
require_paths:
|
39
51
|
- lib
|
@@ -48,8 +60,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
48
60
|
- !ruby/object:Gem::Version
|
49
61
|
version: '0'
|
50
62
|
requirements: []
|
51
|
-
rubygems_version: 3.
|
52
|
-
signing_key:
|
63
|
+
rubygems_version: 3.6.0
|
53
64
|
specification_version: 4
|
54
65
|
summary: A robots.txt parser.
|
55
66
|
test_files: []
|