probot 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4f0fd52daf4e0752bc505a323bc25c87f2e80aa06c09a88652af1c0a1cb14281
4
- data.tar.gz: 1f26a54f6a78deb956b0aaf120b8484ad926b1e7427f1a0bccb4e01b3a2e92cf
3
+ metadata.gz: 5a82d5e6f6baf600f7f6c22cf5358cefaed6ce9bf1119298c98245e440296c2e
4
+ data.tar.gz: c791777dfc59a614f7797b63765cb9d63d39ec732feab6df754025782e023825
5
5
  SHA512:
6
- metadata.gz: 15076c129a6988ff8edb08dfdb5378de07daf9d81fb942bc4b6ffd4b0b752d3434ca357e5287e40d8242c0401fd1e85c72926d6b65f8df5f69dd0c5569bf5b9d
7
- data.tar.gz: 462bd66a4030edac2b06c3ab95cdfe36e6bfc917e9ccd2ccbe8be8d9fd6c7c08867ce9bff20fc1e9daa123498159bf921023f0b020a36fd470274e129b63dfe5
6
+ metadata.gz: ada3169c4a078fe19a526ee07563ba355a6032d8674d70487db3ab6c14aabf65d518d989f3e3d5f6e7d4fbc55b1920b27d7b7828129412ae4d81f76b8f90df67
7
+ data.tar.gz: 9487fc153c81dc99ba73ec8a1330688f46b829e2e62db115e0a04651c1c41ca4418afe8cd22abc272272ef0e7dec70d1a81c0bcb4a36c0c7d3666eb7f32e43de
@@ -1,3 +1,3 @@
1
1
  class Probot
2
- VERSION = "0.4.0"
2
+ VERSION = "0.5.0"
3
3
  end
data/lib/probot.rb CHANGED
@@ -31,6 +31,7 @@ class Probot
31
31
  @current_agents = ["*"]
32
32
  @current_agents.each { |agent| @rules[agent] ||= {"disallow" => [], "allow" => [], "crawl_delay" => 0} }
33
33
  @sitemaps = []
34
+
34
35
  @site = URI(data) if data.start_with?("http")
35
36
  @doc = @site.nil? ? data : fetch_robots_txt(@site)
36
37
  parse(@doc)
@@ -92,7 +93,9 @@ class Probot
92
93
 
93
94
  # All Regex characters are escaped, then we unescape * and $ as they may used in robots.txt
94
95
  if data.allow? || data.disallow?
95
- @current_agents.each { |agent| rules[agent][data.key] << Regexp.new(Regexp.escape(data.value).gsub('\*', ".*").gsub('\$', "$")) }
96
+ @current_agents.each do |agent|
97
+ rules[agent][data.key] << Regexp.new(Regexp.escape(data.value).gsub('\*', ".*").gsub('\$', "$")) unless data.value.nil?
98
+ end
96
99
 
97
100
  # When user-agent strings are found on consecutive lines, they are considered to be part of the same record. Google ignores crawl_delay.
98
101
  subsequent_agent = false
@@ -128,6 +131,8 @@ class Probot
128
131
 
129
132
  def clean_value = raw_value.split("#").first&.strip
130
133
 
134
+ def clean_url = clean_value&.then { URI(_1).to_s }
135
+
131
136
  def agent? = key == "user-agent"
132
137
 
133
138
  def disallow? = key == "disallow"
@@ -140,11 +145,13 @@ class Probot
140
145
 
141
146
  def value
142
147
  return clean_value.to_f if crawl_delay?
143
- return URI(clean_value).to_s if disallow? || allow?
148
+ return clean_url if disallow? || allow?
144
149
 
145
150
  raw_value
146
151
  rescue URI::InvalidURIError
147
152
  raw_value
153
+ rescue ArgumentError
154
+ raw_value
148
155
  end
149
156
  end
150
157
 
data/probot.gemspec CHANGED
@@ -13,6 +13,7 @@ Gem::Specification.new do |spec|
13
13
  spec.homepage = "http://github.com/dkam/probot"
14
14
  spec.license = "MIT"
15
15
  spec.required_ruby_version = ">= 3.0"
16
+ spec.platform = Gem::Platform::RUBY
16
17
 
17
18
  spec.metadata["homepage_uri"] = spec.homepage
18
19
  spec.metadata["source_code_uri"] = "http://github.com/dkam/probot"
@@ -29,4 +30,5 @@ Gem::Specification.new do |spec|
29
30
  spec.bindir = "exe"
30
31
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
31
32
  spec.require_paths = ["lib"]
33
+ spec.add_development_dependency "debug"
32
34
  end
metadata CHANGED
@@ -1,15 +1,28 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: probot
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dan Milne
8
- autorequire:
9
8
  bindir: exe
10
9
  cert_chain: []
11
- date: 2024-10-30 00:00:00.000000000 Z
12
- dependencies: []
10
+ date: 2024-12-23 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: debug
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '0'
19
+ type: :development
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '0'
13
26
  description: A fully featured robots.txt parser.
14
27
  email:
15
28
  - d@nmilne.com
@@ -33,7 +46,6 @@ metadata:
33
46
  homepage_uri: http://github.com/dkam/probot
34
47
  source_code_uri: http://github.com/dkam/probot
35
48
  changelog_uri: http://github.com/dkam/probot/CHANGELOG.md
36
- post_install_message:
37
49
  rdoc_options: []
38
50
  require_paths:
39
51
  - lib
@@ -48,8 +60,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
48
60
  - !ruby/object:Gem::Version
49
61
  version: '0'
50
62
  requirements: []
51
- rubygems_version: 3.5.22
52
- signing_key:
63
+ rubygems_version: 3.6.0
53
64
  specification_version: 4
54
65
  summary: A robots.txt parser.
55
66
  test_files: []