probot 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 54e371737751fd5162dbcb07fbb7f24906f3d9684c17cec9c54f48ab5c99c35f
4
- data.tar.gz: 5babb13f06601f07e58613649ddbc1396236c685e3858c4816f45fba95e95038
3
+ metadata.gz: 5a82d5e6f6baf600f7f6c22cf5358cefaed6ce9bf1119298c98245e440296c2e
4
+ data.tar.gz: c791777dfc59a614f7797b63765cb9d63d39ec732feab6df754025782e023825
5
5
  SHA512:
6
- metadata.gz: c1250d28b604d70abedc9465c755f04e20c0318169fef2beab5c220692abadad073467130fd8f9d3c81ca443e3d659dded8eeec6af12403b594d9f3ad9a610af
7
- data.tar.gz: 53f3d3fadba21111fd84c9a90501d22ea3456002966ce77ecff9757bff1d1544fce07d0971b3a30f0bf3683be21162771313ee6d9c1ed1b676fe11f63ccdbf03
6
+ metadata.gz: ada3169c4a078fe19a526ee07563ba355a6032d8674d70487db3ab6c14aabf65d518d989f3e3d5f6e7d4fbc55b1920b27d7b7828129412ae4d81f76b8f90df67
7
+ data.tar.gz: 9487fc153c81dc99ba73ec8a1330688f46b829e2e62db115e0a04651c1c41ca4418afe8cd22abc272272ef0e7dec70d1a81c0bcb4a36c0c7d3666eb7f32e43de
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.4.0] - 2024-10-31
4
+
5
+ - Ensure VERISON is available
6
+
3
7
  ## [0.3.0] - 2023-09-18
4
8
 
5
9
  - Only return unique sitemaps.
@@ -1,3 +1,3 @@
1
1
  class Probot
2
- VERSION = "0.3.0"
2
+ VERSION = "0.5.0"
3
3
  end
data/lib/probot.rb CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "uri"
4
4
  require "net/http"
5
+ require_relative "probot/version"
5
6
 
6
7
  # https://moz.com/learn/seo/robotstxt
7
8
  # https://stackoverflow.com/questions/45293419/order-of-directives-in-robots-txt-do-they-overwrite-each-other-or-complement-ea
@@ -30,6 +31,7 @@ class Probot
30
31
  @current_agents = ["*"]
31
32
  @current_agents.each { |agent| @rules[agent] ||= {"disallow" => [], "allow" => [], "crawl_delay" => 0} }
32
33
  @sitemaps = []
34
+
33
35
  @site = URI(data) if data.start_with?("http")
34
36
  @doc = @site.nil? ? data : fetch_robots_txt(@site)
35
37
  parse(@doc)
@@ -91,7 +93,9 @@ class Probot
91
93
 
92
94
  # All Regex characters are escaped, then we unescape * and $ as they may used in robots.txt
93
95
  if data.allow? || data.disallow?
94
- @current_agents.each { |agent| rules[agent][data.key] << Regexp.new(Regexp.escape(data.value).gsub('\*', ".*").gsub('\$', "$")) }
96
+ @current_agents.each do |agent|
97
+ rules[agent][data.key] << Regexp.new(Regexp.escape(data.value).gsub('\*', ".*").gsub('\$', "$")) unless data.value.nil?
98
+ end
95
99
 
96
100
  # When user-agent strings are found on consecutive lines, they are considered to be part of the same record. Google ignores crawl_delay.
97
101
  subsequent_agent = false
@@ -127,6 +131,8 @@ class Probot
127
131
 
128
132
  def clean_value = raw_value.split("#").first&.strip
129
133
 
134
+ def clean_url = clean_value&.then { URI(_1).to_s }
135
+
130
136
  def agent? = key == "user-agent"
131
137
 
132
138
  def disallow? = key == "disallow"
@@ -139,11 +145,13 @@ class Probot
139
145
 
140
146
  def value
141
147
  return clean_value.to_f if crawl_delay?
142
- return URI(clean_value).to_s if disallow? || allow?
148
+ return clean_url if disallow? || allow?
143
149
 
144
150
  raw_value
145
151
  rescue URI::InvalidURIError
146
152
  raw_value
153
+ rescue ArgumentError
154
+ raw_value
147
155
  end
148
156
  end
149
157
 
data/probot.gemspec CHANGED
@@ -13,6 +13,7 @@ Gem::Specification.new do |spec|
13
13
  spec.homepage = "http://github.com/dkam/probot"
14
14
  spec.license = "MIT"
15
15
  spec.required_ruby_version = ">= 3.0"
16
+ spec.platform = Gem::Platform::RUBY
16
17
 
17
18
  spec.metadata["homepage_uri"] = spec.homepage
18
19
  spec.metadata["source_code_uri"] = "http://github.com/dkam/probot"
@@ -29,4 +30,5 @@ Gem::Specification.new do |spec|
29
30
  spec.bindir = "exe"
30
31
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
31
32
  spec.require_paths = ["lib"]
33
+ spec.add_development_dependency "debug"
32
34
  end
metadata CHANGED
@@ -1,15 +1,28 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: probot
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dan Milne
8
- autorequire:
9
8
  bindir: exe
10
9
  cert_chain: []
11
- date: 2023-09-18 00:00:00.000000000 Z
12
- dependencies: []
10
+ date: 2024-12-23 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: debug
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '0'
19
+ type: :development
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '0'
13
26
  description: A fully featured robots.txt parser.
14
27
  email:
15
28
  - d@nmilne.com
@@ -33,7 +46,6 @@ metadata:
33
46
  homepage_uri: http://github.com/dkam/probot
34
47
  source_code_uri: http://github.com/dkam/probot
35
48
  changelog_uri: http://github.com/dkam/probot/CHANGELOG.md
36
- post_install_message:
37
49
  rdoc_options: []
38
50
  require_paths:
39
51
  - lib
@@ -48,8 +60,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
48
60
  - !ruby/object:Gem::Version
49
61
  version: '0'
50
62
  requirements: []
51
- rubygems_version: 3.4.19
52
- signing_key:
63
+ rubygems_version: 3.6.0
53
64
  specification_version: 4
54
65
  summary: A robots.txt parser.
55
66
  test_files: []