probot 0.3.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 54e371737751fd5162dbcb07fbb7f24906f3d9684c17cec9c54f48ab5c99c35f
4
- data.tar.gz: 5babb13f06601f07e58613649ddbc1396236c685e3858c4816f45fba95e95038
3
+ metadata.gz: 5a82d5e6f6baf600f7f6c22cf5358cefaed6ce9bf1119298c98245e440296c2e
4
+ data.tar.gz: c791777dfc59a614f7797b63765cb9d63d39ec732feab6df754025782e023825
5
5
  SHA512:
6
- metadata.gz: c1250d28b604d70abedc9465c755f04e20c0318169fef2beab5c220692abadad073467130fd8f9d3c81ca443e3d659dded8eeec6af12403b594d9f3ad9a610af
7
- data.tar.gz: 53f3d3fadba21111fd84c9a90501d22ea3456002966ce77ecff9757bff1d1544fce07d0971b3a30f0bf3683be21162771313ee6d9c1ed1b676fe11f63ccdbf03
6
+ metadata.gz: ada3169c4a078fe19a526ee07563ba355a6032d8674d70487db3ab6c14aabf65d518d989f3e3d5f6e7d4fbc55b1920b27d7b7828129412ae4d81f76b8f90df67
7
+ data.tar.gz: 9487fc153c81dc99ba73ec8a1330688f46b829e2e62db115e0a04651c1c41ca4418afe8cd22abc272272ef0e7dec70d1a81c0bcb4a36c0c7d3666eb7f32e43de
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.4.0] - 2024-10-31
4
+
5
+ - Ensure VERISON is available
6
+
3
7
  ## [0.3.0] - 2023-09-18
4
8
 
5
9
  - Only return unique sitemaps.
@@ -1,3 +1,3 @@
1
1
  class Probot
2
- VERSION = "0.3.0"
2
+ VERSION = "0.5.0"
3
3
  end
data/lib/probot.rb CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "uri"
4
4
  require "net/http"
5
+ require_relative "probot/version"
5
6
 
6
7
  # https://moz.com/learn/seo/robotstxt
7
8
  # https://stackoverflow.com/questions/45293419/order-of-directives-in-robots-txt-do-they-overwrite-each-other-or-complement-ea
@@ -30,6 +31,7 @@ class Probot
30
31
  @current_agents = ["*"]
31
32
  @current_agents.each { |agent| @rules[agent] ||= {"disallow" => [], "allow" => [], "crawl_delay" => 0} }
32
33
  @sitemaps = []
34
+
33
35
  @site = URI(data) if data.start_with?("http")
34
36
  @doc = @site.nil? ? data : fetch_robots_txt(@site)
35
37
  parse(@doc)
@@ -91,7 +93,9 @@ class Probot
91
93
 
92
94
  # All Regex characters are escaped, then we unescape * and $ as they may used in robots.txt
93
95
  if data.allow? || data.disallow?
94
- @current_agents.each { |agent| rules[agent][data.key] << Regexp.new(Regexp.escape(data.value).gsub('\*', ".*").gsub('\$', "$")) }
96
+ @current_agents.each do |agent|
97
+ rules[agent][data.key] << Regexp.new(Regexp.escape(data.value).gsub('\*', ".*").gsub('\$', "$")) unless data.value.nil?
98
+ end
95
99
 
96
100
  # When user-agent strings are found on consecutive lines, they are considered to be part of the same record. Google ignores crawl_delay.
97
101
  subsequent_agent = false
@@ -127,6 +131,8 @@ class Probot
127
131
 
128
132
  def clean_value = raw_value.split("#").first&.strip
129
133
 
134
+ def clean_url = clean_value&.then { URI(_1).to_s }
135
+
130
136
  def agent? = key == "user-agent"
131
137
 
132
138
  def disallow? = key == "disallow"
@@ -139,11 +145,13 @@ class Probot
139
145
 
140
146
  def value
141
147
  return clean_value.to_f if crawl_delay?
142
- return URI(clean_value).to_s if disallow? || allow?
148
+ return clean_url if disallow? || allow?
143
149
 
144
150
  raw_value
145
151
  rescue URI::InvalidURIError
146
152
  raw_value
153
+ rescue ArgumentError
154
+ raw_value
147
155
  end
148
156
  end
149
157
 
data/probot.gemspec CHANGED
@@ -13,6 +13,7 @@ Gem::Specification.new do |spec|
13
13
  spec.homepage = "http://github.com/dkam/probot"
14
14
  spec.license = "MIT"
15
15
  spec.required_ruby_version = ">= 3.0"
16
+ spec.platform = Gem::Platform::RUBY
16
17
 
17
18
  spec.metadata["homepage_uri"] = spec.homepage
18
19
  spec.metadata["source_code_uri"] = "http://github.com/dkam/probot"
@@ -29,4 +30,5 @@ Gem::Specification.new do |spec|
29
30
  spec.bindir = "exe"
30
31
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
31
32
  spec.require_paths = ["lib"]
33
+ spec.add_development_dependency "debug"
32
34
  end
metadata CHANGED
@@ -1,15 +1,28 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: probot
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dan Milne
8
- autorequire:
9
8
  bindir: exe
10
9
  cert_chain: []
11
- date: 2023-09-18 00:00:00.000000000 Z
12
- dependencies: []
10
+ date: 2024-12-23 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: debug
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '0'
19
+ type: :development
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '0'
13
26
  description: A fully featured robots.txt parser.
14
27
  email:
15
28
  - d@nmilne.com
@@ -33,7 +46,6 @@ metadata:
33
46
  homepage_uri: http://github.com/dkam/probot
34
47
  source_code_uri: http://github.com/dkam/probot
35
48
  changelog_uri: http://github.com/dkam/probot/CHANGELOG.md
36
- post_install_message:
37
49
  rdoc_options: []
38
50
  require_paths:
39
51
  - lib
@@ -48,8 +60,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
48
60
  - !ruby/object:Gem::Version
49
61
  version: '0'
50
62
  requirements: []
51
- rubygems_version: 3.4.19
52
- signing_key:
63
+ rubygems_version: 3.6.0
53
64
  specification_version: 4
54
65
  summary: A robots.txt parser.
55
66
  test_files: []