probot 0.1.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 65dd2e16a696a2500937d32ae744262f74be414620e5505838fde49b8847d34d
4
- data.tar.gz: 5f50733228c4c4eec37218eda00ceb4c2dd7eca801df6dd7345dd9e7edc94516
3
+ metadata.gz: 54e371737751fd5162dbcb07fbb7f24906f3d9684c17cec9c54f48ab5c99c35f
4
+ data.tar.gz: 5babb13f06601f07e58613649ddbc1396236c685e3858c4816f45fba95e95038
5
5
  SHA512:
6
- metadata.gz: 33a44b9aba61643781e697e1b0fd54ac8d1afb40a61b1e2dc13d174aeea1b4ec1e6c2b762d122a2e1f5e50b447881b28c57cedaaff9a05f1174ce0d531c7f605
7
- data.tar.gz: 85615bf3573de8af826308f5bc36df231581de0da4346081a44632cd5882df8c7574ce9889f586dc7fdde2c881a6da13edf49b806bc9f76d0676b20cd516a789
6
+ metadata.gz: c1250d28b604d70abedc9465c755f04e20c0318169fef2beab5c220692abadad073467130fd8f9d3c81ca443e3d659dded8eeec6af12403b594d9f3ad9a610af
7
+ data.tar.gz: 53f3d3fadba21111fd84c9a90501d22ea3456002966ce77ecff9757bff1d1544fce07d0971b3a30f0bf3683be21162771313ee6d9c1ed1b676fe11f63ccdbf03
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.3.0] - 2023-09-18
4
+
5
+ - Only return unique sitemaps.
6
+
7
+ ## [0.2.0] - 2023-09-10
8
+
9
+ - Correctly handle multiple sitemaps + tests.
3
10
  ## [0.1.0] - 2023-09-09
4
11
 
5
12
  - Initial release
@@ -1,3 +1,3 @@
1
1
  class Probot
2
- VERSION = "0.1.0"
2
+ VERSION = "0.3.0"
3
3
  end
data/lib/probot.rb CHANGED
@@ -19,8 +19,8 @@ require "net/http"
19
19
  # Find the most specific rule for a given URL. We use the length of the regexp as a proxy for specificity.
20
20
 
21
21
  class Probot
22
- attr_reader :rules, :sitemap, :doc
23
- attr_accessor :agent
22
+ attr_reader :rules, :doc
23
+ attr_accessor :agent, :sitemaps, :site
24
24
 
25
25
  def initialize(data, agent: "*")
26
26
  raise ArgumentError, "The first argument must be a string" unless data.is_a?(String)
@@ -30,8 +30,8 @@ class Probot
30
30
  @current_agents = ["*"]
31
31
  @current_agents.each { |agent| @rules[agent] ||= {"disallow" => [], "allow" => [], "crawl_delay" => 0} }
32
32
  @sitemaps = []
33
-
34
- @doc = data.start_with?("http") ? fetch_robots_txt(data) : data
33
+ @site = URI(data) if data.start_with?("http")
34
+ @doc = @site.nil? ? data : fetch_robots_txt(@site)
35
35
  parse(@doc)
36
36
  end
37
37
 
@@ -90,11 +90,11 @@ class Probot
90
90
  end
91
91
 
92
92
  # All Regex characters are escaped, then we unescape * and $ as they may used in robots.txt
93
-
94
93
  if data.allow? || data.disallow?
95
94
  @current_agents.each { |agent| rules[agent][data.key] << Regexp.new(Regexp.escape(data.value).gsub('\*', ".*").gsub('\$', "$")) }
96
95
 
97
- subsequent_agent = false # When user-agent strings are found on consecutive lines, they are considered to be part of the same record. Google ignores crawl_delay.
96
+ # When user-agent strings are found on consecutive lines, they are considered to be part of the same record. Google ignores crawl_delay.
97
+ subsequent_agent = false
98
98
  next
99
99
  end
100
100
 
@@ -103,8 +103,12 @@ class Probot
103
103
  next
104
104
  end
105
105
 
106
+ # Ensure we have an absolute URL
106
107
  if data.sitemap?
107
- @sitemap = URI(data.value).path
108
+ sitemap_uri = URI(data.value)
109
+ sitemap_uri = sitemap_uri.host.nil? ? URI.join(*[site, sitemap_uri].compact) : sitemap_uri
110
+ @sitemaps << sitemap_uri.to_s
111
+ @sitemaps.uniq!
108
112
  next
109
113
  end
110
114
 
data/probot.gemspec CHANGED
@@ -29,10 +29,4 @@ Gem::Specification.new do |spec|
29
29
  spec.bindir = "exe"
30
30
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
31
31
  spec.require_paths = ["lib"]
32
-
33
- # Uncomment to register a new dependency of your gem
34
- # spec.add_dependency "example-gem", "~> 1.0"
35
-
36
- # For more information and examples about making a new gem, check out our
37
- # guide at: https://bundler.io/guides/creating_gem.html
38
32
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: probot
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dan Milne
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-09-10 00:00:00.000000000 Z
11
+ date: 2023-09-18 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A fully featured robots.txt parser.
14
14
  email: