indexmap 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -3
- data/README.md +41 -0
- data/lib/indexmap/configuration.rb +8 -0
- data/lib/indexmap/google_configuration.rb +21 -0
- data/lib/indexmap/index_now_configuration.rb +44 -0
- data/lib/indexmap/parser.rb +202 -0
- data/lib/indexmap/path.rb +42 -0
- data/lib/indexmap/pinger/base.rb +56 -0
- data/lib/indexmap/pinger/google.rb +77 -0
- data/lib/indexmap/pinger/index_now.rb +173 -0
- data/lib/indexmap/task_runner.rb +9 -0
- data/lib/indexmap/validator.rb +41 -0
- data/lib/indexmap/version.rb +1 -1
- data/lib/indexmap.rb +9 -0
- data/lib/tasks/indexmap_tasks.rake +32 -0
- data/test/indexmap/configuration_test.rb +14 -0
- data/test/indexmap/parser_test.rb +81 -0
- data/test/indexmap/path_test.rb +28 -0
- data/test/indexmap/pinger/google_test.rb +71 -0
- data/test/indexmap/pinger/index_now_test.rb +118 -0
- data/test/indexmap/validator_test.rb +56 -0
- data/test/test_helper.rb +1 -0
- metadata +70 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 52c07631052b8a72f3745d578239e22f97d047a517d7e1f9bdd8172a98cef453
|
|
4
|
+
data.tar.gz: 6ee1835d41386d143dbea600f81e48eff7d147ddd29ad806797195ff8eb3b0e1
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6b66319af63650f80ac5139cc0502cac097ec93d9d0613f0d856db1b695aa7c6b28a7f0d525eb0513bbd2b5989becae575baf00933fcfcf2b4cceec9388a054b
|
|
7
|
+
data.tar.gz: 01130bad021a01134530cbcd1e389e1d25c69637d40fa8f31b4023fbf4015b4caaf79c55895936e76cb54e493495aa35d75a7b86d3a5b77f452b7ba945d37059
|
data/CHANGELOG.md
CHANGED
|
@@ -5,9 +5,9 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
-
## [0.
|
|
8
|
+
## [0.3.0] - 2026-04-22
|
|
9
9
|
|
|
10
|
-
### <!--
|
|
11
|
-
-
|
|
10
|
+
### <!-- 0 -->🚀 Features
|
|
11
|
+
- expand indexmap with sitemap parsing, validation, and search engine pinging
|
|
12
12
|
|
|
13
13
|
|
data/README.md
CHANGED
|
@@ -104,6 +104,47 @@ end
|
|
|
104
104
|
|
|
105
105
|
In `:single_file` mode, `indexmap` writes a `urlset` directly to `sitemap.xml`. In the default `:index` mode, it writes a sitemap index plus child sitemap files from `sections`.
|
|
106
106
|
|
|
107
|
+
## Validation and Parsing
|
|
108
|
+
|
|
109
|
+
`indexmap` also includes small utilities for working with generated sitemap files:
|
|
110
|
+
|
|
111
|
+
```ruby
|
|
112
|
+
parser = Indexmap::Parser.new(path: Rails.public_path.join("sitemap.xml"))
|
|
113
|
+
parser.paths
|
|
114
|
+
# => ["/", "/about", "/articles/example"]
|
|
115
|
+
|
|
116
|
+
Indexmap::Validator.new.validate!
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
The built-in validator checks for:
|
|
120
|
+
|
|
121
|
+
- missing sitemap files
|
|
122
|
+
- duplicate sitemap URLs
|
|
123
|
+
- parameterized URLs in sitemap entries
|
|
124
|
+
|
|
125
|
+
## Search Engine Ping
|
|
126
|
+
|
|
127
|
+
The gem can ping Google Search Console and IndexNow once your app config provides the required credentials.
|
|
128
|
+
|
|
129
|
+
```ruby
|
|
130
|
+
Indexmap.configure do |config|
|
|
131
|
+
config.google.credentials = -> { ENV["GOOGLE_SITEMAP"] }
|
|
132
|
+
config.index_now.key = -> { ENV["INDEXNOW_KEY"] }
|
|
133
|
+
end
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
When `config.index_now.key` is set, `sitemap:create` also writes the matching `public/<key>.txt` verification file automatically.
|
|
137
|
+
|
|
138
|
+
Available rake tasks:
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
bin/rails sitemap:validate
|
|
142
|
+
bin/rails sitemap:google:ping
|
|
143
|
+
bin/rails sitemap:index_now:ping
|
|
144
|
+
bin/rails sitemap:ping
|
|
145
|
+
bin/rails sitemap:index_now:write_key
|
|
146
|
+
```
|
|
147
|
+
|
|
107
148
|
## Development
|
|
108
149
|
|
|
109
150
|
Run tests:
|
|
@@ -24,10 +24,18 @@ module Indexmap
|
|
|
24
24
|
value.nil? ? :index : value.to_sym
|
|
25
25
|
end
|
|
26
26
|
|
|
27
|
+
def google
|
|
28
|
+
@google ||= GoogleConfiguration.new
|
|
29
|
+
end
|
|
30
|
+
|
|
27
31
|
def index_filename
|
|
28
32
|
resolve(@index_filename)
|
|
29
33
|
end
|
|
30
34
|
|
|
35
|
+
def index_now
|
|
36
|
+
@index_now ||= IndexNowConfiguration.new
|
|
37
|
+
end
|
|
38
|
+
|
|
31
39
|
def public_path
|
|
32
40
|
value = resolve(@public_path)
|
|
33
41
|
return Pathname("public") if value.nil?
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Indexmap
|
|
4
|
+
class GoogleConfiguration
|
|
5
|
+
attr_writer :credentials, :property
|
|
6
|
+
|
|
7
|
+
def credentials
|
|
8
|
+
resolve(@credentials)
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def property
|
|
12
|
+
resolve(@property)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
private
|
|
16
|
+
|
|
17
|
+
def resolve(value)
|
|
18
|
+
value.respond_to?(:call) ? value.call : value
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Indexmap
|
|
4
|
+
class IndexNowConfiguration
|
|
5
|
+
DEFAULT_ENDPOINT = "https://api.indexnow.org"
|
|
6
|
+
DEFAULT_MAX_URLS_PER_REQUEST = 500
|
|
7
|
+
|
|
8
|
+
attr_writer :dry_run, :endpoint, :key, :key_path, :max_urls_per_request
|
|
9
|
+
|
|
10
|
+
def dry_run?
|
|
11
|
+
value = resolve(@dry_run)
|
|
12
|
+
value == true || value.to_s == "1"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def endpoint
|
|
16
|
+
resolve(@endpoint).presence || DEFAULT_ENDPOINT
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def key
|
|
20
|
+
resolve(@key)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def key_path(public_path:)
|
|
24
|
+
configured_path = resolve(@key_path)
|
|
25
|
+
return Pathname(configured_path) if configured_path.present?
|
|
26
|
+
return if key.to_s.strip.empty?
|
|
27
|
+
|
|
28
|
+
Pathname(public_path).join("#{key}.txt")
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def max_urls_per_request
|
|
32
|
+
value = resolve(@max_urls_per_request)
|
|
33
|
+
return DEFAULT_MAX_URLS_PER_REQUEST if value.nil?
|
|
34
|
+
|
|
35
|
+
value.to_i
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
def resolve(value)
|
|
41
|
+
value.respond_to?(:call) ? value.call : value
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "nokogiri"
|
|
5
|
+
require "pathname"
|
|
6
|
+
require "uri"
|
|
7
|
+
|
|
8
|
+
module Indexmap
|
|
9
|
+
class Parser
|
|
10
|
+
Entry = Struct.new(:loc, :lastmod, :source_sitemap, keyword_init: true)
|
|
11
|
+
|
|
12
|
+
def initialize(path: default_path, rebase_remote_children: false, index_filename: Indexmap.configuration.index_filename, public_path: Indexmap.configuration.public_path)
|
|
13
|
+
@source = path.to_s
|
|
14
|
+
@rebase_remote_children = rebase_remote_children
|
|
15
|
+
@index_filename = index_filename
|
|
16
|
+
@public_path = public_path
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def entries(reset: false)
|
|
20
|
+
return reset! && entries if reset
|
|
21
|
+
return @entries if defined?(@entries)
|
|
22
|
+
|
|
23
|
+
visited = Set.new
|
|
24
|
+
@entries = parse_source(@source, visited: visited)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def paths(reset: false)
|
|
28
|
+
return reset! && paths if reset
|
|
29
|
+
return @paths if defined?(@paths)
|
|
30
|
+
|
|
31
|
+
seen = Set.new
|
|
32
|
+
@paths = entries.map do |entry|
|
|
33
|
+
path = extract_path(entry.loc)
|
|
34
|
+
next if path.nil?
|
|
35
|
+
|
|
36
|
+
normalized = normalize_path(path)
|
|
37
|
+
next if seen.include?(normalized)
|
|
38
|
+
|
|
39
|
+
seen.add(normalized)
|
|
40
|
+
normalized
|
|
41
|
+
end.compact
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def urls(base_url:, reset: false)
|
|
45
|
+
return reset! && urls(base_url: base_url) if reset
|
|
46
|
+
|
|
47
|
+
target = URI.parse(base_url)
|
|
48
|
+
port_suffix = (target.port && ![80, 443].include?(target.port)) ? ":#{target.port}" : ""
|
|
49
|
+
|
|
50
|
+
paths.map do |path|
|
|
51
|
+
"#{target.scheme}://#{target.host}#{port_suffix}#{path}"
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def reset!
|
|
56
|
+
remove_instance_variable(:@entries) if defined?(@entries)
|
|
57
|
+
remove_instance_variable(:@paths) if defined?(@paths)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
private
|
|
61
|
+
|
|
62
|
+
attr_reader :index_filename, :public_path
|
|
63
|
+
|
|
64
|
+
def default_path
|
|
65
|
+
Indexmap::Path.existing_public_path(public_path: public_path, index_filename: index_filename)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def parse_source(source, visited:)
|
|
69
|
+
normalized_source = normalize_source(source)
|
|
70
|
+
return [] if normalized_source.nil? || visited.include?(normalized_source)
|
|
71
|
+
|
|
72
|
+
visited.add(normalized_source)
|
|
73
|
+
xml = read_source(normalized_source)
|
|
74
|
+
return [] if xml.to_s.strip.empty?
|
|
75
|
+
|
|
76
|
+
document = Nokogiri::XML(xml)
|
|
77
|
+
document.remove_namespaces!
|
|
78
|
+
|
|
79
|
+
if document.at_xpath("/sitemapindex")
|
|
80
|
+
document.xpath("//sitemap/loc").flat_map do |node|
|
|
81
|
+
child_source = resolve_child_sitemap(normalized_source, node.text.to_s.strip)
|
|
82
|
+
next [] if child_source.nil?
|
|
83
|
+
|
|
84
|
+
parse_source(child_source, visited: visited)
|
|
85
|
+
end
|
|
86
|
+
else
|
|
87
|
+
document.xpath("//url").map do |url_node|
|
|
88
|
+
loc = url_node.at_xpath("loc")&.text.to_s.strip
|
|
89
|
+
next if loc.empty?
|
|
90
|
+
|
|
91
|
+
lastmod = url_node.at_xpath("lastmod")&.text.to_s.strip
|
|
92
|
+
Entry.new(loc: loc, lastmod: lastmod.empty? ? nil : lastmod, source_sitemap: normalized_source)
|
|
93
|
+
end.compact
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def resolve_child_sitemap(parent_source, loc)
|
|
98
|
+
return if loc.empty?
|
|
99
|
+
|
|
100
|
+
if remote_source?(parent_source)
|
|
101
|
+
parent_uri = URI.parse(parent_source)
|
|
102
|
+
if remote_source?(loc)
|
|
103
|
+
remote_child_source(parent_uri, loc)
|
|
104
|
+
else
|
|
105
|
+
URI.join(parent_uri.to_s, loc).to_s
|
|
106
|
+
end
|
|
107
|
+
elsif remote_source?(loc)
|
|
108
|
+
uri = URI.parse(loc)
|
|
109
|
+
File.join(File.dirname(parent_source), File.basename(uri.path))
|
|
110
|
+
else
|
|
111
|
+
File.expand_path(loc, File.dirname(parent_source))
|
|
112
|
+
end
|
|
113
|
+
rescue URI::InvalidURIError
|
|
114
|
+
File.expand_path(loc, File.dirname(parent_source))
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def remote_child_source(parent_uri, loc)
|
|
118
|
+
child_uri = URI.parse(loc)
|
|
119
|
+
return child_uri.to_s unless @rebase_remote_children
|
|
120
|
+
return child_uri.to_s if child_uri.host == parent_uri.host && child_uri.port == parent_uri.port && child_uri.scheme == parent_uri.scheme
|
|
121
|
+
|
|
122
|
+
child_uri.scheme = parent_uri.scheme
|
|
123
|
+
child_uri.host = parent_uri.host
|
|
124
|
+
child_uri.port = parent_uri.port
|
|
125
|
+
child_uri.to_s
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def normalize_source(source)
|
|
129
|
+
return if source.to_s.strip.empty?
|
|
130
|
+
|
|
131
|
+
if remote_source?(source)
|
|
132
|
+
URI.parse(source).to_s
|
|
133
|
+
else
|
|
134
|
+
Pathname(source).expand_path.to_s
|
|
135
|
+
end
|
|
136
|
+
rescue URI::InvalidURIError
|
|
137
|
+
nil
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def remote_source?(value)
|
|
141
|
+
uri = URI.parse(value.to_s)
|
|
142
|
+
uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
|
|
143
|
+
rescue URI::InvalidURIError
|
|
144
|
+
false
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def read_source(source)
|
|
148
|
+
if remote_source?(source)
|
|
149
|
+
fetch_remote_source(source)
|
|
150
|
+
elsif File.exist?(source)
|
|
151
|
+
File.read(source, encoding: "UTF-8")
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def fetch_remote_source(source, redirects_remaining: 3)
|
|
156
|
+
uri = URI.parse(source)
|
|
157
|
+
request = Net::HTTP::Get.new(uri)
|
|
158
|
+
request["User-Agent"] = "Indexmap::Parser/1.0"
|
|
159
|
+
|
|
160
|
+
response = Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https", open_timeout: 10, read_timeout: 20) do |http|
|
|
161
|
+
http.request(request)
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
case response
|
|
165
|
+
when Net::HTTPSuccess
|
|
166
|
+
response.body
|
|
167
|
+
when Net::HTTPRedirection
|
|
168
|
+
return if redirects_remaining <= 0
|
|
169
|
+
|
|
170
|
+
location = response["location"].to_s
|
|
171
|
+
return if location.empty?
|
|
172
|
+
|
|
173
|
+
redirected = URI.join(source, location).to_s
|
|
174
|
+
fetch_remote_source(redirected, redirects_remaining: redirects_remaining - 1)
|
|
175
|
+
end
|
|
176
|
+
rescue URI::InvalidURIError
|
|
177
|
+
nil
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def extract_path(loc)
|
|
181
|
+
return if loc.to_s.strip.empty?
|
|
182
|
+
|
|
183
|
+
if loc.start_with?("http://", "https://")
|
|
184
|
+
path = URI.parse(loc).path
|
|
185
|
+
(path.nil? || path.empty?) ? "/" : path
|
|
186
|
+
elsif loc.start_with?("/")
|
|
187
|
+
loc
|
|
188
|
+
else
|
|
189
|
+
"/#{loc}"
|
|
190
|
+
end
|
|
191
|
+
rescue URI::InvalidURIError
|
|
192
|
+
nil
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def normalize_path(path)
|
|
196
|
+
return "/" if path == "/"
|
|
197
|
+
|
|
198
|
+
normalized = path.start_with?("/") ? path : "/#{path}"
|
|
199
|
+
normalized.delete_suffix("/")
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
require "uri"
|
|
5
|
+
|
|
6
|
+
module Indexmap
|
|
7
|
+
module Path
|
|
8
|
+
INDEX_FILENAME = "sitemap.xml"
|
|
9
|
+
LEGACY_FILENAME = "sitemap_index.xml"
|
|
10
|
+
|
|
11
|
+
module_function
|
|
12
|
+
|
|
13
|
+
def canonical_public_path(public_path: default_public_path_root, index_filename: default_index_filename)
|
|
14
|
+
Pathname(public_path).join(index_filename)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def existing_public_path(public_path: default_public_path_root, index_filename: default_index_filename, legacy_filename: LEGACY_FILENAME)
|
|
18
|
+
index_path = canonical_public_path(public_path: public_path, index_filename: index_filename)
|
|
19
|
+
return index_path if index_path.exist?
|
|
20
|
+
|
|
21
|
+
Pathname(public_path).join(legacy_filename)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def canonical_url(base_url, index_filename: default_index_filename)
|
|
25
|
+
URI.join(base_url, "/#{index_filename}").to_s
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def default_index_filename
|
|
29
|
+
Indexmap.configuration.index_filename.presence || INDEX_FILENAME
|
|
30
|
+
rescue
|
|
31
|
+
INDEX_FILENAME
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def default_public_path_root
|
|
35
|
+
if defined?(Rails)
|
|
36
|
+
Rails.public_path
|
|
37
|
+
else
|
|
38
|
+
Pathname("public")
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "active_support/tagged_logging"
|
|
4
|
+
require "uri"
|
|
5
|
+
|
|
6
|
+
module Indexmap
|
|
7
|
+
module Pinger
|
|
8
|
+
class Base
|
|
9
|
+
def self.ping(...)
|
|
10
|
+
new(...).ping
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def initialize(configuration: Indexmap.configuration)
|
|
14
|
+
@configuration = configuration
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def ping
|
|
18
|
+
sitemap_files.each do |sitemap_file|
|
|
19
|
+
ping_sitemap(sitemap_file)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def logger
|
|
24
|
+
@logger ||= if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
25
|
+
Rails.logger
|
|
26
|
+
else
|
|
27
|
+
ActiveSupport::TaggedLogging.new(Logger.new($stdout))
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
attr_reader :configuration
|
|
34
|
+
|
|
35
|
+
def host
|
|
36
|
+
configuration.base_url
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def hostname
|
|
40
|
+
URI.parse(host).host
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def root_domain
|
|
44
|
+
hostname.sub(/\Awww\./, "")
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def sitemap_files
|
|
48
|
+
Dir.glob(configuration.public_path.join("sitemap*.xml")).sort
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def ping_sitemap(_sitemap_file)
|
|
52
|
+
raise NotImplementedError
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "google/apis/searchconsole_v1"
|
|
4
|
+
require "googleauth"
|
|
5
|
+
require "json"
|
|
6
|
+
require "stringio"
|
|
7
|
+
|
|
8
|
+
module Indexmap
|
|
9
|
+
module Pinger
|
|
10
|
+
class Google < Base
|
|
11
|
+
def initialize(configuration: Indexmap.configuration, service: nil, credentials_builder: nil)
|
|
12
|
+
super(configuration: configuration)
|
|
13
|
+
@service = service
|
|
14
|
+
@credentials_builder = credentials_builder
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def ping
|
|
18
|
+
if google_configuration.credentials.to_s.strip.empty?
|
|
19
|
+
logger.debug("Google sitemap credentials not configured.")
|
|
20
|
+
return
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
super
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
private
|
|
27
|
+
|
|
28
|
+
attr_reader :credentials_builder
|
|
29
|
+
|
|
30
|
+
def google_configuration
|
|
31
|
+
configuration.google
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def ping_sitemap(sitemap_file)
|
|
35
|
+
sitemap_url = URI.join(host, File.basename(sitemap_file)).to_s
|
|
36
|
+
|
|
37
|
+
unless authorized?
|
|
38
|
+
logger.error("Google Search Console does not have access to the site: #{root_domain}")
|
|
39
|
+
return
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
webmasters_service.submit_sitemap(property_identifier, sitemap_url)
|
|
43
|
+
logger.debug { "Successfully pinged Google with sitemap: #{sitemap_url}" }
|
|
44
|
+
rescue ::Google::Apis::ClientError => e
|
|
45
|
+
logger.debug { "Failed to ping Google for #{sitemap_url}. Status: #{e.status_code}, Body: #{e.body}" }
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def authorized?
|
|
49
|
+
webmasters_service.list_sites.site_entry.any? { |site| site.site_url.include?(root_domain) }
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def property_identifier
|
|
53
|
+
google_configuration.property.presence || "sc-domain:#{root_domain}"
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def webmasters_service
|
|
57
|
+
@webmasters_service ||= begin
|
|
58
|
+
service = @service || ::Google::Apis::SearchconsoleV1::SearchConsoleService.new
|
|
59
|
+
service.authorization = authorizer
|
|
60
|
+
service
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def authorizer
|
|
65
|
+
json_key = JSON.parse(google_configuration.credentials).to_json
|
|
66
|
+
scope = "https://www.googleapis.com/auth/webmasters"
|
|
67
|
+
|
|
68
|
+
return credentials_builder.call(credentials: json_key, scope: scope) if credentials_builder
|
|
69
|
+
|
|
70
|
+
::Google::Auth::ServiceAccountCredentials.make_creds(
|
|
71
|
+
json_key_io: StringIO.new(json_key),
|
|
72
|
+
scope: scope
|
|
73
|
+
)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "faraday"
|
|
4
|
+
require "json"
|
|
5
|
+
require "time"
|
|
6
|
+
|
|
7
|
+
module Indexmap
|
|
8
|
+
module Pinger
|
|
9
|
+
class IndexNow < Base
|
|
10
|
+
def initialize(configuration: Indexmap.configuration, connection: nil)
|
|
11
|
+
super(configuration: configuration)
|
|
12
|
+
@connection = connection
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def ping
|
|
16
|
+
api_key = read_api_key
|
|
17
|
+
unless api_key
|
|
18
|
+
logger.debug("IndexNow API key is not configured.")
|
|
19
|
+
return
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
entries = entries_to_ping
|
|
23
|
+
if entries.empty?
|
|
24
|
+
logger.debug("IndexNow: no URLs matched the current filter.")
|
|
25
|
+
return
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
entries.each_slice(max_urls_per_request) do |batch|
|
|
29
|
+
urls = batch.map(&:loc)
|
|
30
|
+
|
|
31
|
+
if dry_run?
|
|
32
|
+
logger.debug { "IndexNow dry-run: would ping #{urls.count} URLs." }
|
|
33
|
+
next
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
submit_batch(api_key: api_key, urls: urls)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def write_key_file
|
|
41
|
+
key = index_now_configuration.key.to_s.strip
|
|
42
|
+
return if key.empty?
|
|
43
|
+
|
|
44
|
+
path = index_now_configuration.key_path(public_path: configuration.public_path)
|
|
45
|
+
FileUtils.mkdir_p(path.dirname)
|
|
46
|
+
File.write(path, "#{key}\n")
|
|
47
|
+
path
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
private
|
|
51
|
+
|
|
52
|
+
attr_reader :connection
|
|
53
|
+
|
|
54
|
+
def index_now_configuration
|
|
55
|
+
configuration.index_now
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def sitemap_files
|
|
59
|
+
files = super
|
|
60
|
+
return files if files.one?
|
|
61
|
+
|
|
62
|
+
child_files = files.reject { |file| File.basename(file) == configuration.index_filename }
|
|
63
|
+
child_files.empty? ? files : child_files
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def entries_to_ping
|
|
67
|
+
cutoff = since_cutoff
|
|
68
|
+
unless cutoff
|
|
69
|
+
logger.debug("IndexNow: no cutoff provided, submitting all sitemap URLs.")
|
|
70
|
+
return current_entries.values
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
logger.debug { "IndexNow: submitting sitemap URLs with lastmod >= #{cutoff.iso8601}." }
|
|
74
|
+
|
|
75
|
+
current_entries.values.select do |entry|
|
|
76
|
+
lastmod_after_cutoff?(entry, cutoff) || entry.lastmod.to_s.strip.empty?
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def current_entries
|
|
81
|
+
sitemap_files.each_with_object({}) do |sitemap_file, entries|
|
|
82
|
+
Parser.new(path: sitemap_file).entries.each do |entry|
|
|
83
|
+
next if entry.loc.to_s.strip.empty?
|
|
84
|
+
|
|
85
|
+
entries[entry.loc] = entry
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def since_cutoff
|
|
91
|
+
raw_value = ENV["SINCE"].to_s.strip
|
|
92
|
+
return recent_cutoff if raw_value.empty?
|
|
93
|
+
|
|
94
|
+
Time.iso8601(raw_value).utc
|
|
95
|
+
rescue ArgumentError
|
|
96
|
+
raise ArgumentError, "Invalid SINCE value: #{raw_value.inspect}. Use ISO 8601, e.g. 2026-04-18T10:30:00Z."
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def recent_cutoff
|
|
100
|
+
hours = ENV["INDEXNOW_RECENT_HOURS"].to_s.strip
|
|
101
|
+
return if hours.empty?
|
|
102
|
+
|
|
103
|
+
hours_ago = Integer(hours, exception: false)
|
|
104
|
+
unless hours_ago&.positive?
|
|
105
|
+
raise ArgumentError, "Invalid INDEXNOW_RECENT_HOURS value: #{hours.inspect}. Use a positive integer."
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
Time.now.utc - (hours_ago * 3600)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def lastmod_after_cutoff?(entry, cutoff)
|
|
112
|
+
lastmod = entry_lastmod(entry)
|
|
113
|
+
return false unless lastmod
|
|
114
|
+
|
|
115
|
+
lastmod >= cutoff
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def entry_lastmod(entry)
|
|
119
|
+
return if entry.lastmod.to_s.strip.empty?
|
|
120
|
+
|
|
121
|
+
Time.iso8601(entry.lastmod.to_s).utc
|
|
122
|
+
rescue ArgumentError
|
|
123
|
+
logger.debug { "IndexNow: skipping invalid sitemap lastmod #{entry.lastmod.inspect} for #{entry.loc}" }
|
|
124
|
+
nil
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def max_urls_per_request
|
|
128
|
+
ENV.fetch("INDEXNOW_MAX_URLS_PER_REQUEST", index_now_configuration.max_urls_per_request).to_i
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def submit_batch(api_key:, urls:)
|
|
132
|
+
payload = {host: hostname, key: api_key, urlList: urls}
|
|
133
|
+
response = index_now_connection.post("/indexnow") do |request|
|
|
134
|
+
request.headers["Content-Type"] = "application/json"
|
|
135
|
+
request.body = payload.to_json
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
if response.success?
|
|
139
|
+
logger.debug { "Successfully pinged IndexNow with #{urls.count} URLs." }
|
|
140
|
+
true
|
|
141
|
+
else
|
|
142
|
+
logger.debug { "Failed to ping IndexNow. Status: #{response.status}, Body: #{response.body}" }
|
|
143
|
+
false
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def index_now_connection
|
|
148
|
+
@index_now_connection ||= connection || Faraday.new(url: index_now_configuration.endpoint) do |faraday|
|
|
149
|
+
faraday.request :json
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def dry_run?
|
|
154
|
+
ENV["INDEXNOW_DRY_RUN"] == "1" || index_now_configuration.dry_run?
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def read_api_key
|
|
158
|
+
configured_key = index_now_configuration.key.to_s.strip
|
|
159
|
+
return configured_key unless configured_key.empty?
|
|
160
|
+
|
|
161
|
+
key_file = configuration.public_path.glob("*.txt").find do |file|
|
|
162
|
+
filename = file.basename(".txt").to_s
|
|
163
|
+
next unless filename.match?(/\A[a-zA-Z0-9-]{8,128}\z/)
|
|
164
|
+
|
|
165
|
+
File.read(file).strip == filename
|
|
166
|
+
end
|
|
167
|
+
return nil unless key_file
|
|
168
|
+
|
|
169
|
+
File.read(key_file).strip
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
end
|
data/lib/indexmap/task_runner.rb
CHANGED
|
@@ -11,6 +11,7 @@ module Indexmap
|
|
|
11
11
|
def create
|
|
12
12
|
remove_existing_sitemap_files
|
|
13
13
|
configuration.writer.write
|
|
14
|
+
write_index_now_key
|
|
14
15
|
end
|
|
15
16
|
|
|
16
17
|
def format
|
|
@@ -28,6 +29,14 @@ module Indexmap
|
|
|
28
29
|
end
|
|
29
30
|
end
|
|
30
31
|
|
|
32
|
+
def validate
|
|
33
|
+
Validator.new(configuration: configuration).validate!
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def write_index_now_key
|
|
37
|
+
Pinger::IndexNow.new(configuration: configuration).write_key_file
|
|
38
|
+
end
|
|
39
|
+
|
|
31
40
|
private
|
|
32
41
|
|
|
33
42
|
attr_reader :configuration
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Indexmap
|
|
4
|
+
class Validator
|
|
5
|
+
def initialize(configuration: Indexmap.configuration, path: nil)
|
|
6
|
+
@configuration = configuration
|
|
7
|
+
@path = path
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def validate!
|
|
11
|
+
sitemap_path = path || Indexmap::Path.existing_public_path(
|
|
12
|
+
public_path: configuration.public_path,
|
|
13
|
+
index_filename: configuration.index_filename
|
|
14
|
+
)
|
|
15
|
+
raise ValidationError, "Missing sitemap file: #{sitemap_path}" unless File.exist?(sitemap_path)
|
|
16
|
+
|
|
17
|
+
entries = Parser.new(path: sitemap_path).entries
|
|
18
|
+
validate_duplicates!(entries)
|
|
19
|
+
validate_parameterized_urls!(entries)
|
|
20
|
+
true
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
attr_reader :configuration, :path
|
|
26
|
+
|
|
27
|
+
def validate_duplicates!(entries)
|
|
28
|
+
duplicates = entries.map(&:loc).group_by(&:itself).select { |_url, values| values.size > 1 }.keys
|
|
29
|
+
return if duplicates.empty?
|
|
30
|
+
|
|
31
|
+
raise ValidationError, "Duplicate sitemap URLs detected: #{duplicates.first(5).join(", ")}"
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def validate_parameterized_urls!(entries)
|
|
35
|
+
param_urls = entries.map(&:loc).select { |url| url&.include?("?") }
|
|
36
|
+
return if param_urls.empty?
|
|
37
|
+
|
|
38
|
+
raise ValidationError, "Parameterized sitemap URLs detected: #{param_urls.first(5).join(", ")}"
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
data/lib/indexmap/version.rb
CHANGED
data/lib/indexmap.rb
CHANGED
|
@@ -7,16 +7,25 @@ require "pathname"
|
|
|
7
7
|
require "time"
|
|
8
8
|
|
|
9
9
|
require_relative "indexmap/version"
|
|
10
|
+
require_relative "indexmap/google_configuration"
|
|
11
|
+
require_relative "indexmap/index_now_configuration"
|
|
10
12
|
require_relative "indexmap/configuration"
|
|
11
13
|
require_relative "indexmap/entry"
|
|
14
|
+
require_relative "indexmap/path"
|
|
15
|
+
require_relative "indexmap/parser"
|
|
16
|
+
require_relative "indexmap/pinger/base"
|
|
17
|
+
require_relative "indexmap/pinger/google"
|
|
18
|
+
require_relative "indexmap/pinger/index_now"
|
|
12
19
|
require_relative "indexmap/section"
|
|
13
20
|
require_relative "indexmap/task_runner"
|
|
21
|
+
require_relative "indexmap/validator"
|
|
14
22
|
require_relative "indexmap/writer"
|
|
15
23
|
|
|
16
24
|
module Indexmap
|
|
17
25
|
class Error < StandardError; end
|
|
18
26
|
|
|
19
27
|
class ConfigurationError < Error; end
|
|
28
|
+
class ValidationError < Error; end
|
|
20
29
|
|
|
21
30
|
class << self
|
|
22
31
|
def configuration
|
|
@@ -4,10 +4,42 @@ namespace :sitemap do
|
|
|
4
4
|
runner = Indexmap::TaskRunner.new
|
|
5
5
|
runner.create
|
|
6
6
|
runner.format
|
|
7
|
+
runner.validate
|
|
7
8
|
end
|
|
8
9
|
|
|
9
10
|
desc "Format sitemap files for better readability"
|
|
10
11
|
task format: :environment do
|
|
11
12
|
Indexmap::TaskRunner.new.format
|
|
12
13
|
end
|
|
14
|
+
|
|
15
|
+
desc "Validate sitemap shape and URL hygiene"
|
|
16
|
+
task validate: :environment do
|
|
17
|
+
Indexmap::TaskRunner.new.validate
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
desc "Ping all configured search engines"
|
|
21
|
+
task ping: :environment do
|
|
22
|
+
Rake::Task["sitemap:index_now:ping"].invoke
|
|
23
|
+
Rake::Task["sitemap:google:ping"].invoke
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
namespace :google do
|
|
27
|
+
desc "Ping Google Search Console"
|
|
28
|
+
task ping: :environment do
|
|
29
|
+
Indexmap::Pinger::Google.new.ping
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
namespace :index_now do
|
|
34
|
+
desc "Ping IndexNow. ENV: SINCE=2026-04-18T10:30:00Z or INDEXNOW_RECENT_HOURS=24"
|
|
35
|
+
task ping: :environment do
|
|
36
|
+
Indexmap::Pinger::IndexNow.new.ping
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
desc "Write the IndexNow key file into public/"
|
|
40
|
+
task write_key: :environment do
|
|
41
|
+
path = Indexmap::TaskRunner.new.write_index_now_key
|
|
42
|
+
puts "Wrote #{path}" if path
|
|
43
|
+
end
|
|
44
|
+
end
|
|
13
45
|
end
|
|
@@ -66,4 +66,18 @@ class IndexmapConfigurationTest < Minitest::Test
|
|
|
66
66
|
|
|
67
67
|
assert_equal "Indexmap format must be one of: index, single_file", error.message
|
|
68
68
|
end
|
|
69
|
+
|
|
70
|
+
def test_exposes_nested_google_and_index_now_configuration
|
|
71
|
+
Indexmap.configure do |config|
|
|
72
|
+
config.google.credentials = -> { "{\"type\":\"service_account\"}" }
|
|
73
|
+
config.google.property = -> { "sc-domain:example.com" }
|
|
74
|
+
config.index_now.key = -> { "example-key" }
|
|
75
|
+
config.index_now.max_urls_per_request = -> { 250 }
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
assert_equal "{\"type\":\"service_account\"}", Indexmap.configuration.google.credentials
|
|
79
|
+
assert_equal "sc-domain:example.com", Indexmap.configuration.google.property
|
|
80
|
+
assert_equal "example-key", Indexmap.configuration.index_now.key
|
|
81
|
+
assert_equal 250, Indexmap.configuration.index_now.max_urls_per_request
|
|
82
|
+
end
|
|
69
83
|
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "test_helper"
|
|
4
|
+
|
|
5
|
+
class IndexmapParserTest < Minitest::Test
|
|
6
|
+
def test_parses_remote_sitemap_urlset
|
|
7
|
+
stub_request(:get, "https://www.example.com/sitemap.xml")
|
|
8
|
+
.to_return(
|
|
9
|
+
status: 200,
|
|
10
|
+
body: <<~XML
|
|
11
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
12
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
13
|
+
<url><loc>https://www.example.com/</loc></url>
|
|
14
|
+
<url><loc>https://www.example.com/pages/features</loc></url>
|
|
15
|
+
</urlset>
|
|
16
|
+
XML
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
parser = Indexmap::Parser.new(path: "https://www.example.com/sitemap.xml")
|
|
20
|
+
|
|
21
|
+
assert_equal ["/", "/pages/features"], parser.paths
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def test_parses_remote_sitemap_index_with_child_sitemap
|
|
25
|
+
stub_request(:get, "https://www.example.com/sitemap.xml")
|
|
26
|
+
.to_return(
|
|
27
|
+
status: 200,
|
|
28
|
+
body: <<~XML
|
|
29
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
30
|
+
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
31
|
+
<sitemap><loc>/sitemaps/content.xml</loc></sitemap>
|
|
32
|
+
</sitemapindex>
|
|
33
|
+
XML
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
stub_request(:get, "https://www.example.com/sitemaps/content.xml")
|
|
37
|
+
.to_return(
|
|
38
|
+
status: 200,
|
|
39
|
+
body: <<~XML
|
|
40
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
41
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
42
|
+
<url><loc>https://www.example.com/tools/google-reviews-calculator</loc></url>
|
|
43
|
+
</urlset>
|
|
44
|
+
XML
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
parser = Indexmap::Parser.new(path: "https://www.example.com/sitemap.xml")
|
|
48
|
+
|
|
49
|
+
assert_equal ["/tools/google-reviews-calculator"], parser.paths
|
|
50
|
+
assert_equal ["https://www.reviato.com/tools/google-reviews-calculator"], parser.urls(base_url: "https://www.reviato.com")
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def test_can_rebase_remote_child_sitemap_urls_to_the_fetched_sitemap_origin
|
|
54
|
+
stub_request(:get, "http://localhost:3001/sitemap.xml")
|
|
55
|
+
.to_return(
|
|
56
|
+
status: 200,
|
|
57
|
+
body: <<~XML
|
|
58
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
59
|
+
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
60
|
+
<sitemap><loc>https://www.reviato.com/sitemap-marketing.xml</loc></sitemap>
|
|
61
|
+
</sitemapindex>
|
|
62
|
+
XML
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
stub_request(:get, "http://localhost:3001/sitemap-marketing.xml")
|
|
66
|
+
.to_return(
|
|
67
|
+
status: 200,
|
|
68
|
+
body: <<~XML
|
|
69
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
70
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
71
|
+
<url><loc>https://www.reviato.com/pages/pricing</loc></url>
|
|
72
|
+
</urlset>
|
|
73
|
+
XML
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
parser = Indexmap::Parser.new(path: "http://localhost:3001/sitemap.xml", rebase_remote_children: true)
|
|
77
|
+
|
|
78
|
+
assert_equal ["/pages/pricing"], parser.paths
|
|
79
|
+
assert_equal ["http://localhost:3001/pages/pricing"], parser.urls(base_url: "http://localhost:3001")
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "test_helper"
|
|
4
|
+
|
|
5
|
+
class IndexmapPathTest < Minitest::Test
|
|
6
|
+
def test_existing_public_path_prefers_sitemap_index_when_present
|
|
7
|
+
Dir.mktmpdir do |dir|
|
|
8
|
+
public_path = Pathname(dir)
|
|
9
|
+
public_path.join("sitemap_index.xml").write("<urlset/>")
|
|
10
|
+
public_path.join("sitemap.xml").write("<sitemapindex/>")
|
|
11
|
+
|
|
12
|
+
assert_equal public_path.join("sitemap.xml"), Indexmap::Path.existing_public_path(public_path: public_path)
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def test_existing_public_path_falls_back_to_legacy_sitemap_path
|
|
17
|
+
Dir.mktmpdir do |dir|
|
|
18
|
+
public_path = Pathname(dir)
|
|
19
|
+
public_path.join("sitemap_index.xml").write("<sitemapindex/>")
|
|
20
|
+
|
|
21
|
+
assert_equal public_path.join("sitemap_index.xml"), Indexmap::Path.existing_public_path(public_path: public_path)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def test_canonical_url_targets_sitemap_index
|
|
26
|
+
assert_equal "https://www.example.com/sitemap.xml", Indexmap::Path.canonical_url("https://www.example.com")
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "test_helper"
|
|
4
|
+
|
|
5
|
+
class IndexmapPingerGoogleTest < Minitest::Test
|
|
6
|
+
SiteEntry = Struct.new(:site_url)
|
|
7
|
+
SiteList = Struct.new(:site_entry)
|
|
8
|
+
|
|
9
|
+
class FakeWebmastersService
|
|
10
|
+
attr_accessor :authorization
|
|
11
|
+
attr_reader :submitted
|
|
12
|
+
|
|
13
|
+
def initialize(site_urls:)
|
|
14
|
+
@site_urls = site_urls
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def list_sites
|
|
18
|
+
SiteList.new(@site_urls.map { |site_url| SiteEntry.new(site_url) })
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def submit_sitemap(property, sitemap_url)
|
|
22
|
+
@submitted = [property, sitemap_url]
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def test_pings_google_for_each_sitemap_file
|
|
27
|
+
Dir.mktmpdir do |dir|
|
|
28
|
+
public_path = Pathname(dir)
|
|
29
|
+
public_path.join("sitemap.xml").write("<sitemapindex/>")
|
|
30
|
+
|
|
31
|
+
configuration = Indexmap::Configuration.new
|
|
32
|
+
configuration.base_url = "https://www.example.com"
|
|
33
|
+
configuration.public_path = public_path
|
|
34
|
+
configuration.google.credentials = "{\"type\":\"service_account\"}"
|
|
35
|
+
|
|
36
|
+
service = FakeWebmastersService.new(site_urls: ["sc-domain:example.com"])
|
|
37
|
+
builder_calls = []
|
|
38
|
+
credentials_builder = lambda do |credentials:, scope:|
|
|
39
|
+
builder_calls << [credentials, scope]
|
|
40
|
+
:fake_authorizer
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
Indexmap::Pinger::Google.new(
|
|
44
|
+
configuration: configuration,
|
|
45
|
+
service: service,
|
|
46
|
+
credentials_builder: credentials_builder
|
|
47
|
+
).ping
|
|
48
|
+
|
|
49
|
+
assert_equal [["{\"type\":\"service_account\"}", "https://www.googleapis.com/auth/webmasters"]], builder_calls
|
|
50
|
+
assert_equal :fake_authorizer, service.authorization
|
|
51
|
+
assert_equal ["sc-domain:example.com", "https://www.example.com/sitemap.xml"], service.submitted
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def test_skips_google_ping_when_credentials_are_missing
|
|
56
|
+
Dir.mktmpdir do |dir|
|
|
57
|
+
public_path = Pathname(dir)
|
|
58
|
+
public_path.join("sitemap.xml").write("<sitemapindex/>")
|
|
59
|
+
|
|
60
|
+
configuration = Indexmap::Configuration.new
|
|
61
|
+
configuration.base_url = "https://www.example.com"
|
|
62
|
+
configuration.public_path = public_path
|
|
63
|
+
|
|
64
|
+
service = FakeWebmastersService.new(site_urls: ["sc-domain:example.com"])
|
|
65
|
+
|
|
66
|
+
Indexmap::Pinger::Google.new(configuration: configuration, service: service).ping
|
|
67
|
+
|
|
68
|
+
assert_nil service.submitted
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "test_helper"
|
|
4
|
+
|
|
5
|
+
class IndexmapPingerIndexNowTest < Minitest::Test
|
|
6
|
+
def test_writes_key_file_from_configuration
|
|
7
|
+
Dir.mktmpdir do |dir|
|
|
8
|
+
configuration = Indexmap::Configuration.new
|
|
9
|
+
configuration.base_url = "https://www.example.com"
|
|
10
|
+
configuration.public_path = Pathname(dir)
|
|
11
|
+
configuration.index_now.key = "test-key"
|
|
12
|
+
|
|
13
|
+
path = Indexmap::Pinger::IndexNow.new(configuration: configuration).write_key_file
|
|
14
|
+
|
|
15
|
+
assert_equal Pathname(dir).join("test-key.txt"), path
|
|
16
|
+
assert_equal "test-key\n", path.read
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def test_pings_all_sitemap_urls_when_no_cutoff_is_provided
|
|
21
|
+
Dir.mktmpdir do |dir|
|
|
22
|
+
public_path = Pathname(dir)
|
|
23
|
+
write_sitemap_files(
|
|
24
|
+
public_path,
|
|
25
|
+
marketing_lastmod: "2026-04-18T00:00:00Z",
|
|
26
|
+
insights_lastmod: "2026-04-10T00:00:00Z"
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
configuration = Indexmap::Configuration.new
|
|
30
|
+
configuration.base_url = "https://www.example.com"
|
|
31
|
+
configuration.public_path = public_path
|
|
32
|
+
configuration.index_now.key = "test-key"
|
|
33
|
+
|
|
34
|
+
indexnow_url = "https://api.indexnow.org/indexnow"
|
|
35
|
+
stub_request(:post, indexnow_url).to_return(status: 200, body: "", headers: {})
|
|
36
|
+
|
|
37
|
+
Indexmap::Pinger::IndexNow.new(configuration: configuration).ping
|
|
38
|
+
|
|
39
|
+
assert_requested(:post, indexnow_url, times: 1) do |request|
|
|
40
|
+
payload = JSON.parse(request.body)
|
|
41
|
+
assert_equal [
|
|
42
|
+
"https://www.example.com/pages/features",
|
|
43
|
+
"https://www.example.com/insights/us/restaurants/overview"
|
|
44
|
+
].sort, payload.fetch("urlList").sort
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def test_pings_only_sitemap_urls_newer_than_since
|
|
50
|
+
Dir.mktmpdir do |dir|
|
|
51
|
+
public_path = Pathname(dir)
|
|
52
|
+
write_sitemap_files(
|
|
53
|
+
public_path,
|
|
54
|
+
marketing_lastmod: "2026-04-18T00:00:00Z",
|
|
55
|
+
insights_lastmod: "2026-04-10T00:00:00Z"
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
configuration = Indexmap::Configuration.new
|
|
59
|
+
configuration.base_url = "https://www.example.com"
|
|
60
|
+
configuration.public_path = public_path
|
|
61
|
+
configuration.index_now.key = "test-key"
|
|
62
|
+
|
|
63
|
+
indexnow_url = "https://api.indexnow.org/indexnow"
|
|
64
|
+
stub_request(:post, indexnow_url).to_return(status: 200, body: "", headers: {})
|
|
65
|
+
|
|
66
|
+
with_env("SINCE" => "2026-04-15T00:00:00Z") do
|
|
67
|
+
Indexmap::Pinger::IndexNow.new(configuration: configuration).ping
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
assert_requested(:post, indexnow_url, times: 1) do |request|
|
|
71
|
+
payload = JSON.parse(request.body)
|
|
72
|
+
assert_equal ["https://www.example.com/pages/features"], payload.fetch("urlList")
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
private
|
|
78
|
+
|
|
79
|
+
def with_env(overrides)
|
|
80
|
+
previous_values = overrides.to_h { |key, _value| [key, ENV[key]] }
|
|
81
|
+
overrides.each { |key, value| ENV[key] = value }
|
|
82
|
+
yield
|
|
83
|
+
ensure
|
|
84
|
+
previous_values.each do |key, value|
|
|
85
|
+
value.nil? ? ENV.delete(key) : ENV[key] = value
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def write_sitemap_files(public_path, marketing_lastmod:, insights_lastmod:)
|
|
90
|
+
public_path.join("sitemap.xml").write(<<~XML)
|
|
91
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
92
|
+
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
93
|
+
<sitemap><loc>https://www.example.com/sitemap-marketing.xml</loc></sitemap>
|
|
94
|
+
<sitemap><loc>https://www.example.com/sitemap-insights.xml</loc></sitemap>
|
|
95
|
+
</sitemapindex>
|
|
96
|
+
XML
|
|
97
|
+
|
|
98
|
+
public_path.join("sitemap-marketing.xml").write(<<~XML)
|
|
99
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
100
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
101
|
+
<url>
|
|
102
|
+
<loc>https://www.example.com/pages/features</loc>
|
|
103
|
+
<lastmod>#{marketing_lastmod}</lastmod>
|
|
104
|
+
</url>
|
|
105
|
+
</urlset>
|
|
106
|
+
XML
|
|
107
|
+
|
|
108
|
+
public_path.join("sitemap-insights.xml").write(<<~XML)
|
|
109
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
110
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
111
|
+
<url>
|
|
112
|
+
<loc>https://www.example.com/insights/us/restaurants/overview</loc>
|
|
113
|
+
<lastmod>#{insights_lastmod}</lastmod>
|
|
114
|
+
</url>
|
|
115
|
+
</urlset>
|
|
116
|
+
XML
|
|
117
|
+
end
|
|
118
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "test_helper"
|
|
4
|
+
|
|
5
|
+
class IndexmapValidatorTest < Minitest::Test
|
|
6
|
+
def test_validate_raises_for_duplicate_urls
|
|
7
|
+
Dir.mktmpdir do |directory|
|
|
8
|
+
path = Pathname(directory).join("sitemap.xml")
|
|
9
|
+
path.write(<<~XML)
|
|
10
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
11
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
12
|
+
<url><loc>https://example.com/about</loc></url>
|
|
13
|
+
<url><loc>https://example.com/about</loc></url>
|
|
14
|
+
</urlset>
|
|
15
|
+
XML
|
|
16
|
+
|
|
17
|
+
error = assert_raises(Indexmap::ValidationError) do
|
|
18
|
+
Indexmap::Validator.new(path: path).validate!
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
assert_equal "Duplicate sitemap URLs detected: https://example.com/about", error.message
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def test_validate_raises_for_parameterized_urls
|
|
26
|
+
Dir.mktmpdir do |directory|
|
|
27
|
+
path = Pathname(directory).join("sitemap.xml")
|
|
28
|
+
path.write(<<~XML)
|
|
29
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
30
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
31
|
+
<url><loc>https://example.com/about?ref=test</loc></url>
|
|
32
|
+
</urlset>
|
|
33
|
+
XML
|
|
34
|
+
|
|
35
|
+
error = assert_raises(Indexmap::ValidationError) do
|
|
36
|
+
Indexmap::Validator.new(path: path).validate!
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
assert_equal "Parameterized sitemap URLs detected: https://example.com/about?ref=test", error.message
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def test_validate_passes_for_valid_sitemap
|
|
44
|
+
Dir.mktmpdir do |directory|
|
|
45
|
+
path = Pathname(directory).join("sitemap.xml")
|
|
46
|
+
path.write(<<~XML)
|
|
47
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
48
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
49
|
+
<url><loc>https://example.com/about</loc></url>
|
|
50
|
+
</urlset>
|
|
51
|
+
XML
|
|
52
|
+
|
|
53
|
+
assert Indexmap::Validator.new(path: path).validate!
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: indexmap
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Paulo Fidalgo
|
|
@@ -24,6 +24,48 @@ dependencies:
|
|
|
24
24
|
- - ">="
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
26
|
version: '7.1'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: faraday
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - ">="
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '2.0'
|
|
34
|
+
type: :runtime
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - ">="
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '2.0'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: google-apis-searchconsole_v1
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - ">="
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '0.18'
|
|
48
|
+
type: :runtime
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - ">="
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '0.18'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: googleauth
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - ">="
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '1.12'
|
|
62
|
+
type: :runtime
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - ">="
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '1.12'
|
|
27
69
|
- !ruby/object:Gem::Dependency
|
|
28
70
|
name: nokogiri
|
|
29
71
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -94,6 +136,20 @@ dependencies:
|
|
|
94
136
|
- - "~>"
|
|
95
137
|
- !ruby/object:Gem::Version
|
|
96
138
|
version: '1.0'
|
|
139
|
+
- !ruby/object:Gem::Dependency
|
|
140
|
+
name: webmock
|
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
|
142
|
+
requirements:
|
|
143
|
+
- - "~>"
|
|
144
|
+
- !ruby/object:Gem::Version
|
|
145
|
+
version: '3.0'
|
|
146
|
+
type: :development
|
|
147
|
+
prerelease: false
|
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
149
|
+
requirements:
|
|
150
|
+
- - "~>"
|
|
151
|
+
- !ruby/object:Gem::Version
|
|
152
|
+
version: '3.0'
|
|
97
153
|
description: A small Ruby gem for generating sitemap indexes and child sitemaps from
|
|
98
154
|
explicit section definitions, with optional Rails rake task integration.
|
|
99
155
|
email:
|
|
@@ -108,13 +164,26 @@ files:
|
|
|
108
164
|
- lib/indexmap.rb
|
|
109
165
|
- lib/indexmap/configuration.rb
|
|
110
166
|
- lib/indexmap/entry.rb
|
|
167
|
+
- lib/indexmap/google_configuration.rb
|
|
168
|
+
- lib/indexmap/index_now_configuration.rb
|
|
169
|
+
- lib/indexmap/parser.rb
|
|
170
|
+
- lib/indexmap/path.rb
|
|
171
|
+
- lib/indexmap/pinger/base.rb
|
|
172
|
+
- lib/indexmap/pinger/google.rb
|
|
173
|
+
- lib/indexmap/pinger/index_now.rb
|
|
111
174
|
- lib/indexmap/railtie.rb
|
|
112
175
|
- lib/indexmap/section.rb
|
|
113
176
|
- lib/indexmap/task_runner.rb
|
|
177
|
+
- lib/indexmap/validator.rb
|
|
114
178
|
- lib/indexmap/version.rb
|
|
115
179
|
- lib/indexmap/writer.rb
|
|
116
180
|
- lib/tasks/indexmap_tasks.rake
|
|
117
181
|
- test/indexmap/configuration_test.rb
|
|
182
|
+
- test/indexmap/parser_test.rb
|
|
183
|
+
- test/indexmap/path_test.rb
|
|
184
|
+
- test/indexmap/pinger/google_test.rb
|
|
185
|
+
- test/indexmap/pinger/index_now_test.rb
|
|
186
|
+
- test/indexmap/validator_test.rb
|
|
118
187
|
- test/indexmap/writer_test.rb
|
|
119
188
|
- test/test_helper.rb
|
|
120
189
|
homepage: https://www.ethos-link.com/opensource/indexmap
|