indexmap 0.4.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -3
- data/README.md +103 -14
- data/lib/indexmap/configuration.rb +24 -20
- data/lib/indexmap/creator.rb +75 -0
- data/lib/indexmap/output.rb +74 -0
- data/lib/indexmap/parser.rb +0 -1
- data/lib/indexmap/path.rb +0 -1
- data/lib/indexmap/pinger/google.rb +16 -5
- data/lib/indexmap/task_runner.rb +9 -9
- data/lib/indexmap/validator.rb +120 -0
- data/lib/indexmap/version.rb +1 -1
- data/lib/indexmap/writer.rb +13 -5
- data/lib/indexmap.rb +6 -1
- data/lib/tasks/indexmap_tasks.rake +26 -26
- data/test/indexmap/configuration_test.rb +161 -16
- data/test/indexmap/pinger/google_test.rb +52 -1
- data/test/indexmap/task_runner_test.rb +29 -2
- data/test/indexmap/validator_test.rb +114 -0
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ef452a5928b87f84f65ecb9ba2afcab5340d9d09c5d8c8dfd8c4ffb3217a68fd
|
|
4
|
+
data.tar.gz: 03b11e1d9360bbd797d6b61886c99d1619247877dfd2c4676efeca9a1119764c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a04a772029e2438636df90b65004a27c54e1cf3360205fd556feff615768fd527668fe2a3e3063f2831b9af9afb4edd983e48e73b922278c4c10a63d936b938e
|
|
7
|
+
data.tar.gz: 6ce68d9ee6343aaf7c4bbff6860a712d743899390b4a8a3a09c712694c95cff32cc0acd85f30a43e2ba37f0c8ff671d2af11028e63976649ae90e5100d5e0a1b
|
data/CHANGELOG.md
CHANGED
|
@@ -5,12 +5,14 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
-
## [0.
|
|
8
|
+
## [0.6.0] - 2026-05-01
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
###
|
|
11
|
+
### Added
|
|
12
12
|
|
|
13
|
-
-
|
|
13
|
+
- add the url count to google ping output (#9)
|
|
14
|
+
|
|
15
|
+
- support named sitemap outputs (#10)
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
|
data/README.md
CHANGED
|
@@ -80,12 +80,15 @@ end
|
|
|
80
80
|
Then run:
|
|
81
81
|
|
|
82
82
|
```bash
|
|
83
|
-
bin/rails sitemap:create
|
|
84
|
-
bin/rails sitemap:format
|
|
85
|
-
bin/rails sitemap:validate
|
|
83
|
+
bin/rails indexmap:sitemap:create
|
|
84
|
+
bin/rails indexmap:sitemap:format
|
|
85
|
+
bin/rails indexmap:sitemap:validate
|
|
86
86
|
```
|
|
87
87
|
|
|
88
|
-
`sitemap:create` is the main task. It writes sitemap files
|
|
88
|
+
`indexmap:sitemap:create` is the main task. It writes sitemap files to a local
|
|
89
|
+
temporary directory, formats them, validates the result, then replaces the final
|
|
90
|
+
XML files. Existing sitemap files are left untouched if generation or validation
|
|
91
|
+
fails.
|
|
89
92
|
|
|
90
93
|
### Default Index Mode
|
|
91
94
|
|
|
@@ -114,6 +117,85 @@ end
|
|
|
114
117
|
|
|
115
118
|
In `:single_file` mode, `indexmap` writes a `urlset` directly to `sitemap.xml` and reads entries from `config.entries` instead of `config.sections`.
|
|
116
119
|
|
|
120
|
+
### Named Outputs
|
|
121
|
+
|
|
122
|
+
Most apps only need the default output. Use named outputs when one part of the
|
|
123
|
+
sitemap must be generated separately, for example when static pages can be
|
|
124
|
+
generated during deploy but database-heavy pages should refresh later. Named
|
|
125
|
+
outputs still write normal sitemap XML files to a filesystem path; storage and
|
|
126
|
+
serving are application concerns.
|
|
127
|
+
|
|
128
|
+
```ruby
|
|
129
|
+
Indexmap.configure do |config|
|
|
130
|
+
config.base_url = -> { "https://example.com" }
|
|
131
|
+
config.public_path = -> { Rails.root.join("storage/sitemaps") }
|
|
132
|
+
config.sections = -> { Sitemap.sections }
|
|
133
|
+
|
|
134
|
+
config.output :insights_data do |output|
|
|
135
|
+
output.format = :single_file
|
|
136
|
+
output.index_filename = "sitemap-insights-data.xml"
|
|
137
|
+
output.entries = -> { Sitemap.insights_data_entries }
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Generate the default output:
|
|
143
|
+
|
|
144
|
+
```ruby
|
|
145
|
+
Indexmap.create
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Generate only the named output:
|
|
149
|
+
|
|
150
|
+
```ruby
|
|
151
|
+
Indexmap.create(:insights_data)
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
Named outputs inherit `base_url`, `public_path`, and `format` from the main
|
|
155
|
+
configuration unless you override them.
|
|
156
|
+
|
|
157
|
+
`Indexmap.create` uses the same safe local publish flow as the rake task:
|
|
158
|
+
generate in a temporary directory, format, validate, and then replace the final
|
|
159
|
+
XML file or files.
|
|
160
|
+
|
|
161
|
+
### Deferred Dynamic Sections
|
|
162
|
+
|
|
163
|
+
Use `after_create` when `indexmap:sitemap:create` should publish the default
|
|
164
|
+
sitemap first, then schedule slower dynamic sections for the background. The
|
|
165
|
+
callback runs only after the generated files have been formatted, validated, and
|
|
166
|
+
replaced successfully.
|
|
167
|
+
|
|
168
|
+
```ruby
|
|
169
|
+
Indexmap.configure do |config|
|
|
170
|
+
config.base_url = -> { "https://example.com" }
|
|
171
|
+
config.public_path = -> { Rails.root.join("storage/sitemaps") }
|
|
172
|
+
config.sections = -> { Sitemap.sections }
|
|
173
|
+
|
|
174
|
+
config.output :insights_data do |output|
|
|
175
|
+
output.format = :single_file
|
|
176
|
+
output.index_filename = "sitemap-insights-data.xml"
|
|
177
|
+
output.entries = -> { Sitemap.insights_data_entries }
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
config.after_create do
|
|
181
|
+
Insights::SitemapRefreshJob.perform_later
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
Then the job can stay small:
|
|
187
|
+
|
|
188
|
+
```ruby
|
|
189
|
+
class Insights::SitemapRefreshJob < ApplicationJob
|
|
190
|
+
def perform
|
|
191
|
+
Indexmap.create(:insights_data)
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
This keeps deploys fast: the deploy only waits for `indexmap:sitemap:create`,
|
|
197
|
+
while database-dependent output is refreshed by the job backend.
|
|
198
|
+
|
|
117
199
|
## Validation And Parsing
|
|
118
200
|
|
|
119
201
|
`indexmap` also includes small utilities for working with generated sitemap files:
|
|
@@ -129,8 +211,15 @@ Indexmap::Validator.new.validate!
|
|
|
129
211
|
The built-in validator checks for:
|
|
130
212
|
|
|
131
213
|
- missing sitemap files
|
|
214
|
+
- malformed sitemap XML
|
|
215
|
+
- empty sitemap files
|
|
216
|
+
- missing or duplicate child sitemap references
|
|
132
217
|
- duplicate sitemap URLs
|
|
133
218
|
- parameterized URLs in sitemap entries
|
|
219
|
+
- fragment URLs in sitemap entries
|
|
220
|
+
- non-HTTP or relative URLs
|
|
221
|
+
- URLs outside the configured `base_url`
|
|
222
|
+
- invalid `lastmod` values
|
|
134
223
|
|
|
135
224
|
## Search Engine Ping
|
|
136
225
|
|
|
@@ -139,11 +228,11 @@ The built-in validator checks for:
|
|
|
139
228
|
Available rake tasks:
|
|
140
229
|
|
|
141
230
|
```bash
|
|
142
|
-
bin/rails sitemap:validate
|
|
143
|
-
bin/rails
|
|
144
|
-
bin/rails
|
|
145
|
-
bin/rails
|
|
146
|
-
bin/rails
|
|
231
|
+
bin/rails indexmap:sitemap:validate
|
|
232
|
+
bin/rails indexmap:google:ping
|
|
233
|
+
bin/rails indexmap:index_now:ping
|
|
234
|
+
bin/rails indexmap:index_now:write_key
|
|
235
|
+
bin/rails indexmap:ping
|
|
147
236
|
```
|
|
148
237
|
|
|
149
238
|
### Google Search Console
|
|
@@ -156,7 +245,7 @@ Indexmap.configure do |config|
|
|
|
156
245
|
end
|
|
157
246
|
```
|
|
158
247
|
|
|
159
|
-
If `config.google.credentials` is blank, `
|
|
248
|
+
If `config.google.credentials` is blank, `indexmap:google:ping` skips Google submission.
|
|
160
249
|
|
|
161
250
|
You can optionally override the Search Console property identifier:
|
|
162
251
|
|
|
@@ -184,21 +273,21 @@ Indexmap.configure do |config|
|
|
|
184
273
|
end
|
|
185
274
|
```
|
|
186
275
|
|
|
187
|
-
If `config.index_now.key` is set, `sitemap:create` also writes the matching `public/<key>.txt` verification file automatically.
|
|
276
|
+
If `config.index_now.key` is set, `indexmap:sitemap:create` also writes the matching `public/<key>.txt` verification file automatically.
|
|
188
277
|
|
|
189
278
|
If you prefer the file-based flow, run:
|
|
190
279
|
|
|
191
280
|
```bash
|
|
192
|
-
bin/rails
|
|
281
|
+
bin/rails indexmap:index_now:write_key
|
|
193
282
|
```
|
|
194
283
|
|
|
195
284
|
That task:
|
|
196
285
|
|
|
197
286
|
- reuses an existing valid key file when present
|
|
198
287
|
- otherwise generates a new key in `public/<key>.txt`
|
|
199
|
-
- makes that key available to `
|
|
288
|
+
- makes that key available to `indexmap:index_now:ping` without adding `config.index_now.key`
|
|
200
289
|
|
|
201
|
-
If neither a configured key nor a valid key file is present, `
|
|
290
|
+
If neither a configured key nor a valid key file is present, `indexmap:index_now:ping` skips IndexNow submission.
|
|
202
291
|
|
|
203
292
|
## Development
|
|
204
293
|
|
|
@@ -9,6 +9,8 @@ module Indexmap
|
|
|
9
9
|
def initialize
|
|
10
10
|
@format = :index
|
|
11
11
|
@index_filename = "sitemap.xml"
|
|
12
|
+
@after_create_callbacks = []
|
|
13
|
+
@outputs = {}
|
|
12
14
|
end
|
|
13
15
|
|
|
14
16
|
def base_url
|
|
@@ -47,27 +49,29 @@ module Indexmap
|
|
|
47
49
|
Array(resolve(@sections))
|
|
48
50
|
end
|
|
49
51
|
|
|
52
|
+
def output(name)
|
|
53
|
+
output = output_for(name)
|
|
54
|
+
yield(output) if block_given?
|
|
55
|
+
output
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def output_for(name = :default)
|
|
59
|
+
normalized_name = name.to_sym
|
|
60
|
+
@outputs[normalized_name] ||= Output.new(configuration: self)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def after_create(&block)
|
|
64
|
+
raise ArgumentError, "after_create requires a block" unless block
|
|
65
|
+
|
|
66
|
+
@after_create_callbacks << block
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def run_after_create_callbacks
|
|
70
|
+
@after_create_callbacks.each(&:call)
|
|
71
|
+
end
|
|
72
|
+
|
|
50
73
|
def writer
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
unless VALID_FORMATS.include?(format)
|
|
54
|
-
raise ConfigurationError, "Indexmap format must be one of: #{VALID_FORMATS.join(", ")}"
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
if format == :single_file
|
|
58
|
-
raise ConfigurationError, "Indexmap entries are not configured" if entries.empty?
|
|
59
|
-
elsif sections.empty?
|
|
60
|
-
raise ConfigurationError, "Indexmap sections are not configured" if sections.empty?
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
Writer.new(
|
|
64
|
-
entries: entries,
|
|
65
|
-
format: format,
|
|
66
|
-
sections: sections,
|
|
67
|
-
public_path: public_path,
|
|
68
|
-
base_url: base_url,
|
|
69
|
-
index_filename: index_filename
|
|
70
|
-
)
|
|
74
|
+
output_for(:default).writer
|
|
71
75
|
end
|
|
72
76
|
|
|
73
77
|
private
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "nokogiri"
|
|
4
|
+
require "tmpdir"
|
|
5
|
+
|
|
6
|
+
module Indexmap
|
|
7
|
+
class Creator
|
|
8
|
+
ValidationConfiguration = Struct.new(:base_url, keyword_init: true)
|
|
9
|
+
|
|
10
|
+
def initialize(output:)
|
|
11
|
+
@output = output
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def create
|
|
15
|
+
FileUtils.mkdir_p(output.public_path.dirname)
|
|
16
|
+
|
|
17
|
+
Dir.mktmpdir("indexmap", output.public_path.dirname) do |dir|
|
|
18
|
+
staging_path = Pathname(dir)
|
|
19
|
+
written_files = write_to(staging_path)
|
|
20
|
+
sitemap_files = sitemap_files_in(staging_path)
|
|
21
|
+
|
|
22
|
+
format(sitemap_files)
|
|
23
|
+
validate(staging_path.join(output.index_filename))
|
|
24
|
+
|
|
25
|
+
publish(sitemap_files)
|
|
26
|
+
written_files.map { |path| output.public_path.join(path.basename) }
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
attr_reader :output
|
|
33
|
+
|
|
34
|
+
def write_to(staging_path)
|
|
35
|
+
output.writer.tap do |writer|
|
|
36
|
+
writer.public_path = staging_path
|
|
37
|
+
end.write
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def sitemap_files_in(path)
|
|
41
|
+
path.glob("sitemap*.xml").sort
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def format(files)
|
|
45
|
+
files.each do |file_path|
|
|
46
|
+
document = Nokogiri::XML(
|
|
47
|
+
file_path.read,
|
|
48
|
+
nil,
|
|
49
|
+
nil,
|
|
50
|
+
Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS
|
|
51
|
+
)
|
|
52
|
+
save_options = Nokogiri::XML::Node::SaveOptions::FORMAT | Nokogiri::XML::Node::SaveOptions::AS_XML
|
|
53
|
+
|
|
54
|
+
file_path.write(document.to_xml(indent: 2, save_with: save_options))
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def validate(index_path)
|
|
59
|
+
Validator.new(
|
|
60
|
+
configuration: ValidationConfiguration.new(base_url: output.base_url),
|
|
61
|
+
path: index_path
|
|
62
|
+
).validate!
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def publish(files)
|
|
66
|
+
FileUtils.mkdir_p(output.public_path)
|
|
67
|
+
|
|
68
|
+
files.map do |file_path|
|
|
69
|
+
final_path = output.public_path.join(file_path.basename)
|
|
70
|
+
File.rename(file_path, final_path)
|
|
71
|
+
final_path
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Indexmap
|
|
4
|
+
class Output
|
|
5
|
+
VALID_FORMATS = %i[index single_file].freeze
|
|
6
|
+
|
|
7
|
+
attr_writer :base_url, :entries, :format, :index_filename, :public_path, :sections
|
|
8
|
+
|
|
9
|
+
def initialize(configuration:)
|
|
10
|
+
@configuration = configuration
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def base_url
|
|
14
|
+
resolve(@base_url) || configuration.base_url
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def entries
|
|
18
|
+
resolved_entries = resolve(@entries)
|
|
19
|
+
|
|
20
|
+
Array(resolved_entries.nil? ? configuration.entries : resolved_entries)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def format
|
|
24
|
+
value = resolve(@format) || configuration.format
|
|
25
|
+
value.nil? ? :index : value.to_sym
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def index_filename
|
|
29
|
+
resolve(@index_filename) || configuration.index_filename
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def public_path
|
|
33
|
+
value = resolve(@public_path) || configuration.public_path
|
|
34
|
+
Pathname(value)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def sections
|
|
38
|
+
resolved_sections = resolve(@sections)
|
|
39
|
+
|
|
40
|
+
Array(resolved_sections.nil? ? configuration.sections : resolved_sections)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def writer
|
|
44
|
+
raise ConfigurationError, "Indexmap base_url is not configured" if base_url.to_s.strip.empty?
|
|
45
|
+
|
|
46
|
+
unless VALID_FORMATS.include?(format)
|
|
47
|
+
raise ConfigurationError, "Indexmap format must be one of: #{VALID_FORMATS.join(", ")}"
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
if format == :single_file
|
|
51
|
+
raise ConfigurationError, "Indexmap entries are not configured" if entries.empty?
|
|
52
|
+
elsif sections.empty?
|
|
53
|
+
raise ConfigurationError, "Indexmap sections are not configured" if sections.empty?
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
Writer.new(
|
|
57
|
+
entries: entries,
|
|
58
|
+
format: format,
|
|
59
|
+
sections: sections,
|
|
60
|
+
public_path: public_path,
|
|
61
|
+
base_url: base_url,
|
|
62
|
+
index_filename: index_filename
|
|
63
|
+
)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
private
|
|
67
|
+
|
|
68
|
+
attr_reader :configuration
|
|
69
|
+
|
|
70
|
+
def resolve(value)
|
|
71
|
+
value.respond_to?(:call) ? value.call : value
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
data/lib/indexmap/parser.rb
CHANGED
data/lib/indexmap/path.rb
CHANGED
|
@@ -20,10 +20,11 @@ module Indexmap
|
|
|
20
20
|
return {status: :skipped, reason: :missing_credentials}
|
|
21
21
|
end
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
files = sitemap_files
|
|
24
|
+
results = files.map { |sitemap_file| ping_sitemap(sitemap_file) }
|
|
24
25
|
return {status: :skipped, reason: :no_sitemaps} if results.empty?
|
|
25
26
|
|
|
26
|
-
summarize_results(results)
|
|
27
|
+
summarize_results(results, url_count: sitemap_url_count(files))
|
|
27
28
|
end
|
|
28
29
|
|
|
29
30
|
private
|
|
@@ -90,16 +91,26 @@ module Indexmap
|
|
|
90
91
|
)
|
|
91
92
|
end
|
|
92
93
|
|
|
93
|
-
def
|
|
94
|
+
def sitemap_url_count(files)
|
|
95
|
+
files.each_with_object(Set.new) do |sitemap_file, urls|
|
|
96
|
+
Parser.new(path: sitemap_file).entries.each do |entry|
|
|
97
|
+
loc = entry.loc.to_s.strip
|
|
98
|
+
urls.add(loc) unless loc.empty?
|
|
99
|
+
end
|
|
100
|
+
end.count
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def summarize_results(results, url_count:)
|
|
94
104
|
submitted = results.select { |result| result[:status] == :submitted }
|
|
95
105
|
failures = results.select { |result| result[:status] == :failed }
|
|
96
106
|
|
|
97
|
-
return {status: :submitted, sitemap_count: submitted.count, submitted: submitted} if failures.empty?
|
|
98
|
-
return {status: :failed, sitemap_count: 0, failures: failures} if submitted.empty?
|
|
107
|
+
return {status: :submitted, sitemap_count: submitted.count, url_count: url_count, submitted: submitted} if failures.empty?
|
|
108
|
+
return {status: :failed, sitemap_count: 0, url_count: 0, failures: failures} if submitted.empty?
|
|
99
109
|
|
|
100
110
|
{
|
|
101
111
|
status: :partial,
|
|
102
112
|
sitemap_count: submitted.count,
|
|
113
|
+
url_count: url_count,
|
|
103
114
|
submitted: submitted,
|
|
104
115
|
failures: failures
|
|
105
116
|
}
|
data/lib/indexmap/task_runner.rb
CHANGED
|
@@ -9,9 +9,11 @@ module Indexmap
|
|
|
9
9
|
end
|
|
10
10
|
|
|
11
11
|
def create
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
12
|
+
written_files = Indexmap.create(configuration: configuration)
|
|
13
|
+
index_now_key_path = write_index_now_key
|
|
14
|
+
configuration.run_after_create_callbacks
|
|
15
|
+
|
|
16
|
+
{files: written_files.map(&:to_s), written_files: written_files, index_now_key_path: index_now_key_path}
|
|
15
17
|
end
|
|
16
18
|
|
|
17
19
|
def format
|
|
@@ -44,21 +46,19 @@ module Indexmap
|
|
|
44
46
|
end
|
|
45
47
|
|
|
46
48
|
def public_path
|
|
47
|
-
|
|
49
|
+
default_output.public_path
|
|
48
50
|
end
|
|
49
51
|
|
|
50
52
|
private
|
|
51
53
|
|
|
52
54
|
attr_reader :configuration
|
|
53
55
|
|
|
54
|
-
def
|
|
55
|
-
|
|
56
|
-
File.delete(file_path)
|
|
57
|
-
end
|
|
56
|
+
def default_output
|
|
57
|
+
configuration.output_for(:default)
|
|
58
58
|
end
|
|
59
59
|
|
|
60
60
|
def sitemap_files
|
|
61
|
-
Dir.glob(
|
|
61
|
+
Dir.glob(public_path.join("sitemap*.xml")).sort
|
|
62
62
|
end
|
|
63
63
|
end
|
|
64
64
|
end
|
data/lib/indexmap/validator.rb
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "nokogiri"
|
|
4
|
+
require "date"
|
|
5
|
+
require "time"
|
|
6
|
+
require "uri"
|
|
7
|
+
|
|
3
8
|
module Indexmap
|
|
4
9
|
class Validator
|
|
5
10
|
def initialize(configuration: Indexmap.configuration, path: nil)
|
|
@@ -14,9 +19,15 @@ module Indexmap
|
|
|
14
19
|
)
|
|
15
20
|
raise ValidationError, "Missing sitemap file: #{sitemap_path}" unless File.exist?(sitemap_path)
|
|
16
21
|
|
|
22
|
+
validate_sitemap_file!(sitemap_path)
|
|
17
23
|
entries = Parser.new(path: sitemap_path).entries
|
|
24
|
+
validate_presence!(entries)
|
|
18
25
|
validate_duplicates!(entries)
|
|
19
26
|
validate_parameterized_urls!(entries)
|
|
27
|
+
validate_fragment_urls!(entries)
|
|
28
|
+
validate_absolute_http_urls!(entries)
|
|
29
|
+
validate_same_host_urls!(entries)
|
|
30
|
+
validate_lastmods!(entries)
|
|
20
31
|
true
|
|
21
32
|
end
|
|
22
33
|
|
|
@@ -24,6 +35,65 @@ module Indexmap
|
|
|
24
35
|
|
|
25
36
|
attr_reader :configuration, :path
|
|
26
37
|
|
|
38
|
+
def validate_sitemap_file!(sitemap_path)
|
|
39
|
+
document = read_xml_document(sitemap_path)
|
|
40
|
+
root_name = document.root&.name
|
|
41
|
+
|
|
42
|
+
case root_name
|
|
43
|
+
when "urlset"
|
|
44
|
+
validate_urlset_document!(document, sitemap_path)
|
|
45
|
+
when "sitemapindex"
|
|
46
|
+
validate_sitemap_index_document!(document, sitemap_path)
|
|
47
|
+
else
|
|
48
|
+
raise ValidationError, "Invalid sitemap root element in #{sitemap_path}: #{root_name || "none"}"
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def read_xml_document(file_path)
|
|
53
|
+
document = Nokogiri::XML(File.read(file_path, encoding: "UTF-8")) { |config| config.strict }
|
|
54
|
+
document.remove_namespaces!
|
|
55
|
+
document
|
|
56
|
+
rescue Nokogiri::XML::SyntaxError => error
|
|
57
|
+
raise ValidationError, "Invalid sitemap XML in #{file_path}: #{error.message.lines.first.strip}"
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def validate_urlset_document!(document, sitemap_path)
|
|
61
|
+
return if document.xpath("/urlset/url/loc").any?
|
|
62
|
+
|
|
63
|
+
raise ValidationError, "Sitemap has no URLs: #{sitemap_path}"
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def validate_sitemap_index_document!(document, sitemap_path)
|
|
67
|
+
child_locations = document.xpath("/sitemapindex/sitemap/loc").map { |node| node.text.to_s.strip }.reject(&:empty?)
|
|
68
|
+
raise ValidationError, "Sitemap index has no child sitemap URLs: #{sitemap_path}" if child_locations.empty?
|
|
69
|
+
|
|
70
|
+
duplicate_children = child_locations.group_by(&:itself).select { |_loc, values| values.size > 1 }.keys
|
|
71
|
+
unless duplicate_children.empty?
|
|
72
|
+
raise ValidationError, "Duplicate child sitemap URLs detected: #{duplicate_children.first(5).join(", ")}"
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
child_locations.each do |location|
|
|
76
|
+
child_path = local_child_path(sitemap_path, location)
|
|
77
|
+
raise ValidationError, "Missing child sitemap file: #{child_path}" unless File.exist?(child_path)
|
|
78
|
+
|
|
79
|
+
validate_sitemap_file!(child_path)
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def local_child_path(sitemap_path, location)
|
|
84
|
+
uri = URI.parse(location)
|
|
85
|
+
filename = (uri.absolute? || location.start_with?("/")) ? File.basename(uri.path) : location
|
|
86
|
+
File.expand_path(filename, File.dirname(sitemap_path))
|
|
87
|
+
rescue URI::InvalidURIError
|
|
88
|
+
File.expand_path(location, File.dirname(sitemap_path))
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def validate_presence!(entries)
|
|
92
|
+
return unless entries.empty?
|
|
93
|
+
|
|
94
|
+
raise ValidationError, "Sitemap has no URLs"
|
|
95
|
+
end
|
|
96
|
+
|
|
27
97
|
def validate_duplicates!(entries)
|
|
28
98
|
duplicates = entries.map(&:loc).group_by(&:itself).select { |_url, values| values.size > 1 }.keys
|
|
29
99
|
return if duplicates.empty?
|
|
@@ -37,5 +107,55 @@ module Indexmap
|
|
|
37
107
|
|
|
38
108
|
raise ValidationError, "Parameterized sitemap URLs detected: #{param_urls.first(5).join(", ")}"
|
|
39
109
|
end
|
|
110
|
+
|
|
111
|
+
def validate_fragment_urls!(entries)
|
|
112
|
+
fragment_urls = entries.map(&:loc).select { |url| parse_uri(url)&.fragment }
|
|
113
|
+
return if fragment_urls.empty?
|
|
114
|
+
|
|
115
|
+
raise ValidationError, "Fragment sitemap URLs detected: #{fragment_urls.first(5).join(", ")}"
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def validate_absolute_http_urls!(entries)
|
|
119
|
+
invalid_urls = entries.map(&:loc).reject do |url|
|
|
120
|
+
uri = parse_uri(url)
|
|
121
|
+
uri&.absolute? && %w[http https].include?(uri.scheme)
|
|
122
|
+
end
|
|
123
|
+
return if invalid_urls.empty?
|
|
124
|
+
|
|
125
|
+
raise ValidationError, "Invalid sitemap URLs detected: #{invalid_urls.first(5).join(", ")}"
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def validate_same_host_urls!(entries)
|
|
129
|
+
base_uri = parse_uri(configuration.base_url)
|
|
130
|
+
return unless base_uri&.host
|
|
131
|
+
|
|
132
|
+
invalid_urls = entries.map(&:loc).reject do |url|
|
|
133
|
+
uri = parse_uri(url)
|
|
134
|
+
uri&.host == base_uri.host && uri&.scheme == base_uri.scheme && uri&.port == base_uri.port
|
|
135
|
+
end
|
|
136
|
+
return if invalid_urls.empty?
|
|
137
|
+
|
|
138
|
+
raise ValidationError, "Sitemap URLs outside configured base URL detected: #{invalid_urls.first(5).join(", ")}"
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def validate_lastmods!(entries)
|
|
142
|
+
invalid_entries = entries.select do |entry|
|
|
143
|
+
next false if entry.lastmod.nil?
|
|
144
|
+
|
|
145
|
+
Date.iso8601(entry.lastmod)
|
|
146
|
+
false
|
|
147
|
+
rescue ArgumentError
|
|
148
|
+
true
|
|
149
|
+
end
|
|
150
|
+
return if invalid_entries.empty?
|
|
151
|
+
|
|
152
|
+
raise ValidationError, "Invalid sitemap lastmod values detected: #{invalid_entries.first(5).map(&:loc).join(", ")}"
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def parse_uri(url)
|
|
156
|
+
URI.parse(url.to_s)
|
|
157
|
+
rescue URI::InvalidURIError
|
|
158
|
+
nil
|
|
159
|
+
end
|
|
40
160
|
end
|
|
41
161
|
end
|
data/lib/indexmap/version.rb
CHANGED
data/lib/indexmap/writer.rb
CHANGED
|
@@ -16,18 +16,20 @@ module Indexmap
|
|
|
16
16
|
def write
|
|
17
17
|
FileUtils.mkdir_p(public_path)
|
|
18
18
|
|
|
19
|
-
return
|
|
19
|
+
return [write_file(index_filename, urlset_xml(entries))] if single_file?
|
|
20
20
|
|
|
21
|
-
sections.
|
|
22
|
-
|
|
21
|
+
paths = sections.map do |section|
|
|
22
|
+
write_file(section.filename, urlset_xml(section.entries))
|
|
23
23
|
end
|
|
24
24
|
|
|
25
|
-
|
|
25
|
+
paths + [write_file(index_filename, index_xml(sections))]
|
|
26
26
|
end
|
|
27
27
|
|
|
28
|
+
attr_accessor :public_path
|
|
29
|
+
|
|
28
30
|
private
|
|
29
31
|
|
|
30
|
-
attr_reader :base_url, :entries, :format, :index_filename, :
|
|
32
|
+
attr_reader :base_url, :entries, :format, :index_filename, :sections
|
|
31
33
|
|
|
32
34
|
def normalize_entries(raw_entries)
|
|
33
35
|
Array(raw_entries).map { |entry| normalize_entry(entry) }
|
|
@@ -55,6 +57,12 @@ module Indexmap
|
|
|
55
57
|
format == :single_file
|
|
56
58
|
end
|
|
57
59
|
|
|
60
|
+
def write_file(filename, body)
|
|
61
|
+
path = public_path.join(filename)
|
|
62
|
+
path.write(body)
|
|
63
|
+
path
|
|
64
|
+
end
|
|
65
|
+
|
|
58
66
|
def urlset_xml(entries)
|
|
59
67
|
lines = [
|
|
60
68
|
%(<?xml version="1.0" encoding="UTF-8"?>),
|
data/lib/indexmap.rb
CHANGED
|
@@ -3,14 +3,15 @@
|
|
|
3
3
|
require "cgi"
|
|
4
4
|
require "date"
|
|
5
5
|
require "fileutils"
|
|
6
|
-
require "pathname"
|
|
7
6
|
require "time"
|
|
8
7
|
|
|
9
8
|
require_relative "indexmap/version"
|
|
10
9
|
require_relative "indexmap/google_configuration"
|
|
11
10
|
require_relative "indexmap/index_now_configuration"
|
|
12
11
|
require_relative "indexmap/configuration"
|
|
12
|
+
require_relative "indexmap/creator"
|
|
13
13
|
require_relative "indexmap/entry"
|
|
14
|
+
require_relative "indexmap/output"
|
|
14
15
|
require_relative "indexmap/path"
|
|
15
16
|
require_relative "indexmap/parser"
|
|
16
17
|
require_relative "indexmap/pinger/base"
|
|
@@ -39,6 +40,10 @@ module Indexmap
|
|
|
39
40
|
def reset!
|
|
40
41
|
@configuration = Configuration.new
|
|
41
42
|
end
|
|
43
|
+
|
|
44
|
+
def create(output_name = :default, configuration: self.configuration)
|
|
45
|
+
Creator.new(output: configuration.output_for(output_name)).create
|
|
46
|
+
end
|
|
42
47
|
end
|
|
43
48
|
end
|
|
44
49
|
|
|
@@ -1,35 +1,35 @@
|
|
|
1
|
-
namespace :
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
end
|
|
1
|
+
namespace :indexmap do
|
|
2
|
+
namespace :sitemap do
|
|
3
|
+
desc "Create sitemap files"
|
|
4
|
+
task create: :environment do
|
|
5
|
+
runner = Indexmap::TaskRunner.new
|
|
6
|
+
create_result = runner.create
|
|
7
|
+
|
|
8
|
+
puts "Created, formatted, and validated #{file_count(create_result[:files])} in #{public_directory(runner)}."
|
|
9
|
+
puts "IndexNow key file: #{create_result[:index_now_key_path]}" if create_result[:index_now_key_path]
|
|
10
|
+
end
|
|
12
11
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
12
|
+
desc "Format sitemap files for better readability"
|
|
13
|
+
task format: :environment do
|
|
14
|
+
runner = Indexmap::TaskRunner.new
|
|
15
|
+
formatted_files = runner.format
|
|
17
16
|
|
|
18
|
-
|
|
19
|
-
|
|
17
|
+
puts "Formatted #{file_count(formatted_files)} in #{public_directory(runner)}."
|
|
18
|
+
end
|
|
20
19
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
20
|
+
desc "Validate sitemap shape and URL hygiene"
|
|
21
|
+
task validate: :environment do
|
|
22
|
+
runner = Indexmap::TaskRunner.new
|
|
23
|
+
validated_files = runner.validate
|
|
25
24
|
|
|
26
|
-
|
|
25
|
+
puts "Validated #{file_count(validated_files)} for sitemap shape and URL hygiene."
|
|
26
|
+
end
|
|
27
27
|
end
|
|
28
28
|
|
|
29
29
|
desc "Ping all configured search engines"
|
|
30
30
|
task ping: :environment do
|
|
31
|
-
Rake::Task["
|
|
32
|
-
Rake::Task["
|
|
31
|
+
Rake::Task["indexmap:index_now:ping"].invoke
|
|
32
|
+
Rake::Task["indexmap:google:ping"].invoke
|
|
33
33
|
end
|
|
34
34
|
|
|
35
35
|
namespace :google do
|
|
@@ -39,9 +39,9 @@ namespace :sitemap do
|
|
|
39
39
|
|
|
40
40
|
case result[:status]
|
|
41
41
|
when :submitted
|
|
42
|
-
puts "Submitted #{result[:sitemap_count]} sitemap #{(result[:sitemap_count] == 1) ? "file" : "files"} to Google Search Console."
|
|
42
|
+
puts "Submitted #{result[:sitemap_count]} sitemap #{(result[:sitemap_count] == 1) ? "file" : "files"} with #{result[:url_count]} URL#{"s" unless result[:url_count] == 1} to Google Search Console."
|
|
43
43
|
when :partial
|
|
44
|
-
puts "Submitted #{result[:sitemap_count]} sitemap #{(result[:sitemap_count] == 1) ? "file" : "files"} to Google Search Console, with #{result[:failures].count} failure#{"s" unless result[:failures].count == 1}."
|
|
44
|
+
puts "Submitted #{result[:sitemap_count]} sitemap #{(result[:sitemap_count] == 1) ? "file" : "files"} with #{result[:url_count]} URL#{"s" unless result[:url_count] == 1} to Google Search Console, with #{result[:failures].count} failure#{"s" unless result[:failures].count == 1}."
|
|
45
45
|
result[:failures].each { |failure| puts format_google_ping_failure(failure) }
|
|
46
46
|
when :failed
|
|
47
47
|
result[:failures].each { |failure| puts format_google_ping_failure(failure) }
|
|
@@ -8,30 +8,41 @@ class IndexmapConfigurationTest < Minitest::Test
|
|
|
8
8
|
end
|
|
9
9
|
|
|
10
10
|
def test_writer_builds_from_configured_callables
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
11
|
+
Dir.mktmpdir do |dir|
|
|
12
|
+
public_path = Pathname(dir)
|
|
13
|
+
|
|
14
|
+
Indexmap.configure do |config|
|
|
15
|
+
config.base_url = -> { "https://example.com" }
|
|
16
|
+
config.public_path = -> { public_path }
|
|
17
|
+
config.sections = -> do
|
|
18
|
+
[Indexmap::Section.new(filename: "sitemap-pages.xml", entries: [Indexmap::Entry.new(loc: "https://example.com/")])]
|
|
19
|
+
end
|
|
16
20
|
end
|
|
17
|
-
end
|
|
18
21
|
|
|
19
|
-
|
|
22
|
+
Indexmap.configuration.writer.write
|
|
20
23
|
|
|
21
|
-
|
|
24
|
+
assert_includes public_path.join("sitemap.xml").read, "<loc>https://example.com/sitemap-pages.xml</loc>"
|
|
25
|
+
assert_includes public_path.join("sitemap-pages.xml").read, "<loc>https://example.com/</loc>"
|
|
26
|
+
end
|
|
22
27
|
end
|
|
23
28
|
|
|
24
29
|
def test_writer_builds_single_file_writer_from_configured_entries
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
+
Dir.mktmpdir do |dir|
|
|
31
|
+
public_path = Pathname(dir)
|
|
32
|
+
|
|
33
|
+
Indexmap.configure do |config|
|
|
34
|
+
config.base_url = "https://example.com"
|
|
35
|
+
config.public_path = public_path
|
|
36
|
+
config.format = :single_file
|
|
37
|
+
config.entries = -> { [Indexmap::Entry.new(loc: "https://example.com/")] }
|
|
38
|
+
end
|
|
30
39
|
|
|
31
|
-
|
|
40
|
+
Indexmap.configuration.writer.write
|
|
32
41
|
|
|
33
|
-
|
|
34
|
-
|
|
42
|
+
assert_includes public_path.join("sitemap.xml").read, "<urlset"
|
|
43
|
+
assert_includes public_path.join("sitemap.xml").read, "<loc>https://example.com/</loc>"
|
|
44
|
+
refute public_path.join("sitemap-pages.xml").exist?
|
|
45
|
+
end
|
|
35
46
|
end
|
|
36
47
|
|
|
37
48
|
def test_writer_raises_without_base_url
|
|
@@ -80,4 +91,138 @@ class IndexmapConfigurationTest < Minitest::Test
|
|
|
80
91
|
assert_equal "example-key", Indexmap.configuration.index_now.key
|
|
81
92
|
assert_equal 250, Indexmap.configuration.index_now.max_urls_per_request
|
|
82
93
|
end
|
|
94
|
+
|
|
95
|
+
def test_named_outputs_inherit_configuration_defaults
|
|
96
|
+
Dir.mktmpdir do |dir|
|
|
97
|
+
public_path = Pathname(dir)
|
|
98
|
+
|
|
99
|
+
Indexmap.configure do |config|
|
|
100
|
+
config.base_url = "https://example.com"
|
|
101
|
+
config.public_path = public_path
|
|
102
|
+
config.output :reports do |output|
|
|
103
|
+
output.sections = [
|
|
104
|
+
Indexmap::Section.new(
|
|
105
|
+
filename: "sitemap-reports.xml",
|
|
106
|
+
entries: [Indexmap::Entry.new(loc: "https://example.com/reports")]
|
|
107
|
+
)
|
|
108
|
+
]
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
files = Indexmap.create(:reports)
|
|
113
|
+
|
|
114
|
+
assert_equal [
|
|
115
|
+
public_path.join("sitemap-reports.xml"),
|
|
116
|
+
public_path.join("sitemap.xml")
|
|
117
|
+
], files
|
|
118
|
+
assert_includes public_path.join("sitemap.xml").read, "https://example.com/sitemap-reports.xml"
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def test_create_writes_named_output_to_public_path
|
|
123
|
+
Dir.mktmpdir do |dir|
|
|
124
|
+
public_path = Pathname(dir)
|
|
125
|
+
|
|
126
|
+
Indexmap.configure do |config|
|
|
127
|
+
config.base_url = "https://example.com"
|
|
128
|
+
config.public_path = public_path
|
|
129
|
+
config.output :dynamic do |output|
|
|
130
|
+
output.sections = [
|
|
131
|
+
Indexmap::Section.new(
|
|
132
|
+
filename: "sitemap-dynamic.xml",
|
|
133
|
+
entries: [Indexmap::Entry.new(loc: "https://example.com/dynamic")]
|
|
134
|
+
)
|
|
135
|
+
]
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
files = Indexmap.create(:dynamic)
|
|
140
|
+
|
|
141
|
+
assert_equal [
|
|
142
|
+
public_path.join("sitemap-dynamic.xml"),
|
|
143
|
+
public_path.join("sitemap.xml")
|
|
144
|
+
], files
|
|
145
|
+
assert_includes public_path.join("sitemap-dynamic.xml").read, "https://example.com/dynamic"
|
|
146
|
+
assert_includes public_path.join("sitemap.xml").read, "https://example.com/sitemap-dynamic.xml"
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def test_create_preserves_existing_files_when_validation_fails
|
|
151
|
+
Dir.mktmpdir do |dir|
|
|
152
|
+
public_path = Pathname(dir)
|
|
153
|
+
public_path.join("sitemap.xml").write("old index")
|
|
154
|
+
public_path.join("sitemap-pages.xml").write("old child")
|
|
155
|
+
|
|
156
|
+
Indexmap.configure do |config|
|
|
157
|
+
config.base_url = "https://example.com"
|
|
158
|
+
config.public_path = public_path
|
|
159
|
+
config.sections = [
|
|
160
|
+
Indexmap::Section.new(
|
|
161
|
+
filename: "sitemap-pages.xml",
|
|
162
|
+
entries: [Indexmap::Entry.new(loc: "https://example.com/about?utm_source=test")]
|
|
163
|
+
)
|
|
164
|
+
]
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
error = assert_raises(Indexmap::ValidationError) { Indexmap.create }
|
|
168
|
+
|
|
169
|
+
assert_match "Parameterized sitemap URLs detected", error.message
|
|
170
|
+
assert_equal "old index", public_path.join("sitemap.xml").read
|
|
171
|
+
assert_equal "old child", public_path.join("sitemap-pages.xml").read
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def test_create_writes_single_file_named_output_without_default_index
|
|
176
|
+
Dir.mktmpdir do |dir|
|
|
177
|
+
public_path = Pathname(dir)
|
|
178
|
+
|
|
179
|
+
Indexmap.configure do |config|
|
|
180
|
+
config.base_url = "https://example.com"
|
|
181
|
+
config.public_path = public_path
|
|
182
|
+
config.output :dynamic do |output|
|
|
183
|
+
output.format = :single_file
|
|
184
|
+
output.index_filename = "sitemap-dynamic.xml"
|
|
185
|
+
output.entries = [
|
|
186
|
+
Indexmap::Entry.new(loc: "https://example.com/dynamic")
|
|
187
|
+
]
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
files = Indexmap.create(:dynamic)
|
|
192
|
+
|
|
193
|
+
assert_equal [public_path.join("sitemap-dynamic.xml")], files
|
|
194
|
+
refute public_path.join("sitemap.xml").exist?
|
|
195
|
+
assert_includes public_path.join("sitemap-dynamic.xml").read, "https://example.com/dynamic"
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def test_create_preserves_existing_named_output_when_validation_fails
|
|
200
|
+
Dir.mktmpdir do |dir|
|
|
201
|
+
public_path = Pathname(dir)
|
|
202
|
+
public_path.join("sitemap-dynamic.xml").write("old dynamic")
|
|
203
|
+
|
|
204
|
+
Indexmap.configure do |config|
|
|
205
|
+
config.base_url = "https://example.com"
|
|
206
|
+
config.public_path = public_path
|
|
207
|
+
config.output :dynamic do |output|
|
|
208
|
+
output.format = :single_file
|
|
209
|
+
output.index_filename = "sitemap-dynamic.xml"
|
|
210
|
+
output.entries = [
|
|
211
|
+
Indexmap::Entry.new(loc: "https://example.com/dynamic?utm_source=test")
|
|
212
|
+
]
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
error = assert_raises(Indexmap::ValidationError) { Indexmap.create(:dynamic) }
|
|
217
|
+
|
|
218
|
+
assert_match "Parameterized sitemap URLs detected", error.message
|
|
219
|
+
assert_equal "old dynamic", public_path.join("sitemap-dynamic.xml").read
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def test_after_create_requires_a_block
|
|
224
|
+
error = assert_raises(ArgumentError) { Indexmap.configuration.after_create }
|
|
225
|
+
|
|
226
|
+
assert_equal "after_create requires a block", error.message
|
|
227
|
+
end
|
|
83
228
|
end
|
|
@@ -8,10 +8,11 @@ class IndexmapPingerGoogleTest < Minitest::Test
|
|
|
8
8
|
|
|
9
9
|
class FakeWebmastersService
|
|
10
10
|
attr_accessor :authorization
|
|
11
|
-
attr_reader :submitted, :list_sites_calls
|
|
11
|
+
attr_reader :submitted, :submissions, :list_sites_calls
|
|
12
12
|
|
|
13
13
|
def initialize(site_urls:)
|
|
14
14
|
@site_urls = site_urls
|
|
15
|
+
@submissions = []
|
|
15
16
|
@list_sites_calls = 0
|
|
16
17
|
end
|
|
17
18
|
|
|
@@ -22,6 +23,7 @@ class IndexmapPingerGoogleTest < Minitest::Test
|
|
|
22
23
|
|
|
23
24
|
def submit_sitemap(property, sitemap_url)
|
|
24
25
|
@submitted = [property, sitemap_url]
|
|
26
|
+
@submissions << @submitted
|
|
25
27
|
end
|
|
26
28
|
end
|
|
27
29
|
|
|
@@ -53,10 +55,59 @@ class IndexmapPingerGoogleTest < Minitest::Test
|
|
|
53
55
|
assert_equal ["sc-domain:example.com", "https://www.example.com/sitemap.xml"], service.submitted
|
|
54
56
|
assert_equal :submitted, result[:status]
|
|
55
57
|
assert_equal 1, result[:sitemap_count]
|
|
58
|
+
assert_equal 0, result[:url_count]
|
|
56
59
|
assert_equal 1, service.list_sites_calls
|
|
57
60
|
end
|
|
58
61
|
end
|
|
59
62
|
|
|
63
|
+
def test_reports_unique_url_count_from_submitted_sitemaps
|
|
64
|
+
Dir.mktmpdir do |dir|
|
|
65
|
+
public_path = Pathname(dir)
|
|
66
|
+
public_path.join("sitemap.xml").write(<<~XML)
|
|
67
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
68
|
+
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
69
|
+
<sitemap><loc>https://www.example.com/sitemap-pages.xml</loc></sitemap>
|
|
70
|
+
<sitemap><loc>https://www.example.com/sitemap-posts.xml</loc></sitemap>
|
|
71
|
+
</sitemapindex>
|
|
72
|
+
XML
|
|
73
|
+
public_path.join("sitemap-pages.xml").write(<<~XML)
|
|
74
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
75
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
76
|
+
<url><loc>https://www.example.com/</loc></url>
|
|
77
|
+
<url><loc>https://www.example.com/about</loc></url>
|
|
78
|
+
</urlset>
|
|
79
|
+
XML
|
|
80
|
+
public_path.join("sitemap-posts.xml").write(<<~XML)
|
|
81
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
82
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
83
|
+
<url><loc>https://www.example.com/about</loc></url>
|
|
84
|
+
<url><loc>https://www.example.com/blog</loc></url>
|
|
85
|
+
</urlset>
|
|
86
|
+
XML
|
|
87
|
+
|
|
88
|
+
configuration = Indexmap::Configuration.new
|
|
89
|
+
configuration.base_url = "https://www.example.com"
|
|
90
|
+
configuration.public_path = public_path
|
|
91
|
+
configuration.google.credentials = "{\"type\":\"service_account\"}"
|
|
92
|
+
|
|
93
|
+
service = FakeWebmastersService.new(site_urls: ["sc-domain:example.com"])
|
|
94
|
+
result = Indexmap::Pinger::Google.new(
|
|
95
|
+
configuration: configuration,
|
|
96
|
+
service: service,
|
|
97
|
+
credentials_builder: ->(**) { :fake_authorizer }
|
|
98
|
+
).ping
|
|
99
|
+
|
|
100
|
+
assert_equal :submitted, result[:status]
|
|
101
|
+
assert_equal 3, result[:sitemap_count]
|
|
102
|
+
assert_equal 3, result[:url_count]
|
|
103
|
+
assert_equal [
|
|
104
|
+
["sc-domain:example.com", "https://www.example.com/sitemap-pages.xml"],
|
|
105
|
+
["sc-domain:example.com", "https://www.example.com/sitemap-posts.xml"],
|
|
106
|
+
["sc-domain:example.com", "https://www.example.com/sitemap.xml"]
|
|
107
|
+
], service.submissions
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
60
111
|
def test_skips_google_ping_when_credentials_are_missing
|
|
61
112
|
Dir.mktmpdir do |dir|
|
|
62
113
|
public_path = Pathname(dir)
|
|
@@ -5,11 +5,12 @@ require "test_helper"
|
|
|
5
5
|
class IndexmapTaskRunnerTest < Minitest::Test
|
|
6
6
|
VALID_KEY = "1234567890abcdef1234567890abcdef"
|
|
7
7
|
|
|
8
|
-
def
|
|
8
|
+
def test_create_writes_new_sitemap_and_key_file_without_deleting_unrelated_files
|
|
9
9
|
Dir.mktmpdir do |dir|
|
|
10
10
|
public_path = Pathname(dir)
|
|
11
11
|
public_path.join("sitemap.xml").write("old")
|
|
12
12
|
public_path.join("sitemap-pages.xml.gz").write("old")
|
|
13
|
+
public_path.join("sitemap-extra.xml").write("existing")
|
|
13
14
|
|
|
14
15
|
configuration = Indexmap::Configuration.new
|
|
15
16
|
configuration.base_url = "https://example.com"
|
|
@@ -24,14 +25,40 @@ class IndexmapTaskRunnerTest < Minitest::Test
|
|
|
24
25
|
|
|
25
26
|
result = Indexmap::TaskRunner.new(configuration: configuration).create
|
|
26
27
|
|
|
27
|
-
|
|
28
|
+
assert public_path.join("sitemap-pages.xml.gz").exist?
|
|
29
|
+
assert_equal "existing", public_path.join("sitemap-extra.xml").read
|
|
28
30
|
assert_includes public_path.join("sitemap.xml").read, "<sitemapindex"
|
|
29
31
|
assert_equal VALID_KEY, public_path.join("#{VALID_KEY}.txt").read
|
|
30
32
|
assert_equal [public_path.join("sitemap-pages.xml").to_s, public_path.join("sitemap.xml").to_s], result[:files]
|
|
33
|
+
assert_equal [public_path.join("sitemap-pages.xml"), public_path.join("sitemap.xml")], result[:written_files]
|
|
31
34
|
assert_equal public_path.join("#{VALID_KEY}.txt"), result[:index_now_key_path]
|
|
32
35
|
end
|
|
33
36
|
end
|
|
34
37
|
|
|
38
|
+
def test_create_runs_after_create_callbacks_after_validation
|
|
39
|
+
Dir.mktmpdir do |dir|
|
|
40
|
+
calls = []
|
|
41
|
+
public_path = Pathname(dir)
|
|
42
|
+
configuration = Indexmap::Configuration.new
|
|
43
|
+
configuration.base_url = "https://example.com"
|
|
44
|
+
configuration.public_path = public_path
|
|
45
|
+
configuration.sections = [
|
|
46
|
+
Indexmap::Section.new(
|
|
47
|
+
filename: "sitemap-pages.xml",
|
|
48
|
+
entries: [Indexmap::Entry.new(loc: "https://example.com/about")]
|
|
49
|
+
)
|
|
50
|
+
]
|
|
51
|
+
configuration.after_create do
|
|
52
|
+
calls << :called
|
|
53
|
+
calls << public_path.join("sitemap.xml").read.include?("<sitemapindex")
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
Indexmap::TaskRunner.new(configuration: configuration).create
|
|
57
|
+
|
|
58
|
+
assert_equal [:called, true], calls
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
35
62
|
def test_write_index_now_key_returns_nil_when_key_is_not_configured
|
|
36
63
|
Dir.mktmpdir do |dir|
|
|
37
64
|
configuration = Indexmap::Configuration.new
|
|
@@ -52,6 +52,120 @@ class IndexmapValidatorTest < Minitest::Test
|
|
|
52
52
|
end
|
|
53
53
|
end
|
|
54
54
|
|
|
55
|
+
def test_validate_raises_for_fragment_urls
|
|
56
|
+
Dir.mktmpdir do |directory|
|
|
57
|
+
path = Pathname(directory).join("sitemap.xml")
|
|
58
|
+
path.write(<<~XML)
|
|
59
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
60
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
61
|
+
<url><loc>https://example.com/about#team</loc></url>
|
|
62
|
+
</urlset>
|
|
63
|
+
XML
|
|
64
|
+
|
|
65
|
+
error = assert_raises(Indexmap::ValidationError) do
|
|
66
|
+
Indexmap::Validator.new(path: path).validate!
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
assert_equal "Fragment sitemap URLs detected: https://example.com/about#team", error.message
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def test_validate_raises_for_relative_urls
|
|
74
|
+
Dir.mktmpdir do |directory|
|
|
75
|
+
path = Pathname(directory).join("sitemap.xml")
|
|
76
|
+
path.write(<<~XML)
|
|
77
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
78
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
79
|
+
<url><loc>/about</loc></url>
|
|
80
|
+
</urlset>
|
|
81
|
+
XML
|
|
82
|
+
|
|
83
|
+
error = assert_raises(Indexmap::ValidationError) do
|
|
84
|
+
Indexmap::Validator.new(path: path).validate!
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
assert_equal "Invalid sitemap URLs detected: /about", error.message
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def test_validate_raises_for_urls_outside_configured_base_url
|
|
92
|
+
Dir.mktmpdir do |directory|
|
|
93
|
+
path = Pathname(directory).join("sitemap.xml")
|
|
94
|
+
path.write(<<~XML)
|
|
95
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
96
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
97
|
+
<url><loc>https://other.example.com/about</loc></url>
|
|
98
|
+
</urlset>
|
|
99
|
+
XML
|
|
100
|
+
|
|
101
|
+
configuration = Indexmap::Configuration.new
|
|
102
|
+
configuration.base_url = "https://example.com"
|
|
103
|
+
|
|
104
|
+
error = assert_raises(Indexmap::ValidationError) do
|
|
105
|
+
Indexmap::Validator.new(configuration: configuration, path: path).validate!
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
assert_equal "Sitemap URLs outside configured base URL detected: https://other.example.com/about", error.message
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def test_validate_raises_for_invalid_lastmod_values
|
|
113
|
+
Dir.mktmpdir do |directory|
|
|
114
|
+
path = Pathname(directory).join("sitemap.xml")
|
|
115
|
+
path.write(<<~XML)
|
|
116
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
117
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
118
|
+
<url>
|
|
119
|
+
<loc>https://example.com/about</loc>
|
|
120
|
+
<lastmod>not-a-date</lastmod>
|
|
121
|
+
</url>
|
|
122
|
+
</urlset>
|
|
123
|
+
XML
|
|
124
|
+
|
|
125
|
+
error = assert_raises(Indexmap::ValidationError) do
|
|
126
|
+
Indexmap::Validator.new(path: path).validate!
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
assert_equal "Invalid sitemap lastmod values detected: https://example.com/about", error.message
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def test_validate_raises_for_empty_sitemaps
|
|
134
|
+
Dir.mktmpdir do |directory|
|
|
135
|
+
path = Pathname(directory).join("sitemap.xml")
|
|
136
|
+
path.write(<<~XML)
|
|
137
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
138
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
139
|
+
</urlset>
|
|
140
|
+
XML
|
|
141
|
+
|
|
142
|
+
error = assert_raises(Indexmap::ValidationError) do
|
|
143
|
+
Indexmap::Validator.new(path: path).validate!
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
assert_equal "Sitemap has no URLs: #{path}", error.message
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def test_validate_raises_for_missing_child_sitemap_files
|
|
151
|
+
Dir.mktmpdir do |directory|
|
|
152
|
+
path = Pathname(directory).join("sitemap.xml")
|
|
153
|
+
child_path = Pathname(directory).join("sitemap-pages.xml")
|
|
154
|
+
path.write(<<~XML)
|
|
155
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
156
|
+
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
157
|
+
<sitemap><loc>https://example.com/sitemap-pages.xml</loc></sitemap>
|
|
158
|
+
</sitemapindex>
|
|
159
|
+
XML
|
|
160
|
+
|
|
161
|
+
error = assert_raises(Indexmap::ValidationError) do
|
|
162
|
+
Indexmap::Validator.new(path: path).validate!
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
assert_equal "Missing child sitemap file: #{child_path}", error.message
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
55
169
|
def test_validate_passes_for_valid_sitemap
|
|
56
170
|
Dir.mktmpdir do |directory|
|
|
57
171
|
path = Pathname(directory).join("sitemap.xml")
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: indexmap
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.6.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Paulo Fidalgo
|
|
@@ -177,9 +177,11 @@ files:
|
|
|
177
177
|
- README.md
|
|
178
178
|
- lib/indexmap.rb
|
|
179
179
|
- lib/indexmap/configuration.rb
|
|
180
|
+
- lib/indexmap/creator.rb
|
|
180
181
|
- lib/indexmap/entry.rb
|
|
181
182
|
- lib/indexmap/google_configuration.rb
|
|
182
183
|
- lib/indexmap/index_now_configuration.rb
|
|
184
|
+
- lib/indexmap/output.rb
|
|
183
185
|
- lib/indexmap/parser.rb
|
|
184
186
|
- lib/indexmap/path.rb
|
|
185
187
|
- lib/indexmap/pinger/base.rb
|