indexmap 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +2 -2
- data/README.md +162 -10
- data/lib/indexmap/configuration.rb +27 -26
- data/lib/indexmap/creator.rb +62 -0
- data/lib/indexmap/index_now_configuration.rb +12 -5
- data/lib/indexmap/output.rb +72 -0
- data/lib/indexmap/parser.rb +26 -15
- data/lib/indexmap/pinger/base.rb +5 -1
- data/lib/indexmap/pinger/google.rb +17 -6
- data/lib/indexmap/pinger/index_now.rb +21 -24
- data/lib/indexmap/storage/active_storage.rb +105 -0
- data/lib/indexmap/storage/file.rb +11 -0
- data/lib/indexmap/storage/filesystem.rb +77 -0
- data/lib/indexmap/storage/memory.rb +61 -0
- data/lib/indexmap/task_runner.rb +13 -13
- data/lib/indexmap/validator.rb +42 -30
- data/lib/indexmap/version.rb +1 -1
- data/lib/indexmap/writer.rb +10 -9
- data/lib/indexmap.rb +10 -2
- data/lib/tasks/indexmap_tasks.rake +7 -9
- data/test/indexmap/configuration_test.rb +129 -26
- data/test/indexmap/parser_test.rb +44 -3
- data/test/indexmap/pinger/google_test.rb +125 -96
- data/test/indexmap/pinger/index_now_test.rb +148 -179
- data/test/indexmap/storage_test.rb +123 -0
- data/test/indexmap/task_runner_test.rb +97 -41
- data/test/indexmap/validator_test.rb +96 -92
- data/test/indexmap/writer_test.rb +63 -74
- metadata +8 -3
- data/lib/indexmap/path.rb +0 -43
- data/test/indexmap/path_test.rb +0 -28
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 90b723b27367659107827ca3ff1f7473c6a4a917b62ae1cdb4ae2cfcf440f3f2
|
|
4
|
+
data.tar.gz: cd5be4391a83b1378efcad593408648822dabff4dc8e5479e6ae3e3c12d93ac3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: '099bf5472fae8a8be3d17ad40003ea5802dd410799431af367e604dca3cd0fce04e98555526dfb36b98f270774b41eec160a47c5abdc9fd81ac148080901d96f'
|
|
7
|
+
data.tar.gz: e53e45fe051e02b0000e05c5000f8f23c2e1cc7ab78a952cad6a4a33891b108c8811cd67d9fcadea6cf39bea4abf74eae9736d9e85402e6a1988ef49ace9c0dc
|
data/CHANGELOG.md
CHANGED
|
@@ -5,12 +5,12 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
-
## [0.
|
|
8
|
+
## [0.7.0] - 2026-05-08
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
### Fixed
|
|
12
12
|
|
|
13
|
-
-
|
|
13
|
+
- avoid rewriting existing IndexNow key files (#11)
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
|
data/README.md
CHANGED
|
@@ -34,6 +34,10 @@ Or install it directly:
|
|
|
34
34
|
gem install indexmap
|
|
35
35
|
```
|
|
36
36
|
|
|
37
|
+
Upgrading an existing app? Read [UPGRADE.md](UPGRADE.md) before deploying,
|
|
38
|
+
especially if the app uses custom storage or stores sitemap files under a
|
|
39
|
+
directory prefix such as `sitemaps/`.
|
|
40
|
+
|
|
37
41
|
## Ruby Usage
|
|
38
42
|
|
|
39
43
|
```ruby
|
|
@@ -51,7 +55,6 @@ sections = [
|
|
|
51
55
|
|
|
52
56
|
Indexmap::Writer.new(
|
|
53
57
|
sections: sections,
|
|
54
|
-
public_path: Pathname("public"),
|
|
55
58
|
base_url: "https://example.com"
|
|
56
59
|
).write
|
|
57
60
|
```
|
|
@@ -63,7 +66,12 @@ In an initializer:
|
|
|
63
66
|
```ruby
|
|
64
67
|
Indexmap.configure do |config|
|
|
65
68
|
config.base_url = -> { "https://example.com" }
|
|
66
|
-
config.
|
|
69
|
+
config.storage = -> do
|
|
70
|
+
Indexmap::Storage::Filesystem.new(
|
|
71
|
+
path: Rails.public_path,
|
|
72
|
+
public_url: config.base_url
|
|
73
|
+
)
|
|
74
|
+
end
|
|
67
75
|
config.sections = -> do
|
|
68
76
|
[
|
|
69
77
|
Indexmap::Section.new(
|
|
@@ -85,23 +93,26 @@ bin/rails indexmap:sitemap:format
|
|
|
85
93
|
bin/rails indexmap:sitemap:validate
|
|
86
94
|
```
|
|
87
95
|
|
|
88
|
-
`indexmap:sitemap:create` is the main task. It
|
|
96
|
+
`indexmap:sitemap:create` is the main task. It builds sitemap files in memory,
|
|
97
|
+
formats them, validates the result, then writes the final XML files to the
|
|
98
|
+
configured storage. Existing sitemap files are left untouched if generation or
|
|
99
|
+
validation fails.
|
|
89
100
|
|
|
90
101
|
### Default Index Mode
|
|
91
102
|
|
|
92
103
|
This is the default behavior. `indexmap` writes:
|
|
93
104
|
|
|
94
|
-
- `
|
|
105
|
+
- `sitemap.xml` as a sitemap index
|
|
95
106
|
- one or more child sitemap files from `config.sections`
|
|
96
107
|
|
|
97
108
|
### Single-File Mode
|
|
98
109
|
|
|
99
|
-
For sites that only want one `
|
|
110
|
+
For sites that only want one `sitemap.xml` file:
|
|
100
111
|
|
|
101
112
|
```ruby
|
|
102
113
|
Indexmap.configure do |config|
|
|
103
114
|
config.base_url = -> { "https://example.com" }
|
|
104
|
-
config.
|
|
115
|
+
config.storage = -> { Indexmap::Storage::Filesystem.new(path: Rails.public_path, public_url: config.base_url) }
|
|
105
116
|
config.format = :single_file
|
|
106
117
|
config.entries = -> do
|
|
107
118
|
[
|
|
@@ -114,12 +125,135 @@ end
|
|
|
114
125
|
|
|
115
126
|
In `:single_file` mode, `indexmap` writes a `urlset` directly to `sitemap.xml` and reads entries from `config.entries` instead of `config.sections`.
|
|
116
127
|
|
|
128
|
+
### Named Outputs
|
|
129
|
+
|
|
130
|
+
Most apps only need the default output. Use named outputs when one part of the
|
|
131
|
+
sitemap must be generated separately, for example when static pages can be
|
|
132
|
+
generated during deploy but database-heavy pages should refresh later. Named
|
|
133
|
+
outputs write through the same configured storage as the default output.
|
|
134
|
+
|
|
135
|
+
```ruby
|
|
136
|
+
Indexmap.configure do |config|
|
|
137
|
+
config.base_url = -> { "https://example.com" }
|
|
138
|
+
config.storage = -> { Indexmap::Storage::Filesystem.new(path: Rails.root.join("storage/sitemaps"), public_url: config.base_url) }
|
|
139
|
+
config.sections = -> { Sitemap.sections }
|
|
140
|
+
|
|
141
|
+
config.output :insights_data do |output|
|
|
142
|
+
output.format = :single_file
|
|
143
|
+
output.index_filename = "sitemap-insights-data.xml"
|
|
144
|
+
output.entries = -> { Sitemap.insights_data_entries }
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Generate the default output:
|
|
150
|
+
|
|
151
|
+
```ruby
|
|
152
|
+
Indexmap.create
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
Generate only the named output:
|
|
156
|
+
|
|
157
|
+
```ruby
|
|
158
|
+
Indexmap.create(:insights_data)
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
Named outputs inherit `base_url` and `format` from the main configuration unless
|
|
162
|
+
you override them. Storage is configured once and shared by every output.
|
|
163
|
+
|
|
164
|
+
`Indexmap.create` uses the same safe publish flow as the rake task: build,
|
|
165
|
+
format, validate, and then write the final XML file or files to storage.
|
|
166
|
+
|
|
167
|
+
### Storage
|
|
168
|
+
|
|
169
|
+
Every `indexmap` operation reads and writes through `config.storage`. The storage
|
|
170
|
+
object is the source of truth for generation, validation, parsing, Google
|
|
171
|
+
submission, IndexNow submission, and IndexNow verification files.
|
|
172
|
+
|
|
173
|
+
The filesystem adapter stores files in a directory and exposes public URLs from
|
|
174
|
+
the same filenames:
|
|
175
|
+
|
|
176
|
+
```ruby
|
|
177
|
+
Indexmap.configure do |config|
|
|
178
|
+
config.base_url = "https://example.com"
|
|
179
|
+
config.storage = Indexmap::Storage::Filesystem.new(
|
|
180
|
+
path: Rails.public_path,
|
|
181
|
+
public_url: "https://example.com"
|
|
182
|
+
)
|
|
183
|
+
end
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
Rails apps that store sitemap files in Active Storage can use the optional
|
|
187
|
+
adapter. `indexmap` does not depend on `activestorage`; this adapter only uses
|
|
188
|
+
the model and attachment object you pass in.
|
|
189
|
+
|
|
190
|
+
```ruby
|
|
191
|
+
Indexmap.configure do |config|
|
|
192
|
+
config.base_url = "https://example.com"
|
|
193
|
+
config.storage = Indexmap::Storage::ActiveStorage.new(
|
|
194
|
+
model: SitemapArtifact,
|
|
195
|
+
filename_column: :filename,
|
|
196
|
+
attachment: :file,
|
|
197
|
+
public_url: "https://example.com"
|
|
198
|
+
)
|
|
199
|
+
end
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
Custom storage backends can implement the same small interface:
|
|
203
|
+
|
|
204
|
+
```ruby
|
|
205
|
+
storage.write(filename, body, content_type:)
|
|
206
|
+
storage.read(filename)
|
|
207
|
+
storage.exist?(filename)
|
|
208
|
+
storage.list(prefix:, suffix:)
|
|
209
|
+
storage.delete(filename)
|
|
210
|
+
storage.public_url(filename)
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
### Deferred Dynamic Sections
|
|
214
|
+
|
|
215
|
+
Use `after_create` when `indexmap:sitemap:create` should publish the default
|
|
216
|
+
sitemap first, then schedule slower dynamic sections for the background. The
|
|
217
|
+
callback runs only after the generated files have been formatted, validated, and
|
|
218
|
+
replaced successfully.
|
|
219
|
+
|
|
220
|
+
```ruby
|
|
221
|
+
Indexmap.configure do |config|
|
|
222
|
+
config.base_url = -> { "https://example.com" }
|
|
223
|
+
config.storage = -> { Indexmap::Storage::Filesystem.new(path: Rails.root.join("storage/sitemaps"), public_url: config.base_url) }
|
|
224
|
+
config.sections = -> { Sitemap.sections }
|
|
225
|
+
|
|
226
|
+
config.output :insights_data do |output|
|
|
227
|
+
output.format = :single_file
|
|
228
|
+
output.index_filename = "sitemap-insights-data.xml"
|
|
229
|
+
output.entries = -> { Sitemap.insights_data_entries }
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
config.after_create do
|
|
233
|
+
Insights::SitemapRefreshJob.perform_later
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
Then the job can stay small:
|
|
239
|
+
|
|
240
|
+
```ruby
|
|
241
|
+
class Insights::SitemapRefreshJob < ApplicationJob
|
|
242
|
+
def perform
|
|
243
|
+
Indexmap.create(:insights_data)
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
This keeps deploys fast: the deploy only waits for `indexmap:sitemap:create`,
|
|
249
|
+
while database-dependent output is refreshed by the job backend.
|
|
250
|
+
|
|
117
251
|
## Validation And Parsing
|
|
118
252
|
|
|
119
253
|
`indexmap` also includes small utilities for working with generated sitemap files:
|
|
120
254
|
|
|
121
255
|
```ruby
|
|
122
|
-
parser = Indexmap::Parser.new(
|
|
256
|
+
parser = Indexmap::Parser.new(source: "sitemap.xml")
|
|
123
257
|
parser.paths
|
|
124
258
|
# => ["/", "/about", "/articles/example"]
|
|
125
259
|
|
|
@@ -181,7 +315,7 @@ If `config.google.property` is not set, `indexmap` defaults to `sc-domain:<host>
|
|
|
181
315
|
IndexNow submission requires a key. `indexmap` supports two ways to provide it:
|
|
182
316
|
|
|
183
317
|
- set `config.index_now.key`
|
|
184
|
-
- or keep a valid verification file
|
|
318
|
+
- or keep a valid verification file in the configured storage as `<key>.txt`
|
|
185
319
|
|
|
186
320
|
Configured-key example:
|
|
187
321
|
|
|
@@ -191,7 +325,25 @@ Indexmap.configure do |config|
|
|
|
191
325
|
end
|
|
192
326
|
```
|
|
193
327
|
|
|
194
|
-
If `config.index_now.key` is set, `indexmap:sitemap:create` also
|
|
328
|
+
If `config.index_now.key` is set, `indexmap:sitemap:create` also ensures the matching `<key>.txt` verification file exists in storage. It leaves an existing valid key file unchanged.
|
|
329
|
+
|
|
330
|
+
If you need a non-standard verification filename, configure it explicitly:
|
|
331
|
+
|
|
332
|
+
```ruby
|
|
333
|
+
Indexmap.configure do |config|
|
|
334
|
+
config.index_now.key = -> { ENV["INDEXNOW_KEY"] }
|
|
335
|
+
config.index_now.key_filename = -> { "#{ENV.fetch("INDEXNOW_KEY")}.txt" }
|
|
336
|
+
end
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
You can also disable automatic key-file writes entirely:
|
|
340
|
+
|
|
341
|
+
```ruby
|
|
342
|
+
Indexmap.configure do |config|
|
|
343
|
+
config.index_now.key = -> { ENV["INDEXNOW_KEY"] }
|
|
344
|
+
config.index_now.write_key_file = false
|
|
345
|
+
end
|
|
346
|
+
```
|
|
195
347
|
|
|
196
348
|
If you prefer the file-based flow, run:
|
|
197
349
|
|
|
@@ -202,7 +354,7 @@ bin/rails indexmap:index_now:write_key
|
|
|
202
354
|
That task:
|
|
203
355
|
|
|
204
356
|
- reuses an existing valid key file when present
|
|
205
|
-
- otherwise generates a new key in
|
|
357
|
+
- otherwise generates a new key in `<key>.txt`
|
|
206
358
|
- makes that key available to `indexmap:index_now:ping` without adding `config.index_now.key`
|
|
207
359
|
|
|
208
360
|
If neither a configured key nor a valid key file is present, `indexmap:index_now:ping` skips IndexNow submission.
|
|
@@ -4,11 +4,13 @@ module Indexmap
|
|
|
4
4
|
class Configuration
|
|
5
5
|
VALID_FORMATS = %i[index single_file].freeze
|
|
6
6
|
|
|
7
|
-
attr_writer :base_url, :entries, :format, :index_filename, :
|
|
7
|
+
attr_writer :base_url, :entries, :format, :index_filename, :sections, :storage
|
|
8
8
|
|
|
9
9
|
def initialize
|
|
10
10
|
@format = :index
|
|
11
11
|
@index_filename = "sitemap.xml"
|
|
12
|
+
@after_create_callbacks = []
|
|
13
|
+
@outputs = {}
|
|
12
14
|
end
|
|
13
15
|
|
|
14
16
|
def base_url
|
|
@@ -36,38 +38,37 @@ module Indexmap
|
|
|
36
38
|
@index_now ||= IndexNowConfiguration.new
|
|
37
39
|
end
|
|
38
40
|
|
|
39
|
-
def
|
|
40
|
-
|
|
41
|
-
return Pathname("public") if value.nil?
|
|
42
|
-
|
|
43
|
-
Pathname(value)
|
|
41
|
+
def storage
|
|
42
|
+
resolve(@storage) || Storage::Filesystem.new(path: "public", public_url: base_url)
|
|
44
43
|
end
|
|
45
44
|
|
|
46
45
|
def sections
|
|
47
46
|
Array(resolve(@sections))
|
|
48
47
|
end
|
|
49
48
|
|
|
49
|
+
def output(name)
|
|
50
|
+
output = output_for(name)
|
|
51
|
+
yield(output) if block_given?
|
|
52
|
+
output
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def output_for(name = :default)
|
|
56
|
+
normalized_name = name.to_sym
|
|
57
|
+
@outputs[normalized_name] ||= Output.new(configuration: self)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def after_create(&block)
|
|
61
|
+
raise ArgumentError, "after_create requires a block" unless block
|
|
62
|
+
|
|
63
|
+
@after_create_callbacks << block
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def run_after_create_callbacks
|
|
67
|
+
@after_create_callbacks.each(&:call)
|
|
68
|
+
end
|
|
69
|
+
|
|
50
70
|
def writer
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
unless VALID_FORMATS.include?(format)
|
|
54
|
-
raise ConfigurationError, "Indexmap format must be one of: #{VALID_FORMATS.join(", ")}"
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
if format == :single_file
|
|
58
|
-
raise ConfigurationError, "Indexmap entries are not configured" if entries.empty?
|
|
59
|
-
elsif sections.empty?
|
|
60
|
-
raise ConfigurationError, "Indexmap sections are not configured" if sections.empty?
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
Writer.new(
|
|
64
|
-
entries: entries,
|
|
65
|
-
format: format,
|
|
66
|
-
sections: sections,
|
|
67
|
-
public_path: public_path,
|
|
68
|
-
base_url: base_url,
|
|
69
|
-
index_filename: index_filename
|
|
70
|
-
)
|
|
71
|
+
output_for(:default).writer
|
|
71
72
|
end
|
|
72
73
|
|
|
73
74
|
private
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "nokogiri"
|
|
4
|
+
|
|
5
|
+
module Indexmap
|
|
6
|
+
class Creator
|
|
7
|
+
ValidationConfiguration = Struct.new(:base_url, :index_filename, :storage, keyword_init: true)
|
|
8
|
+
|
|
9
|
+
def initialize(output:)
|
|
10
|
+
@output = output
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def create
|
|
14
|
+
files = format(write)
|
|
15
|
+
validate(files)
|
|
16
|
+
publish(files)
|
|
17
|
+
files.map(&:filename)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
private
|
|
21
|
+
|
|
22
|
+
attr_reader :output
|
|
23
|
+
|
|
24
|
+
def write
|
|
25
|
+
output.writer.write
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def format(files)
|
|
29
|
+
files.map do |file|
|
|
30
|
+
document = Nokogiri::XML(
|
|
31
|
+
file.body,
|
|
32
|
+
nil,
|
|
33
|
+
nil,
|
|
34
|
+
Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS
|
|
35
|
+
)
|
|
36
|
+
save_options = Nokogiri::XML::Node::SaveOptions::FORMAT | Nokogiri::XML::Node::SaveOptions::AS_XML
|
|
37
|
+
|
|
38
|
+
Storage::File.new(
|
|
39
|
+
filename: file.filename,
|
|
40
|
+
body: document.to_xml(indent: 2, save_with: save_options),
|
|
41
|
+
content_type: file.content_type
|
|
42
|
+
)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def validate(files)
|
|
47
|
+
Validator.new(
|
|
48
|
+
configuration: ValidationConfiguration.new(
|
|
49
|
+
base_url: output.base_url,
|
|
50
|
+
index_filename: output.index_filename,
|
|
51
|
+
storage: Storage::Memory.new(files)
|
|
52
|
+
)
|
|
53
|
+
).validate!
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def publish(files)
|
|
57
|
+
files.each do |file|
|
|
58
|
+
output.storage.write(file.filename, file.body, content_type: file.content_type)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -5,7 +5,7 @@ module Indexmap
|
|
|
5
5
|
DEFAULT_ENDPOINT = "https://api.indexnow.org"
|
|
6
6
|
DEFAULT_MAX_URLS_PER_REQUEST = 500
|
|
7
7
|
|
|
8
|
-
attr_writer :dry_run, :endpoint, :key, :
|
|
8
|
+
attr_writer :dry_run, :endpoint, :key, :key_filename, :max_urls_per_request, :write_key_file
|
|
9
9
|
|
|
10
10
|
def dry_run?
|
|
11
11
|
value = resolve(@dry_run)
|
|
@@ -21,12 +21,19 @@ module Indexmap
|
|
|
21
21
|
resolve(@key)
|
|
22
22
|
end
|
|
23
23
|
|
|
24
|
-
def
|
|
25
|
-
|
|
26
|
-
return
|
|
24
|
+
def write_key_file?
|
|
25
|
+
value = resolve(@write_key_file)
|
|
26
|
+
return !key.to_s.strip.empty? if value.nil?
|
|
27
|
+
|
|
28
|
+
value == true || value.to_s == "1"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def key_filename(key: self.key)
|
|
32
|
+
configured_filename = resolve(@key_filename)
|
|
33
|
+
return configured_filename unless configured_filename.to_s.strip.empty?
|
|
27
34
|
return if key.to_s.strip.empty?
|
|
28
35
|
|
|
29
|
-
|
|
36
|
+
"#{key}.txt"
|
|
30
37
|
end
|
|
31
38
|
|
|
32
39
|
def max_urls_per_request
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Indexmap
|
|
4
|
+
class Output
|
|
5
|
+
VALID_FORMATS = %i[index single_file].freeze
|
|
6
|
+
|
|
7
|
+
attr_writer :base_url, :entries, :format, :index_filename, :sections
|
|
8
|
+
|
|
9
|
+
def initialize(configuration:)
|
|
10
|
+
@configuration = configuration
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def base_url
|
|
14
|
+
resolve(@base_url) || configuration.base_url
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def entries
|
|
18
|
+
resolved_entries = resolve(@entries)
|
|
19
|
+
|
|
20
|
+
Array(resolved_entries.nil? ? configuration.entries : resolved_entries)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def format
|
|
24
|
+
value = resolve(@format) || configuration.format
|
|
25
|
+
value.nil? ? :index : value.to_sym
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def index_filename
|
|
29
|
+
resolve(@index_filename) || configuration.index_filename
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def storage
|
|
33
|
+
configuration.storage
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def sections
|
|
37
|
+
resolved_sections = resolve(@sections)
|
|
38
|
+
|
|
39
|
+
Array(resolved_sections.nil? ? configuration.sections : resolved_sections)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def writer
|
|
43
|
+
raise ConfigurationError, "Indexmap base_url is not configured" if base_url.to_s.strip.empty?
|
|
44
|
+
|
|
45
|
+
unless VALID_FORMATS.include?(format)
|
|
46
|
+
raise ConfigurationError, "Indexmap format must be one of: #{VALID_FORMATS.join(", ")}"
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
if format == :single_file
|
|
50
|
+
raise ConfigurationError, "Indexmap entries are not configured" if entries.empty?
|
|
51
|
+
elsif sections.empty?
|
|
52
|
+
raise ConfigurationError, "Indexmap sections are not configured" if sections.empty?
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
Writer.new(
|
|
56
|
+
entries: entries,
|
|
57
|
+
format: format,
|
|
58
|
+
sections: sections,
|
|
59
|
+
base_url: base_url,
|
|
60
|
+
index_filename: index_filename
|
|
61
|
+
)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
private
|
|
65
|
+
|
|
66
|
+
attr_reader :configuration
|
|
67
|
+
|
|
68
|
+
def resolve(value)
|
|
69
|
+
value.respond_to?(:call) ? value.call : value
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
data/lib/indexmap/parser.rb
CHANGED
|
@@ -2,18 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
require "net/http"
|
|
4
4
|
require "nokogiri"
|
|
5
|
-
require "pathname"
|
|
6
5
|
require "uri"
|
|
7
6
|
|
|
8
7
|
module Indexmap
|
|
9
8
|
class Parser
|
|
10
9
|
Entry = Struct.new(:loc, :lastmod, :source_sitemap, keyword_init: true)
|
|
11
10
|
|
|
12
|
-
def initialize(
|
|
13
|
-
@source =
|
|
11
|
+
def initialize(source: nil, rebase_remote_children: false, index_filename: Indexmap.configuration.index_filename, storage: Indexmap.configuration.storage)
|
|
12
|
+
@source = (source || index_filename).to_s
|
|
14
13
|
@rebase_remote_children = rebase_remote_children
|
|
15
14
|
@index_filename = index_filename
|
|
16
|
-
@
|
|
15
|
+
@storage = storage
|
|
17
16
|
end
|
|
18
17
|
|
|
19
18
|
def entries(reset: false)
|
|
@@ -59,11 +58,7 @@ module Indexmap
|
|
|
59
58
|
|
|
60
59
|
private
|
|
61
60
|
|
|
62
|
-
attr_reader :index_filename, :
|
|
63
|
-
|
|
64
|
-
def default_path
|
|
65
|
-
Indexmap::Path.existing_public_path(public_path: public_path, index_filename: index_filename)
|
|
66
|
-
end
|
|
61
|
+
attr_reader :index_filename, :storage
|
|
67
62
|
|
|
68
63
|
def parse_source(source, visited:)
|
|
69
64
|
normalized_source = normalize_source(source)
|
|
@@ -106,12 +101,21 @@ module Indexmap
|
|
|
106
101
|
end
|
|
107
102
|
elsif remote_source?(loc)
|
|
108
103
|
uri = URI.parse(loc)
|
|
109
|
-
|
|
104
|
+
normalize_local_source(uri.path)
|
|
110
105
|
else
|
|
111
|
-
|
|
106
|
+
resolve_local_child_sitemap(parent_source, loc)
|
|
112
107
|
end
|
|
113
108
|
rescue URI::InvalidURIError
|
|
114
|
-
|
|
109
|
+
resolve_local_child_sitemap(parent_source, loc)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def resolve_local_child_sitemap(parent_source, loc)
|
|
113
|
+
if loc.start_with?("/")
|
|
114
|
+
normalize_local_source(loc)
|
|
115
|
+
else
|
|
116
|
+
parent_directory = File.dirname(parent_source)
|
|
117
|
+
normalize_local_source((parent_directory == ".") ? loc : File.join(parent_directory, loc))
|
|
118
|
+
end
|
|
115
119
|
end
|
|
116
120
|
|
|
117
121
|
def remote_child_source(parent_uri, loc)
|
|
@@ -131,12 +135,19 @@ module Indexmap
|
|
|
131
135
|
if remote_source?(source)
|
|
132
136
|
URI.parse(source).to_s
|
|
133
137
|
else
|
|
134
|
-
|
|
138
|
+
normalize_local_source(source)
|
|
135
139
|
end
|
|
136
140
|
rescue URI::InvalidURIError
|
|
137
141
|
nil
|
|
138
142
|
end
|
|
139
143
|
|
|
144
|
+
def normalize_local_source(source)
|
|
145
|
+
normalized = Pathname(source.to_s).cleanpath.to_s.sub(%r{\A/+}, "")
|
|
146
|
+
return if normalized.empty? || normalized == ".." || normalized.start_with?("../")
|
|
147
|
+
|
|
148
|
+
normalized
|
|
149
|
+
end
|
|
150
|
+
|
|
140
151
|
def remote_source?(value)
|
|
141
152
|
uri = URI.parse(value.to_s)
|
|
142
153
|
uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
|
|
@@ -147,8 +158,8 @@ module Indexmap
|
|
|
147
158
|
def read_source(source)
|
|
148
159
|
if remote_source?(source)
|
|
149
160
|
fetch_remote_source(source)
|
|
150
|
-
elsif
|
|
151
|
-
|
|
161
|
+
elsif storage.exist?(source)
|
|
162
|
+
storage.read(source)
|
|
152
163
|
end
|
|
153
164
|
end
|
|
154
165
|
|
data/lib/indexmap/pinger/base.rb
CHANGED
|
@@ -46,8 +46,12 @@ module Indexmap
|
|
|
46
46
|
hostname.sub(/\Awww\./, "")
|
|
47
47
|
end
|
|
48
48
|
|
|
49
|
+
def storage
|
|
50
|
+
configuration.storage
|
|
51
|
+
end
|
|
52
|
+
|
|
49
53
|
def sitemap_files
|
|
50
|
-
|
|
54
|
+
storage.list(prefix: "sitemap", suffix: ".xml")
|
|
51
55
|
end
|
|
52
56
|
|
|
53
57
|
def ping_sitemap(_sitemap_file)
|
|
@@ -20,10 +20,11 @@ module Indexmap
|
|
|
20
20
|
return {status: :skipped, reason: :missing_credentials}
|
|
21
21
|
end
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
files = sitemap_files
|
|
24
|
+
results = files.map { |sitemap_file| ping_sitemap(sitemap_file) }
|
|
24
25
|
return {status: :skipped, reason: :no_sitemaps} if results.empty?
|
|
25
26
|
|
|
26
|
-
summarize_results(results)
|
|
27
|
+
summarize_results(results, url_count: sitemap_url_count(files))
|
|
27
28
|
end
|
|
28
29
|
|
|
29
30
|
private
|
|
@@ -35,7 +36,7 @@ module Indexmap
|
|
|
35
36
|
end
|
|
36
37
|
|
|
37
38
|
def ping_sitemap(sitemap_file)
|
|
38
|
-
sitemap_url =
|
|
39
|
+
sitemap_url = storage.public_url(sitemap_file)
|
|
39
40
|
|
|
40
41
|
unless authorized?
|
|
41
42
|
logger.debug("Google Search Console does not have access to the site: #{root_domain}")
|
|
@@ -90,16 +91,26 @@ module Indexmap
|
|
|
90
91
|
)
|
|
91
92
|
end
|
|
92
93
|
|
|
93
|
-
def
|
|
94
|
+
def sitemap_url_count(files)
|
|
95
|
+
files.each_with_object(Set.new) do |sitemap_file, urls|
|
|
96
|
+
Parser.new(source: sitemap_file, storage: storage).entries.each do |entry|
|
|
97
|
+
loc = entry.loc.to_s.strip
|
|
98
|
+
urls.add(loc) unless loc.empty?
|
|
99
|
+
end
|
|
100
|
+
end.count
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def summarize_results(results, url_count:)
|
|
94
104
|
submitted = results.select { |result| result[:status] == :submitted }
|
|
95
105
|
failures = results.select { |result| result[:status] == :failed }
|
|
96
106
|
|
|
97
|
-
return {status: :submitted, sitemap_count: submitted.count, submitted: submitted} if failures.empty?
|
|
98
|
-
return {status: :failed, sitemap_count: 0, failures: failures} if submitted.empty?
|
|
107
|
+
return {status: :submitted, sitemap_count: submitted.count, url_count: url_count, submitted: submitted} if failures.empty?
|
|
108
|
+
return {status: :failed, sitemap_count: 0, url_count: 0, failures: failures} if submitted.empty?
|
|
99
109
|
|
|
100
110
|
{
|
|
101
111
|
status: :partial,
|
|
102
112
|
sitemap_count: submitted.count,
|
|
113
|
+
url_count: url_count,
|
|
103
114
|
submitted: submitted,
|
|
104
115
|
failures: failures
|
|
105
116
|
}
|