indexmap 0.6.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +112 -2
- data/README.md +100 -78
- data/lib/indexmap/configuration.rb +3 -6
- data/lib/indexmap/creator.rb +22 -35
- data/lib/indexmap/index_now_configuration.rb +12 -5
- data/lib/indexmap/output.rb +3 -5
- data/lib/indexmap/parser.rb +26 -14
- data/lib/indexmap/pinger/base.rb +5 -1
- data/lib/indexmap/pinger/google.rb +2 -2
- data/lib/indexmap/pinger/index_now.rb +21 -24
- data/lib/indexmap/storage/active_storage.rb +105 -0
- data/lib/indexmap/storage/file.rb +11 -0
- data/lib/indexmap/storage/filesystem.rb +77 -0
- data/lib/indexmap/storage/memory.rb +61 -0
- data/lib/indexmap/task_runner.rb +35 -8
- data/lib/indexmap/validator.rb +42 -30
- data/lib/indexmap/version.rb +1 -1
- data/lib/indexmap/writer.rb +2 -9
- data/lib/indexmap.rb +4 -1
- data/lib/tasks/indexmap_tasks.rake +13 -5
- data/test/indexmap/configuration_test.rb +98 -129
- data/test/indexmap/parser_test.rb +44 -3
- data/test/indexmap/pinger/google_test.rb +101 -123
- data/test/indexmap/pinger/index_now_test.rb +148 -179
- data/test/indexmap/storage_test.rb +123 -0
- data/test/indexmap/task_runner_test.rb +104 -63
- data/test/indexmap/validator_test.rb +96 -92
- data/test/indexmap/writer_test.rb +63 -74
- data/test/release_task_test.rb +86 -0
- metadata +8 -4
- data/lib/indexmap/path.rb +0 -42
- data/test/indexmap/path_test.rb +0 -28
data/lib/indexmap/parser.rb
CHANGED
|
@@ -8,11 +8,11 @@ module Indexmap
|
|
|
8
8
|
class Parser
|
|
9
9
|
Entry = Struct.new(:loc, :lastmod, :source_sitemap, keyword_init: true)
|
|
10
10
|
|
|
11
|
-
def initialize(
|
|
12
|
-
@source =
|
|
11
|
+
def initialize(source: nil, rebase_remote_children: false, index_filename: Indexmap.configuration.index_filename, storage: Indexmap.configuration.storage)
|
|
12
|
+
@source = (source || index_filename).to_s
|
|
13
13
|
@rebase_remote_children = rebase_remote_children
|
|
14
14
|
@index_filename = index_filename
|
|
15
|
-
@
|
|
15
|
+
@storage = storage
|
|
16
16
|
end
|
|
17
17
|
|
|
18
18
|
def entries(reset: false)
|
|
@@ -58,11 +58,7 @@ module Indexmap
|
|
|
58
58
|
|
|
59
59
|
private
|
|
60
60
|
|
|
61
|
-
attr_reader :index_filename, :
|
|
62
|
-
|
|
63
|
-
def default_path
|
|
64
|
-
Indexmap::Path.existing_public_path(public_path: public_path, index_filename: index_filename)
|
|
65
|
-
end
|
|
61
|
+
attr_reader :index_filename, :storage
|
|
66
62
|
|
|
67
63
|
def parse_source(source, visited:)
|
|
68
64
|
normalized_source = normalize_source(source)
|
|
@@ -105,12 +101,21 @@ module Indexmap
|
|
|
105
101
|
end
|
|
106
102
|
elsif remote_source?(loc)
|
|
107
103
|
uri = URI.parse(loc)
|
|
108
|
-
|
|
104
|
+
normalize_local_source(uri.path)
|
|
109
105
|
else
|
|
110
|
-
|
|
106
|
+
resolve_local_child_sitemap(parent_source, loc)
|
|
111
107
|
end
|
|
112
108
|
rescue URI::InvalidURIError
|
|
113
|
-
|
|
109
|
+
resolve_local_child_sitemap(parent_source, loc)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def resolve_local_child_sitemap(parent_source, loc)
|
|
113
|
+
if loc.start_with?("/")
|
|
114
|
+
normalize_local_source(loc)
|
|
115
|
+
else
|
|
116
|
+
parent_directory = File.dirname(parent_source)
|
|
117
|
+
normalize_local_source((parent_directory == ".") ? loc : File.join(parent_directory, loc))
|
|
118
|
+
end
|
|
114
119
|
end
|
|
115
120
|
|
|
116
121
|
def remote_child_source(parent_uri, loc)
|
|
@@ -130,12 +135,19 @@ module Indexmap
|
|
|
130
135
|
if remote_source?(source)
|
|
131
136
|
URI.parse(source).to_s
|
|
132
137
|
else
|
|
133
|
-
|
|
138
|
+
normalize_local_source(source)
|
|
134
139
|
end
|
|
135
140
|
rescue URI::InvalidURIError
|
|
136
141
|
nil
|
|
137
142
|
end
|
|
138
143
|
|
|
144
|
+
def normalize_local_source(source)
|
|
145
|
+
normalized = Pathname(source.to_s).cleanpath.to_s.sub(%r{\A/+}, "")
|
|
146
|
+
return if normalized.empty? || normalized == ".." || normalized.start_with?("../")
|
|
147
|
+
|
|
148
|
+
normalized
|
|
149
|
+
end
|
|
150
|
+
|
|
139
151
|
def remote_source?(value)
|
|
140
152
|
uri = URI.parse(value.to_s)
|
|
141
153
|
uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
|
|
@@ -146,8 +158,8 @@ module Indexmap
|
|
|
146
158
|
def read_source(source)
|
|
147
159
|
if remote_source?(source)
|
|
148
160
|
fetch_remote_source(source)
|
|
149
|
-
elsif
|
|
150
|
-
|
|
161
|
+
elsif storage.exist?(source)
|
|
162
|
+
storage.read(source)
|
|
151
163
|
end
|
|
152
164
|
end
|
|
153
165
|
|
data/lib/indexmap/pinger/base.rb
CHANGED
|
@@ -46,8 +46,12 @@ module Indexmap
|
|
|
46
46
|
hostname.sub(/\Awww\./, "")
|
|
47
47
|
end
|
|
48
48
|
|
|
49
|
+
def storage
|
|
50
|
+
configuration.storage
|
|
51
|
+
end
|
|
52
|
+
|
|
49
53
|
def sitemap_files
|
|
50
|
-
|
|
54
|
+
storage.list(prefix: "sitemap", suffix: ".xml")
|
|
51
55
|
end
|
|
52
56
|
|
|
53
57
|
def ping_sitemap(_sitemap_file)
|
|
@@ -36,7 +36,7 @@ module Indexmap
|
|
|
36
36
|
end
|
|
37
37
|
|
|
38
38
|
def ping_sitemap(sitemap_file)
|
|
39
|
-
sitemap_url =
|
|
39
|
+
sitemap_url = storage.public_url(sitemap_file)
|
|
40
40
|
|
|
41
41
|
unless authorized?
|
|
42
42
|
logger.debug("Google Search Console does not have access to the site: #{root_domain}")
|
|
@@ -93,7 +93,7 @@ module Indexmap
|
|
|
93
93
|
|
|
94
94
|
def sitemap_url_count(files)
|
|
95
95
|
files.each_with_object(Set.new) do |sitemap_file, urls|
|
|
96
|
-
Parser.new(
|
|
96
|
+
Parser.new(source: sitemap_file, storage: storage).entries.each do |entry|
|
|
97
97
|
loc = entry.loc.to_s.strip
|
|
98
98
|
urls.add(loc) unless loc.empty?
|
|
99
99
|
end
|
|
@@ -42,14 +42,15 @@ module Indexmap
|
|
|
42
42
|
summarize_results(results)
|
|
43
43
|
end
|
|
44
44
|
|
|
45
|
-
def write_key_file(key: index_now_configuration.key,
|
|
45
|
+
def write_key_file(key: index_now_configuration.key, filename: nil)
|
|
46
46
|
key = normalized_configured_key(key)
|
|
47
47
|
return if key.empty?
|
|
48
48
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
49
|
+
filename ||= index_now_configuration.key_filename(key: key)
|
|
50
|
+
return filename if valid_key_file?(filename)
|
|
51
|
+
|
|
52
|
+
storage.write(filename, key, content_type: "text/plain")
|
|
53
|
+
filename
|
|
53
54
|
end
|
|
54
55
|
|
|
55
56
|
def ensure_key_file
|
|
@@ -60,7 +61,7 @@ module Indexmap
|
|
|
60
61
|
return existing_path if existing_path
|
|
61
62
|
|
|
62
63
|
key = generated_key
|
|
63
|
-
write_key_file(key: key,
|
|
64
|
+
write_key_file(key: key, filename: "#{key}.txt")
|
|
64
65
|
end
|
|
65
66
|
|
|
66
67
|
private
|
|
@@ -75,7 +76,7 @@ module Indexmap
|
|
|
75
76
|
files = super
|
|
76
77
|
return files if files.one?
|
|
77
78
|
|
|
78
|
-
child_files = files.reject { |file|
|
|
79
|
+
child_files = files.reject { |file| file == configuration.index_filename }
|
|
79
80
|
child_files.empty? ? files : child_files
|
|
80
81
|
end
|
|
81
82
|
|
|
@@ -95,7 +96,7 @@ module Indexmap
|
|
|
95
96
|
|
|
96
97
|
def current_entries
|
|
97
98
|
sitemap_files.each_with_object({}) do |sitemap_file, entries|
|
|
98
|
-
Parser.new(
|
|
99
|
+
Parser.new(source: sitemap_file, storage: storage).entries.each do |entry|
|
|
99
100
|
next if entry.loc.to_s.strip.empty?
|
|
100
101
|
|
|
101
102
|
entries[entry.loc] = entry
|
|
@@ -177,36 +178,32 @@ module Indexmap
|
|
|
177
178
|
configured_key = normalized_configured_key(index_now_configuration.key)
|
|
178
179
|
return configured_key unless configured_key.empty?
|
|
179
180
|
|
|
180
|
-
existing_key_file
|
|
181
|
+
storage.read(existing_key_file) if existing_key_file
|
|
181
182
|
end
|
|
182
183
|
|
|
183
184
|
def existing_key_file
|
|
184
|
-
|
|
185
|
-
return
|
|
185
|
+
configured_filename = index_now_configuration.key_filename
|
|
186
|
+
return configured_filename if valid_key_file?(configured_filename)
|
|
186
187
|
|
|
187
|
-
|
|
188
|
+
storage.list(suffix: ".txt").find { |filename| valid_key_file?(filename) }
|
|
188
189
|
end
|
|
189
190
|
|
|
190
191
|
def key_location(api_key:)
|
|
191
|
-
|
|
192
|
-
return unless
|
|
193
|
-
|
|
194
|
-
public_path = configuration.public_path.expand_path
|
|
195
|
-
key_path = path.expand_path
|
|
196
|
-
relative_path = key_path.relative_path_from(public_path)
|
|
192
|
+
filename = index_now_configuration.key_filename(key: api_key) || existing_key_file
|
|
193
|
+
return unless filename
|
|
197
194
|
|
|
198
|
-
|
|
195
|
+
storage.public_url(filename)
|
|
199
196
|
rescue ArgumentError
|
|
200
197
|
nil
|
|
201
198
|
end
|
|
202
199
|
|
|
203
|
-
def valid_key_file?(
|
|
204
|
-
return false unless
|
|
200
|
+
def valid_key_file?(filename)
|
|
201
|
+
return false unless filename && storage.exist?(filename)
|
|
205
202
|
|
|
206
|
-
|
|
207
|
-
return false unless
|
|
203
|
+
key = File.basename(filename, ".txt").to_s
|
|
204
|
+
return false unless key.match?(KEY_FORMAT)
|
|
208
205
|
|
|
209
|
-
|
|
206
|
+
storage.read(filename) == key
|
|
210
207
|
end
|
|
211
208
|
|
|
212
209
|
def generated_key
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "stringio"
|
|
4
|
+
require "uri"
|
|
5
|
+
|
|
6
|
+
module Indexmap
|
|
7
|
+
module Storage
|
|
8
|
+
class ActiveStorage
|
|
9
|
+
DEFAULT_CONTENT_TYPE = "application/xml"
|
|
10
|
+
|
|
11
|
+
def initialize(model:, public_url:, filename_column: :filename, attachment: :file, content_type: DEFAULT_CONTENT_TYPE)
|
|
12
|
+
@model = model
|
|
13
|
+
@public_url_base = public_url
|
|
14
|
+
@filename_column = filename_column
|
|
15
|
+
@attachment = attachment
|
|
16
|
+
@default_content_type = content_type
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def write(filename, body, content_type: nil)
|
|
20
|
+
attachment_content_type = content_type || default_content_type
|
|
21
|
+
record = find_or_initialize(filename)
|
|
22
|
+
record.save! unless record.persisted?
|
|
23
|
+
record.public_send(attachment).attach(
|
|
24
|
+
io: StringIO.new(body.to_s),
|
|
25
|
+
filename: filename,
|
|
26
|
+
content_type: attachment_content_type
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
File.new(filename: filename, body: body.to_s, content_type: attachment_content_type)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def read(filename)
|
|
33
|
+
record = find_record(filename)
|
|
34
|
+
return unless attached?(record)
|
|
35
|
+
|
|
36
|
+
record.public_send(attachment).download
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def exist?(filename)
|
|
40
|
+
attached?(find_record(filename))
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def list(prefix: nil, suffix: nil)
|
|
44
|
+
relation = relation_for(prefix: prefix, suffix: suffix)
|
|
45
|
+
|
|
46
|
+
relation.filter_map do |record|
|
|
47
|
+
filename = record.public_send(filename_column).to_s
|
|
48
|
+
next if prefix && !filename.start_with?(prefix)
|
|
49
|
+
next if suffix && !filename.end_with?(suffix)
|
|
50
|
+
next unless attached?(record)
|
|
51
|
+
|
|
52
|
+
filename
|
|
53
|
+
end.sort
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def delete(filename)
|
|
57
|
+
record = find_record(filename)
|
|
58
|
+
record&.public_send(attachment)&.purge
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def public_url(filename)
|
|
62
|
+
URI.join("#{public_url_base}/", filename).to_s
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def inspect
|
|
66
|
+
"#<#{self.class.name} model=#{model}>"
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
private
|
|
70
|
+
|
|
71
|
+
attr_reader :model, :filename_column, :attachment, :default_content_type
|
|
72
|
+
|
|
73
|
+
def find_or_initialize(filename)
|
|
74
|
+
model.find_or_initialize_by(filename_column => filename)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def find_record(filename)
|
|
78
|
+
model.find_by(filename_column => filename)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def relation_for(prefix:, suffix:)
|
|
82
|
+
if !model.respond_to?(:where) || (!prefix && !suffix)
|
|
83
|
+
return model.all
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
column = model.connection.quote_column_name(filename_column)
|
|
87
|
+
if prefix && suffix
|
|
88
|
+
model.where("#{column} LIKE ?", "#{prefix}%#{suffix}")
|
|
89
|
+
elsif prefix
|
|
90
|
+
model.where("#{column} LIKE ?", "#{prefix}%")
|
|
91
|
+
else
|
|
92
|
+
model.where("#{column} LIKE ?", "%#{suffix}")
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def attached?(record)
|
|
97
|
+
record&.public_send(attachment)&.attached?
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def public_url_base
|
|
101
|
+
@public_url_base.to_s.delete_suffix("/")
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "uri"
|
|
4
|
+
|
|
5
|
+
module Indexmap
|
|
6
|
+
module Storage
|
|
7
|
+
class Filesystem
|
|
8
|
+
DEFAULT_CONTENT_TYPE = "application/xml"
|
|
9
|
+
|
|
10
|
+
def initialize(path:, public_url: nil)
|
|
11
|
+
@path = Pathname(path)
|
|
12
|
+
@public_url_base = public_url
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def write(filename, body, content_type: DEFAULT_CONTENT_TYPE)
|
|
16
|
+
target = path_for(filename)
|
|
17
|
+
target.dirname.mkpath
|
|
18
|
+
target.write(body.to_s)
|
|
19
|
+
|
|
20
|
+
File.new(
|
|
21
|
+
filename: normalize_filename(filename),
|
|
22
|
+
body: body.to_s,
|
|
23
|
+
content_type: content_type || DEFAULT_CONTENT_TYPE
|
|
24
|
+
)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def read(filename)
|
|
28
|
+
path_for(filename).read(encoding: "UTF-8")
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def exist?(filename)
|
|
32
|
+
path_for(filename).file?
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def list(prefix: nil, suffix: nil)
|
|
36
|
+
path.glob("**/*").select(&:file?).filter_map do |file|
|
|
37
|
+
filename = file.relative_path_from(path).to_s
|
|
38
|
+
next if prefix && !filename.start_with?(prefix)
|
|
39
|
+
next if suffix && !filename.end_with?(suffix)
|
|
40
|
+
|
|
41
|
+
filename
|
|
42
|
+
end.sort
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def delete(filename)
|
|
46
|
+
path_for(filename).delete if exist?(filename)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def public_url(filename)
|
|
50
|
+
return normalize_filename(filename) if public_url_base.to_s.strip.empty?
|
|
51
|
+
|
|
52
|
+
URI.join("#{public_url_base.to_s.delete_suffix("/")}/", normalize_filename(filename)).to_s
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def inspect
|
|
56
|
+
"#<#{self.class.name} path=#{path}>"
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
attr_reader :path, :public_url_base
|
|
62
|
+
|
|
63
|
+
def path_for(filename)
|
|
64
|
+
path.join(normalize_filename(filename))
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def normalize_filename(filename)
|
|
68
|
+
normalized = Pathname(filename.to_s).cleanpath
|
|
69
|
+
if normalized.absolute? || normalized.to_s.start_with?("../") || normalized.to_s == ".."
|
|
70
|
+
raise ArgumentError, "Storage filename must be relative: #{filename.inspect}"
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
normalized.to_s
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "uri"
|
|
4
|
+
|
|
5
|
+
module Indexmap
|
|
6
|
+
module Storage
|
|
7
|
+
class Memory
|
|
8
|
+
DEFAULT_CONTENT_TYPE = "application/xml"
|
|
9
|
+
|
|
10
|
+
def initialize(files = [], public_url: nil)
|
|
11
|
+
@files = {}
|
|
12
|
+
@public_url_base = public_url
|
|
13
|
+
Array(files).each do |file|
|
|
14
|
+
write(file.filename, file.body, content_type: file.content_type)
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def write(filename, body, content_type: DEFAULT_CONTENT_TYPE)
|
|
19
|
+
normalized = normalize_filename(filename)
|
|
20
|
+
files[normalized] = File.new(
|
|
21
|
+
filename: normalized,
|
|
22
|
+
body: body.to_s,
|
|
23
|
+
content_type: content_type || DEFAULT_CONTENT_TYPE
|
|
24
|
+
)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def read(filename)
|
|
28
|
+
files.fetch(normalize_filename(filename)).body
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def exist?(filename)
|
|
32
|
+
files.key?(normalize_filename(filename))
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def list(prefix: nil, suffix: nil)
|
|
36
|
+
files.keys.select do |filename|
|
|
37
|
+
(prefix.nil? || filename.start_with?(prefix)) &&
|
|
38
|
+
(suffix.nil? || filename.end_with?(suffix))
|
|
39
|
+
end.sort
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def delete(filename)
|
|
43
|
+
files.delete(normalize_filename(filename))
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def public_url(filename)
|
|
47
|
+
return normalize_filename(filename) if public_url_base.to_s.strip.empty?
|
|
48
|
+
|
|
49
|
+
URI.join("#{public_url_base.to_s.delete_suffix("/")}/", normalize_filename(filename)).to_s
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
private
|
|
53
|
+
|
|
54
|
+
attr_reader :files, :public_url_base
|
|
55
|
+
|
|
56
|
+
def normalize_filename(filename)
|
|
57
|
+
filename.to_s
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
data/lib/indexmap/task_runner.rb
CHANGED
|
@@ -10,15 +10,20 @@ module Indexmap
|
|
|
10
10
|
|
|
11
11
|
def create
|
|
12
12
|
written_files = Indexmap.create(configuration: configuration)
|
|
13
|
-
|
|
13
|
+
index_now_key_filename = write_index_now_key if configuration.index_now.write_key_file?
|
|
14
14
|
configuration.run_after_create_callbacks
|
|
15
15
|
|
|
16
|
-
{
|
|
16
|
+
{
|
|
17
|
+
files: written_files.map(&:to_s),
|
|
18
|
+
written_files: written_files,
|
|
19
|
+
sitemaps: sitemap_details(written_files),
|
|
20
|
+
index_now_key_filename: index_now_key_filename
|
|
21
|
+
}
|
|
17
22
|
end
|
|
18
23
|
|
|
19
24
|
def format
|
|
20
|
-
sitemap_files.each do |
|
|
21
|
-
content =
|
|
25
|
+
sitemap_files.each do |filename|
|
|
26
|
+
content = storage.read(filename)
|
|
22
27
|
document = Nokogiri::XML(
|
|
23
28
|
content,
|
|
24
29
|
nil,
|
|
@@ -27,7 +32,7 @@ module Indexmap
|
|
|
27
32
|
)
|
|
28
33
|
save_options = Nokogiri::XML::Node::SaveOptions::FORMAT | Nokogiri::XML::Node::SaveOptions::AS_XML
|
|
29
34
|
|
|
30
|
-
|
|
35
|
+
storage.write(filename, document.to_xml(indent: 2, save_with: save_options), content_type: "application/xml")
|
|
31
36
|
end
|
|
32
37
|
|
|
33
38
|
sitemap_files
|
|
@@ -45,8 +50,8 @@ module Indexmap
|
|
|
45
50
|
pinger.write_key_file
|
|
46
51
|
end
|
|
47
52
|
|
|
48
|
-
def
|
|
49
|
-
|
|
53
|
+
def storage
|
|
54
|
+
configuration.storage
|
|
50
55
|
end
|
|
51
56
|
|
|
52
57
|
private
|
|
@@ -58,7 +63,29 @@ module Indexmap
|
|
|
58
63
|
end
|
|
59
64
|
|
|
60
65
|
def sitemap_files
|
|
61
|
-
|
|
66
|
+
storage.list(prefix: "sitemap", suffix: ".xml")
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def sitemap_details(files)
|
|
70
|
+
files.map do |filename|
|
|
71
|
+
{
|
|
72
|
+
filename: filename.to_s,
|
|
73
|
+
location: sitemap_location(filename),
|
|
74
|
+
link_count: sitemap_link_count(filename)
|
|
75
|
+
}
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def sitemap_location(filename)
|
|
80
|
+
return storage.public_url(filename) if storage.respond_to?(:public_url)
|
|
81
|
+
|
|
82
|
+
filename.to_s
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def sitemap_link_count(filename)
|
|
86
|
+
document = Nokogiri::XML(storage.read(filename.to_s))
|
|
87
|
+
document.remove_namespaces!
|
|
88
|
+
document.xpath("//loc").count
|
|
62
89
|
end
|
|
63
90
|
end
|
|
64
91
|
end
|
data/lib/indexmap/validator.rb
CHANGED
|
@@ -7,20 +7,17 @@ require "uri"
|
|
|
7
7
|
|
|
8
8
|
module Indexmap
|
|
9
9
|
class Validator
|
|
10
|
-
def initialize(configuration: Indexmap.configuration,
|
|
10
|
+
def initialize(configuration: Indexmap.configuration, filename: nil)
|
|
11
11
|
@configuration = configuration
|
|
12
|
-
@
|
|
12
|
+
@filename = filename
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
def validate!
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
validate_sitemap_file!(sitemap_path)
|
|
23
|
-
entries = Parser.new(path: sitemap_path).entries
|
|
16
|
+
sitemap_filename = filename || configuration.index_filename
|
|
17
|
+
raise ValidationError, "Missing sitemap file: #{sitemap_filename}" unless storage.exist?(sitemap_filename)
|
|
18
|
+
|
|
19
|
+
validate_sitemap_file!(sitemap_filename)
|
|
20
|
+
entries = Parser.new(source: sitemap_filename, storage: storage, index_filename: configuration.index_filename).entries
|
|
24
21
|
validate_presence!(entries)
|
|
25
22
|
validate_duplicates!(entries)
|
|
26
23
|
validate_parameterized_urls!(entries)
|
|
@@ -33,39 +30,43 @@ module Indexmap
|
|
|
33
30
|
|
|
34
31
|
private
|
|
35
32
|
|
|
36
|
-
attr_reader :configuration, :
|
|
33
|
+
attr_reader :configuration, :filename
|
|
34
|
+
|
|
35
|
+
def storage
|
|
36
|
+
configuration.storage
|
|
37
|
+
end
|
|
37
38
|
|
|
38
|
-
def validate_sitemap_file!(
|
|
39
|
-
document = read_xml_document(
|
|
39
|
+
def validate_sitemap_file!(sitemap_filename)
|
|
40
|
+
document = read_xml_document(sitemap_filename)
|
|
40
41
|
root_name = document.root&.name
|
|
41
42
|
|
|
42
43
|
case root_name
|
|
43
44
|
when "urlset"
|
|
44
|
-
validate_urlset_document!(document,
|
|
45
|
+
validate_urlset_document!(document, sitemap_filename)
|
|
45
46
|
when "sitemapindex"
|
|
46
|
-
validate_sitemap_index_document!(document,
|
|
47
|
+
validate_sitemap_index_document!(document, sitemap_filename)
|
|
47
48
|
else
|
|
48
|
-
raise ValidationError, "Invalid sitemap root element in #{
|
|
49
|
+
raise ValidationError, "Invalid sitemap root element in #{sitemap_filename}: #{root_name || "none"}"
|
|
49
50
|
end
|
|
50
51
|
end
|
|
51
52
|
|
|
52
|
-
def read_xml_document(
|
|
53
|
-
document = Nokogiri::XML(
|
|
53
|
+
def read_xml_document(filename)
|
|
54
|
+
document = Nokogiri::XML(storage.read(filename)) { |config| config.strict }
|
|
54
55
|
document.remove_namespaces!
|
|
55
56
|
document
|
|
56
57
|
rescue Nokogiri::XML::SyntaxError => error
|
|
57
|
-
raise ValidationError, "Invalid sitemap XML in #{
|
|
58
|
+
raise ValidationError, "Invalid sitemap XML in #{filename}: #{error.message.lines.first.strip}"
|
|
58
59
|
end
|
|
59
60
|
|
|
60
|
-
def validate_urlset_document!(document,
|
|
61
|
+
def validate_urlset_document!(document, sitemap_filename)
|
|
61
62
|
return if document.xpath("/urlset/url/loc").any?
|
|
62
63
|
|
|
63
|
-
raise ValidationError, "Sitemap has no URLs: #{
|
|
64
|
+
raise ValidationError, "Sitemap has no URLs: #{sitemap_filename}"
|
|
64
65
|
end
|
|
65
66
|
|
|
66
|
-
def validate_sitemap_index_document!(document,
|
|
67
|
+
def validate_sitemap_index_document!(document, sitemap_filename)
|
|
67
68
|
child_locations = document.xpath("/sitemapindex/sitemap/loc").map { |node| node.text.to_s.strip }.reject(&:empty?)
|
|
68
|
-
raise ValidationError, "Sitemap index has no child sitemap URLs: #{
|
|
69
|
+
raise ValidationError, "Sitemap index has no child sitemap URLs: #{sitemap_filename}" if child_locations.empty?
|
|
69
70
|
|
|
70
71
|
duplicate_children = child_locations.group_by(&:itself).select { |_loc, values| values.size > 1 }.keys
|
|
71
72
|
unless duplicate_children.empty?
|
|
@@ -73,19 +74,30 @@ module Indexmap
|
|
|
73
74
|
end
|
|
74
75
|
|
|
75
76
|
child_locations.each do |location|
|
|
76
|
-
|
|
77
|
-
raise ValidationError, "Missing child sitemap file: #{
|
|
77
|
+
child_filename = local_child_filename(location, parent_filename: sitemap_filename)
|
|
78
|
+
raise ValidationError, "Missing child sitemap file: #{child_filename}" unless storage.exist?(child_filename)
|
|
78
79
|
|
|
79
|
-
validate_sitemap_file!(
|
|
80
|
+
validate_sitemap_file!(child_filename)
|
|
80
81
|
end
|
|
81
82
|
end
|
|
82
83
|
|
|
83
|
-
def
|
|
84
|
+
def local_child_filename(location, parent_filename:)
|
|
84
85
|
uri = URI.parse(location)
|
|
85
|
-
|
|
86
|
-
|
|
86
|
+
if uri.absolute? || location.start_with?("/")
|
|
87
|
+
normalize_local_filename(uri.path)
|
|
88
|
+
else
|
|
89
|
+
parent_directory = File.dirname(parent_filename)
|
|
90
|
+
normalize_local_filename((parent_directory == ".") ? location : File.join(parent_directory, location))
|
|
91
|
+
end
|
|
87
92
|
rescue URI::InvalidURIError
|
|
88
|
-
|
|
93
|
+
normalize_local_filename(location)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def normalize_local_filename(filename)
|
|
97
|
+
normalized = Pathname(filename.to_s).cleanpath.to_s.sub(%r{\A/+}, "")
|
|
98
|
+
return if normalized.empty? || normalized == ".." || normalized.start_with?("../")
|
|
99
|
+
|
|
100
|
+
normalized
|
|
89
101
|
end
|
|
90
102
|
|
|
91
103
|
def validate_presence!(entries)
|
data/lib/indexmap/version.rb
CHANGED