site_maps 0.0.1.beta3 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +2 -4
- data/.rubocop.yml +4 -2
- data/.tool-versions +1 -1
- data/AGENTS.md +73 -0
- data/CHANGELOG.md +5 -0
- data/CLAUDE.md +77 -0
- data/Gemfile +1 -0
- data/Gemfile.lock +72 -56
- data/README.md +531 -393
- data/docs/README.md +67 -0
- data/docs/adapters.md +143 -0
- data/docs/api.md +154 -0
- data/docs/cli.md +93 -0
- data/docs/events.md +79 -0
- data/docs/extensions.md +141 -0
- data/docs/getting-started.md +138 -0
- data/docs/middleware.md +85 -0
- data/docs/processes.md +156 -0
- data/docs/rails.md +128 -0
- data/lib/site_maps/adapters/adapter.rb +35 -5
- data/lib/site_maps/adapters/aws_sdk/storage.rb +5 -2
- data/lib/site_maps/builder/sitemap_index/item.rb +1 -1
- data/lib/site_maps/builder/sitemap_index.rb +29 -5
- data/lib/site_maps/builder/url.rb +13 -10
- data/lib/site_maps/builder/url_set.rb +17 -7
- data/lib/site_maps/builder/xsl_stylesheet.rb +192 -0
- data/lib/site_maps/cli.rb +6 -2
- data/lib/site_maps/configuration.rb +8 -1
- data/lib/site_maps/incremental_location.rb +1 -1
- data/lib/site_maps/middleware.rb +197 -0
- data/lib/site_maps/notification/event.rb +1 -1
- data/lib/site_maps/notification/publisher.rb +1 -0
- data/lib/site_maps/notification.rb +1 -0
- data/lib/site_maps/ping.rb +35 -0
- data/lib/site_maps/{primitives → primitive}/array.rb +1 -1
- data/lib/site_maps/{primitives → primitive}/output.rb +1 -1
- data/lib/site_maps/primitive/string.rb +106 -0
- data/lib/site_maps/robots_txt.rb +21 -0
- data/lib/site_maps/runner/event_listener.rb +2 -2
- data/lib/site_maps/runner.rb +17 -3
- data/lib/site_maps/sitemap_builder.rb +16 -4
- data/lib/site_maps/sitemap_reader.rb +3 -0
- data/lib/site_maps/version.rb +1 -1
- data/lib/site_maps.rb +81 -10
- data/site_maps.gemspec +1 -1
- metadata +23 -10
- data/lib/site_maps/primitives/string.rb +0 -43
|
@@ -11,20 +11,23 @@ module SiteMaps::Builder
|
|
|
11
11
|
|
|
12
12
|
attr_reader :attributes
|
|
13
13
|
|
|
14
|
-
def initialize(link, **attributes)
|
|
15
|
-
|
|
14
|
+
def initialize(link, emit_priority: true, emit_changefreq: true, **attributes)
|
|
15
|
+
defaults = DEFAULTS.dup
|
|
16
|
+
defaults.delete(:priority) unless emit_priority
|
|
17
|
+
defaults.delete(:changefreq) unless emit_changefreq
|
|
18
|
+
@attributes = defaults.merge(attributes)
|
|
16
19
|
@attributes[:loc] = link
|
|
17
|
-
@attributes[:alternates] = SiteMaps::
|
|
18
|
-
@attributes[:videos] = SiteMaps::
|
|
19
|
-
@attributes[:images] = SiteMaps::
|
|
20
|
+
@attributes[:alternates] = SiteMaps::Primitive::Array.wrap(@attributes[:alternates])
|
|
21
|
+
@attributes[:videos] = SiteMaps::Primitive::Array.wrap(@attributes[:videos])
|
|
22
|
+
@attributes[:images] = SiteMaps::Primitive::Array.wrap(@attributes[:images])
|
|
20
23
|
if (video = @attributes.delete(:video))
|
|
21
|
-
@attributes[:videos].concat(SiteMaps::
|
|
24
|
+
@attributes[:videos].concat(SiteMaps::Primitive::Array.wrap(video))
|
|
22
25
|
end
|
|
23
26
|
if (alternate = @attributes.delete(:alternate))
|
|
24
|
-
@attributes[:alternates].concat(SiteMaps::
|
|
27
|
+
@attributes[:alternates].concat(SiteMaps::Primitive::Array.wrap(alternate))
|
|
25
28
|
end
|
|
26
29
|
if (image = @attributes.delete(:image))
|
|
27
|
-
@attributes[:images].concat(SiteMaps::
|
|
30
|
+
@attributes[:images].concat(SiteMaps::Primitive::Array.wrap(image))
|
|
28
31
|
end
|
|
29
32
|
@attributes[:images] = @attributes[:images][0...SiteMaps::MAX_LENGTH[:images]]
|
|
30
33
|
end
|
|
@@ -121,9 +124,9 @@ module SiteMaps::Builder
|
|
|
121
124
|
|
|
122
125
|
if self[:pagemap].is_a?(Hash) && (pagemap = self[:pagemap]).any?
|
|
123
126
|
builder.pagemap :PageMap do
|
|
124
|
-
SiteMaps::
|
|
127
|
+
SiteMaps::Primitive::Array.wrap(pagemap[:dataobjects]).each do |dataobject|
|
|
125
128
|
builder.pagemap :DataObject, type: dataobject[:type].to_s, id: dataobject[:id].to_s do
|
|
126
|
-
SiteMaps::
|
|
129
|
+
SiteMaps::Primitive::Array.wrap(dataobject[:attributes]).each do |attribute|
|
|
127
130
|
builder.pagemap :Attribute, attribute[:value].to_s, name: attribute[:name].to_s
|
|
128
131
|
end
|
|
129
132
|
end
|
|
@@ -10,32 +10,42 @@ module SiteMaps::Builder
|
|
|
10
10
|
"video" => "http://www.google.com/schemas/sitemap-video/1.1"
|
|
11
11
|
}.freeze
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
XML_DECLARATION = %(<?xml version="1.0" encoding="UTF-8"?>)
|
|
14
|
+
URLSET_OPEN = <<~URLSET_OPEN
|
|
15
15
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
|
16
16
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
|
17
17
|
xmlns:xhtml="http://www.w3.org/1999/xhtml"
|
|
18
18
|
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
|
|
19
19
|
#{SCHEMAS.map { |name, uri| " xmlns:#{name}=\"#{uri}\"" }.join("\n")}
|
|
20
20
|
>
|
|
21
|
-
|
|
21
|
+
URLSET_OPEN
|
|
22
|
+
HEADER = "#{XML_DECLARATION}\n#{URLSET_OPEN}"
|
|
22
23
|
FOOTER = "</urlset>"
|
|
23
24
|
FOOTER_BYTESIZE = FOOTER.bytesize
|
|
24
25
|
|
|
25
26
|
attr_reader :content, :links_count, :news_count
|
|
26
27
|
|
|
27
|
-
def initialize
|
|
28
|
+
def initialize(max_links: SiteMaps::MAX_LENGTH[:links], emit_priority: true, emit_changefreq: true, xsl_url: nil)
|
|
28
29
|
@content = StringIO.new
|
|
29
|
-
|
|
30
|
+
if xsl_url
|
|
31
|
+
@content.puts(XML_DECLARATION)
|
|
32
|
+
@content.puts(XSLStylesheet.processing_instruction(xsl_url))
|
|
33
|
+
@content.puts(URLSET_OPEN)
|
|
34
|
+
else
|
|
35
|
+
@content.puts(HEADER)
|
|
36
|
+
end
|
|
30
37
|
@links_count = 0
|
|
31
38
|
@news_count = 0
|
|
32
39
|
@last_modified = nil
|
|
40
|
+
@max_links = max_links
|
|
41
|
+
@emit_priority = emit_priority
|
|
42
|
+
@emit_changefreq = emit_changefreq
|
|
33
43
|
end
|
|
34
44
|
|
|
35
45
|
def add(link, **options)
|
|
36
46
|
raise SiteMaps::FullSitemapError if finalized?
|
|
37
47
|
|
|
38
|
-
url = SiteMaps::Builder::URL.new(link, **options)
|
|
48
|
+
url = SiteMaps::Builder::URL.new(link, emit_priority: @emit_priority, emit_changefreq: @emit_changefreq, **options)
|
|
39
49
|
raise SiteMaps::FullSitemapError unless fit?(url)
|
|
40
50
|
|
|
41
51
|
content.puts(url.to_xml)
|
|
@@ -83,7 +93,7 @@ module SiteMaps::Builder
|
|
|
83
93
|
|
|
84
94
|
# @param url [Builder::URL]
|
|
85
95
|
def fit?(url)
|
|
86
|
-
return false if links_count >=
|
|
96
|
+
return false if links_count >= @max_links
|
|
87
97
|
return false if url.news? && news_count >= SiteMaps::MAX_LENGTH[:news]
|
|
88
98
|
|
|
89
99
|
(bytesize + url.bytesize + FOOTER_BYTESIZE) <= SiteMaps::MAX_FILESIZE
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SiteMaps::Builder
|
|
4
|
+
class XSLStylesheet
|
|
5
|
+
URLSET_XSL = <<~XSL
|
|
6
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
7
|
+
<xsl:stylesheet version="2.0"
|
|
8
|
+
xmlns:html="http://www.w3.org/TR/REC-html40"
|
|
9
|
+
xmlns:sitemap="http://www.sitemaps.org/schemas/sitemap/0.9"
|
|
10
|
+
xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"
|
|
11
|
+
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
|
12
|
+
<xsl:output method="html" version="1.0" encoding="UTF-8" indent="yes"/>
|
|
13
|
+
<xsl:template match="/">
|
|
14
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
15
|
+
<head>
|
|
16
|
+
<title>XML Sitemap</title>
|
|
17
|
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
|
|
18
|
+
<style type="text/css">
|
|
19
|
+
body {
|
|
20
|
+
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen-Sans, Ubuntu, Cantarell, "Helvetica Neue", sans-serif;
|
|
21
|
+
font-size: 13px;
|
|
22
|
+
color: #545353;
|
|
23
|
+
}
|
|
24
|
+
a { color: #05809e; text-decoration: none; }
|
|
25
|
+
a:visited { color: #06577d; }
|
|
26
|
+
a:hover { text-decoration: underline; }
|
|
27
|
+
#content {
|
|
28
|
+
margin: 0 auto;
|
|
29
|
+
padding: 0 20px;
|
|
30
|
+
max-width: 1200px;
|
|
31
|
+
}
|
|
32
|
+
h1 { font-size: 24px; margin: 20px 0 10px; }
|
|
33
|
+
p.desc { color: #777; margin: 0 0 20px; }
|
|
34
|
+
table {
|
|
35
|
+
border: none;
|
|
36
|
+
border-collapse: collapse;
|
|
37
|
+
width: 100%;
|
|
38
|
+
margin: 0 0 20px;
|
|
39
|
+
}
|
|
40
|
+
th {
|
|
41
|
+
text-align: left;
|
|
42
|
+
padding: 10px 8px;
|
|
43
|
+
font-size: 12px;
|
|
44
|
+
border-bottom: 1px solid #ccc;
|
|
45
|
+
}
|
|
46
|
+
td {
|
|
47
|
+
padding: 8px;
|
|
48
|
+
font-size: 12px;
|
|
49
|
+
border-bottom: 1px solid #eee;
|
|
50
|
+
}
|
|
51
|
+
tr:nth-child(odd) td { background-color: #f8f8f8; }
|
|
52
|
+
tr:hover td { background-color: #e8e8e8; }
|
|
53
|
+
td.url { max-width: 0; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
|
|
54
|
+
#footer { margin: 10px 0 30px; font-size: 11px; color: #999; }
|
|
55
|
+
</style>
|
|
56
|
+
</head>
|
|
57
|
+
<body>
|
|
58
|
+
<div id="content">
|
|
59
|
+
<h1>XML Sitemap</h1>
|
|
60
|
+
<p class="desc">
|
|
61
|
+
This XML sitemap is used by search engines which follow the
|
|
62
|
+
<a href="https://www.sitemaps.org">XML sitemap standard</a>.
|
|
63
|
+
</p>
|
|
64
|
+
<table>
|
|
65
|
+
<tr>
|
|
66
|
+
<th style="width:80%">URL</th>
|
|
67
|
+
<th style="width:5%">Images</th>
|
|
68
|
+
<th style="width:15%">Last Modified</th>
|
|
69
|
+
</tr>
|
|
70
|
+
<xsl:for-each select="sitemap:urlset/sitemap:url">
|
|
71
|
+
<tr>
|
|
72
|
+
<td class="url">
|
|
73
|
+
<a href="{sitemap:loc}"><xsl:value-of select="sitemap:loc"/></a>
|
|
74
|
+
</td>
|
|
75
|
+
<td>
|
|
76
|
+
<xsl:value-of select="count(image:image)"/>
|
|
77
|
+
</td>
|
|
78
|
+
<td>
|
|
79
|
+
<xsl:value-of select="concat(substring(sitemap:lastmod, 0, 11), ' ', substring(sitemap:lastmod, 12, 5))"/>
|
|
80
|
+
</td>
|
|
81
|
+
</tr>
|
|
82
|
+
</xsl:for-each>
|
|
83
|
+
</table>
|
|
84
|
+
<p id="footer">
|
|
85
|
+
Generated by <a href="https://github.com/marcosgz/site_maps">SiteMaps</a>
|
|
86
|
+
</p>
|
|
87
|
+
</div>
|
|
88
|
+
</body>
|
|
89
|
+
</html>
|
|
90
|
+
</xsl:template>
|
|
91
|
+
</xsl:stylesheet>
|
|
92
|
+
XSL
|
|
93
|
+
|
|
94
|
+
INDEX_XSL = <<~XSL
|
|
95
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
96
|
+
<xsl:stylesheet version="2.0"
|
|
97
|
+
xmlns:html="http://www.w3.org/TR/REC-html40"
|
|
98
|
+
xmlns:sitemap="http://www.sitemaps.org/schemas/sitemap/0.9"
|
|
99
|
+
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
|
100
|
+
<xsl:output method="html" version="1.0" encoding="UTF-8" indent="yes"/>
|
|
101
|
+
<xsl:template match="/">
|
|
102
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
103
|
+
<head>
|
|
104
|
+
<title>XML Sitemap Index</title>
|
|
105
|
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
|
|
106
|
+
<style type="text/css">
|
|
107
|
+
body {
|
|
108
|
+
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen-Sans, Ubuntu, Cantarell, "Helvetica Neue", sans-serif;
|
|
109
|
+
font-size: 13px;
|
|
110
|
+
color: #545353;
|
|
111
|
+
}
|
|
112
|
+
a { color: #05809e; text-decoration: none; }
|
|
113
|
+
a:visited { color: #06577d; }
|
|
114
|
+
a:hover { text-decoration: underline; }
|
|
115
|
+
#content {
|
|
116
|
+
margin: 0 auto;
|
|
117
|
+
padding: 0 20px;
|
|
118
|
+
max-width: 1200px;
|
|
119
|
+
}
|
|
120
|
+
h1 { font-size: 24px; margin: 20px 0 10px; }
|
|
121
|
+
p.desc { color: #777; margin: 0 0 20px; }
|
|
122
|
+
table {
|
|
123
|
+
border: none;
|
|
124
|
+
border-collapse: collapse;
|
|
125
|
+
width: 100%;
|
|
126
|
+
margin: 0 0 20px;
|
|
127
|
+
}
|
|
128
|
+
th {
|
|
129
|
+
text-align: left;
|
|
130
|
+
padding: 10px 8px;
|
|
131
|
+
font-size: 12px;
|
|
132
|
+
border-bottom: 1px solid #ccc;
|
|
133
|
+
}
|
|
134
|
+
td {
|
|
135
|
+
padding: 8px;
|
|
136
|
+
font-size: 12px;
|
|
137
|
+
border-bottom: 1px solid #eee;
|
|
138
|
+
}
|
|
139
|
+
tr:nth-child(odd) td { background-color: #f8f8f8; }
|
|
140
|
+
tr:hover td { background-color: #e8e8e8; }
|
|
141
|
+
td.url { max-width: 0; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
|
|
142
|
+
#footer { margin: 10px 0 30px; font-size: 11px; color: #999; }
|
|
143
|
+
</style>
|
|
144
|
+
</head>
|
|
145
|
+
<body>
|
|
146
|
+
<div id="content">
|
|
147
|
+
<h1>XML Sitemap Index</h1>
|
|
148
|
+
<p class="desc">
|
|
149
|
+
This XML sitemap index file contains
|
|
150
|
+
<xsl:value-of select="count(sitemap:sitemapindex/sitemap:sitemap)"/> sitemaps.
|
|
151
|
+
</p>
|
|
152
|
+
<table>
|
|
153
|
+
<tr>
|
|
154
|
+
<th style="width:75%">Sitemap</th>
|
|
155
|
+
<th style="width:25%">Last Modified</th>
|
|
156
|
+
</tr>
|
|
157
|
+
<xsl:for-each select="sitemap:sitemapindex/sitemap:sitemap">
|
|
158
|
+
<tr>
|
|
159
|
+
<td class="url">
|
|
160
|
+
<a href="{sitemap:loc}"><xsl:value-of select="sitemap:loc"/></a>
|
|
161
|
+
</td>
|
|
162
|
+
<td>
|
|
163
|
+
<xsl:value-of select="concat(substring(sitemap:lastmod, 0, 11), ' ', substring(sitemap:lastmod, 12, 5))"/>
|
|
164
|
+
</td>
|
|
165
|
+
</tr>
|
|
166
|
+
</xsl:for-each>
|
|
167
|
+
</table>
|
|
168
|
+
<p id="footer">
|
|
169
|
+
Generated by <a href="https://github.com/marcosgz/site_maps">SiteMaps</a>
|
|
170
|
+
</p>
|
|
171
|
+
</div>
|
|
172
|
+
</body>
|
|
173
|
+
</html>
|
|
174
|
+
</xsl:template>
|
|
175
|
+
</xsl:stylesheet>
|
|
176
|
+
XSL
|
|
177
|
+
|
|
178
|
+
class << self
|
|
179
|
+
def processing_instruction(url)
|
|
180
|
+
%(<?xml-stylesheet type="text/xsl" href="#{url}"?>)
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def urlset_xsl
|
|
184
|
+
URLSET_XSL
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def index_xsl
|
|
188
|
+
INDEX_XSL
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
end
|
data/lib/site_maps/cli.rb
CHANGED
|
@@ -9,6 +9,7 @@ module SiteMaps
|
|
|
9
9
|
method_option :max_threads, type: :numeric, aliases: "-c", default: 4
|
|
10
10
|
method_option :context, type: :hash, default: {}
|
|
11
11
|
method_option :enqueue_remaining, type: :boolean, default: false
|
|
12
|
+
method_option :ping, type: :boolean, default: false, desc: "Ping search engines after generation"
|
|
12
13
|
|
|
13
14
|
desc "generate 1st_process,2nd_process ... ,Nth_process", "Generate sitemap.xml files for the given processes"
|
|
14
15
|
default_command :start
|
|
@@ -26,14 +27,17 @@ module SiteMaps
|
|
|
26
27
|
|
|
27
28
|
SiteMaps::Notification.subscribe(SiteMaps::Runner::EventListener)
|
|
28
29
|
|
|
30
|
+
context = (opts[:context] || {}).transform_keys(&:to_sym)
|
|
29
31
|
runner = SiteMaps.generate(
|
|
30
32
|
config_file: opts[:config_file],
|
|
31
|
-
max_threads: opts[:max_threads]
|
|
33
|
+
max_threads: opts[:max_threads],
|
|
34
|
+
context: context.empty? ? nil : context,
|
|
35
|
+
ping: opts[:ping] || nil
|
|
32
36
|
)
|
|
33
37
|
if processes.empty?
|
|
34
38
|
runner.enqueue_all
|
|
35
39
|
else
|
|
36
|
-
kwargs =
|
|
40
|
+
kwargs = context
|
|
37
41
|
processes.split(",").each do |process|
|
|
38
42
|
runner.enqueue(process.strip.to_sym, **kwargs)
|
|
39
43
|
end
|
|
@@ -37,6 +37,13 @@ module SiteMaps
|
|
|
37
37
|
|
|
38
38
|
attribute :url
|
|
39
39
|
attribute :directory, default: "/tmp/sitemaps"
|
|
40
|
+
attribute :max_links, default: 50_000
|
|
41
|
+
attribute :emit_priority, default: true
|
|
42
|
+
attribute :emit_changefreq, default: true
|
|
43
|
+
attribute :xsl_stylesheet_url
|
|
44
|
+
attribute :xsl_index_stylesheet_url
|
|
45
|
+
attribute :ping_search_engines, default: false
|
|
46
|
+
attribute :ping_engines
|
|
40
47
|
|
|
41
48
|
def initialize(**options)
|
|
42
49
|
default_attributes.merge(options).each do |key, value|
|
|
@@ -90,7 +97,7 @@ module SiteMaps
|
|
|
90
97
|
|
|
91
98
|
def remote_sitemap_directory
|
|
92
99
|
path = ::URI.parse(url).path
|
|
93
|
-
path = path[1
|
|
100
|
+
path = path[1..] if path.start_with?("/")
|
|
94
101
|
path.split("/")[0..-2].join("/")
|
|
95
102
|
end
|
|
96
103
|
|
|
@@ -53,7 +53,7 @@ module SiteMaps
|
|
|
53
53
|
end
|
|
54
54
|
base = uri.dup.tap { |v| v.path = "" }.to_s
|
|
55
55
|
basename = File.basename(uri.path)
|
|
56
|
-
index_basename = basename.sub(
|
|
56
|
+
index_basename = basename.sub(/\.(xml|xml\.gz)$/, "#{PLACEHOLDER}.\\1")
|
|
57
57
|
|
|
58
58
|
@placeholder_url = File.join(base, File.join(File.dirname(uri.path), index_basename))
|
|
59
59
|
@uri = URI(File.join(base, File.join(File.dirname(uri.path), basename)))
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SiteMaps
|
|
4
|
+
class Middleware
|
|
5
|
+
DEFAULT_X_ROBOTS_TAG = "noindex, follow"
|
|
6
|
+
DEFAULT_CACHE_CONTROL = "public, max-age=3600"
|
|
7
|
+
URLSET_XSL_PATH = "/_sitemap-stylesheet.xsl"
|
|
8
|
+
INDEX_XSL_PATH = "/_sitemap-index-stylesheet.xsl"
|
|
9
|
+
|
|
10
|
+
# @param adapter [Object, #call, nil] Adapter instance, a callable (0-arg or 1-arg
|
|
11
|
+
# receiving the Rack env) that returns an adapter, or nil to fall back to
|
|
12
|
+
# SiteMaps.current_adapter.
|
|
13
|
+
#
|
|
14
|
+
# @param public_prefix [String, #call, nil] A prefix present in the **public URL**
|
|
15
|
+
# that is absent from the storage path. Stripped from the incoming request path
|
|
16
|
+
# to derive the internal lookup path.
|
|
17
|
+
#
|
|
18
|
+
# Example: sitemaps stored at `/sitemap.xml`, served publicly at
|
|
19
|
+
# `/sitemaps/tenant/sitemap.xml` → `public_prefix: "/sitemaps/tenant"`
|
|
20
|
+
#
|
|
21
|
+
# @param storage_prefix [String, #call, nil] A prefix present in the **storage
|
|
22
|
+
# path** that is absent from the public URL. Prepended to the incoming request
|
|
23
|
+
# path to derive the internal lookup path.
|
|
24
|
+
#
|
|
25
|
+
# Example: sitemaps stored at `/sitemaps/tenant/sitemap.xml`, served publicly at
|
|
26
|
+
# `/sitemap.xml` → `storage_prefix: "/sitemaps/tenant"`
|
|
27
|
+
#
|
|
28
|
+
# Both options accept a callable (0-arg or 1-arg receiving env), which is useful
|
|
29
|
+
# in multi-tenant setups where the prefix depends on the current request/site.
|
|
30
|
+
#
|
|
31
|
+
def initialize(
|
|
32
|
+
app,
|
|
33
|
+
adapter: nil,
|
|
34
|
+
public_prefix: nil,
|
|
35
|
+
storage_prefix: nil,
|
|
36
|
+
x_robots_tag: DEFAULT_X_ROBOTS_TAG,
|
|
37
|
+
cache_control: DEFAULT_CACHE_CONTROL
|
|
38
|
+
)
|
|
39
|
+
@app = app
|
|
40
|
+
@adapter = adapter
|
|
41
|
+
@public_prefix = public_prefix
|
|
42
|
+
@storage_prefix = storage_prefix
|
|
43
|
+
@x_robots_tag = x_robots_tag
|
|
44
|
+
@cache_control = cache_control
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def call(env)
|
|
48
|
+
path = env["PATH_INFO"]
|
|
49
|
+
|
|
50
|
+
if xsl_request?(path)
|
|
51
|
+
serve_xsl(path)
|
|
52
|
+
elsif path.end_with?(".xml", ".xml.gz")
|
|
53
|
+
pub_prefix = resolve_value(@public_prefix, env)
|
|
54
|
+
sto_prefix = resolve_value(@storage_prefix, env)
|
|
55
|
+
|
|
56
|
+
# Strip public prefix (nil = no match when prefix is configured but doesn't match)
|
|
57
|
+
stripped = strip_prefix(path, pub_prefix)
|
|
58
|
+
|
|
59
|
+
# Prepend storage prefix to get the internal path used for adapter lookups
|
|
60
|
+
internal_path = stripped && prepend_prefix(stripped, sto_prefix)
|
|
61
|
+
|
|
62
|
+
# Only resolve the adapter (potentially expensive: DB lookup, callable) when
|
|
63
|
+
# the path already looks like a sitemap file and passed prefix checks.
|
|
64
|
+
current_adapter = resolve_adapter(env) if internal_path
|
|
65
|
+
if current_adapter && sitemap_request?(internal_path, current_adapter)
|
|
66
|
+
serve_sitemap(internal_path, current_adapter, pub_prefix: pub_prefix, sto_prefix: sto_prefix)
|
|
67
|
+
else
|
|
68
|
+
@app.call(env)
|
|
69
|
+
end
|
|
70
|
+
else
|
|
71
|
+
@app.call(env)
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
private
|
|
76
|
+
|
|
77
|
+
def resolve_adapter(env)
|
|
78
|
+
if @adapter.respond_to?(:call)
|
|
79
|
+
call_with_env(@adapter, env)
|
|
80
|
+
else
|
|
81
|
+
@adapter || SiteMaps.current_adapter
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Resolves a string-or-callable option, normalising the trailing slash.
|
|
86
|
+
def resolve_value(option, env)
|
|
87
|
+
value = option.respond_to?(:call) ? call_with_env(option, env) : option
|
|
88
|
+
value&.chomp("/")
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Calls a callable with env if it accepts an argument, otherwise with no
|
|
92
|
+
# arguments. Supports both `-> { Current.site }` (0-arg, when upstream
|
|
93
|
+
# middleware already set thread-local state) and `->(env) { ... }` (1-arg).
|
|
94
|
+
def call_with_env(callable, env)
|
|
95
|
+
callable.arity.zero? ? callable.call : callable.call(env)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Returns the path with the prefix stripped.
|
|
99
|
+
# Returns nil when a prefix is configured but the path doesn't start with it
|
|
100
|
+
# (so the middleware can pass through non-matching requests).
|
|
101
|
+
# Returns the original path when no prefix is configured.
|
|
102
|
+
def strip_prefix(path, prefix)
|
|
103
|
+
return path if prefix.nil? || prefix.empty?
|
|
104
|
+
return nil unless path.start_with?(prefix)
|
|
105
|
+
|
|
106
|
+
stripped = path[prefix.length..]
|
|
107
|
+
stripped.start_with?("/") ? stripped : "/#{stripped}"
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Prepends a storage prefix to a path. A nil/empty prefix is a no-op.
|
|
111
|
+
def prepend_prefix(path, prefix)
|
|
112
|
+
return path if prefix.nil? || prefix.empty?
|
|
113
|
+
|
|
114
|
+
"#{prefix}#{path}"
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def sitemap_request?(path, adapter)
|
|
118
|
+
sitemap_dir = adapter.config.remote_sitemap_directory
|
|
119
|
+
prefix = sitemap_dir.empty? ? "/" : "/#{sitemap_dir}/"
|
|
120
|
+
path.start_with?(prefix) && path.end_with?(".xml", ".xml.gz")
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def xsl_request?(path)
|
|
124
|
+
path == URLSET_XSL_PATH || path == INDEX_XSL_PATH
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def serve_sitemap(path, adapter, pub_prefix: nil, sto_prefix: nil)
|
|
128
|
+
url = "#{adapter.config.base_uri}#{path}"
|
|
129
|
+
raw_data, metadata = adapter.read(url)
|
|
130
|
+
body = decompress(raw_data, metadata)
|
|
131
|
+
body = rewrite_locs(body, adapter.config.base_uri, pub_prefix, sto_prefix)
|
|
132
|
+
|
|
133
|
+
[200, sitemap_headers("text/xml; charset=UTF-8"), [body]]
|
|
134
|
+
rescue SiteMaps::FileNotFoundError
|
|
135
|
+
@app.call({"PATH_INFO" => path, "REQUEST_METHOD" => "GET"})
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Rewrites <loc> URLs in served XML so they match the public paths the
|
|
139
|
+
# middleware actually handles, not the internal storage paths.
|
|
140
|
+
#
|
|
141
|
+
# storage_prefix case: strips the storage prefix from all <loc> URLs.
|
|
142
|
+
# stored: https://example.com/sitemaps/tenant/static/sitemap.xml
|
|
143
|
+
# public: https://example.com/static/sitemap.xml
|
|
144
|
+
#
|
|
145
|
+
# public_prefix case: prepends the public prefix to <loc> URLs in sitemap
|
|
146
|
+
# index files only (URL sets contain page URLs that must not be touched).
|
|
147
|
+
# stored: https://example.com/static/sitemap.xml
|
|
148
|
+
# public: https://example.com/sitemaps/tenant/static/sitemap.xml
|
|
149
|
+
def rewrite_locs(body, base_uri, pub_prefix, sto_prefix)
|
|
150
|
+
base = base_uri.to_s
|
|
151
|
+
|
|
152
|
+
if sto_prefix && !sto_prefix.empty?
|
|
153
|
+
body.gsub("#{base}#{sto_prefix}/", "#{base}/")
|
|
154
|
+
elsif pub_prefix && !pub_prefix.empty? && body.include?("<sitemapindex")
|
|
155
|
+
body.gsub("<loc>#{base}/", "<loc>#{base}#{pub_prefix}/")
|
|
156
|
+
else
|
|
157
|
+
body
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# The adapter may return gzip-compressed data (raw bytes) or already-decompressed
|
|
162
|
+
# XML. Always serve as plain XML so sitemaps are browsable with XSL stylesheets.
|
|
163
|
+
def decompress(raw_data, metadata)
|
|
164
|
+
return raw_data unless metadata && metadata[:content_type] == "application/gzip"
|
|
165
|
+
|
|
166
|
+
Zlib::GzipReader.new(StringIO.new(raw_data)).read
|
|
167
|
+
rescue Zlib::GzipFile::Error
|
|
168
|
+
# Data was already decompressed (e.g., FileSystem adapter decompresses on read)
|
|
169
|
+
raw_data
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def serve_xsl(path)
|
|
173
|
+
body = if path == INDEX_XSL_PATH
|
|
174
|
+
Builder::XSLStylesheet.index_xsl
|
|
175
|
+
else
|
|
176
|
+
Builder::XSLStylesheet.urlset_xsl
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
[200, xsl_headers, [body]]
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def sitemap_headers(content_type)
|
|
183
|
+
{
|
|
184
|
+
"content-type" => content_type,
|
|
185
|
+
"x-robots-tag" => @x_robots_tag,
|
|
186
|
+
"cache-control" => @cache_control
|
|
187
|
+
}
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def xsl_headers
|
|
191
|
+
{
|
|
192
|
+
"content-type" => "text/xsl; charset=UTF-8",
|
|
193
|
+
"cache-control" => @cache_control
|
|
194
|
+
}
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
|
|
5
|
+
module SiteMaps
|
|
6
|
+
module Ping
|
|
7
|
+
ENGINES = {
|
|
8
|
+
bing: "https://www.bing.com/ping?sitemap=%{url}"
|
|
9
|
+
}.freeze
|
|
10
|
+
|
|
11
|
+
class << self
|
|
12
|
+
def ping(sitemap_url, engines: nil)
|
|
13
|
+
engines ||= ENGINES
|
|
14
|
+
encoded_url = ERB::Util.url_encode(sitemap_url)
|
|
15
|
+
|
|
16
|
+
engines.each_with_object({}) do |(name, url_template), results|
|
|
17
|
+
ping_url = url_template % {url: encoded_url}
|
|
18
|
+
uri = URI.parse(ping_url)
|
|
19
|
+
|
|
20
|
+
response = Net::HTTP.get_response(uri)
|
|
21
|
+
results[name] = {status: response.code.to_i, url: ping_url}
|
|
22
|
+
|
|
23
|
+
SiteMaps.logger.info("[SiteMaps] Pinged #{name}: #{response.code} - #{ping_url}")
|
|
24
|
+
rescue => e
|
|
25
|
+
results[name] = {status: nil, error: e.message, url: ping_url}
|
|
26
|
+
SiteMaps.logger.warn("[SiteMaps] Failed to ping #{name}: #{e.message}")
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def default_engines
|
|
31
|
+
ENGINES
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|