site_maps 0.0.1.beta3 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/main.yml +2 -4
  3. data/.rubocop.yml +4 -2
  4. data/.tool-versions +1 -1
  5. data/AGENTS.md +73 -0
  6. data/CHANGELOG.md +5 -0
  7. data/CLAUDE.md +77 -0
  8. data/Gemfile +1 -0
  9. data/Gemfile.lock +72 -56
  10. data/README.md +531 -393
  11. data/docs/README.md +67 -0
  12. data/docs/adapters.md +143 -0
  13. data/docs/api.md +154 -0
  14. data/docs/cli.md +93 -0
  15. data/docs/events.md +79 -0
  16. data/docs/extensions.md +141 -0
  17. data/docs/getting-started.md +138 -0
  18. data/docs/middleware.md +85 -0
  19. data/docs/processes.md +156 -0
  20. data/docs/rails.md +128 -0
  21. data/lib/site_maps/adapters/adapter.rb +35 -5
  22. data/lib/site_maps/adapters/aws_sdk/storage.rb +5 -2
  23. data/lib/site_maps/builder/sitemap_index/item.rb +1 -1
  24. data/lib/site_maps/builder/sitemap_index.rb +29 -5
  25. data/lib/site_maps/builder/url.rb +13 -10
  26. data/lib/site_maps/builder/url_set.rb +17 -7
  27. data/lib/site_maps/builder/xsl_stylesheet.rb +192 -0
  28. data/lib/site_maps/cli.rb +6 -2
  29. data/lib/site_maps/configuration.rb +8 -1
  30. data/lib/site_maps/incremental_location.rb +1 -1
  31. data/lib/site_maps/middleware.rb +197 -0
  32. data/lib/site_maps/notification/event.rb +1 -1
  33. data/lib/site_maps/notification/publisher.rb +1 -0
  34. data/lib/site_maps/notification.rb +1 -0
  35. data/lib/site_maps/ping.rb +35 -0
  36. data/lib/site_maps/{primitives → primitive}/array.rb +1 -1
  37. data/lib/site_maps/{primitives → primitive}/output.rb +1 -1
  38. data/lib/site_maps/primitive/string.rb +106 -0
  39. data/lib/site_maps/robots_txt.rb +21 -0
  40. data/lib/site_maps/runner/event_listener.rb +2 -2
  41. data/lib/site_maps/runner.rb +17 -3
  42. data/lib/site_maps/sitemap_builder.rb +16 -4
  43. data/lib/site_maps/sitemap_reader.rb +3 -0
  44. data/lib/site_maps/version.rb +1 -1
  45. data/lib/site_maps.rb +81 -10
  46. data/site_maps.gemspec +1 -1
  47. metadata +23 -10
  48. data/lib/site_maps/primitives/string.rb +0 -43
@@ -11,20 +11,23 @@ module SiteMaps::Builder
11
11
 
12
12
  attr_reader :attributes
13
13
 
14
- def initialize(link, **attributes)
15
- @attributes = DEFAULTS.merge(attributes)
14
+ def initialize(link, emit_priority: true, emit_changefreq: true, **attributes)
15
+ defaults = DEFAULTS.dup
16
+ defaults.delete(:priority) unless emit_priority
17
+ defaults.delete(:changefreq) unless emit_changefreq
18
+ @attributes = defaults.merge(attributes)
16
19
  @attributes[:loc] = link
17
- @attributes[:alternates] = SiteMaps::Primitives::Array.wrap(@attributes[:alternates])
18
- @attributes[:videos] = SiteMaps::Primitives::Array.wrap(@attributes[:videos])
19
- @attributes[:images] = SiteMaps::Primitives::Array.wrap(@attributes[:images])
20
+ @attributes[:alternates] = SiteMaps::Primitive::Array.wrap(@attributes[:alternates])
21
+ @attributes[:videos] = SiteMaps::Primitive::Array.wrap(@attributes[:videos])
22
+ @attributes[:images] = SiteMaps::Primitive::Array.wrap(@attributes[:images])
20
23
  if (video = @attributes.delete(:video))
21
- @attributes[:videos].concat(SiteMaps::Primitives::Array.wrap(video))
24
+ @attributes[:videos].concat(SiteMaps::Primitive::Array.wrap(video))
22
25
  end
23
26
  if (alternate = @attributes.delete(:alternate))
24
- @attributes[:alternates].concat(SiteMaps::Primitives::Array.wrap(alternate))
27
+ @attributes[:alternates].concat(SiteMaps::Primitive::Array.wrap(alternate))
25
28
  end
26
29
  if (image = @attributes.delete(:image))
27
- @attributes[:images].concat(SiteMaps::Primitives::Array.wrap(image))
30
+ @attributes[:images].concat(SiteMaps::Primitive::Array.wrap(image))
28
31
  end
29
32
  @attributes[:images] = @attributes[:images][0...SiteMaps::MAX_LENGTH[:images]]
30
33
  end
@@ -121,9 +124,9 @@ module SiteMaps::Builder
121
124
 
122
125
  if self[:pagemap].is_a?(Hash) && (pagemap = self[:pagemap]).any?
123
126
  builder.pagemap :PageMap do
124
- SiteMaps::Primitives::Array.wrap(pagemap[:dataobjects]).each do |dataobject|
127
+ SiteMaps::Primitive::Array.wrap(pagemap[:dataobjects]).each do |dataobject|
125
128
  builder.pagemap :DataObject, type: dataobject[:type].to_s, id: dataobject[:id].to_s do
126
- SiteMaps::Primitives::Array.wrap(dataobject[:attributes]).each do |attribute|
129
+ SiteMaps::Primitive::Array.wrap(dataobject[:attributes]).each do |attribute|
127
130
  builder.pagemap :Attribute, attribute[:value].to_s, name: attribute[:name].to_s
128
131
  end
129
132
  end
@@ -10,32 +10,42 @@ module SiteMaps::Builder
10
10
  "video" => "http://www.google.com/schemas/sitemap-video/1.1"
11
11
  }.freeze
12
12
 
13
- HEADER = <<~HEADER
14
- <?xml version="1.0" encoding="UTF-8"?>
13
+ XML_DECLARATION = %(<?xml version="1.0" encoding="UTF-8"?>)
14
+ URLSET_OPEN = <<~URLSET_OPEN
15
15
  <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
16
16
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
17
17
  xmlns:xhtml="http://www.w3.org/1999/xhtml"
18
18
  xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
19
19
  #{SCHEMAS.map { |name, uri| " xmlns:#{name}=\"#{uri}\"" }.join("\n")}
20
20
  >
21
- HEADER
21
+ URLSET_OPEN
22
+ HEADER = "#{XML_DECLARATION}\n#{URLSET_OPEN}"
22
23
  FOOTER = "</urlset>"
23
24
  FOOTER_BYTESIZE = FOOTER.bytesize
24
25
 
25
26
  attr_reader :content, :links_count, :news_count
26
27
 
27
- def initialize
28
+ def initialize(max_links: SiteMaps::MAX_LENGTH[:links], emit_priority: true, emit_changefreq: true, xsl_url: nil)
28
29
  @content = StringIO.new
29
- @content.puts(HEADER)
30
+ if xsl_url
31
+ @content.puts(XML_DECLARATION)
32
+ @content.puts(XSLStylesheet.processing_instruction(xsl_url))
33
+ @content.puts(URLSET_OPEN)
34
+ else
35
+ @content.puts(HEADER)
36
+ end
30
37
  @links_count = 0
31
38
  @news_count = 0
32
39
  @last_modified = nil
40
+ @max_links = max_links
41
+ @emit_priority = emit_priority
42
+ @emit_changefreq = emit_changefreq
33
43
  end
34
44
 
35
45
  def add(link, **options)
36
46
  raise SiteMaps::FullSitemapError if finalized?
37
47
 
38
- url = SiteMaps::Builder::URL.new(link, **options)
48
+ url = SiteMaps::Builder::URL.new(link, emit_priority: @emit_priority, emit_changefreq: @emit_changefreq, **options)
39
49
  raise SiteMaps::FullSitemapError unless fit?(url)
40
50
 
41
51
  content.puts(url.to_xml)
@@ -83,7 +93,7 @@ module SiteMaps::Builder
83
93
 
84
94
  # @param url [Builder::URL]
85
95
  def fit?(url)
86
- return false if links_count >= SiteMaps::MAX_LENGTH[:links]
96
+ return false if links_count >= @max_links
87
97
  return false if url.news? && news_count >= SiteMaps::MAX_LENGTH[:news]
88
98
 
89
99
  (bytesize + url.bytesize + FOOTER_BYTESIZE) <= SiteMaps::MAX_FILESIZE
@@ -0,0 +1,192 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps::Builder
4
+ class XSLStylesheet
5
+ URLSET_XSL = <<~XSL
6
+ <?xml version="1.0" encoding="UTF-8"?>
7
+ <xsl:stylesheet version="2.0"
8
+ xmlns:html="http://www.w3.org/TR/REC-html40"
9
+ xmlns:sitemap="http://www.sitemaps.org/schemas/sitemap/0.9"
10
+ xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"
11
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
12
+ <xsl:output method="html" version="1.0" encoding="UTF-8" indent="yes"/>
13
+ <xsl:template match="/">
14
+ <html xmlns="http://www.w3.org/1999/xhtml">
15
+ <head>
16
+ <title>XML Sitemap</title>
17
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
18
+ <style type="text/css">
19
+ body {
20
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen-Sans, Ubuntu, Cantarell, "Helvetica Neue", sans-serif;
21
+ font-size: 13px;
22
+ color: #545353;
23
+ }
24
+ a { color: #05809e; text-decoration: none; }
25
+ a:visited { color: #06577d; }
26
+ a:hover { text-decoration: underline; }
27
+ #content {
28
+ margin: 0 auto;
29
+ padding: 0 20px;
30
+ max-width: 1200px;
31
+ }
32
+ h1 { font-size: 24px; margin: 20px 0 10px; }
33
+ p.desc { color: #777; margin: 0 0 20px; }
34
+ table {
35
+ border: none;
36
+ border-collapse: collapse;
37
+ width: 100%;
38
+ margin: 0 0 20px;
39
+ }
40
+ th {
41
+ text-align: left;
42
+ padding: 10px 8px;
43
+ font-size: 12px;
44
+ border-bottom: 1px solid #ccc;
45
+ }
46
+ td {
47
+ padding: 8px;
48
+ font-size: 12px;
49
+ border-bottom: 1px solid #eee;
50
+ }
51
+ tr:nth-child(odd) td { background-color: #f8f8f8; }
52
+ tr:hover td { background-color: #e8e8e8; }
53
+ td.url { max-width: 0; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
54
+ #footer { margin: 10px 0 30px; font-size: 11px; color: #999; }
55
+ </style>
56
+ </head>
57
+ <body>
58
+ <div id="content">
59
+ <h1>XML Sitemap</h1>
60
+ <p class="desc">
61
+ This XML sitemap is used by search engines which follow the
62
+ <a href="https://www.sitemaps.org">XML sitemap standard</a>.
63
+ </p>
64
+ <table>
65
+ <tr>
66
+ <th style="width:80%">URL</th>
67
+ <th style="width:5%">Images</th>
68
+ <th style="width:15%">Last Modified</th>
69
+ </tr>
70
+ <xsl:for-each select="sitemap:urlset/sitemap:url">
71
+ <tr>
72
+ <td class="url">
73
+ <a href="{sitemap:loc}"><xsl:value-of select="sitemap:loc"/></a>
74
+ </td>
75
+ <td>
76
+ <xsl:value-of select="count(image:image)"/>
77
+ </td>
78
+ <td>
79
+ <xsl:value-of select="concat(substring(sitemap:lastmod, 0, 11), ' ', substring(sitemap:lastmod, 12, 5))"/>
80
+ </td>
81
+ </tr>
82
+ </xsl:for-each>
83
+ </table>
84
+ <p id="footer">
85
+ Generated by <a href="https://github.com/marcosgz/site_maps">SiteMaps</a>
86
+ </p>
87
+ </div>
88
+ </body>
89
+ </html>
90
+ </xsl:template>
91
+ </xsl:stylesheet>
92
+ XSL
93
+
94
+ INDEX_XSL = <<~XSL
95
+ <?xml version="1.0" encoding="UTF-8"?>
96
+ <xsl:stylesheet version="2.0"
97
+ xmlns:html="http://www.w3.org/TR/REC-html40"
98
+ xmlns:sitemap="http://www.sitemaps.org/schemas/sitemap/0.9"
99
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
100
+ <xsl:output method="html" version="1.0" encoding="UTF-8" indent="yes"/>
101
+ <xsl:template match="/">
102
+ <html xmlns="http://www.w3.org/1999/xhtml">
103
+ <head>
104
+ <title>XML Sitemap Index</title>
105
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
106
+ <style type="text/css">
107
+ body {
108
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen-Sans, Ubuntu, Cantarell, "Helvetica Neue", sans-serif;
109
+ font-size: 13px;
110
+ color: #545353;
111
+ }
112
+ a { color: #05809e; text-decoration: none; }
113
+ a:visited { color: #06577d; }
114
+ a:hover { text-decoration: underline; }
115
+ #content {
116
+ margin: 0 auto;
117
+ padding: 0 20px;
118
+ max-width: 1200px;
119
+ }
120
+ h1 { font-size: 24px; margin: 20px 0 10px; }
121
+ p.desc { color: #777; margin: 0 0 20px; }
122
+ table {
123
+ border: none;
124
+ border-collapse: collapse;
125
+ width: 100%;
126
+ margin: 0 0 20px;
127
+ }
128
+ th {
129
+ text-align: left;
130
+ padding: 10px 8px;
131
+ font-size: 12px;
132
+ border-bottom: 1px solid #ccc;
133
+ }
134
+ td {
135
+ padding: 8px;
136
+ font-size: 12px;
137
+ border-bottom: 1px solid #eee;
138
+ }
139
+ tr:nth-child(odd) td { background-color: #f8f8f8; }
140
+ tr:hover td { background-color: #e8e8e8; }
141
+ td.url { max-width: 0; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
142
+ #footer { margin: 10px 0 30px; font-size: 11px; color: #999; }
143
+ </style>
144
+ </head>
145
+ <body>
146
+ <div id="content">
147
+ <h1>XML Sitemap Index</h1>
148
+ <p class="desc">
149
+ This XML sitemap index file contains
150
+ <xsl:value-of select="count(sitemap:sitemapindex/sitemap:sitemap)"/> sitemaps.
151
+ </p>
152
+ <table>
153
+ <tr>
154
+ <th style="width:75%">Sitemap</th>
155
+ <th style="width:25%">Last Modified</th>
156
+ </tr>
157
+ <xsl:for-each select="sitemap:sitemapindex/sitemap:sitemap">
158
+ <tr>
159
+ <td class="url">
160
+ <a href="{sitemap:loc}"><xsl:value-of select="sitemap:loc"/></a>
161
+ </td>
162
+ <td>
163
+ <xsl:value-of select="concat(substring(sitemap:lastmod, 0, 11), ' ', substring(sitemap:lastmod, 12, 5))"/>
164
+ </td>
165
+ </tr>
166
+ </xsl:for-each>
167
+ </table>
168
+ <p id="footer">
169
+ Generated by <a href="https://github.com/marcosgz/site_maps">SiteMaps</a>
170
+ </p>
171
+ </div>
172
+ </body>
173
+ </html>
174
+ </xsl:template>
175
+ </xsl:stylesheet>
176
+ XSL
177
+
178
+ class << self
179
+ def processing_instruction(url)
180
+ %(<?xml-stylesheet type="text/xsl" href="#{url}"?>)
181
+ end
182
+
183
+ def urlset_xsl
184
+ URLSET_XSL
185
+ end
186
+
187
+ def index_xsl
188
+ INDEX_XSL
189
+ end
190
+ end
191
+ end
192
+ end
data/lib/site_maps/cli.rb CHANGED
@@ -9,6 +9,7 @@ module SiteMaps
9
9
  method_option :max_threads, type: :numeric, aliases: "-c", default: 4
10
10
  method_option :context, type: :hash, default: {}
11
11
  method_option :enqueue_remaining, type: :boolean, default: false
12
+ method_option :ping, type: :boolean, default: false, desc: "Ping search engines after generation"
12
13
 
13
14
  desc "generate 1st_process,2nd_process ... ,Nth_process", "Generate sitemap.xml files for the given processes"
14
15
  default_command :start
@@ -26,14 +27,17 @@ module SiteMaps
26
27
 
27
28
  SiteMaps::Notification.subscribe(SiteMaps::Runner::EventListener)
28
29
 
30
+ context = (opts[:context] || {}).transform_keys(&:to_sym)
29
31
  runner = SiteMaps.generate(
30
32
  config_file: opts[:config_file],
31
- max_threads: opts[:max_threads]
33
+ max_threads: opts[:max_threads],
34
+ context: context.empty? ? nil : context,
35
+ ping: opts[:ping] || nil
32
36
  )
33
37
  if processes.empty?
34
38
  runner.enqueue_all
35
39
  else
36
- kwargs = (opts[:context] || {}).transform_keys(&:to_sym)
40
+ kwargs = context
37
41
  processes.split(",").each do |process|
38
42
  runner.enqueue(process.strip.to_sym, **kwargs)
39
43
  end
@@ -37,6 +37,13 @@ module SiteMaps
37
37
 
38
38
  attribute :url
39
39
  attribute :directory, default: "/tmp/sitemaps"
40
+ attribute :max_links, default: 50_000
41
+ attribute :emit_priority, default: true
42
+ attribute :emit_changefreq, default: true
43
+ attribute :xsl_stylesheet_url
44
+ attribute :xsl_index_stylesheet_url
45
+ attribute :ping_search_engines, default: false
46
+ attribute :ping_engines
40
47
 
41
48
  def initialize(**options)
42
49
  default_attributes.merge(options).each do |key, value|
@@ -90,7 +97,7 @@ module SiteMaps
90
97
 
91
98
  def remote_sitemap_directory
92
99
  path = ::URI.parse(url).path
93
- path = path[1..-1] if path.start_with?("/")
100
+ path = path[1..] if path.start_with?("/")
94
101
  path.split("/")[0..-2].join("/")
95
102
  end
96
103
 
@@ -53,7 +53,7 @@ module SiteMaps
53
53
  end
54
54
  base = uri.dup.tap { |v| v.path = "" }.to_s
55
55
  basename = File.basename(uri.path)
56
- index_basename = basename.sub(/[\.](xml|xml\.gz)$/, "#{PLACEHOLDER}.\\1")
56
+ index_basename = basename.sub(/\.(xml|xml\.gz)$/, "#{PLACEHOLDER}.\\1")
57
57
 
58
58
  @placeholder_url = File.join(base, File.join(File.dirname(uri.path), index_basename))
59
59
  @uri = URI(File.join(base, File.join(File.dirname(uri.path), basename)))
@@ -0,0 +1,197 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps
4
+ class Middleware
5
+ DEFAULT_X_ROBOTS_TAG = "noindex, follow"
6
+ DEFAULT_CACHE_CONTROL = "public, max-age=3600"
7
+ URLSET_XSL_PATH = "/_sitemap-stylesheet.xsl"
8
+ INDEX_XSL_PATH = "/_sitemap-index-stylesheet.xsl"
9
+
10
+ # @param adapter [Object, #call, nil] Adapter instance, a callable (0-arg or 1-arg
11
+ # receiving the Rack env) that returns an adapter, or nil to fall back to
12
+ # SiteMaps.current_adapter.
13
+ #
14
+ # @param public_prefix [String, #call, nil] A prefix present in the **public URL**
15
+ # that is absent from the storage path. Stripped from the incoming request path
16
+ # to derive the internal lookup path.
17
+ #
18
+ # Example: sitemaps stored at `/sitemap.xml`, served publicly at
19
+ # `/sitemaps/tenant/sitemap.xml` → `public_prefix: "/sitemaps/tenant"`
20
+ #
21
+ # @param storage_prefix [String, #call, nil] A prefix present in the **storage
22
+ # path** that is absent from the public URL. Prepended to the incoming request
23
+ # path to derive the internal lookup path.
24
+ #
25
+ # Example: sitemaps stored at `/sitemaps/tenant/sitemap.xml`, served publicly at
26
+ # `/sitemap.xml` → `storage_prefix: "/sitemaps/tenant"`
27
+ #
28
+ # Both options accept a callable (0-arg or 1-arg receiving env), which is useful
29
+ # in multi-tenant setups where the prefix depends on the current request/site.
30
+ #
31
+ def initialize(
32
+ app,
33
+ adapter: nil,
34
+ public_prefix: nil,
35
+ storage_prefix: nil,
36
+ x_robots_tag: DEFAULT_X_ROBOTS_TAG,
37
+ cache_control: DEFAULT_CACHE_CONTROL
38
+ )
39
+ @app = app
40
+ @adapter = adapter
41
+ @public_prefix = public_prefix
42
+ @storage_prefix = storage_prefix
43
+ @x_robots_tag = x_robots_tag
44
+ @cache_control = cache_control
45
+ end
46
+
47
+ def call(env)
48
+ path = env["PATH_INFO"]
49
+
50
+ if xsl_request?(path)
51
+ serve_xsl(path)
52
+ elsif path.end_with?(".xml", ".xml.gz")
53
+ pub_prefix = resolve_value(@public_prefix, env)
54
+ sto_prefix = resolve_value(@storage_prefix, env)
55
+
56
+ # Strip public prefix (nil = no match when prefix is configured but doesn't match)
57
+ stripped = strip_prefix(path, pub_prefix)
58
+
59
+ # Prepend storage prefix to get the internal path used for adapter lookups
60
+ internal_path = stripped && prepend_prefix(stripped, sto_prefix)
61
+
62
+ # Only resolve the adapter (potentially expensive: DB lookup, callable) when
63
+ # the path already looks like a sitemap file and passed prefix checks.
64
+ current_adapter = resolve_adapter(env) if internal_path
65
+ if current_adapter && sitemap_request?(internal_path, current_adapter)
66
+ serve_sitemap(internal_path, current_adapter, pub_prefix: pub_prefix, sto_prefix: sto_prefix)
67
+ else
68
+ @app.call(env)
69
+ end
70
+ else
71
+ @app.call(env)
72
+ end
73
+ end
74
+
75
+ private
76
+
77
+ def resolve_adapter(env)
78
+ if @adapter.respond_to?(:call)
79
+ call_with_env(@adapter, env)
80
+ else
81
+ @adapter || SiteMaps.current_adapter
82
+ end
83
+ end
84
+
85
+ # Resolves a string-or-callable option, normalising the trailing slash.
86
+ def resolve_value(option, env)
87
+ value = option.respond_to?(:call) ? call_with_env(option, env) : option
88
+ value&.chomp("/")
89
+ end
90
+
91
+ # Calls a callable with env if it accepts an argument, otherwise with no
92
+ # arguments. Supports both `-> { Current.site }` (0-arg, when upstream
93
+ # middleware already set thread-local state) and `->(env) { ... }` (1-arg).
94
+ def call_with_env(callable, env)
95
+ callable.arity.zero? ? callable.call : callable.call(env)
96
+ end
97
+
98
+ # Returns the path with the prefix stripped.
99
+ # Returns nil when a prefix is configured but the path doesn't start with it
100
+ # (so the middleware can pass through non-matching requests).
101
+ # Returns the original path when no prefix is configured.
102
+ def strip_prefix(path, prefix)
103
+ return path if prefix.nil? || prefix.empty?
104
+ return nil unless path.start_with?(prefix)
105
+
106
+ stripped = path[prefix.length..]
107
+ stripped.start_with?("/") ? stripped : "/#{stripped}"
108
+ end
109
+
110
+ # Prepends a storage prefix to a path. A nil/empty prefix is a no-op.
111
+ def prepend_prefix(path, prefix)
112
+ return path if prefix.nil? || prefix.empty?
113
+
114
+ "#{prefix}#{path}"
115
+ end
116
+
117
+ def sitemap_request?(path, adapter)
118
+ sitemap_dir = adapter.config.remote_sitemap_directory
119
+ prefix = sitemap_dir.empty? ? "/" : "/#{sitemap_dir}/"
120
+ path.start_with?(prefix) && path.end_with?(".xml", ".xml.gz")
121
+ end
122
+
123
+ def xsl_request?(path)
124
+ path == URLSET_XSL_PATH || path == INDEX_XSL_PATH
125
+ end
126
+
127
+ def serve_sitemap(path, adapter, pub_prefix: nil, sto_prefix: nil)
128
+ url = "#{adapter.config.base_uri}#{path}"
129
+ raw_data, metadata = adapter.read(url)
130
+ body = decompress(raw_data, metadata)
131
+ body = rewrite_locs(body, adapter.config.base_uri, pub_prefix, sto_prefix)
132
+
133
+ [200, sitemap_headers("text/xml; charset=UTF-8"), [body]]
134
+ rescue SiteMaps::FileNotFoundError
135
+ @app.call({"PATH_INFO" => path, "REQUEST_METHOD" => "GET"})
136
+ end
137
+
138
+ # Rewrites <loc> URLs in served XML so they match the public paths the
139
+ # middleware actually handles, not the internal storage paths.
140
+ #
141
+ # storage_prefix case: strips the storage prefix from all <loc> URLs.
142
+ # stored: https://example.com/sitemaps/tenant/static/sitemap.xml
143
+ # public: https://example.com/static/sitemap.xml
144
+ #
145
+ # public_prefix case: prepends the public prefix to <loc> URLs in sitemap
146
+ # index files only (URL sets contain page URLs that must not be touched).
147
+ # stored: https://example.com/static/sitemap.xml
148
+ # public: https://example.com/sitemaps/tenant/static/sitemap.xml
149
+ def rewrite_locs(body, base_uri, pub_prefix, sto_prefix)
150
+ base = base_uri.to_s
151
+
152
+ if sto_prefix && !sto_prefix.empty?
153
+ body.gsub("#{base}#{sto_prefix}/", "#{base}/")
154
+ elsif pub_prefix && !pub_prefix.empty? && body.include?("<sitemapindex")
155
+ body.gsub("<loc>#{base}/", "<loc>#{base}#{pub_prefix}/")
156
+ else
157
+ body
158
+ end
159
+ end
160
+
161
+ # The adapter may return gzip-compressed data (raw bytes) or already-decompressed
162
+ # XML. Always serve as plain XML so sitemaps are browsable with XSL stylesheets.
163
+ def decompress(raw_data, metadata)
164
+ return raw_data unless metadata && metadata[:content_type] == "application/gzip"
165
+
166
+ Zlib::GzipReader.new(StringIO.new(raw_data)).read
167
+ rescue Zlib::GzipFile::Error
168
+ # Data was already decompressed (e.g., FileSystem adapter decompresses on read)
169
+ raw_data
170
+ end
171
+
172
+ def serve_xsl(path)
173
+ body = if path == INDEX_XSL_PATH
174
+ Builder::XSLStylesheet.index_xsl
175
+ else
176
+ Builder::XSLStylesheet.urlset_xsl
177
+ end
178
+
179
+ [200, xsl_headers, [body]]
180
+ end
181
+
182
+ def sitemap_headers(content_type)
183
+ {
184
+ "content-type" => content_type,
185
+ "x-robots-tag" => @x_robots_tag,
186
+ "cache-control" => @cache_control
187
+ }
188
+ end
189
+
190
+ def xsl_headers
191
+ {
192
+ "content-type" => "text/xsl; charset=UTF-8",
193
+ "cache-control" => @cache_control
194
+ }
195
+ end
196
+ end
197
+ end
@@ -43,7 +43,7 @@ module SiteMaps
43
43
 
44
44
  # @api private
45
45
  def listener_method
46
- @listener_method ||= Primitives::String.new("on_#{id}").underscore.to_sym
46
+ @listener_method ||= Primitive::String.new("on_#{id}").underscore.to_sym
47
47
  end
48
48
  end
49
49
  end
@@ -11,6 +11,7 @@ module SiteMaps::Notification
11
11
  # @api public
12
12
  module ClassMethods
13
13
  extend Forwardable
14
+
14
15
  def_delegators :bus, :publish, :subscribed?, :unsubscribe
15
16
 
16
17
  # Register a new event type
@@ -32,5 +32,6 @@ module SiteMaps
32
32
  register_event "sitemaps.before_process_execution"
33
33
  register_event "sitemaps.enqueue_process"
34
34
  register_event "sitemaps.process_execution"
35
+ register_event "sitemaps.ping"
35
36
  end
36
37
  end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+
5
+ module SiteMaps
6
+ module Ping
7
+ ENGINES = {
8
+ bing: "https://www.bing.com/ping?sitemap=%{url}"
9
+ }.freeze
10
+
11
+ class << self
12
+ def ping(sitemap_url, engines: nil)
13
+ engines ||= ENGINES
14
+ encoded_url = ERB::Util.url_encode(sitemap_url)
15
+
16
+ engines.each_with_object({}) do |(name, url_template), results|
17
+ ping_url = url_template % {url: encoded_url}
18
+ uri = URI.parse(ping_url)
19
+
20
+ response = Net::HTTP.get_response(uri)
21
+ results[name] = {status: response.code.to_i, url: ping_url}
22
+
23
+ SiteMaps.logger.info("[SiteMaps] Pinged #{name}: #{response.code} - #{ping_url}")
24
+ rescue => e
25
+ results[name] = {status: nil, error: e.message, url: ping_url}
26
+ SiteMaps.logger.warn("[SiteMaps] Failed to ping #{name}: #{e.message}")
27
+ end
28
+ end
29
+
30
+ def default_engines
31
+ ENGINES
32
+ end
33
+ end
34
+ end
35
+ end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module SiteMaps::Primitives
3
+ module SiteMaps::Primitive
4
4
  class Array < ::Array
5
5
  def self.wrap(object)
6
6
  if object.nil?
@@ -6,7 +6,7 @@ rescue LoadError
6
6
  end
7
7
 
8
8
  module SiteMaps
9
- module Primitives
9
+ module Primitive
10
10
  module Output
11
11
  module_function
12
12