@blamejs/blamejs-shop 0.0.65 → 0.0.70
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/lib/assembly-instructions.js +777 -0
- package/lib/auto-replenish.js +933 -0
- package/lib/business-hours.js +980 -0
- package/lib/click-and-collect.js +711 -0
- package/lib/clickstream.js +713 -0
- package/lib/cost-layers.js +774 -0
- package/lib/credit-limits.js +752 -0
- package/lib/currency-rounding.js +525 -0
- package/lib/customer-activity.js +862 -0
- package/lib/customer-notes.js +712 -0
- package/lib/customer-risk-profile.js +593 -0
- package/lib/customer-surveys.js +1012 -0
- package/lib/damage-photos.js +473 -0
- package/lib/discount-allocation.js +557 -0
- package/lib/dropship-forwarding.js +645 -0
- package/lib/email-templates.js +817 -0
- package/lib/index.js +45 -0
- package/lib/inventory-allocations.js +559 -0
- package/lib/inventory-writeoffs.js +636 -0
- package/lib/knowledge-base.js +1104 -0
- package/lib/locale-router.js +1077 -0
- package/lib/operator-roles.js +768 -0
- package/lib/order-escalation.js +951 -0
- package/lib/order-ratings.js +495 -0
- package/lib/order-tags.js +944 -0
- package/lib/packing-slips.js +810 -0
- package/lib/payment-retries.js +816 -0
- package/lib/pick-lists.js +639 -0
- package/lib/pixel-events.js +995 -0
- package/lib/preorder.js +595 -0
- package/lib/print-queue.js +681 -0
- package/lib/product-qa.js +749 -0
- package/lib/promo-bundles.js +835 -0
- package/lib/push-notifications.js +937 -0
- package/lib/refund-automation.js +853 -0
- package/lib/reorder-reminders.js +798 -0
- package/lib/robots-config.js +753 -0
- package/lib/seller-signup.js +1052 -0
- package/lib/site-redirects.js +690 -0
- package/lib/sitemap-generator.js +717 -0
- package/lib/subscription-gifts.js +710 -0
- package/lib/tax-cert-renewals.js +632 -0
- package/lib/theme-assets.js +711 -0
- package/lib/tier-benefits.js +776 -0
- package/lib/vendor/MANIFEST.json +2 -2
- package/lib/vendor/blamejs/CHANGELOG.md +2 -0
- package/lib/vendor/blamejs/api-snapshot.json +2 -2
- package/lib/vendor/blamejs/lib/metrics.js +68 -4
- package/lib/vendor/blamejs/package.json +1 -1
- package/lib/vendor/blamejs/release-notes/v0.12.5.json +40 -0
- package/lib/wishlist-alerts.js +842 -0
- package/lib/wishlist-sharing.js +718 -0
- package/package.json +1 -1
|
@@ -0,0 +1,717 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* @module shop.sitemapGenerator
|
|
4
|
+
* @title Sitemap generator — sitemap.xml + sitemap-index.xml emission
|
|
5
|
+
*
|
|
6
|
+
* @intro
|
|
7
|
+
* Crawler-facing sitemap surface. Generates the bytes a search-
|
|
8
|
+
* engine fetches at `/sitemap.xml` (the index) and at each chunk
|
|
9
|
+
* filename listed inside the index. The primitive does NOT persist
|
|
10
|
+
* the bytes — the operator's worker writes the returned artifact
|
|
11
|
+
* list to the storefront's static-asset surface (R2 / CDN / disk)
|
|
12
|
+
* and calls `recordGeneration({ artifacts })` to log what shipped.
|
|
13
|
+
*
|
|
14
|
+
* Sections:
|
|
15
|
+
* - `defineSection({ slug, source, base_path, priority, changefreq, max_urls? })`
|
|
16
|
+
* — register an emitting section. `source` picks the URL stream
|
|
17
|
+
* the section reads from:
|
|
18
|
+
* 'product' — every active catalog product, slug-rooted
|
|
19
|
+
* 'collection' — every non-archived collection
|
|
20
|
+
* 'storefront_page' — every published storefront page
|
|
21
|
+
* 'custom' — operator supplies the URLs at generate
|
|
22
|
+
* time via `create({ custom: { <slug>: fn }})`
|
|
23
|
+
*
|
|
24
|
+
* Generation:
|
|
25
|
+
* - `generate({ origin_url, sections_filter? })` — walks every
|
|
26
|
+
* active, non-archived section (or only those whose slug is in
|
|
27
|
+
* `sections_filter`), pulls the URL stream, applies the
|
|
28
|
+
* per-section `max_urls` cap, splits the result at the sitemap
|
|
29
|
+
* spec's hard limits (50,000 URLs per file, ~50 MB serialized
|
|
30
|
+
* per file), and returns an array of `{ filename, content }`
|
|
31
|
+
* artifacts — the chunk files first, the index file last. The
|
|
32
|
+
* index file lists every chunk's loc relative to `origin_url`.
|
|
33
|
+
* Returns an empty array (no index, no chunks) when no section
|
|
34
|
+
* produced any URLs.
|
|
35
|
+
*
|
|
36
|
+
* Audit:
|
|
37
|
+
* - `recordGeneration({ artifacts })` — appends the audit row
|
|
38
|
+
* (artifact_count + total_url_count + total_byte_size +
|
|
39
|
+
* generated_at). The operator's worker calls this after the
|
|
40
|
+
* bytes ship so `lastGeneration()` can answer the dashboard's
|
|
41
|
+
* "when did the sitemap last update" question.
|
|
42
|
+
* - `lastGeneration()` — the newest audit row, or null when
|
|
43
|
+
* nothing has shipped yet.
|
|
44
|
+
*
|
|
45
|
+
* Read helpers:
|
|
46
|
+
* - `sections({ active_only? })` — enumerate the registered
|
|
47
|
+
* sections.
|
|
48
|
+
* - `archiveSection(slug)` — soft-delete the section. The bytes
|
|
49
|
+
* drop out of the next `generate()`; the config row stays so
|
|
50
|
+
* an operator can audit "what we used to emit".
|
|
51
|
+
*
|
|
52
|
+
* URL validation:
|
|
53
|
+
* - `validateOriginUrl({ origin_url })` — through `b.safeUrl`
|
|
54
|
+
* with `{ allowedProtocols: ["https:"] }`. The origin URL is
|
|
55
|
+
* what every `<loc>` in the index resolves against; cleartext
|
|
56
|
+
* would let an MITM swap the canonical host.
|
|
57
|
+
*
|
|
58
|
+
* URL encoding (sitemap spec compliance):
|
|
59
|
+
* - Every emitted URL is percent-encoded for path-segment
|
|
60
|
+
* reserved characters (`%`, `?`, `#`, ` `, etc.) AND then
|
|
61
|
+
* XML-escaped (`&` → `&`, `'` → `'`, `"` → `"`,
|
|
62
|
+
* `<` → `<`, `>` → `>`). Both passes are required by
|
|
63
|
+
* the spec; a slug containing an ampersand reaches the crawler
|
|
64
|
+
* as `&` inside a `<loc>` element.
|
|
65
|
+
*
|
|
66
|
+
* Composes ONLY blamejs:
|
|
67
|
+
* - `b.framework.safeUrl.parse` — origin URL validation (https-only).
|
|
68
|
+
* - `b.framework.template.escapeHtml` — XML-escape of operator-
|
|
69
|
+
* sourced URL fragments (the escape catalog covers the five
|
|
70
|
+
* XML special characters).
|
|
71
|
+
* - `b.framework.uuid.v7` — audit-row id.
|
|
72
|
+
*
|
|
73
|
+
* @primitive sitemapGenerator
|
|
74
|
+
* @related b.safeUrl.parse, b.template.escapeHtml, b.uuid.v7
|
|
75
|
+
*/
|
|
76
|
+
|
|
77
|
+
// Sitemap protocol hard limits (https://www.sitemaps.org/protocol.html).
|
|
78
|
+
// A single sitemap file may contain at most 50,000 URLs AND must not
|
|
79
|
+
// exceed 50 MB uncompressed; either limit triggers a chunk split.
|
|
80
|
+
var URL_HARD_CAP = 50000;
|
|
81
|
+
var BYTE_HARD_CAP = 50 * 1024 * 1024;
|
|
82
|
+
|
|
83
|
+
// Per-section slug shape. Reaches the chunk filename
|
|
84
|
+
// (`sitemap-<slug>-N.xml`) so a hostile slug would smuggle a path
|
|
85
|
+
// traversal — refuse everything outside the narrow alnum + dot +
|
|
86
|
+
// hyphen + underscore set.
|
|
87
|
+
var MAX_SECTION_SLUG_LEN = 80;
|
|
88
|
+
var SECTION_SLUG_RE = /^[A-Za-z0-9][A-Za-z0-9._-]{0,79}$/;
|
|
89
|
+
|
|
90
|
+
var MAX_BASE_PATH_LEN = 256;
|
|
91
|
+
var MAX_ORIGIN_URL_LEN = 2048;
|
|
92
|
+
|
|
93
|
+
var ALLOWED_SOURCES = Object.freeze([
|
|
94
|
+
"product",
|
|
95
|
+
"collection",
|
|
96
|
+
"storefront_page",
|
|
97
|
+
"custom",
|
|
98
|
+
]);
|
|
99
|
+
|
|
100
|
+
var ALLOWED_CHANGEFREQS = Object.freeze([
|
|
101
|
+
"always",
|
|
102
|
+
"hourly",
|
|
103
|
+
"daily",
|
|
104
|
+
"weekly",
|
|
105
|
+
"monthly",
|
|
106
|
+
"yearly",
|
|
107
|
+
"never",
|
|
108
|
+
]);
|
|
109
|
+
|
|
110
|
+
var CONTROL_BYTE_RE = /[\x00-\x1f\x7f]/;
|
|
111
|
+
|
|
112
|
+
var bShop;
|
|
113
|
+
function _b() {
|
|
114
|
+
if (!bShop) bShop = require("./index");
|
|
115
|
+
return bShop.framework;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// ---- validators ---------------------------------------------------------
|
|
119
|
+
|
|
120
|
+
function _slug(s) {
|
|
121
|
+
if (typeof s !== "string" || !SECTION_SLUG_RE.test(s)) {
|
|
122
|
+
throw new TypeError(
|
|
123
|
+
"sitemapGenerator: slug must match /^[A-Za-z0-9][A-Za-z0-9._-]*$/ " +
|
|
124
|
+
"(<= " + MAX_SECTION_SLUG_LEN + " chars)"
|
|
125
|
+
);
|
|
126
|
+
}
|
|
127
|
+
return s;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function _source(s) {
|
|
131
|
+
if (typeof s !== "string" || ALLOWED_SOURCES.indexOf(s) === -1) {
|
|
132
|
+
throw new TypeError("sitemapGenerator: source must be one of " + JSON.stringify(ALLOWED_SOURCES));
|
|
133
|
+
}
|
|
134
|
+
return s;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function _basePath(s) {
|
|
138
|
+
if (typeof s !== "string" || s.length < 1 || s.length > MAX_BASE_PATH_LEN) {
|
|
139
|
+
throw new TypeError("sitemapGenerator: base_path must be a string 1.." + MAX_BASE_PATH_LEN + " chars");
|
|
140
|
+
}
|
|
141
|
+
if (CONTROL_BYTE_RE.test(s)) {
|
|
142
|
+
throw new TypeError("sitemapGenerator: base_path contains control bytes");
|
|
143
|
+
}
|
|
144
|
+
if (s.charCodeAt(0) !== 47 /* "/" */) {
|
|
145
|
+
throw new TypeError("sitemapGenerator: base_path must start with '/'");
|
|
146
|
+
}
|
|
147
|
+
// Refuse protocol-relative `//host` — that would let a section
|
|
148
|
+
// emit URLs pointing at an arbitrary domain.
|
|
149
|
+
if (s.length > 1 && s.charCodeAt(1) === 47) {
|
|
150
|
+
throw new TypeError("sitemapGenerator: base_path must not start with '//'");
|
|
151
|
+
}
|
|
152
|
+
if (s.indexOf("..") !== -1) {
|
|
153
|
+
throw new TypeError("sitemapGenerator: base_path must not contain '..'");
|
|
154
|
+
}
|
|
155
|
+
return s;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function _priority(n) {
|
|
159
|
+
if (typeof n !== "number" || !isFinite(n) || n < 0 || n > 1) {
|
|
160
|
+
throw new TypeError("sitemapGenerator: priority must be a number 0.0..1.0");
|
|
161
|
+
}
|
|
162
|
+
return n;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
function _changefreq(s) {
|
|
166
|
+
if (typeof s !== "string" || ALLOWED_CHANGEFREQS.indexOf(s) === -1) {
|
|
167
|
+
throw new TypeError("sitemapGenerator: changefreq must be one of " + JSON.stringify(ALLOWED_CHANGEFREQS));
|
|
168
|
+
}
|
|
169
|
+
return s;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
function _maxUrls(n) {
|
|
173
|
+
if (n == null) return null;
|
|
174
|
+
if (!Number.isInteger(n) || n < 1) {
|
|
175
|
+
throw new TypeError("sitemapGenerator: max_urls must be a positive integer or null");
|
|
176
|
+
}
|
|
177
|
+
return n;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
function _originUrl(u) {
|
|
181
|
+
if (typeof u !== "string" || u.length < 1 || u.length > MAX_ORIGIN_URL_LEN) {
|
|
182
|
+
throw new TypeError("sitemapGenerator: origin_url must be a string 1.." + MAX_ORIGIN_URL_LEN + " chars");
|
|
183
|
+
}
|
|
184
|
+
if (CONTROL_BYTE_RE.test(u)) {
|
|
185
|
+
throw new TypeError("sitemapGenerator: origin_url contains control bytes");
|
|
186
|
+
}
|
|
187
|
+
try {
|
|
188
|
+
_b().safeUrl.parse(u, { allowedProtocols: ["https:"] });
|
|
189
|
+
} catch (e) {
|
|
190
|
+
throw new TypeError("sitemapGenerator: origin_url — " + (e && e.message || "must be a valid https:// URL"));
|
|
191
|
+
}
|
|
192
|
+
// Refuse a trailing slash — the join logic prepends `/` so a
|
|
193
|
+
// trailing slash would emit `https://shop.example.com//products/...`.
|
|
194
|
+
if (u.charCodeAt(u.length - 1) === 47) {
|
|
195
|
+
return u.slice(0, -1);
|
|
196
|
+
}
|
|
197
|
+
return u;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// XML-escape the five special characters per the sitemap spec.
|
|
201
|
+
// Reuses `b.template.escapeHtml` — the framework's escape catalog
|
|
202
|
+
// already covers `&`, `<`, `>`, `"`, `'` which is exactly the XML
|
|
203
|
+
// set the sitemap spec calls out.
|
|
204
|
+
function _xmlEscape(s) {
|
|
205
|
+
return _b().template.escapeHtml(String(s));
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Percent-encode a path segment per the sitemap spec. We escape
|
|
209
|
+
// everything except the URL-safe set `A-Z a-z 0-9 - _ . ~ /` — the
|
|
210
|
+
// forward slash stays unescaped because base_path + slug joins are
|
|
211
|
+
// path-shaped (one or more segments). Operators emitting URLs with
|
|
212
|
+
// embedded query strings should encode those upstream before they
|
|
213
|
+
// reach the `loc` field.
|
|
214
|
+
function _percentEncode(s) {
|
|
215
|
+
return String(s).replace(/[^A-Za-z0-9\-_.~/]/g, function (ch) {
|
|
216
|
+
return encodeURIComponent(ch);
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Compose a sitemap-spec `<loc>` value: the origin URL passes
|
|
221
|
+
// through unmodified (it was validated through b.safeUrl and is
|
|
222
|
+
// already a well-formed https:// URL), the path is percent-encoded
|
|
223
|
+
// for path-segment reserved characters, and the joined result is
|
|
224
|
+
// XML-escaped. Percent-encoding before XML-escape is required by
|
|
225
|
+
// the spec — a slug containing `&` reaches the crawler as `%26`
|
|
226
|
+
// (percent-encoded form survives the XML round trip) rather than
|
|
227
|
+
// as `&` of a raw ampersand.
|
|
228
|
+
function _encodeLoc(originUrl, path) {
|
|
229
|
+
return _xmlEscape(originUrl + _percentEncode(path));
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// W3C ISO 8601 (UTC, second precision). The sitemap spec accepts
|
|
233
|
+
// either date-only or full datetime; full datetime is the operator-
|
|
234
|
+
// friendlier shape because it lets the crawler diff a re-emission.
|
|
235
|
+
function _iso8601(epochMs) {
|
|
236
|
+
var d = new Date(epochMs);
|
|
237
|
+
function _pad(n) { return n < 10 ? "0" + n : String(n); }
|
|
238
|
+
return d.getUTCFullYear() + "-" + _pad(d.getUTCMonth() + 1) + "-" + _pad(d.getUTCDate()) +
|
|
239
|
+
"T" + _pad(d.getUTCHours()) + ":" + _pad(d.getUTCMinutes()) + ":" + _pad(d.getUTCSeconds()) + "Z";
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// ---- row hydration ------------------------------------------------------
|
|
243
|
+
|
|
244
|
+
function _hydrateSection(r) {
|
|
245
|
+
if (!r) return null;
|
|
246
|
+
return {
|
|
247
|
+
slug: r.slug,
|
|
248
|
+
source: r.source,
|
|
249
|
+
base_path: r.base_path,
|
|
250
|
+
priority: Number(r.priority),
|
|
251
|
+
changefreq: r.changefreq,
|
|
252
|
+
max_urls: r.max_urls == null ? null : Number(r.max_urls),
|
|
253
|
+
active: Number(r.active) === 1,
|
|
254
|
+
archived_at: r.archived_at == null ? null : Number(r.archived_at),
|
|
255
|
+
created_at: Number(r.created_at),
|
|
256
|
+
updated_at: Number(r.updated_at),
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
function _hydrateGeneration(r) {
|
|
261
|
+
if (!r) return null;
|
|
262
|
+
return {
|
|
263
|
+
id: r.id,
|
|
264
|
+
origin_url: r.origin_url,
|
|
265
|
+
artifact_count: Number(r.artifact_count),
|
|
266
|
+
total_url_count: Number(r.total_url_count),
|
|
267
|
+
total_byte_size: Number(r.total_byte_size),
|
|
268
|
+
generated_at: Number(r.generated_at),
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// ---- chunk serialization ------------------------------------------------
|
|
273
|
+
//
|
|
274
|
+
// Serialize an array of `{ loc, lastmod, priority, changefreq }`
|
|
275
|
+
// entries into a sitemap-spec XML document. The function returns
|
|
276
|
+
// chunks — when the URL count or the byte size crosses the hard
|
|
277
|
+
// caps, the entries are split into N strings.
|
|
278
|
+
//
|
|
279
|
+
// The hard caps:
|
|
280
|
+
// - 50,000 URLs per file (URL_HARD_CAP)
|
|
281
|
+
// - 50 MB serialized per file (BYTE_HARD_CAP)
|
|
282
|
+
//
|
|
283
|
+
// Both are required by the sitemap protocol. Crawlers refuse files
|
|
284
|
+
// exceeding either limit.
|
|
285
|
+
|
|
286
|
+
var URLSET_OPEN = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
|
|
287
|
+
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n";
|
|
288
|
+
var URLSET_CLOSE = "</urlset>\n";
|
|
289
|
+
|
|
290
|
+
function _renderUrlEntry(entry) {
|
|
291
|
+
var out = " <url>\n";
|
|
292
|
+
out += " <loc>" + entry.loc + "</loc>\n";
|
|
293
|
+
if (entry.lastmod != null) {
|
|
294
|
+
out += " <lastmod>" + entry.lastmod + "</lastmod>\n";
|
|
295
|
+
}
|
|
296
|
+
out += " <changefreq>" + entry.changefreq + "</changefreq>\n";
|
|
297
|
+
// The priority is a float 0.0..1.0; render with one decimal place.
|
|
298
|
+
// The sitemap spec accepts any decimal representation but a single
|
|
299
|
+
// fractional digit is the convention.
|
|
300
|
+
out += " <priority>" + entry.priority.toFixed(1) + "</priority>\n";
|
|
301
|
+
out += " </url>\n";
|
|
302
|
+
return out;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
function _chunkEntries(entries) {
|
|
306
|
+
var chunks = [];
|
|
307
|
+
var current = [];
|
|
308
|
+
var currentBytes = URLSET_OPEN.length + URLSET_CLOSE.length;
|
|
309
|
+
for (var i = 0; i < entries.length; i += 1) {
|
|
310
|
+
var rendered = _renderUrlEntry(entries[i]);
|
|
311
|
+
var renderedBytes = Buffer.byteLength(rendered, "utf8");
|
|
312
|
+
var wouldExceedUrls = current.length + 1 > URL_HARD_CAP;
|
|
313
|
+
var wouldExceedBytes = currentBytes + renderedBytes > BYTE_HARD_CAP;
|
|
314
|
+
if ((wouldExceedUrls || wouldExceedBytes) && current.length > 0) {
|
|
315
|
+
chunks.push(current);
|
|
316
|
+
current = [];
|
|
317
|
+
currentBytes = URLSET_OPEN.length + URLSET_CLOSE.length;
|
|
318
|
+
}
|
|
319
|
+
current.push(rendered);
|
|
320
|
+
currentBytes += renderedBytes;
|
|
321
|
+
}
|
|
322
|
+
if (current.length > 0) chunks.push(current);
|
|
323
|
+
return chunks;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
function _renderChunk(renderedEntries) {
|
|
327
|
+
return URLSET_OPEN + renderedEntries.join("") + URLSET_CLOSE;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
var SITEMAPINDEX_OPEN = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
|
|
331
|
+
"<sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n";
|
|
332
|
+
var SITEMAPINDEX_CLOSE = "</sitemapindex>\n";
|
|
333
|
+
|
|
334
|
+
function _renderIndex(originUrl, chunkFilenames, lastmodMs) {
|
|
335
|
+
var lastmod = _iso8601(lastmodMs);
|
|
336
|
+
var body = "";
|
|
337
|
+
for (var i = 0; i < chunkFilenames.length; i += 1) {
|
|
338
|
+
var loc = _xmlEscape(originUrl + "/" + chunkFilenames[i]);
|
|
339
|
+
body += " <sitemap>\n";
|
|
340
|
+
body += " <loc>" + loc + "</loc>\n";
|
|
341
|
+
body += " <lastmod>" + lastmod + "</lastmod>\n";
|
|
342
|
+
body += " </sitemap>\n";
|
|
343
|
+
}
|
|
344
|
+
return SITEMAPINDEX_OPEN + body + SITEMAPINDEX_CLOSE;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// ---- factory ------------------------------------------------------------
|
|
348
|
+
|
|
349
|
+
function create(opts) {
|
|
350
|
+
opts = opts || {};
|
|
351
|
+
var query = opts.query;
|
|
352
|
+
if (!query) {
|
|
353
|
+
query = function (sql, params) { return _b().externalDb.query(sql, params); };
|
|
354
|
+
}
|
|
355
|
+
// The catalog / collections / storefrontPages primitives aren't
|
|
356
|
+
// required at construct time — the source SQL we run is direct
|
|
357
|
+
// (sitemap generation reads the underlying tables, not the
|
|
358
|
+
// primitive's hydrated rows). The optional opts.catalog etc.
|
|
359
|
+
// entries are accepted for forward compatibility with callers
|
|
360
|
+
// that wire the whole bShop bag in one shot.
|
|
361
|
+
var customAdapters = opts.custom && typeof opts.custom === "object" ? opts.custom : {};
|
|
362
|
+
|
|
363
|
+
// Per-factory monotonic clock — guarantees `updated_at` across
|
|
364
|
+
// a single defineSection + archiveSection roundtrip is strictly
|
|
365
|
+
// increasing even when the wall clock has 1ms resolution and
|
|
366
|
+
// the caller chains the two inside one tick.
|
|
367
|
+
var _lastTs = 0;
|
|
368
|
+
function _monotonicTs() {
|
|
369
|
+
var wall = Date.now();
|
|
370
|
+
if (wall > _lastTs) _lastTs = wall;
|
|
371
|
+
else _lastTs += 1;
|
|
372
|
+
return _lastTs;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
// ---- defineSection ---------------------------------------------------
|
|
376
|
+
|
|
377
|
+
async function defineSection(input) {
|
|
378
|
+
if (!input || typeof input !== "object") {
|
|
379
|
+
throw new TypeError("sitemapGenerator.defineSection: input object required");
|
|
380
|
+
}
|
|
381
|
+
var slug = _slug(input.slug);
|
|
382
|
+
var source = _source(input.source);
|
|
383
|
+
var basePath = _basePath(input.base_path);
|
|
384
|
+
var priority = _priority(input.priority);
|
|
385
|
+
var changefreq = _changefreq(input.changefreq);
|
|
386
|
+
var maxUrls = _maxUrls(input.max_urls);
|
|
387
|
+
|
|
388
|
+
var ts = _monotonicTs();
|
|
389
|
+
try {
|
|
390
|
+
await query(
|
|
391
|
+
"INSERT INTO sitemap_sections (slug, source, base_path, priority, changefreq, " +
|
|
392
|
+
"max_urls, active, archived_at, created_at, updated_at) " +
|
|
393
|
+
"VALUES (?1, ?2, ?3, ?4, ?5, ?6, 1, NULL, ?7, ?7)",
|
|
394
|
+
[slug, source, basePath, priority, changefreq, maxUrls, ts],
|
|
395
|
+
);
|
|
396
|
+
} catch (e) {
|
|
397
|
+
if (/UNIQUE|PRIMARY KEY/i.test(String(e && e.message))) {
|
|
398
|
+
throw new TypeError("sitemapGenerator.defineSection: slug " + JSON.stringify(slug) + " already registered");
|
|
399
|
+
}
|
|
400
|
+
throw e;
|
|
401
|
+
}
|
|
402
|
+
return await _getSection(slug);
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
async function _getSection(slug) {
|
|
406
|
+
var r = (await query(
|
|
407
|
+
"SELECT * FROM sitemap_sections WHERE slug = ?1 LIMIT 1",
|
|
408
|
+
[slug],
|
|
409
|
+
)).rows[0];
|
|
410
|
+
return _hydrateSection(r);
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
// ---- sections --------------------------------------------------------
|
|
414
|
+
|
|
415
|
+
async function sections(input) {
|
|
416
|
+
input = input || {};
|
|
417
|
+
var activeOnly = input.active_only === true;
|
|
418
|
+
var sql;
|
|
419
|
+
if (activeOnly) {
|
|
420
|
+
sql = "SELECT * FROM sitemap_sections WHERE active = 1 AND archived_at IS NULL " +
|
|
421
|
+
"ORDER BY slug ASC";
|
|
422
|
+
} else {
|
|
423
|
+
sql = "SELECT * FROM sitemap_sections ORDER BY slug ASC";
|
|
424
|
+
}
|
|
425
|
+
var rows = (await query(sql, [])).rows;
|
|
426
|
+
var out = [];
|
|
427
|
+
for (var i = 0; i < rows.length; i += 1) out.push(_hydrateSection(rows[i]));
|
|
428
|
+
return out;
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
// ---- archiveSection --------------------------------------------------
|
|
432
|
+
|
|
433
|
+
async function archiveSection(slug) {
|
|
434
|
+
_slug(slug);
|
|
435
|
+
var current = await _getSection(slug);
|
|
436
|
+
if (!current) {
|
|
437
|
+
throw new TypeError("sitemapGenerator.archiveSection: slug " + JSON.stringify(slug) + " not found");
|
|
438
|
+
}
|
|
439
|
+
if (current.archived_at != null) return current;
|
|
440
|
+
var ts = _monotonicTs();
|
|
441
|
+
await query(
|
|
442
|
+
"UPDATE sitemap_sections SET archived_at = ?1, active = 0, updated_at = ?1 WHERE slug = ?2",
|
|
443
|
+
[ts, slug],
|
|
444
|
+
);
|
|
445
|
+
return await _getSection(slug);
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
// ---- validateOriginUrl ----------------------------------------------
|
|
449
|
+
|
|
450
|
+
function validateOriginUrl(input) {
|
|
451
|
+
if (!input || typeof input !== "object") {
|
|
452
|
+
throw new TypeError("sitemapGenerator.validateOriginUrl: input object required");
|
|
453
|
+
}
|
|
454
|
+
return _originUrl(input.origin_url);
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
// ---- per-source URL pullers ----------------------------------------
|
|
458
|
+
//
|
|
459
|
+
// Each puller returns an array of `{ slug, updated_at }` rows. The
|
|
460
|
+
// generate() composer joins the section's `base_path` with each
|
|
461
|
+
// row's slug, applies the section's priority + changefreq, and
|
|
462
|
+
// tags the row's updated_at as the URL's lastmod.
|
|
463
|
+
|
|
464
|
+
async function _pullProducts() {
|
|
465
|
+
var rows = (await query(
|
|
466
|
+
"SELECT slug, updated_at FROM products WHERE status = 'active' " +
|
|
467
|
+
"ORDER BY updated_at DESC, slug ASC",
|
|
468
|
+
[],
|
|
469
|
+
)).rows;
|
|
470
|
+
return rows.map(function (r) {
|
|
471
|
+
return { slug: r.slug, updated_at: Number(r.updated_at) };
|
|
472
|
+
});
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
async function _pullCollections() {
|
|
476
|
+
var rows = (await query(
|
|
477
|
+
"SELECT slug, updated_at FROM collections WHERE archived_at IS NULL " +
|
|
478
|
+
"ORDER BY updated_at DESC, slug ASC",
|
|
479
|
+
[],
|
|
480
|
+
)).rows;
|
|
481
|
+
return rows.map(function (r) {
|
|
482
|
+
return { slug: r.slug, updated_at: Number(r.updated_at) };
|
|
483
|
+
});
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
async function _pullStorefrontPages() {
|
|
487
|
+
var rows = (await query(
|
|
488
|
+
"SELECT slug, updated_at FROM storefront_pages WHERE status = 'published' " +
|
|
489
|
+
"ORDER BY updated_at DESC, slug ASC",
|
|
490
|
+
[],
|
|
491
|
+
)).rows;
|
|
492
|
+
return rows.map(function (r) {
|
|
493
|
+
return { slug: r.slug, updated_at: Number(r.updated_at) };
|
|
494
|
+
});
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
async function _pullCustom(sectionSlug) {
|
|
498
|
+
var adapter = customAdapters[sectionSlug];
|
|
499
|
+
if (!adapter) {
|
|
500
|
+
throw new TypeError(
|
|
501
|
+
"sitemapGenerator.generate: section " + JSON.stringify(sectionSlug) +
|
|
502
|
+
" has source='custom' but no adapter was registered via create({ custom: { ... } })"
|
|
503
|
+
);
|
|
504
|
+
}
|
|
505
|
+
if (typeof adapter !== "function") {
|
|
506
|
+
throw new TypeError(
|
|
507
|
+
"sitemapGenerator.generate: custom adapter for section " + JSON.stringify(sectionSlug) +
|
|
508
|
+
" must be a function returning an array of { slug, updated_at }"
|
|
509
|
+
);
|
|
510
|
+
}
|
|
511
|
+
var rows = await adapter();
|
|
512
|
+
if (!Array.isArray(rows)) {
|
|
513
|
+
throw new TypeError(
|
|
514
|
+
"sitemapGenerator.generate: custom adapter for section " + JSON.stringify(sectionSlug) +
|
|
515
|
+
" must return an array; got " + (rows === null ? "null" : typeof rows)
|
|
516
|
+
);
|
|
517
|
+
}
|
|
518
|
+
return rows.map(function (r) {
|
|
519
|
+
if (!r || typeof r !== "object" || typeof r.slug !== "string") {
|
|
520
|
+
throw new TypeError(
|
|
521
|
+
"sitemapGenerator.generate: custom adapter for section " + JSON.stringify(sectionSlug) +
|
|
522
|
+
" must yield { slug: string, updated_at: number } rows"
|
|
523
|
+
);
|
|
524
|
+
}
|
|
525
|
+
var u = Number(r.updated_at);
|
|
526
|
+
if (!isFinite(u)) {
|
|
527
|
+
throw new TypeError(
|
|
528
|
+
"sitemapGenerator.generate: custom adapter for section " + JSON.stringify(sectionSlug) +
|
|
529
|
+
" yielded a row with non-numeric updated_at"
|
|
530
|
+
);
|
|
531
|
+
}
|
|
532
|
+
return { slug: r.slug, updated_at: u };
|
|
533
|
+
});
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
async function _pullSection(section) {
|
|
537
|
+
if (section.source === "product") return await _pullProducts();
|
|
538
|
+
if (section.source === "collection") return await _pullCollections();
|
|
539
|
+
if (section.source === "storefront_page") return await _pullStorefrontPages();
|
|
540
|
+
/* custom */ return await _pullCustom(section.slug);
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
// ---- generate --------------------------------------------------------
|
|
544
|
+
|
|
545
|
+
async function generate(input) {
|
|
546
|
+
if (!input || typeof input !== "object") {
|
|
547
|
+
throw new TypeError("sitemapGenerator.generate: input object required");
|
|
548
|
+
}
|
|
549
|
+
var originUrl = _originUrl(input.origin_url);
|
|
550
|
+
|
|
551
|
+
var filter = null;
|
|
552
|
+
if (input.sections_filter != null) {
|
|
553
|
+
if (!Array.isArray(input.sections_filter)) {
|
|
554
|
+
throw new TypeError("sitemapGenerator.generate: sections_filter must be an array of slugs or null");
|
|
555
|
+
}
|
|
556
|
+
filter = {};
|
|
557
|
+
for (var fi = 0; fi < input.sections_filter.length; fi += 1) {
|
|
558
|
+
var f = input.sections_filter[fi];
|
|
559
|
+
if (typeof f !== "string") {
|
|
560
|
+
throw new TypeError("sitemapGenerator.generate: sections_filter entries must be strings");
|
|
561
|
+
}
|
|
562
|
+
filter[f] = true;
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
var allSections = await sections({ active_only: true });
|
|
567
|
+
|
|
568
|
+
var artifacts = [];
|
|
569
|
+
var maxLastmod = 0;
|
|
570
|
+
var anyUrls = false;
|
|
571
|
+
|
|
572
|
+
for (var si = 0; si < allSections.length; si += 1) {
|
|
573
|
+
var section = allSections[si];
|
|
574
|
+
if (filter && !filter[section.slug]) continue;
|
|
575
|
+
|
|
576
|
+
var rows = await _pullSection(section);
|
|
577
|
+
if (section.max_urls != null && rows.length > section.max_urls) {
|
|
578
|
+
rows = rows.slice(0, section.max_urls);
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
var entries = [];
|
|
582
|
+
for (var ri = 0; ri < rows.length; ri += 1) {
|
|
583
|
+
var row = rows[ri];
|
|
584
|
+
var path = section.base_path;
|
|
585
|
+
if (path.charCodeAt(path.length - 1) !== 47) path += "/";
|
|
586
|
+
path += row.slug;
|
|
587
|
+
var lastmod = isFinite(row.updated_at) && row.updated_at > 0
|
|
588
|
+
? _iso8601(row.updated_at)
|
|
589
|
+
: null;
|
|
590
|
+
if (row.updated_at > maxLastmod) maxLastmod = row.updated_at;
|
|
591
|
+
entries.push({
|
|
592
|
+
loc: _encodeLoc(originUrl, path),
|
|
593
|
+
lastmod: lastmod,
|
|
594
|
+
priority: section.priority,
|
|
595
|
+
changefreq: section.changefreq,
|
|
596
|
+
});
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
if (entries.length === 0) continue;
|
|
600
|
+
anyUrls = true;
|
|
601
|
+
|
|
602
|
+
var chunks = _chunkEntries(entries);
|
|
603
|
+
for (var ci = 0; ci < chunks.length; ci += 1) {
|
|
604
|
+
var filename = "sitemap-" + section.slug + "-" + (ci + 1) + ".xml";
|
|
605
|
+
artifacts.push({
|
|
606
|
+
filename: filename,
|
|
607
|
+
content: _renderChunk(chunks[ci]),
|
|
608
|
+
});
|
|
609
|
+
}
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
if (!anyUrls) return [];
|
|
613
|
+
|
|
614
|
+
var chunkFilenames = artifacts.map(function (a) { return a.filename; });
|
|
615
|
+
var indexContent = _renderIndex(
|
|
616
|
+
originUrl,
|
|
617
|
+
chunkFilenames,
|
|
618
|
+
maxLastmod > 0 ? maxLastmod : Date.now(),
|
|
619
|
+
);
|
|
620
|
+
artifacts.push({ filename: "sitemap.xml", content: indexContent });
|
|
621
|
+
|
|
622
|
+
return artifacts;
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
// ---- recordGeneration -----------------------------------------------
|
|
626
|
+
|
|
627
|
+
async function recordGeneration(input) {
|
|
628
|
+
if (!input || typeof input !== "object") {
|
|
629
|
+
throw new TypeError("sitemapGenerator.recordGeneration: input object required");
|
|
630
|
+
}
|
|
631
|
+
if (!Array.isArray(input.artifacts)) {
|
|
632
|
+
throw new TypeError("sitemapGenerator.recordGeneration: artifacts must be an array");
|
|
633
|
+
}
|
|
634
|
+
// Origin URL is required so the audit row can name the canonical
|
|
635
|
+
// host the bytes were generated against — operators running
|
|
636
|
+
// multi-tenant shops may rotate origins and the audit trail wants
|
|
637
|
+
// to distinguish a regeneration against shop-A from a regeneration
|
|
638
|
+
// against shop-B.
|
|
639
|
+
var originUrl = _originUrl(input.origin_url);
|
|
640
|
+
|
|
641
|
+
var artifactCount = input.artifacts.length;
|
|
642
|
+
var totalUrlCount = 0;
|
|
643
|
+
var totalByteSize = 0;
|
|
644
|
+
var urlsetOpenTag = "<urlset";
|
|
645
|
+
for (var i = 0; i < input.artifacts.length; i += 1) {
|
|
646
|
+
var a = input.artifacts[i];
|
|
647
|
+
if (!a || typeof a !== "object" ||
|
|
648
|
+
typeof a.filename !== "string" || typeof a.content !== "string") {
|
|
649
|
+
throw new TypeError(
|
|
650
|
+
"sitemapGenerator.recordGeneration: artifacts[" + i + "] must be " +
|
|
651
|
+
"{ filename: string, content: string }"
|
|
652
|
+
);
|
|
653
|
+
}
|
|
654
|
+
totalByteSize += Buffer.byteLength(a.content, "utf8");
|
|
655
|
+
// Count `<url>` entries only inside chunk files; the index
|
|
656
|
+
// file's `<sitemap>` entries are an internal pointer surface,
|
|
657
|
+
// not part of the URL count an operator cares about.
|
|
658
|
+
if (a.content.indexOf(urlsetOpenTag) !== -1) {
|
|
659
|
+
var marker = "<url>";
|
|
660
|
+
var pos = -1;
|
|
661
|
+
while ((pos = a.content.indexOf(marker, pos + 1)) !== -1) {
|
|
662
|
+
totalUrlCount += 1;
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
var id = _b().uuid.v7();
|
|
668
|
+
var ts = _monotonicTs();
|
|
669
|
+
await query(
|
|
670
|
+
"INSERT INTO sitemap_generations (id, origin_url, artifact_count, total_url_count, " +
|
|
671
|
+
"total_byte_size, generated_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
|
|
672
|
+
[id, originUrl, artifactCount, totalUrlCount, totalByteSize, ts],
|
|
673
|
+
);
|
|
674
|
+
return {
|
|
675
|
+
id: id,
|
|
676
|
+
origin_url: originUrl,
|
|
677
|
+
artifact_count: artifactCount,
|
|
678
|
+
total_url_count: totalUrlCount,
|
|
679
|
+
total_byte_size: totalByteSize,
|
|
680
|
+
generated_at: ts,
|
|
681
|
+
};
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
// ---- lastGeneration -------------------------------------------------
|
|
685
|
+
|
|
686
|
+
async function lastGeneration() {
|
|
687
|
+
var rows = (await query(
|
|
688
|
+
"SELECT * FROM sitemap_generations ORDER BY generated_at DESC, id DESC LIMIT 1",
|
|
689
|
+
[],
|
|
690
|
+
)).rows;
|
|
691
|
+
if (!rows.length) return null;
|
|
692
|
+
return _hydrateGeneration(rows[0]);
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
return {
|
|
696
|
+
URL_HARD_CAP: URL_HARD_CAP,
|
|
697
|
+
BYTE_HARD_CAP: BYTE_HARD_CAP,
|
|
698
|
+
ALLOWED_SOURCES: ALLOWED_SOURCES,
|
|
699
|
+
ALLOWED_CHANGEFREQS: ALLOWED_CHANGEFREQS,
|
|
700
|
+
|
|
701
|
+
defineSection: defineSection,
|
|
702
|
+
sections: sections,
|
|
703
|
+
archiveSection: archiveSection,
|
|
704
|
+
validateOriginUrl: validateOriginUrl,
|
|
705
|
+
generate: generate,
|
|
706
|
+
recordGeneration: recordGeneration,
|
|
707
|
+
lastGeneration: lastGeneration,
|
|
708
|
+
};
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
module.exports = {
|
|
712
|
+
create: create,
|
|
713
|
+
URL_HARD_CAP: URL_HARD_CAP,
|
|
714
|
+
BYTE_HARD_CAP: BYTE_HARD_CAP,
|
|
715
|
+
ALLOWED_SOURCES: ALLOWED_SOURCES,
|
|
716
|
+
ALLOWED_CHANGEFREQS: ALLOWED_CHANGEFREQS,
|
|
717
|
+
};
|