@blamejs/blamejs-shop 0.0.65 → 0.0.70

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CHANGELOG.md +10 -0
  2. package/lib/assembly-instructions.js +777 -0
  3. package/lib/auto-replenish.js +933 -0
  4. package/lib/business-hours.js +980 -0
  5. package/lib/click-and-collect.js +711 -0
  6. package/lib/clickstream.js +713 -0
  7. package/lib/cost-layers.js +774 -0
  8. package/lib/credit-limits.js +752 -0
  9. package/lib/currency-rounding.js +525 -0
  10. package/lib/customer-activity.js +862 -0
  11. package/lib/customer-notes.js +712 -0
  12. package/lib/customer-risk-profile.js +593 -0
  13. package/lib/customer-surveys.js +1012 -0
  14. package/lib/damage-photos.js +473 -0
  15. package/lib/discount-allocation.js +557 -0
  16. package/lib/dropship-forwarding.js +645 -0
  17. package/lib/email-templates.js +817 -0
  18. package/lib/index.js +45 -0
  19. package/lib/inventory-allocations.js +559 -0
  20. package/lib/inventory-writeoffs.js +636 -0
  21. package/lib/knowledge-base.js +1104 -0
  22. package/lib/locale-router.js +1077 -0
  23. package/lib/operator-roles.js +768 -0
  24. package/lib/order-escalation.js +951 -0
  25. package/lib/order-ratings.js +495 -0
  26. package/lib/order-tags.js +944 -0
  27. package/lib/packing-slips.js +810 -0
  28. package/lib/payment-retries.js +816 -0
  29. package/lib/pick-lists.js +639 -0
  30. package/lib/pixel-events.js +995 -0
  31. package/lib/preorder.js +595 -0
  32. package/lib/print-queue.js +681 -0
  33. package/lib/product-qa.js +749 -0
  34. package/lib/promo-bundles.js +835 -0
  35. package/lib/push-notifications.js +937 -0
  36. package/lib/refund-automation.js +853 -0
  37. package/lib/reorder-reminders.js +798 -0
  38. package/lib/robots-config.js +753 -0
  39. package/lib/seller-signup.js +1052 -0
  40. package/lib/site-redirects.js +690 -0
  41. package/lib/sitemap-generator.js +717 -0
  42. package/lib/subscription-gifts.js +710 -0
  43. package/lib/tax-cert-renewals.js +632 -0
  44. package/lib/theme-assets.js +711 -0
  45. package/lib/tier-benefits.js +776 -0
  46. package/lib/vendor/MANIFEST.json +2 -2
  47. package/lib/vendor/blamejs/CHANGELOG.md +2 -0
  48. package/lib/vendor/blamejs/api-snapshot.json +2 -2
  49. package/lib/vendor/blamejs/lib/metrics.js +68 -4
  50. package/lib/vendor/blamejs/package.json +1 -1
  51. package/lib/vendor/blamejs/release-notes/v0.12.5.json +40 -0
  52. package/lib/wishlist-alerts.js +842 -0
  53. package/lib/wishlist-sharing.js +718 -0
  54. package/package.json +1 -1
@@ -0,0 +1,717 @@
1
+ "use strict";
2
+ /**
3
+ * @module shop.sitemapGenerator
4
+ * @title Sitemap generator — sitemap.xml + sitemap-index.xml emission
5
+ *
6
+ * @intro
7
+ * Crawler-facing sitemap surface. Generates the bytes a search-
8
+ * engine fetches at `/sitemap.xml` (the index) and at each chunk
9
+ * filename listed inside the index. The primitive does NOT persist
10
+ * the bytes — the operator's worker writes the returned artifact
11
+ * list to the storefront's static-asset surface (R2 / CDN / disk)
12
+ * and calls `recordGeneration({ artifacts })` to log what shipped.
13
+ *
14
+ * Sections:
15
+ * - `defineSection({ slug, source, base_path, priority, changefreq, max_urls? })`
16
+ * — register an emitting section. `source` picks the URL stream
17
+ * the section reads from:
18
+ * 'product' — every active catalog product, slug-rooted
19
+ * 'collection' — every non-archived collection
20
+ * 'storefront_page' — every published storefront page
21
+ * 'custom' — operator supplies the URLs at generate
22
+ * time via `create({ custom: { <slug>: fn }})`
23
+ *
24
+ * Generation:
25
+ * - `generate({ origin_url, sections_filter? })` — walks every
26
+ * active, non-archived section (or only those whose slug is in
27
+ * `sections_filter`), pulls the URL stream, applies the
28
+ * per-section `max_urls` cap, splits the result at the sitemap
29
+ * spec's hard limits (50,000 URLs per file, ~50 MB serialized
30
+ * per file), and returns an array of `{ filename, content }`
31
+ * artifacts — the chunk files first, the index file last. The
32
+ * index file lists every chunk's loc relative to `origin_url`.
33
+ * Returns an empty array (no index, no chunks) when no section
34
+ * produced any URLs.
35
+ *
36
+ * Audit:
37
+ * - `recordGeneration({ artifacts })` — appends the audit row
38
+ * (artifact_count + total_url_count + total_byte_size +
39
+ * generated_at). The operator's worker calls this after the
40
+ * bytes ship so `lastGeneration()` can answer the dashboard's
41
+ * "when did the sitemap last update" question.
42
+ * - `lastGeneration()` — the newest audit row, or null when
43
+ * nothing has shipped yet.
44
+ *
45
+ * Read helpers:
46
+ * - `sections({ active_only? })` — enumerate the registered
47
+ * sections.
48
+ * - `archiveSection(slug)` — soft-delete the section. The bytes
49
+ * drop out of the next `generate()`; the config row stays so
50
+ * an operator can audit "what we used to emit".
51
+ *
52
+ * URL validation:
53
+ * - `validateOriginUrl({ origin_url })` — through `b.safeUrl`
54
+ * with `{ allowedProtocols: ["https:"] }`. The origin URL is
55
+ * what every `<loc>` in the index resolves against; cleartext
56
+ * would let an MITM swap the canonical host.
57
+ *
58
+ * URL encoding (sitemap spec compliance):
59
+ * - Every emitted URL is percent-encoded for path-segment
60
+ * reserved characters (`%`, `?`, `#`, ` `, etc.) AND then
61
+ * XML-escaped (`&` → `&amp;`, `'` → `&apos;`, `"` → `&quot;`,
62
+ * `<` → `&lt;`, `>` → `&gt;`). Both passes are required by
63
+ * the spec; a slug containing an ampersand reaches the crawler
64
+ * as `&amp;` inside a `<loc>` element.
65
+ *
66
+ * Composes ONLY blamejs:
67
+ * - `b.framework.safeUrl.parse` — origin URL validation (https-only).
68
+ * - `b.framework.template.escapeHtml` — XML-escape of operator-
69
+ * sourced URL fragments (the escape catalog covers the five
70
+ * XML special characters).
71
+ * - `b.framework.uuid.v7` — audit-row id.
72
+ *
73
+ * @primitive sitemapGenerator
74
+ * @related b.safeUrl.parse, b.template.escapeHtml, b.uuid.v7
75
+ */
76
+
77
+ // Sitemap protocol hard limits (https://www.sitemaps.org/protocol.html).
78
+ // A single sitemap file may contain at most 50,000 URLs AND must not
79
+ // exceed 50 MB uncompressed; either limit triggers a chunk split.
80
+ var URL_HARD_CAP = 50000;
81
+ var BYTE_HARD_CAP = 50 * 1024 * 1024;
82
+
83
+ // Per-section slug shape. Reaches the chunk filename
84
+ // (`sitemap-<slug>-N.xml`) so a hostile slug would smuggle a path
85
+ // traversal — refuse everything outside the narrow alnum + dot +
86
+ // hyphen + underscore set.
87
+ var MAX_SECTION_SLUG_LEN = 80;
88
+ var SECTION_SLUG_RE = /^[A-Za-z0-9][A-Za-z0-9._-]{0,79}$/;
89
+
90
+ var MAX_BASE_PATH_LEN = 256;
91
+ var MAX_ORIGIN_URL_LEN = 2048;
92
+
93
+ var ALLOWED_SOURCES = Object.freeze([
94
+ "product",
95
+ "collection",
96
+ "storefront_page",
97
+ "custom",
98
+ ]);
99
+
100
+ var ALLOWED_CHANGEFREQS = Object.freeze([
101
+ "always",
102
+ "hourly",
103
+ "daily",
104
+ "weekly",
105
+ "monthly",
106
+ "yearly",
107
+ "never",
108
+ ]);
109
+
110
+ var CONTROL_BYTE_RE = /[\x00-\x1f\x7f]/;
111
+
112
+ var bShop;
113
+ function _b() {
114
+ if (!bShop) bShop = require("./index");
115
+ return bShop.framework;
116
+ }
117
+
118
+ // ---- validators ---------------------------------------------------------
119
+
120
+ function _slug(s) {
121
+ if (typeof s !== "string" || !SECTION_SLUG_RE.test(s)) {
122
+ throw new TypeError(
123
+ "sitemapGenerator: slug must match /^[A-Za-z0-9][A-Za-z0-9._-]*$/ " +
124
+ "(<= " + MAX_SECTION_SLUG_LEN + " chars)"
125
+ );
126
+ }
127
+ return s;
128
+ }
129
+
130
+ function _source(s) {
131
+ if (typeof s !== "string" || ALLOWED_SOURCES.indexOf(s) === -1) {
132
+ throw new TypeError("sitemapGenerator: source must be one of " + JSON.stringify(ALLOWED_SOURCES));
133
+ }
134
+ return s;
135
+ }
136
+
137
+ function _basePath(s) {
138
+ if (typeof s !== "string" || s.length < 1 || s.length > MAX_BASE_PATH_LEN) {
139
+ throw new TypeError("sitemapGenerator: base_path must be a string 1.." + MAX_BASE_PATH_LEN + " chars");
140
+ }
141
+ if (CONTROL_BYTE_RE.test(s)) {
142
+ throw new TypeError("sitemapGenerator: base_path contains control bytes");
143
+ }
144
+ if (s.charCodeAt(0) !== 47 /* "/" */) {
145
+ throw new TypeError("sitemapGenerator: base_path must start with '/'");
146
+ }
147
+ // Refuse protocol-relative `//host` — that would let a section
148
+ // emit URLs pointing at an arbitrary domain.
149
+ if (s.length > 1 && s.charCodeAt(1) === 47) {
150
+ throw new TypeError("sitemapGenerator: base_path must not start with '//'");
151
+ }
152
+ if (s.indexOf("..") !== -1) {
153
+ throw new TypeError("sitemapGenerator: base_path must not contain '..'");
154
+ }
155
+ return s;
156
+ }
157
+
158
+ function _priority(n) {
159
+ if (typeof n !== "number" || !isFinite(n) || n < 0 || n > 1) {
160
+ throw new TypeError("sitemapGenerator: priority must be a number 0.0..1.0");
161
+ }
162
+ return n;
163
+ }
164
+
165
+ function _changefreq(s) {
166
+ if (typeof s !== "string" || ALLOWED_CHANGEFREQS.indexOf(s) === -1) {
167
+ throw new TypeError("sitemapGenerator: changefreq must be one of " + JSON.stringify(ALLOWED_CHANGEFREQS));
168
+ }
169
+ return s;
170
+ }
171
+
172
+ function _maxUrls(n) {
173
+ if (n == null) return null;
174
+ if (!Number.isInteger(n) || n < 1) {
175
+ throw new TypeError("sitemapGenerator: max_urls must be a positive integer or null");
176
+ }
177
+ return n;
178
+ }
179
+
180
+ function _originUrl(u) {
181
+ if (typeof u !== "string" || u.length < 1 || u.length > MAX_ORIGIN_URL_LEN) {
182
+ throw new TypeError("sitemapGenerator: origin_url must be a string 1.." + MAX_ORIGIN_URL_LEN + " chars");
183
+ }
184
+ if (CONTROL_BYTE_RE.test(u)) {
185
+ throw new TypeError("sitemapGenerator: origin_url contains control bytes");
186
+ }
187
+ try {
188
+ _b().safeUrl.parse(u, { allowedProtocols: ["https:"] });
189
+ } catch (e) {
190
+ throw new TypeError("sitemapGenerator: origin_url — " + (e && e.message || "must be a valid https:// URL"));
191
+ }
192
+ // Refuse a trailing slash — the join logic prepends `/` so a
193
+ // trailing slash would emit `https://shop.example.com//products/...`.
194
+ if (u.charCodeAt(u.length - 1) === 47) {
195
+ return u.slice(0, -1);
196
+ }
197
+ return u;
198
+ }
199
+
200
+ // XML-escape the five special characters per the sitemap spec.
201
+ // Reuses `b.template.escapeHtml` — the framework's escape catalog
202
+ // already covers `&`, `<`, `>`, `"`, `'` which is exactly the XML
203
+ // set the sitemap spec calls out.
204
+ function _xmlEscape(s) {
205
+ return _b().template.escapeHtml(String(s));
206
+ }
207
+
208
+ // Percent-encode a path segment per the sitemap spec. We escape
209
+ // everything except the URL-safe set `A-Z a-z 0-9 - _ . ~ /` — the
210
+ // forward slash stays unescaped because base_path + slug joins are
211
+ // path-shaped (one or more segments). Operators emitting URLs with
212
+ // embedded query strings should encode those upstream before they
213
+ // reach the `loc` field.
214
+ function _percentEncode(s) {
215
+ return String(s).replace(/[^A-Za-z0-9\-_.~/]/g, function (ch) {
216
+ return encodeURIComponent(ch);
217
+ });
218
+ }
219
+
220
+ // Compose a sitemap-spec `<loc>` value: the origin URL passes
221
+ // through unmodified (it was validated through b.safeUrl and is
222
+ // already a well-formed https:// URL), the path is percent-encoded
223
+ // for path-segment reserved characters, and the joined result is
224
+ // XML-escaped. Percent-encoding before XML-escape is required by
225
+ // the spec — a slug containing `&` reaches the crawler as `%26`
226
+ // (percent-encoded form survives the XML round trip) rather than
227
+ // as `&amp;` of a raw ampersand.
228
+ function _encodeLoc(originUrl, path) {
229
+ return _xmlEscape(originUrl + _percentEncode(path));
230
+ }
231
+
232
+ // W3C ISO 8601 (UTC, second precision). The sitemap spec accepts
233
+ // either date-only or full datetime; full datetime is the operator-
234
+ // friendlier shape because it lets the crawler diff a re-emission.
235
+ function _iso8601(epochMs) {
236
+ var d = new Date(epochMs);
237
+ function _pad(n) { return n < 10 ? "0" + n : String(n); }
238
+ return d.getUTCFullYear() + "-" + _pad(d.getUTCMonth() + 1) + "-" + _pad(d.getUTCDate()) +
239
+ "T" + _pad(d.getUTCHours()) + ":" + _pad(d.getUTCMinutes()) + ":" + _pad(d.getUTCSeconds()) + "Z";
240
+ }
241
+
242
+ // ---- row hydration ------------------------------------------------------
243
+
244
+ function _hydrateSection(r) {
245
+ if (!r) return null;
246
+ return {
247
+ slug: r.slug,
248
+ source: r.source,
249
+ base_path: r.base_path,
250
+ priority: Number(r.priority),
251
+ changefreq: r.changefreq,
252
+ max_urls: r.max_urls == null ? null : Number(r.max_urls),
253
+ active: Number(r.active) === 1,
254
+ archived_at: r.archived_at == null ? null : Number(r.archived_at),
255
+ created_at: Number(r.created_at),
256
+ updated_at: Number(r.updated_at),
257
+ };
258
+ }
259
+
260
+ function _hydrateGeneration(r) {
261
+ if (!r) return null;
262
+ return {
263
+ id: r.id,
264
+ origin_url: r.origin_url,
265
+ artifact_count: Number(r.artifact_count),
266
+ total_url_count: Number(r.total_url_count),
267
+ total_byte_size: Number(r.total_byte_size),
268
+ generated_at: Number(r.generated_at),
269
+ };
270
+ }
271
+
272
+ // ---- chunk serialization ------------------------------------------------
273
+ //
274
+ // Serialize an array of `{ loc, lastmod, priority, changefreq }`
275
+ // entries into a sitemap-spec XML document. The function returns
276
+ // chunks — when the URL count or the byte size crosses the hard
277
+ // caps, the entries are split into N strings.
278
+ //
279
+ // The hard caps:
280
+ // - 50,000 URLs per file (URL_HARD_CAP)
281
+ // - 50 MB serialized per file (BYTE_HARD_CAP)
282
+ //
283
+ // Both are required by the sitemap protocol. Crawlers refuse files
284
+ // exceeding either limit.
285
+
286
+ var URLSET_OPEN = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
287
+ "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n";
288
+ var URLSET_CLOSE = "</urlset>\n";
289
+
290
+ function _renderUrlEntry(entry) {
291
+ var out = " <url>\n";
292
+ out += " <loc>" + entry.loc + "</loc>\n";
293
+ if (entry.lastmod != null) {
294
+ out += " <lastmod>" + entry.lastmod + "</lastmod>\n";
295
+ }
296
+ out += " <changefreq>" + entry.changefreq + "</changefreq>\n";
297
+ // The priority is a float 0.0..1.0; render with one decimal place.
298
+ // The sitemap spec accepts any decimal representation but a single
299
+ // fractional digit is the convention.
300
+ out += " <priority>" + entry.priority.toFixed(1) + "</priority>\n";
301
+ out += " </url>\n";
302
+ return out;
303
+ }
304
+
305
+ function _chunkEntries(entries) {
306
+ var chunks = [];
307
+ var current = [];
308
+ var currentBytes = URLSET_OPEN.length + URLSET_CLOSE.length;
309
+ for (var i = 0; i < entries.length; i += 1) {
310
+ var rendered = _renderUrlEntry(entries[i]);
311
+ var renderedBytes = Buffer.byteLength(rendered, "utf8");
312
+ var wouldExceedUrls = current.length + 1 > URL_HARD_CAP;
313
+ var wouldExceedBytes = currentBytes + renderedBytes > BYTE_HARD_CAP;
314
+ if ((wouldExceedUrls || wouldExceedBytes) && current.length > 0) {
315
+ chunks.push(current);
316
+ current = [];
317
+ currentBytes = URLSET_OPEN.length + URLSET_CLOSE.length;
318
+ }
319
+ current.push(rendered);
320
+ currentBytes += renderedBytes;
321
+ }
322
+ if (current.length > 0) chunks.push(current);
323
+ return chunks;
324
+ }
325
+
326
+ function _renderChunk(renderedEntries) {
327
+ return URLSET_OPEN + renderedEntries.join("") + URLSET_CLOSE;
328
+ }
329
+
330
+ var SITEMAPINDEX_OPEN = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
331
+ "<sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n";
332
+ var SITEMAPINDEX_CLOSE = "</sitemapindex>\n";
333
+
334
+ function _renderIndex(originUrl, chunkFilenames, lastmodMs) {
335
+ var lastmod = _iso8601(lastmodMs);
336
+ var body = "";
337
+ for (var i = 0; i < chunkFilenames.length; i += 1) {
338
+ var loc = _xmlEscape(originUrl + "/" + chunkFilenames[i]);
339
+ body += " <sitemap>\n";
340
+ body += " <loc>" + loc + "</loc>\n";
341
+ body += " <lastmod>" + lastmod + "</lastmod>\n";
342
+ body += " </sitemap>\n";
343
+ }
344
+ return SITEMAPINDEX_OPEN + body + SITEMAPINDEX_CLOSE;
345
+ }
346
+
347
+ // ---- factory ------------------------------------------------------------
348
+
349
+ function create(opts) {
350
+ opts = opts || {};
351
+ var query = opts.query;
352
+ if (!query) {
353
+ query = function (sql, params) { return _b().externalDb.query(sql, params); };
354
+ }
355
+ // The catalog / collections / storefrontPages primitives aren't
356
+ // required at construct time — the source SQL we run is direct
357
+ // (sitemap generation reads the underlying tables, not the
358
+ // primitive's hydrated rows). The optional opts.catalog etc.
359
+ // entries are accepted for forward compatibility with callers
360
+ // that wire the whole bShop bag in one shot.
361
+ var customAdapters = opts.custom && typeof opts.custom === "object" ? opts.custom : {};
362
+
363
+ // Per-factory monotonic clock — guarantees `updated_at` across
364
+ // a single defineSection + archiveSection roundtrip is strictly
365
+ // increasing even when the wall clock has 1ms resolution and
366
+ // the caller chains the two inside one tick.
367
+ var _lastTs = 0;
368
+ function _monotonicTs() {
369
+ var wall = Date.now();
370
+ if (wall > _lastTs) _lastTs = wall;
371
+ else _lastTs += 1;
372
+ return _lastTs;
373
+ }
374
+
375
+ // ---- defineSection ---------------------------------------------------
376
+
377
+ async function defineSection(input) {
378
+ if (!input || typeof input !== "object") {
379
+ throw new TypeError("sitemapGenerator.defineSection: input object required");
380
+ }
381
+ var slug = _slug(input.slug);
382
+ var source = _source(input.source);
383
+ var basePath = _basePath(input.base_path);
384
+ var priority = _priority(input.priority);
385
+ var changefreq = _changefreq(input.changefreq);
386
+ var maxUrls = _maxUrls(input.max_urls);
387
+
388
+ var ts = _monotonicTs();
389
+ try {
390
+ await query(
391
+ "INSERT INTO sitemap_sections (slug, source, base_path, priority, changefreq, " +
392
+ "max_urls, active, archived_at, created_at, updated_at) " +
393
+ "VALUES (?1, ?2, ?3, ?4, ?5, ?6, 1, NULL, ?7, ?7)",
394
+ [slug, source, basePath, priority, changefreq, maxUrls, ts],
395
+ );
396
+ } catch (e) {
397
+ if (/UNIQUE|PRIMARY KEY/i.test(String(e && e.message))) {
398
+ throw new TypeError("sitemapGenerator.defineSection: slug " + JSON.stringify(slug) + " already registered");
399
+ }
400
+ throw e;
401
+ }
402
+ return await _getSection(slug);
403
+ }
404
+
405
+ async function _getSection(slug) {
406
+ var r = (await query(
407
+ "SELECT * FROM sitemap_sections WHERE slug = ?1 LIMIT 1",
408
+ [slug],
409
+ )).rows[0];
410
+ return _hydrateSection(r);
411
+ }
412
+
413
+ // ---- sections --------------------------------------------------------
414
+
415
+ async function sections(input) {
416
+ input = input || {};
417
+ var activeOnly = input.active_only === true;
418
+ var sql;
419
+ if (activeOnly) {
420
+ sql = "SELECT * FROM sitemap_sections WHERE active = 1 AND archived_at IS NULL " +
421
+ "ORDER BY slug ASC";
422
+ } else {
423
+ sql = "SELECT * FROM sitemap_sections ORDER BY slug ASC";
424
+ }
425
+ var rows = (await query(sql, [])).rows;
426
+ var out = [];
427
+ for (var i = 0; i < rows.length; i += 1) out.push(_hydrateSection(rows[i]));
428
+ return out;
429
+ }
430
+
431
+ // ---- archiveSection --------------------------------------------------
432
+
433
+ async function archiveSection(slug) {
434
+ _slug(slug);
435
+ var current = await _getSection(slug);
436
+ if (!current) {
437
+ throw new TypeError("sitemapGenerator.archiveSection: slug " + JSON.stringify(slug) + " not found");
438
+ }
439
+ if (current.archived_at != null) return current;
440
+ var ts = _monotonicTs();
441
+ await query(
442
+ "UPDATE sitemap_sections SET archived_at = ?1, active = 0, updated_at = ?1 WHERE slug = ?2",
443
+ [ts, slug],
444
+ );
445
+ return await _getSection(slug);
446
+ }
447
+
448
+ // ---- validateOriginUrl ----------------------------------------------
449
+
450
+ function validateOriginUrl(input) {
451
+ if (!input || typeof input !== "object") {
452
+ throw new TypeError("sitemapGenerator.validateOriginUrl: input object required");
453
+ }
454
+ return _originUrl(input.origin_url);
455
+ }
456
+
457
+ // ---- per-source URL pullers ----------------------------------------
458
+ //
459
+ // Each puller returns an array of `{ slug, updated_at }` rows. The
460
+ // generate() composer joins the section's `base_path` with each
461
+ // row's slug, applies the section's priority + changefreq, and
462
+ // tags the row's updated_at as the URL's lastmod.
463
+
464
+ async function _pullProducts() {
465
+ var rows = (await query(
466
+ "SELECT slug, updated_at FROM products WHERE status = 'active' " +
467
+ "ORDER BY updated_at DESC, slug ASC",
468
+ [],
469
+ )).rows;
470
+ return rows.map(function (r) {
471
+ return { slug: r.slug, updated_at: Number(r.updated_at) };
472
+ });
473
+ }
474
+
475
+ async function _pullCollections() {
476
+ var rows = (await query(
477
+ "SELECT slug, updated_at FROM collections WHERE archived_at IS NULL " +
478
+ "ORDER BY updated_at DESC, slug ASC",
479
+ [],
480
+ )).rows;
481
+ return rows.map(function (r) {
482
+ return { slug: r.slug, updated_at: Number(r.updated_at) };
483
+ });
484
+ }
485
+
486
+ async function _pullStorefrontPages() {
487
+ var rows = (await query(
488
+ "SELECT slug, updated_at FROM storefront_pages WHERE status = 'published' " +
489
+ "ORDER BY updated_at DESC, slug ASC",
490
+ [],
491
+ )).rows;
492
+ return rows.map(function (r) {
493
+ return { slug: r.slug, updated_at: Number(r.updated_at) };
494
+ });
495
+ }
496
+
497
+ async function _pullCustom(sectionSlug) {
498
+ var adapter = customAdapters[sectionSlug];
499
+ if (!adapter) {
500
+ throw new TypeError(
501
+ "sitemapGenerator.generate: section " + JSON.stringify(sectionSlug) +
502
+ " has source='custom' but no adapter was registered via create({ custom: { ... } })"
503
+ );
504
+ }
505
+ if (typeof adapter !== "function") {
506
+ throw new TypeError(
507
+ "sitemapGenerator.generate: custom adapter for section " + JSON.stringify(sectionSlug) +
508
+ " must be a function returning an array of { slug, updated_at }"
509
+ );
510
+ }
511
+ var rows = await adapter();
512
+ if (!Array.isArray(rows)) {
513
+ throw new TypeError(
514
+ "sitemapGenerator.generate: custom adapter for section " + JSON.stringify(sectionSlug) +
515
+ " must return an array; got " + (rows === null ? "null" : typeof rows)
516
+ );
517
+ }
518
+ return rows.map(function (r) {
519
+ if (!r || typeof r !== "object" || typeof r.slug !== "string") {
520
+ throw new TypeError(
521
+ "sitemapGenerator.generate: custom adapter for section " + JSON.stringify(sectionSlug) +
522
+ " must yield { slug: string, updated_at: number } rows"
523
+ );
524
+ }
525
+ var u = Number(r.updated_at);
526
+ if (!isFinite(u)) {
527
+ throw new TypeError(
528
+ "sitemapGenerator.generate: custom adapter for section " + JSON.stringify(sectionSlug) +
529
+ " yielded a row with non-numeric updated_at"
530
+ );
531
+ }
532
+ return { slug: r.slug, updated_at: u };
533
+ });
534
+ }
535
+
536
+ async function _pullSection(section) {
537
+ if (section.source === "product") return await _pullProducts();
538
+ if (section.source === "collection") return await _pullCollections();
539
+ if (section.source === "storefront_page") return await _pullStorefrontPages();
540
+ /* custom */ return await _pullCustom(section.slug);
541
+ }
542
+
543
+ // ---- generate --------------------------------------------------------
544
+
545
+ async function generate(input) {
546
+ if (!input || typeof input !== "object") {
547
+ throw new TypeError("sitemapGenerator.generate: input object required");
548
+ }
549
+ var originUrl = _originUrl(input.origin_url);
550
+
551
+ var filter = null;
552
+ if (input.sections_filter != null) {
553
+ if (!Array.isArray(input.sections_filter)) {
554
+ throw new TypeError("sitemapGenerator.generate: sections_filter must be an array of slugs or null");
555
+ }
556
+ filter = {};
557
+ for (var fi = 0; fi < input.sections_filter.length; fi += 1) {
558
+ var f = input.sections_filter[fi];
559
+ if (typeof f !== "string") {
560
+ throw new TypeError("sitemapGenerator.generate: sections_filter entries must be strings");
561
+ }
562
+ filter[f] = true;
563
+ }
564
+ }
565
+
566
+ var allSections = await sections({ active_only: true });
567
+
568
+ var artifacts = [];
569
+ var maxLastmod = 0;
570
+ var anyUrls = false;
571
+
572
+ for (var si = 0; si < allSections.length; si += 1) {
573
+ var section = allSections[si];
574
+ if (filter && !filter[section.slug]) continue;
575
+
576
+ var rows = await _pullSection(section);
577
+ if (section.max_urls != null && rows.length > section.max_urls) {
578
+ rows = rows.slice(0, section.max_urls);
579
+ }
580
+
581
+ var entries = [];
582
+ for (var ri = 0; ri < rows.length; ri += 1) {
583
+ var row = rows[ri];
584
+ var path = section.base_path;
585
+ if (path.charCodeAt(path.length - 1) !== 47) path += "/";
586
+ path += row.slug;
587
+ var lastmod = isFinite(row.updated_at) && row.updated_at > 0
588
+ ? _iso8601(row.updated_at)
589
+ : null;
590
+ if (row.updated_at > maxLastmod) maxLastmod = row.updated_at;
591
+ entries.push({
592
+ loc: _encodeLoc(originUrl, path),
593
+ lastmod: lastmod,
594
+ priority: section.priority,
595
+ changefreq: section.changefreq,
596
+ });
597
+ }
598
+
599
+ if (entries.length === 0) continue;
600
+ anyUrls = true;
601
+
602
+ var chunks = _chunkEntries(entries);
603
+ for (var ci = 0; ci < chunks.length; ci += 1) {
604
+ var filename = "sitemap-" + section.slug + "-" + (ci + 1) + ".xml";
605
+ artifacts.push({
606
+ filename: filename,
607
+ content: _renderChunk(chunks[ci]),
608
+ });
609
+ }
610
+ }
611
+
612
+ if (!anyUrls) return [];
613
+
614
+ var chunkFilenames = artifacts.map(function (a) { return a.filename; });
615
+ var indexContent = _renderIndex(
616
+ originUrl,
617
+ chunkFilenames,
618
+ maxLastmod > 0 ? maxLastmod : Date.now(),
619
+ );
620
+ artifacts.push({ filename: "sitemap.xml", content: indexContent });
621
+
622
+ return artifacts;
623
+ }
624
+
625
+ // ---- recordGeneration -----------------------------------------------
626
+
627
+ async function recordGeneration(input) {
628
+ if (!input || typeof input !== "object") {
629
+ throw new TypeError("sitemapGenerator.recordGeneration: input object required");
630
+ }
631
+ if (!Array.isArray(input.artifacts)) {
632
+ throw new TypeError("sitemapGenerator.recordGeneration: artifacts must be an array");
633
+ }
634
+ // Origin URL is required so the audit row can name the canonical
635
+ // host the bytes were generated against — operators running
636
+ // multi-tenant shops may rotate origins and the audit trail wants
637
+ // to distinguish a regeneration against shop-A from a regeneration
638
+ // against shop-B.
639
+ var originUrl = _originUrl(input.origin_url);
640
+
641
+ var artifactCount = input.artifacts.length;
642
+ var totalUrlCount = 0;
643
+ var totalByteSize = 0;
644
+ var urlsetOpenTag = "<urlset";
645
+ for (var i = 0; i < input.artifacts.length; i += 1) {
646
+ var a = input.artifacts[i];
647
+ if (!a || typeof a !== "object" ||
648
+ typeof a.filename !== "string" || typeof a.content !== "string") {
649
+ throw new TypeError(
650
+ "sitemapGenerator.recordGeneration: artifacts[" + i + "] must be " +
651
+ "{ filename: string, content: string }"
652
+ );
653
+ }
654
+ totalByteSize += Buffer.byteLength(a.content, "utf8");
655
+ // Count `<url>` entries only inside chunk files; the index
656
+ // file's `<sitemap>` entries are an internal pointer surface,
657
+ // not part of the URL count an operator cares about.
658
+ if (a.content.indexOf(urlsetOpenTag) !== -1) {
659
+ var marker = "<url>";
660
+ var pos = -1;
661
+ while ((pos = a.content.indexOf(marker, pos + 1)) !== -1) {
662
+ totalUrlCount += 1;
663
+ }
664
+ }
665
+ }
666
+
667
+ var id = _b().uuid.v7();
668
+ var ts = _monotonicTs();
669
+ await query(
670
+ "INSERT INTO sitemap_generations (id, origin_url, artifact_count, total_url_count, " +
671
+ "total_byte_size, generated_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
672
+ [id, originUrl, artifactCount, totalUrlCount, totalByteSize, ts],
673
+ );
674
+ return {
675
+ id: id,
676
+ origin_url: originUrl,
677
+ artifact_count: artifactCount,
678
+ total_url_count: totalUrlCount,
679
+ total_byte_size: totalByteSize,
680
+ generated_at: ts,
681
+ };
682
+ }
683
+
684
+ // ---- lastGeneration -------------------------------------------------
685
+
686
+ async function lastGeneration() {
687
+ var rows = (await query(
688
+ "SELECT * FROM sitemap_generations ORDER BY generated_at DESC, id DESC LIMIT 1",
689
+ [],
690
+ )).rows;
691
+ if (!rows.length) return null;
692
+ return _hydrateGeneration(rows[0]);
693
+ }
694
+
695
+ return {
696
+ URL_HARD_CAP: URL_HARD_CAP,
697
+ BYTE_HARD_CAP: BYTE_HARD_CAP,
698
+ ALLOWED_SOURCES: ALLOWED_SOURCES,
699
+ ALLOWED_CHANGEFREQS: ALLOWED_CHANGEFREQS,
700
+
701
+ defineSection: defineSection,
702
+ sections: sections,
703
+ archiveSection: archiveSection,
704
+ validateOriginUrl: validateOriginUrl,
705
+ generate: generate,
706
+ recordGeneration: recordGeneration,
707
+ lastGeneration: lastGeneration,
708
+ };
709
+ }
710
+
711
+ module.exports = {
712
+ create: create,
713
+ URL_HARD_CAP: URL_HARD_CAP,
714
+ BYTE_HARD_CAP: BYTE_HARD_CAP,
715
+ ALLOWED_SOURCES: ALLOWED_SOURCES,
716
+ ALLOWED_CHANGEFREQS: ALLOWED_CHANGEFREQS,
717
+ };