@blamejs/blamejs-shop 0.0.65 → 0.0.70

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CHANGELOG.md +10 -0
  2. package/lib/assembly-instructions.js +777 -0
  3. package/lib/auto-replenish.js +933 -0
  4. package/lib/business-hours.js +980 -0
  5. package/lib/click-and-collect.js +711 -0
  6. package/lib/clickstream.js +713 -0
  7. package/lib/cost-layers.js +774 -0
  8. package/lib/credit-limits.js +752 -0
  9. package/lib/currency-rounding.js +525 -0
  10. package/lib/customer-activity.js +862 -0
  11. package/lib/customer-notes.js +712 -0
  12. package/lib/customer-risk-profile.js +593 -0
  13. package/lib/customer-surveys.js +1012 -0
  14. package/lib/damage-photos.js +473 -0
  15. package/lib/discount-allocation.js +557 -0
  16. package/lib/dropship-forwarding.js +645 -0
  17. package/lib/email-templates.js +817 -0
  18. package/lib/index.js +45 -0
  19. package/lib/inventory-allocations.js +559 -0
  20. package/lib/inventory-writeoffs.js +636 -0
  21. package/lib/knowledge-base.js +1104 -0
  22. package/lib/locale-router.js +1077 -0
  23. package/lib/operator-roles.js +768 -0
  24. package/lib/order-escalation.js +951 -0
  25. package/lib/order-ratings.js +495 -0
  26. package/lib/order-tags.js +944 -0
  27. package/lib/packing-slips.js +810 -0
  28. package/lib/payment-retries.js +816 -0
  29. package/lib/pick-lists.js +639 -0
  30. package/lib/pixel-events.js +995 -0
  31. package/lib/preorder.js +595 -0
  32. package/lib/print-queue.js +681 -0
  33. package/lib/product-qa.js +749 -0
  34. package/lib/promo-bundles.js +835 -0
  35. package/lib/push-notifications.js +937 -0
  36. package/lib/refund-automation.js +853 -0
  37. package/lib/reorder-reminders.js +798 -0
  38. package/lib/robots-config.js +753 -0
  39. package/lib/seller-signup.js +1052 -0
  40. package/lib/site-redirects.js +690 -0
  41. package/lib/sitemap-generator.js +717 -0
  42. package/lib/subscription-gifts.js +710 -0
  43. package/lib/tax-cert-renewals.js +632 -0
  44. package/lib/theme-assets.js +711 -0
  45. package/lib/tier-benefits.js +776 -0
  46. package/lib/vendor/MANIFEST.json +2 -2
  47. package/lib/vendor/blamejs/CHANGELOG.md +2 -0
  48. package/lib/vendor/blamejs/api-snapshot.json +2 -2
  49. package/lib/vendor/blamejs/lib/metrics.js +68 -4
  50. package/lib/vendor/blamejs/package.json +1 -1
  51. package/lib/vendor/blamejs/release-notes/v0.12.5.json +40 -0
  52. package/lib/wishlist-alerts.js +842 -0
  53. package/lib/wishlist-sharing.js +718 -0
  54. package/package.json +1 -1
@@ -0,0 +1,753 @@
1
+ "use strict";
2
+ /**
3
+ * @module shop.robotsConfig
4
+ * @title Robots config — operator-editable robots.txt rules,
5
+ * AI-crawler opt-outs, sitemap declarations, optional
6
+ * canonical-host hint.
7
+ *
8
+ * @intro
9
+ * The bytes a crawler fetches at `/robots.txt`. Distinct from the
10
+ * worker's static fallback (which ships an allow-all default with
11
+ * one Sitemap declaration so a fresh deploy is still crawlable) —
12
+ * this primitive is the operator's editable surface:
13
+ *
14
+ * - Per-bot Allow / Disallow stanzas. Each `defineRule` call
15
+ * persists one (user_agent, allow[], disallow[], crawl_delay?)
16
+ * tuple. `render()` joins every active rule into the canonical
17
+ * robots.txt format, stable-sorted by `priority` ASC then
18
+ * `user_agent` ASC.
19
+ * - Sitemap declarations. `addSitemap(url)` records an absolute
20
+ * https:// URL the crawler should fetch the sitemap index from;
21
+ * `render()` emits each one on its own `Sitemap: <url>` line at
22
+ * the bottom of the file (the convention every search engine
23
+ * follows). Duplicate adds are idempotent (the URL is the PK).
24
+ * - Host directive. `setHostDirective(host)` upserts the optional
25
+ * `Host: <canonical-host>` hint. The directive is non-standard
26
+ * but widely-honored — Yandex's primary canonical signal, a soft
27
+ * hint to Bing, ignored by Google. The line is omitted entirely
28
+ * when no host has been set.
29
+ * - Predefined templates. `predefinedTemplates()` returns the
30
+ * four common bot-block payloads operators reach for first:
31
+ * `block_ai_crawlers` (GPTBot / ClaudeBot / CCBot / anthropic-ai /
32
+ * Google-Extended / Bytespider / Amazonbot / FacebookBot — every
33
+ * AI-training scraper currently in the wild), `block_all`
34
+ * (one `User-agent: *` / `Disallow: /` stanza), `open_all`
35
+ * (allow-everything, suitable for a fresh launch), and
36
+ * `standard_with_admin_disallow` (open allow plus a
37
+ * `Disallow: /admin/` to keep the admin surface out of search
38
+ * indices). `applyTemplate({ template_slug })` archives every
39
+ * active rule and writes the template's stanzas as the new
40
+ * baseline — the prior rules remain in the table for audit but
41
+ * no longer affect `render()`.
42
+ *
43
+ * Render contract. The emitted bytes are the canonical robots.txt
44
+ * format: one stanza per rule, each stanza is
45
+ * User-agent: <ua>
46
+ * [Crawl-delay: <n>]
47
+ * Allow: <path> (one line per allow entry)
48
+ * Disallow: <path> (one line per disallow entry)
49
+ * followed by a blank line. Stanzas with the same user_agent are
50
+ * emitted as separate blocks (operators sometimes want two
51
+ * priorities against the same bot — e.g. priority=10 for the
52
+ * permissive "allow /search" line and priority=20 for the broader
53
+ * "disallow /admin" line). The `Host:` line (when set) and every
54
+ * `Sitemap:` line follow at the bottom of the file.
55
+ *
56
+ * Validation discipline.
57
+ * - Sitemap URLs run through `b.safeUrl.parse` with the
58
+ * `{ allowedProtocols: ["https:"] }` allowlist. Cleartext is
59
+ * refused; an MITM rewriting the sitemap location would let an
60
+ * attacker swap the crawler's view of the site.
61
+ * - The host directive accepts a hostname or `host:port` shape —
62
+ * no scheme, no path. The host is just the canonical hostname
63
+ * the operator wants crawlers to attribute results to.
64
+ * - User-agent strings are length-capped and refuse control bytes
65
+ * — robots.txt is a newline-delimited format and a CR/LF in the
66
+ * UA field would break the stanza boundary.
67
+ * - Allow / Disallow path entries are `/`-rooted absolute paths
68
+ * (relative paths are meaningless in robots.txt). Wildcards
69
+ * (`*` and `$`) are permitted — every major crawler honors the
70
+ * extended-syntax wildcards even though they're not in the
71
+ * original 1994 spec.
72
+ *
73
+ * Composes ONLY blamejs:
74
+ * - `b.framework.safeUrl.parse` — sitemap URL validation
75
+ * (https-only).
76
+ * - `b.framework.uuid.v7` — rule-row id; lexicographically
77
+ * sortable so a `created_at` tiebreak is rarely needed.
78
+ *
79
+ * Storage (migration `0147_robots_config.sql`):
80
+ * - `robots_rules` — one row per operator-defined stanza
81
+ * - `robots_sitemaps` — one row per declared sitemap URL
82
+ * - `robots_host_directive` — singleton row (id=1) holding the
83
+ * canonical-host hint
84
+ *
85
+ * @primitive robotsConfig
86
+ * @related shop.sitemapGenerator, b.safeUrl.parse, b.uuid.v7
87
+ */
88
+
89
+ var MAX_USER_AGENT_LEN = 200;
90
+ var MAX_PATH_LEN = 512;
91
+ var MAX_PATHS_PER_RULE = 200;
92
+ var MAX_SITEMAP_URL_LEN = 2048;
93
+ var MAX_HOST_LEN = 255;
94
+ var MAX_CRAWL_DELAY = 86400; // 24 h — anything larger is a typo
95
+ var MAX_PRIORITY = 1000000;
96
+ var MAX_RULE_ID_LEN = 80;
97
+
98
+ var CONTROL_BYTE_RE = /[\x00-\x1f\x7f]/;
99
+
100
+ // Lazy framework handle — matches the rest of the shop primitives.
101
+ var bShop;
102
+ function _b() {
103
+ if (!bShop) bShop = require("./index");
104
+ return bShop.framework;
105
+ }
106
+
107
+ // ---- predefined templates ---------------------------------------------
108
+ //
109
+ // The frozen catalog of canned payloads `applyTemplate` writes into the
110
+ // `robots_rules` table. Each entry is the stanza set + the sitemap +
111
+ // host fields a sensible default would carry; `applyTemplate` does the
112
+ // archive-then-write step. The catalog is exported through
113
+ // `predefinedTemplates()` so the operator-facing dashboard can render
114
+ // the choices without re-reading this file.
115
+
116
+ var PREDEFINED_TEMPLATES = Object.freeze({
117
+ block_ai_crawlers: Object.freeze({
118
+ slug: "block_ai_crawlers",
119
+ label: "Block AI training crawlers",
120
+ description:
121
+ "Refuse every AI-training scraper currently in the wild — " +
122
+ "GPTBot (OpenAI), ClaudeBot + anthropic-ai (Anthropic), " +
123
+ "CCBot (Common Crawl, the corpus most LLMs train on), " +
124
+ "Google-Extended (Google's training opt-out token), " +
125
+ "Bytespider (ByteDance), Amazonbot, FacebookBot. Leaves " +
126
+ "the wildcard `*` stanza permissive so search-engine indexing " +
127
+ "continues to work.",
128
+ rules: Object.freeze([
129
+ Object.freeze({ user_agent: "GPTBot", allow: [], disallow: ["/"], priority: 10 }),
130
+ Object.freeze({ user_agent: "ClaudeBot", allow: [], disallow: ["/"], priority: 10 }),
131
+ Object.freeze({ user_agent: "anthropic-ai", allow: [], disallow: ["/"], priority: 10 }),
132
+ Object.freeze({ user_agent: "CCBot", allow: [], disallow: ["/"], priority: 10 }),
133
+ Object.freeze({ user_agent: "Google-Extended", allow: [], disallow: ["/"], priority: 10 }),
134
+ Object.freeze({ user_agent: "Bytespider", allow: [], disallow: ["/"], priority: 10 }),
135
+ Object.freeze({ user_agent: "Amazonbot", allow: [], disallow: ["/"], priority: 10 }),
136
+ Object.freeze({ user_agent: "FacebookBot", allow: [], disallow: ["/"], priority: 10 }),
137
+ Object.freeze({ user_agent: "*", allow: [], disallow: [], priority: 100 }),
138
+ ]),
139
+ }),
140
+ block_all: Object.freeze({
141
+ slug: "block_all",
142
+ label: "Block every crawler",
143
+ description:
144
+ "Refuse every crawler at every path. Suitable for a staging " +
145
+ "environment or a pre-launch site that must not leak into " +
146
+ "search indices.",
147
+ rules: Object.freeze([
148
+ Object.freeze({ user_agent: "*", allow: [], disallow: ["/"], priority: 100 }),
149
+ ]),
150
+ }),
151
+ open_all: Object.freeze({
152
+ slug: "open_all",
153
+ label: "Open to every crawler",
154
+ description:
155
+ "Permit every crawler to fetch every path. The default shape " +
156
+ "for a freshly-launched storefront that wants maximum search " +
157
+ "visibility.",
158
+ rules: Object.freeze([
159
+ Object.freeze({ user_agent: "*", allow: [], disallow: [], priority: 100 }),
160
+ ]),
161
+ }),
162
+ standard_with_admin_disallow: Object.freeze({
163
+ slug: "standard_with_admin_disallow",
164
+ label: "Open with /admin/ disallowed",
165
+ description:
166
+ "Permit every crawler except inside `/admin/`. Keeps the " +
167
+ "operator-only surface out of search indices while leaving " +
168
+ "the storefront fully crawlable.",
169
+ rules: Object.freeze([
170
+ Object.freeze({
171
+ user_agent: "*",
172
+ allow: [],
173
+ disallow: ["/admin/"],
174
+ priority: 100,
175
+ }),
176
+ ]),
177
+ }),
178
+ });
179
+
180
+ var TEMPLATE_SLUGS = Object.freeze(Object.keys(PREDEFINED_TEMPLATES));
181
+
182
+ // ---- validators -------------------------------------------------------
183
+
184
+ function _userAgent(s) {
185
+ if (typeof s !== "string" || s.length < 1 || s.length > MAX_USER_AGENT_LEN) {
186
+ throw new TypeError(
187
+ "robotsConfig: user_agent must be a string 1.." + MAX_USER_AGENT_LEN + " chars"
188
+ );
189
+ }
190
+ if (CONTROL_BYTE_RE.test(s)) {
191
+ throw new TypeError("robotsConfig: user_agent must not contain control bytes");
192
+ }
193
+ // robots.txt is a colon-delimited key/value format — a colon in the
194
+ // user-agent value would shift the parser onto a different key.
195
+ if (s.indexOf(":") !== -1) {
196
+ throw new TypeError("robotsConfig: user_agent must not contain ':'");
197
+ }
198
+ return s;
199
+ }
200
+
201
+ function _pathEntry(s, label) {
202
+ if (typeof s !== "string" || s.length < 1 || s.length > MAX_PATH_LEN) {
203
+ throw new TypeError(
204
+ "robotsConfig: " + label + " entry must be a string 1.." + MAX_PATH_LEN + " chars"
205
+ );
206
+ }
207
+ if (CONTROL_BYTE_RE.test(s)) {
208
+ throw new TypeError("robotsConfig: " + label + " entry must not contain control bytes");
209
+ }
210
+ // robots.txt path entries are /-rooted absolute paths. Wildcards
211
+ // (`*` for "any sequence") and end-of-path anchors (`$`) are
212
+ // honored by every major crawler — the leading `/` is the only
213
+ // structural rule.
214
+ if (s.charCodeAt(0) !== 47 /* "/" */) {
215
+ throw new TypeError(
216
+ "robotsConfig: " + label + " entry must be a /-rooted absolute path; got " + JSON.stringify(s)
217
+ );
218
+ }
219
+ return s;
220
+ }
221
+
222
+ function _pathArray(arr, label) {
223
+ if (arr == null) return [];
224
+ if (!Array.isArray(arr)) {
225
+ throw new TypeError("robotsConfig: " + label + " must be an array of paths");
226
+ }
227
+ if (arr.length > MAX_PATHS_PER_RULE) {
228
+ throw new TypeError(
229
+ "robotsConfig: " + label + " must have <= " + MAX_PATHS_PER_RULE + " entries"
230
+ );
231
+ }
232
+ var out = [];
233
+ for (var i = 0; i < arr.length; i += 1) {
234
+ out.push(_pathEntry(arr[i], label));
235
+ }
236
+ return out;
237
+ }
238
+
239
+ function _crawlDelay(n) {
240
+ if (n == null) return null;
241
+ if (!Number.isInteger(n) || n < 0 || n > MAX_CRAWL_DELAY) {
242
+ throw new TypeError(
243
+ "robotsConfig: crawl_delay must be an integer 0.." + MAX_CRAWL_DELAY + " seconds, or null"
244
+ );
245
+ }
246
+ return n;
247
+ }
248
+
249
+ function _priority(n) {
250
+ if (n == null) return 100;
251
+ if (!Number.isInteger(n) || n < 0 || n > MAX_PRIORITY) {
252
+ throw new TypeError(
253
+ "robotsConfig: priority must be an integer 0.." + MAX_PRIORITY
254
+ );
255
+ }
256
+ return n;
257
+ }
258
+
259
+ function _ruleId(s) {
260
+ if (typeof s !== "string" || s.length < 1 || s.length > MAX_RULE_ID_LEN) {
261
+ throw new TypeError(
262
+ "robotsConfig: rule_id must be a string 1.." + MAX_RULE_ID_LEN + " chars"
263
+ );
264
+ }
265
+ if (CONTROL_BYTE_RE.test(s)) {
266
+ throw new TypeError("robotsConfig: rule_id must not contain control bytes");
267
+ }
268
+ return s;
269
+ }
270
+
271
+ function _sitemapUrl(u) {
272
+ if (typeof u !== "string" || u.length < 1 || u.length > MAX_SITEMAP_URL_LEN) {
273
+ throw new TypeError(
274
+ "robotsConfig: sitemap url must be a string 1.." + MAX_SITEMAP_URL_LEN + " chars"
275
+ );
276
+ }
277
+ if (CONTROL_BYTE_RE.test(u)) {
278
+ throw new TypeError("robotsConfig: sitemap url must not contain control bytes");
279
+ }
280
+ // robots.txt only carries the URL itself, so the path-rooted form
281
+ // promo-banners accepts isn't meaningful here — every crawler treats
282
+ // the Sitemap: value as an absolute URL. https-only protects against
283
+ // an MITM rewriting the sitemap location.
284
+ try {
285
+ _b().safeUrl.parse(u, { allowedProtocols: ["https:"] });
286
+ } catch (e) {
287
+ throw new TypeError(
288
+ "robotsConfig: sitemap url — " + (e && e.message || "must be a valid https:// URL")
289
+ );
290
+ }
291
+ return u;
292
+ }
293
+
294
+ function _hostDirective(s) {
295
+ if (typeof s !== "string" || s.length < 1 || s.length > MAX_HOST_LEN) {
296
+ throw new TypeError(
297
+ "robotsConfig: host must be a string 1.." + MAX_HOST_LEN + " chars"
298
+ );
299
+ }
300
+ if (CONTROL_BYTE_RE.test(s)) {
301
+ throw new TypeError("robotsConfig: host must not contain control bytes");
302
+ }
303
+ // The host directive is a bare hostname (optionally with :port) —
304
+ // no scheme, no path. Refuse anything that looks like a URL so an
305
+ // operator can't accidentally paste the full origin URL in.
306
+ if (s.indexOf("/") !== -1 || s.indexOf(":") !== s.lastIndexOf(":")) {
307
+ throw new TypeError(
308
+ "robotsConfig: host must be a bare hostname (optionally host:port), no scheme / path"
309
+ );
310
+ }
311
+ if (s.indexOf("://") !== -1) {
312
+ throw new TypeError(
313
+ "robotsConfig: host must be a bare hostname, not a scheme://host URL"
314
+ );
315
+ }
316
+ // A hostname character class — letters, digits, dot, hyphen, and an
317
+ // optional :port suffix.
318
+ if (!/^[A-Za-z0-9.\-]+(?::[0-9]+)?$/.test(s)) {
319
+ throw new TypeError(
320
+ "robotsConfig: host must match hostname (optionally host:port) syntax"
321
+ );
322
+ }
323
+ return s;
324
+ }
325
+
326
+ function _templateSlug(s) {
327
+ if (typeof s !== "string" || s.length < 1) {
328
+ throw new TypeError("robotsConfig: template_slug must be a non-empty string");
329
+ }
330
+ if (TEMPLATE_SLUGS.indexOf(s) === -1) {
331
+ throw new TypeError(
332
+ "robotsConfig: template_slug must be one of " + TEMPLATE_SLUGS.join(", ") +
333
+ "; got " + JSON.stringify(s)
334
+ );
335
+ }
336
+ return s;
337
+ }
338
+
339
+ // ---- row hydration ----------------------------------------------------
340
+
341
+ function _hydrateRule(row) {
342
+ if (!row) return null;
343
+ var allow = [];
344
+ var disallow = [];
345
+ try { allow = JSON.parse(row.allow_json || "[]"); } catch (_e) { allow = []; }
346
+ try { disallow = JSON.parse(row.disallow_json || "[]"); } catch (_e) { disallow = []; }
347
+ return {
348
+ id: row.id,
349
+ user_agent: row.user_agent,
350
+ allow: Array.isArray(allow) ? allow : [],
351
+ disallow: Array.isArray(disallow) ? disallow : [],
352
+ crawl_delay: row.crawl_delay == null ? null : Number(row.crawl_delay),
353
+ priority: Number(row.priority),
354
+ archived_at: row.archived_at == null ? null : Number(row.archived_at),
355
+ created_at: Number(row.created_at),
356
+ updated_at: Number(row.updated_at),
357
+ };
358
+ }
359
+
360
+ // ---- render -----------------------------------------------------------
361
+
362
+ function _renderRule(rule) {
363
+ // One stanza per rule. The stanza always opens with the User-agent
364
+ // line; Crawl-delay (when set) follows; then every Allow line; then
365
+ // every Disallow line; closing with a blank line so the next stanza
366
+ // is parseable as a fresh block.
367
+ var lines = ["User-agent: " + rule.user_agent];
368
+ if (rule.crawl_delay != null) {
369
+ lines.push("Crawl-delay: " + rule.crawl_delay);
370
+ }
371
+ for (var ai = 0; ai < rule.allow.length; ai += 1) {
372
+ lines.push("Allow: " + rule.allow[ai]);
373
+ }
374
+ for (var di = 0; di < rule.disallow.length; di += 1) {
375
+ lines.push("Disallow: " + rule.disallow[di]);
376
+ }
377
+ return lines.join("\n");
378
+ }
379
+
380
+ // ---- factory ----------------------------------------------------------
381
+
382
+ function create(opts) {
383
+ opts = opts || {};
384
+ var query = opts.query;
385
+ if (!query) {
386
+ query = function (sql, params) { return _b().externalDb.query(sql, params); };
387
+ }
388
+
389
+ // Per-factory monotonic clock — guarantees `updated_at` across a
390
+ // defineRule + updateRule + archiveRule chain is strictly
391
+ // increasing even when the wall clock has 1 ms resolution and the
392
+ // caller threads the calls inside one tick.
393
+ var _lastTs = 0;
394
+ function _monotonicTs() {
395
+ var wall = Date.now();
396
+ if (wall > _lastTs) _lastTs = wall;
397
+ else _lastTs += 1;
398
+ return _lastTs;
399
+ }
400
+
401
+ async function _getRule(ruleId) {
402
+ var r = await query(
403
+ "SELECT * FROM robots_rules WHERE id = ?1 LIMIT 1",
404
+ [ruleId],
405
+ );
406
+ return _hydrateRule(r.rows[0] || null);
407
+ }
408
+
409
+ // ---- defineRule -----------------------------------------------------
410
+
411
+ async function defineRule(input) {
412
+ if (!input || typeof input !== "object") {
413
+ throw new TypeError("robotsConfig.defineRule: input object required");
414
+ }
415
+ var userAgent = _userAgent(input.user_agent);
416
+ var allow = _pathArray(input.allow, "allow");
417
+ var disallow = _pathArray(input.disallow, "disallow");
418
+ var crawlDelay = _crawlDelay(input.crawl_delay == null ? null : input.crawl_delay);
419
+ var priority = _priority(input.priority == null ? null : input.priority);
420
+
421
+ var id = _b().uuid.v7();
422
+ var ts = _monotonicTs();
423
+ await query(
424
+ "INSERT INTO robots_rules " +
425
+ "(id, user_agent, allow_json, disallow_json, crawl_delay, priority, " +
426
+ " archived_at, created_at, updated_at) " +
427
+ "VALUES (?1, ?2, ?3, ?4, ?5, ?6, NULL, ?7, ?7)",
428
+ [
429
+ id, userAgent,
430
+ JSON.stringify(allow),
431
+ JSON.stringify(disallow),
432
+ crawlDelay,
433
+ priority,
434
+ ts,
435
+ ],
436
+ );
437
+ return await _getRule(id);
438
+ }
439
+
440
+ // ---- listRules ------------------------------------------------------
441
+
442
+ async function listRules(input) {
443
+ input = input || {};
444
+ var sql;
445
+ var params = [];
446
+ if (input.user_agent != null) {
447
+ var ua = _userAgent(input.user_agent);
448
+ sql = "SELECT * FROM robots_rules WHERE archived_at IS NULL AND user_agent = ?1 " +
449
+ "ORDER BY priority ASC, user_agent ASC, created_at ASC";
450
+ params = [ua];
451
+ } else {
452
+ sql = "SELECT * FROM robots_rules WHERE archived_at IS NULL " +
453
+ "ORDER BY priority ASC, user_agent ASC, created_at ASC";
454
+ }
455
+ var rows = (await query(sql, params)).rows;
456
+ var out = [];
457
+ for (var i = 0; i < rows.length; i += 1) out.push(_hydrateRule(rows[i]));
458
+ return out;
459
+ }
460
+
461
+ // ---- archiveRule ----------------------------------------------------
462
+
463
+ async function archiveRule(ruleId) {
464
+ _ruleId(ruleId);
465
+ var current = await _getRule(ruleId);
466
+ if (!current) {
467
+ throw new TypeError(
468
+ "robotsConfig.archiveRule: rule_id " + JSON.stringify(ruleId) + " not found"
469
+ );
470
+ }
471
+ if (current.archived_at != null) return current;
472
+ var ts = _monotonicTs();
473
+ await query(
474
+ "UPDATE robots_rules SET archived_at = ?1, updated_at = ?1 WHERE id = ?2",
475
+ [ts, ruleId],
476
+ );
477
+ return await _getRule(ruleId);
478
+ }
479
+
480
+ // ---- updateRule -----------------------------------------------------
481
+
482
+ async function updateRule(ruleId, patch) {
483
+ _ruleId(ruleId);
484
+ if (!patch || typeof patch !== "object") {
485
+ throw new TypeError("robotsConfig.updateRule: patch object required");
486
+ }
487
+ var current = await _getRule(ruleId);
488
+ if (!current) {
489
+ throw new TypeError(
490
+ "robotsConfig.updateRule: rule_id " + JSON.stringify(ruleId) + " not found"
491
+ );
492
+ }
493
+ if (current.archived_at != null) {
494
+ throw new TypeError(
495
+ "robotsConfig.updateRule: rule_id " + JSON.stringify(ruleId) +
496
+ " is archived; defineRule a fresh rule instead"
497
+ );
498
+ }
499
+
500
+ var next = {
501
+ user_agent: current.user_agent,
502
+ allow: current.allow,
503
+ disallow: current.disallow,
504
+ crawl_delay: current.crawl_delay,
505
+ priority: current.priority,
506
+ };
507
+ if (patch.user_agent != null) next.user_agent = _userAgent(patch.user_agent);
508
+ if (patch.allow != null) next.allow = _pathArray(patch.allow, "allow");
509
+ if (patch.disallow != null) next.disallow = _pathArray(patch.disallow, "disallow");
510
+ // `crawl_delay` is the one nullable field — an explicit `null` in
511
+ // the patch clears the column; absence preserves it.
512
+ if (Object.prototype.hasOwnProperty.call(patch, "crawl_delay")) {
513
+ next.crawl_delay = _crawlDelay(patch.crawl_delay);
514
+ }
515
+ if (patch.priority != null) next.priority = _priority(patch.priority);
516
+
517
+ var ts = _monotonicTs();
518
+ await query(
519
+ "UPDATE robots_rules SET " +
520
+ "user_agent = ?1, allow_json = ?2, disallow_json = ?3, " +
521
+ "crawl_delay = ?4, priority = ?5, updated_at = ?6 " +
522
+ "WHERE id = ?7",
523
+ [
524
+ next.user_agent,
525
+ JSON.stringify(next.allow),
526
+ JSON.stringify(next.disallow),
527
+ next.crawl_delay,
528
+ next.priority,
529
+ ts,
530
+ ruleId,
531
+ ],
532
+ );
533
+ return await _getRule(ruleId);
534
+ }
535
+
536
+ // ---- sitemaps -------------------------------------------------------
537
+
538
+ async function addSitemap(url) {
539
+ var u = _sitemapUrl(url);
540
+ var ts = _monotonicTs();
541
+ // INSERT OR IGNORE — the URL is the PK so a duplicate add is a no-op
542
+ // rather than a throw. The first add wins on `added_at`; a later
543
+ // re-add doesn't bump the timestamp (the operator's audit trail
544
+ // shows when the URL was first declared).
545
+ await query(
546
+ "INSERT OR IGNORE INTO robots_sitemaps (url, added_at) VALUES (?1, ?2)",
547
+ [u, ts],
548
+ );
549
+ var r = await query(
550
+ "SELECT url, added_at FROM robots_sitemaps WHERE url = ?1 LIMIT 1",
551
+ [u],
552
+ );
553
+ var row = r.rows[0];
554
+ return { url: row.url, added_at: Number(row.added_at) };
555
+ }
556
+
557
+ async function removeSitemap(url) {
558
+ var u = _sitemapUrl(url);
559
+ var r = await query(
560
+ "DELETE FROM robots_sitemaps WHERE url = ?1",
561
+ [u],
562
+ );
563
+ return { removed: Number(r.rowCount || 0) > 0 };
564
+ }
565
+
566
+ async function listSitemaps() {
567
+ var rows = (await query(
568
+ "SELECT url, added_at FROM robots_sitemaps ORDER BY added_at ASC, url ASC",
569
+ [],
570
+ )).rows;
571
+ var out = [];
572
+ for (var i = 0; i < rows.length; i += 1) {
573
+ out.push({ url: rows[i].url, added_at: Number(rows[i].added_at) });
574
+ }
575
+ return out;
576
+ }
577
+
578
+ // ---- host directive -------------------------------------------------
579
+
580
+ async function setHostDirective(host) {
581
+ var h = _hostDirective(host);
582
+ var ts = _monotonicTs();
583
+ // SQLite UPSERT against the singleton row (id = 1). The CHECK
584
+ // constraint on the table forbids any other id, so the table is
585
+ // bounded to at most one host string.
586
+ await query(
587
+ "INSERT INTO robots_host_directive (id, host, updated_at) VALUES (1, ?1, ?2) " +
588
+ "ON CONFLICT(id) DO UPDATE SET host = excluded.host, updated_at = excluded.updated_at",
589
+ [h, ts],
590
+ );
591
+ return { host: h, updated_at: ts };
592
+ }
593
+
594
+ async function getHostDirective() {
595
+ var r = await query(
596
+ "SELECT host, updated_at FROM robots_host_directive WHERE id = 1 LIMIT 1",
597
+ [],
598
+ );
599
+ var row = r.rows[0];
600
+ if (!row) return null;
601
+ return { host: row.host, updated_at: Number(row.updated_at) };
602
+ }
603
+
604
+ // ---- render ---------------------------------------------------------
605
+
606
+ async function render(input) {
607
+ if (!input || typeof input !== "object") {
608
+ throw new TypeError("robotsConfig.render: input object required");
609
+ }
610
+ // `origin_url` validates through `b.safeUrl` — required so the
611
+ // bytes always carry a canonical origin context even when no
612
+ // sitemap entries are declared. The render output itself doesn't
613
+ // embed origin_url, but accepting it here keeps the surface
614
+ // symmetrical with sitemapGenerator.generate({ origin_url }) and
615
+ // lets a future emission step interpolate the canonical host into
616
+ // operator-supplied path entries.
617
+ if (typeof input.origin_url !== "string" || !input.origin_url.length) {
618
+ throw new TypeError("robotsConfig.render: origin_url must be a non-empty string");
619
+ }
620
+ try {
621
+ _b().safeUrl.parse(input.origin_url, { allowedProtocols: ["https:"] });
622
+ } catch (e) {
623
+ throw new TypeError(
624
+ "robotsConfig.render: origin_url — " + (e && e.message || "must be a valid https:// URL")
625
+ );
626
+ }
627
+
628
+ var rules = await listRules({});
629
+ var sitemaps = await listSitemaps();
630
+ var host = await getHostDirective();
631
+
632
+ var sections = [];
633
+
634
+ // Empty-config fallback: emit the open-all default. The worker's
635
+ // static /robots.txt fallback handles the "no DB connectivity"
636
+ // case; this branch handles "DB up, table empty" so a fresh
637
+ // deploy still serves a well-formed robots.txt.
638
+ if (rules.length === 0) {
639
+ sections.push("User-agent: *\nAllow: /");
640
+ } else {
641
+ for (var i = 0; i < rules.length; i += 1) {
642
+ sections.push(_renderRule(rules[i]));
643
+ }
644
+ }
645
+
646
+ if (host) {
647
+ sections.push("Host: " + host.host);
648
+ }
649
+
650
+ if (sitemaps.length > 0) {
651
+ var sitemapLines = [];
652
+ for (var si = 0; si < sitemaps.length; si += 1) {
653
+ sitemapLines.push("Sitemap: " + sitemaps[si].url);
654
+ }
655
+ sections.push(sitemapLines.join("\n"));
656
+ }
657
+
658
+ // Sections separated by a single blank line; trailing newline so
659
+ // the file ends on a clean LF boundary the way every robots.txt
660
+ // emitter in the wild ships it.
661
+ return sections.join("\n\n") + "\n";
662
+ }
663
+
664
+ // ---- templates ------------------------------------------------------
665
+
666
+ function predefinedTemplates() {
667
+ // Return a freshly-cloned snapshot so callers cannot mutate the
668
+ // frozen catalog. The catalog itself is deep-frozen at module
669
+ // load; the returned shape is a plain-object copy suitable for
670
+ // serialization back to a dashboard.
671
+ var out = {};
672
+ for (var i = 0; i < TEMPLATE_SLUGS.length; i += 1) {
673
+ var slug = TEMPLATE_SLUGS[i];
674
+ var tpl = PREDEFINED_TEMPLATES[slug];
675
+ var rules = [];
676
+ for (var ri = 0; ri < tpl.rules.length; ri += 1) {
677
+ var r = tpl.rules[ri];
678
+ rules.push({
679
+ user_agent: r.user_agent,
680
+ allow: r.allow.slice(),
681
+ disallow: r.disallow.slice(),
682
+ priority: r.priority,
683
+ });
684
+ }
685
+ out[slug] = {
686
+ slug: tpl.slug,
687
+ label: tpl.label,
688
+ description: tpl.description,
689
+ rules: rules,
690
+ };
691
+ }
692
+ return out;
693
+ }
694
+
695
+ async function applyTemplate(input) {
696
+ if (!input || typeof input !== "object") {
697
+ throw new TypeError("robotsConfig.applyTemplate: input object required");
698
+ }
699
+ var slug = _templateSlug(input.template_slug);
700
+ var tpl = PREDEFINED_TEMPLATES[slug];
701
+
702
+ // Archive every currently-active rule. The audit trail keeps the
703
+ // prior shape; `listRules({})` returns only the active set so the
704
+ // template's stanzas become the new baseline.
705
+ var active = await listRules({});
706
+ for (var ai = 0; ai < active.length; ai += 1) {
707
+ await archiveRule(active[ai].id);
708
+ }
709
+
710
+ // Write each template stanza as a fresh rule. The priority field
711
+ // on the template entries is honored so an operator who renders
712
+ // immediately sees the canonical bot-block first.
713
+ var written = [];
714
+ for (var ri = 0; ri < tpl.rules.length; ri += 1) {
715
+ var stanza = tpl.rules[ri];
716
+ var rule = await defineRule({
717
+ user_agent: stanza.user_agent,
718
+ allow: stanza.allow,
719
+ disallow: stanza.disallow,
720
+ priority: stanza.priority,
721
+ });
722
+ written.push(rule);
723
+ }
724
+ return { template_slug: slug, rules: written };
725
+ }
726
+
727
+ return {
728
+ defineRule: defineRule,
729
+ listRules: listRules,
730
+ archiveRule: archiveRule,
731
+ updateRule: updateRule,
732
+ addSitemap: addSitemap,
733
+ removeSitemap: removeSitemap,
734
+ listSitemaps: listSitemaps,
735
+ setHostDirective: setHostDirective,
736
+ getHostDirective: getHostDirective,
737
+ render: render,
738
+ predefinedTemplates: predefinedTemplates,
739
+ applyTemplate: applyTemplate,
740
+ TEMPLATE_SLUGS: TEMPLATE_SLUGS,
741
+ };
742
+ }
743
+
744
+ module.exports = {
745
+ create: create,
746
+ TEMPLATE_SLUGS: TEMPLATE_SLUGS,
747
+ MAX_USER_AGENT_LEN: MAX_USER_AGENT_LEN,
748
+ MAX_PATH_LEN: MAX_PATH_LEN,
749
+ MAX_PATHS_PER_RULE: MAX_PATHS_PER_RULE,
750
+ MAX_SITEMAP_URL_LEN: MAX_SITEMAP_URL_LEN,
751
+ MAX_HOST_LEN: MAX_HOST_LEN,
752
+ MAX_CRAWL_DELAY: MAX_CRAWL_DELAY,
753
+ };