@blamejs/blamejs-shop 0.0.66 → 0.0.70

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/lib/assembly-instructions.js +777 -0
  3. package/lib/auto-replenish.js +933 -0
  4. package/lib/click-and-collect.js +711 -0
  5. package/lib/clickstream.js +713 -0
  6. package/lib/customer-activity.js +862 -0
  7. package/lib/customer-notes.js +712 -0
  8. package/lib/customer-risk-profile.js +593 -0
  9. package/lib/customer-surveys.js +1012 -0
  10. package/lib/damage-photos.js +473 -0
  11. package/lib/dropship-forwarding.js +645 -0
  12. package/lib/email-templates.js +817 -0
  13. package/lib/index.js +35 -0
  14. package/lib/inventory-allocations.js +559 -0
  15. package/lib/inventory-writeoffs.js +636 -0
  16. package/lib/knowledge-base.js +1104 -0
  17. package/lib/locale-router.js +1077 -0
  18. package/lib/operator-roles.js +768 -0
  19. package/lib/order-escalation.js +951 -0
  20. package/lib/order-ratings.js +495 -0
  21. package/lib/order-tags.js +944 -0
  22. package/lib/packing-slips.js +810 -0
  23. package/lib/pixel-events.js +995 -0
  24. package/lib/print-queue.js +681 -0
  25. package/lib/product-qa.js +749 -0
  26. package/lib/promo-bundles.js +835 -0
  27. package/lib/push-notifications.js +937 -0
  28. package/lib/refund-automation.js +853 -0
  29. package/lib/reorder-reminders.js +798 -0
  30. package/lib/robots-config.js +753 -0
  31. package/lib/seller-signup.js +1052 -0
  32. package/lib/sitemap-generator.js +717 -0
  33. package/lib/subscription-gifts.js +710 -0
  34. package/lib/tax-cert-renewals.js +632 -0
  35. package/lib/tier-benefits.js +776 -0
  36. package/lib/vendor/MANIFEST.json +2 -2
  37. package/lib/vendor/blamejs/CHANGELOG.md +2 -0
  38. package/lib/vendor/blamejs/api-snapshot.json +2 -2
  39. package/lib/vendor/blamejs/lib/metrics.js +68 -4
  40. package/lib/vendor/blamejs/package.json +1 -1
  41. package/lib/vendor/blamejs/release-notes/v0.12.5.json +40 -0
  42. package/lib/wishlist-alerts.js +842 -0
  43. package/lib/wishlist-sharing.js +718 -0
  44. package/package.json +1 -1
@@ -0,0 +1,713 @@
1
+ "use strict";
2
+ /**
3
+ * @module shop.clickstream
4
+ * @title Clickstream — server-side page-event capture for storefront
5
+ *
6
+ * @intro
7
+ * Captures page views + button clicks + form submits + scroll depth
8
+ * + video engagement + dwell time without a third-party JS tag.
9
+ * Distinct from `analytics` (which carries the operator-relevant
10
+ * funnel: PDP view, cart add, checkout) — this is the explicit
11
+ * page-event layer underneath. Operators wire `recordPageView` into
12
+ * the page handler and `recordEvent` into the form/fetch beacon
13
+ * endpoint; the same `query` adapter the rest of the data layer
14
+ * uses persists each row to D1 (or any sqlite-shaped backend during
15
+ * tests).
16
+ *
17
+ * Three-tier validation: this primitive is a HOT-PATH observability
18
+ * sink. The write entry points (`recordPageView`, `recordEvent`)
19
+ * drop-silent on malformed input — a throw here would crash the
20
+ * request that triggered the event and lose the page render that
21
+ * the operator was trying to instrument. The READ entry points
22
+ * (`sessionPath`, `topPages`, `topClicks`, `funnelAnalysis`,
23
+ * `bouncerate`, `dwellByPage`, `cleanupOlderThan`) throw on bad
24
+ * input because they're operator-driven dashboard / cron call sites
25
+ * where a typo SHOULD surface loudly.
26
+ *
27
+ * Surface:
28
+ *
29
+ * clickstream.recordPageView({ session_id, path, referrer?,
30
+ * ua_class?, customer_id?, occurred_at? })
31
+ * → { id, occurred_at } | null (null on drop-silent rejection)
32
+ *
33
+ * clickstream.recordEvent({ session_id, kind, element?, payload?,
34
+ * page_path, occurred_at? })
35
+ * → { id, occurred_at } | null
36
+ * kind ∈ { click, form_submit, form_abandon,
37
+ * video_play, video_complete, scroll_depth, dwell }
38
+ *
39
+ * clickstream.sessionPath({ session_id })
40
+ * → [{ kind: 'pageview'|<event_kind>, path, ...row, occurred_at }]
41
+ * chronologically sorted; merges pageviews + events into one
42
+ * timeline the operator can replay.
43
+ *
44
+ * clickstream.topPages({ from, to, limit })
45
+ * → [{ path, views, unique_sessions }]
46
+ *
47
+ * clickstream.topClicks({ from, to, limit, page_path? })
48
+ * → [{ element, page_path, clicks }]
49
+ *
50
+ * clickstream.funnelAnalysis({ steps, from, to })
51
+ * → { steps: [{ path_pattern, sessions, retained_pct }], total_sessions }
52
+ * steps[] are glob-ish path patterns (literal path OR a path
53
+ * ending in `/*` for prefix match). Retention is computed per
54
+ * session: a session counts for step N if it visited a path
55
+ * matching step N AFTER its earliest visit to step N-1.
56
+ *
57
+ * clickstream.bouncerate({ from, to })
58
+ * → { single_page_sessions, total_sessions, bouncerate }
59
+ * bouncerate = single_page_sessions / total_sessions, clamped
60
+ * to [0, 1]. A session counts as single-page if all its
61
+ * pageviews share the same path.
62
+ *
63
+ * clickstream.dwellByPage({ from, to })
64
+ * → [{ page_path, samples, avg_ms, p95_ms }]
65
+ * derived from `dwell` events (payload.ms). p95 is a sorted-
66
+ * nearest-rank percentile (no interpolation) — the cheapest
67
+ * shape that's still operator-meaningful for this volume.
68
+ *
69
+ * clickstream.cleanupOlderThan(days)
70
+ * → { pageviews_deleted, events_deleted }
71
+ *
72
+ * Privacy posture: `session_id` and `customer_id` are SHA3-512
73
+ * namespace-hashed (`clickstream-session` / `clickstream-customer`)
74
+ * before the row reaches the database. Raw values never persist;
75
+ * `sessionPath({ session_id })` re-hashes its argument under the
76
+ * same namespace so the operator hands in the raw cookie value
77
+ * they have on hand. The path columns are stripped of any query
78
+ * string at the write site so a `?email=alice@example.com` in the
79
+ * URL never reaches the table.
80
+ *
81
+ * Composes:
82
+ * - `b.crypto.namespaceHash` — SHA3-512 hash of session + customer
83
+ * identifiers under the clickstream
84
+ * namespaces.
85
+ * - `b.uuid.v7` — monotonic-lex row ids so the
86
+ * default `ORDER BY id` matches
87
+ * `ORDER BY occurred_at` without an
88
+ * explicit tiebreaker.
89
+ *
90
+ * Storage: `migrations-d1/0161_clickstream.sql`.
91
+ *
92
+ * @primitive clickstream
93
+ * @related shop.analytics, b.crypto, b.uuid
94
+ */
95
+
96
+ var bShop;
97
+ function _b() {
98
+ if (!bShop) bShop = require("./index");
99
+ return bShop.framework;
100
+ }
101
+
102
+ // ---- constants ----------------------------------------------------------
103
+
104
+ var SESSION_NAMESPACE = "clickstream-session";
105
+ var CUSTOMER_NAMESPACE = "clickstream-customer";
106
+
107
+ var UA_CLASSES = Object.freeze(["desktop", "mobile", "tablet", "bot", "other"]);
108
+ var EVENT_KINDS = Object.freeze([
109
+ "click", "form_submit", "form_abandon",
110
+ "video_play", "video_complete",
111
+ "scroll_depth", "dwell",
112
+ ]);
113
+
114
+ var MAX_PATH_LEN = 2048;
115
+ var MAX_REFERRER_LEN = 2048;
116
+ var MAX_ELEMENT_LEN = 256;
117
+ var MAX_SESSION_ID_LEN = 512;
118
+ var MAX_CUSTOMER_ID_LEN = 512;
119
+ var MAX_PAYLOAD_BYTES = 4096;
120
+
121
+ var DEFAULT_TOP_LIMIT = 10;
122
+ var MAX_TOP_LIMIT = 100;
123
+ var MAX_FUNNEL_STEPS = 16;
124
+ var MAX_CLEANUP_DAYS = 365 * 5;
125
+ var MIN_CLEANUP_DAYS = 1;
126
+
127
+ var ONE_YEAR_MS = 365 * 24 * 60 * 60 * 1000;
128
+ var DEFAULT_WINDOW_MS = 30 * 24 * 60 * 60 * 1000;
129
+
130
+ // Raw-PII shapes refused at every write site (mirrors the analytics
131
+ // guard). A hashed identifier is hex / base64url and never trips
132
+ // these shapes, so the gate is a one-way "operator handed us raw
133
+ // PII" detector.
134
+ var RAW_EMAIL_RE = /[^\s@]+@[^\s@]+\.[^\s@]+/;
135
+ var RAW_IPV4_RE = /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/;
136
+ var RAW_IPV6_RE = /(?:[0-9a-fA-F]{1,4}:){2,}[0-9a-fA-F]{0,4}/;
137
+
138
+ // ---- monotonic clock ----------------------------------------------------
139
+ //
140
+ // Page views + intra-page events persist epoch-ms timestamps and the
141
+ // session-timeline / funnel queries read them back in chronological
142
+ // order. Two same-millisecond `_now()` calls produce distinct
143
+ // integers so a tight burst of clicks (a customer hammering the
144
+ // "add to cart" button) sorts deterministically without an extra
145
+ // tiebreaker column.
146
+ var _lastTs = 0;
147
+ function _now() {
148
+ var t = Date.now();
149
+ if (t <= _lastTs) { t = _lastTs + 1; }
150
+ _lastTs = t;
151
+ return t;
152
+ }
153
+
154
+ // ---- drop-silent input checks (hot-path writes) ------------------------
155
+ //
156
+ // drop-silent — by design: the recordPageView / recordEvent paths run
157
+ // inside the request lifecycle. A throw here would fail the request
158
+ // that triggered the page render or beacon. Each `_dropOn*` helper
159
+ // returns true when the value is unusable, so the caller bails to
160
+ // `return null` without raising.
161
+
162
+ function _badString(value, maxLen) {
163
+ if (typeof value !== "string") return true;
164
+ if (value.length === 0) return true;
165
+ if (value.length > maxLen) return true;
166
+ return false;
167
+ }
168
+
169
+ function _looksLikeRawPii(value) {
170
+ if (typeof value !== "string" || value.length === 0) return false;
171
+ if (RAW_EMAIL_RE.test(value)) return true;
172
+ if (RAW_IPV4_RE.test(value)) return true;
173
+ if (RAW_IPV6_RE.test(value)) return true;
174
+ return false;
175
+ }
176
+
177
+ // Strip the query string + fragment from a path. PII often rides in
178
+ // query params (`?email=`, `?token=`) and the table never wants it.
179
+ // The hash mark / question mark are both potential carriers; the
180
+ // path-only prefix is what aggregates index on.
181
+ function _normalizePath(p) {
182
+ if (typeof p !== "string") return null;
183
+ var q = p.indexOf("?");
184
+ if (q !== -1) p = p.slice(0, q);
185
+ var h = p.indexOf("#");
186
+ if (h !== -1) p = p.slice(0, h);
187
+ if (p.length === 0) return null;
188
+ if (p.length > MAX_PATH_LEN) return null;
189
+ return p;
190
+ }
191
+
192
+ // ---- throw-on-bad-input checks (operator-driven reads) -----------------
193
+
194
+ function _epochMs(n, label) {
195
+ if (!Number.isInteger(n) || n < 0) {
196
+ throw new TypeError("clickstream: " + label + " must be a non-negative integer (epoch ms)");
197
+ }
198
+ }
199
+
200
+ function _resolveWindow(opts, label) {
201
+ if (opts == null || typeof opts !== "object") {
202
+ throw new TypeError("clickstream." + label + ": input object required");
203
+ }
204
+ var now = Date.now();
205
+ var from = opts.from == null ? (now - DEFAULT_WINDOW_MS) : opts.from;
206
+ var to = opts.to == null ? now : opts.to;
207
+ _epochMs(from, "from");
208
+ _epochMs(to, "to");
209
+ if (from >= to) {
210
+ throw new TypeError("clickstream." + label + ": from must be strictly less than to");
211
+ }
212
+ if ((to - from) > ONE_YEAR_MS) {
213
+ throw new TypeError("clickstream." + label + ": window (to - from) must be ≤ 1 year");
214
+ }
215
+ return { from: from, to: to };
216
+ }
217
+
218
+ function _limit(n, label, max) {
219
+ max = max || MAX_TOP_LIMIT;
220
+ if (!Number.isInteger(n) || n < 1 || n > max) {
221
+ throw new TypeError("clickstream." + label + ": limit must be an integer in [1, " + max + "]");
222
+ }
223
+ }
224
+
225
+ // ---- factory ------------------------------------------------------------
226
+
227
+ function create(opts) {
228
+ opts = opts || {};
229
+ var query = opts.query;
230
+ if (!query) {
231
+ query = function (sql, params) { return _b().externalDb.query(sql, params); };
232
+ }
233
+
234
+ // Hash helpers — pull the namespaceHash at call time so the lazy
235
+ // `_b()` initialiser doesn't fire at module-load.
236
+ function _sessionHash(raw) { return _b().crypto.namespaceHash(SESSION_NAMESPACE, raw); }
237
+ function _customerHash(raw) { return _b().crypto.namespaceHash(CUSTOMER_NAMESPACE, raw); }
238
+
239
+ return {
240
+
241
+ // ---- write paths (drop-silent on bad input) ------------------------
242
+
243
+ // Record a single server-rendered page view. The session_id and
244
+ // customer_id are hashed at the write site; raw email / IP
245
+ // shapes are refused by dropping the write. Query string is
246
+ // stripped from `path` before persistence.
247
+ recordPageView: async function (input) {
248
+ if (!input || typeof input !== "object") return null;
249
+ if (_badString(input.session_id, MAX_SESSION_ID_LEN)) return null;
250
+ if (_looksLikeRawPii(input.session_id)) return null;
251
+
252
+ var path = _normalizePath(input.path);
253
+ if (path == null) return null;
254
+
255
+ var referrer = null;
256
+ if (input.referrer != null) {
257
+ if (_badString(input.referrer, MAX_REFERRER_LEN)) return null;
258
+ if (_looksLikeRawPii(input.referrer)) return null;
259
+ referrer = input.referrer;
260
+ }
261
+
262
+ var uaClass = input.ua_class == null ? "other" : input.ua_class;
263
+ if (UA_CLASSES.indexOf(uaClass) === -1) return null;
264
+
265
+ var customerIdHash = null;
266
+ if (input.customer_id != null) {
267
+ if (_badString(input.customer_id, MAX_CUSTOMER_ID_LEN)) return null;
268
+ if (_looksLikeRawPii(input.customer_id)) return null;
269
+ customerIdHash = _customerHash(input.customer_id);
270
+ }
271
+
272
+ var occurredAt;
273
+ if (input.occurred_at == null) {
274
+ occurredAt = _now();
275
+ } else if (Number.isInteger(input.occurred_at) && input.occurred_at >= 0) {
276
+ occurredAt = input.occurred_at;
277
+ } else {
278
+ return null;
279
+ }
280
+
281
+ var sessionHash = _sessionHash(input.session_id);
282
+ var id = _b().uuid.v7();
283
+ try {
284
+ await query(
285
+ "INSERT INTO clickstream_pageviews " +
286
+ "(id, session_id_hash, path, referrer, ua_class, customer_id_hash, occurred_at) " +
287
+ "VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
288
+ [id, sessionHash, path, referrer, uaClass, customerIdHash, occurredAt],
289
+ );
290
+ } catch (_e) { return null; /* drop-silent — by design; write-site sink swallow */ }
291
+ return { id: id, occurred_at: occurredAt };
292
+ },
293
+
294
+ // Record a single intra-page event. Same drop-silent posture as
295
+ // recordPageView — a malformed beacon never crashes the request.
296
+ recordEvent: async function (input) {
297
+ if (!input || typeof input !== "object") return null;
298
+ if (_badString(input.session_id, MAX_SESSION_ID_LEN)) return null;
299
+ if (_looksLikeRawPii(input.session_id)) return null;
300
+
301
+ if (typeof input.kind !== "string") return null;
302
+ if (EVENT_KINDS.indexOf(input.kind) === -1) return null;
303
+
304
+ var pagePath = _normalizePath(input.page_path);
305
+ if (pagePath == null) return null;
306
+
307
+ var element = null;
308
+ if (input.element != null) {
309
+ if (_badString(input.element, MAX_ELEMENT_LEN)) return null;
310
+ if (_looksLikeRawPii(input.element)) return null;
311
+ element = input.element;
312
+ }
313
+
314
+ // payload — JSON-encode in-process so the size bound checks the
315
+ // encoded bytes. `null` / `undefined` defaults to `{}`.
316
+ var payloadInput = input.payload == null ? {} : input.payload;
317
+ if (typeof payloadInput !== "object" || Array.isArray(payloadInput)) return null;
318
+ var payloadJson;
319
+ try { payloadJson = JSON.stringify(payloadInput); }
320
+ catch (_e) { return null; /* drop-silent — by design; unserialisable payload from the wire */ }
321
+ if (Buffer.byteLength(payloadJson, "utf8") > MAX_PAYLOAD_BYTES) return null;
322
+
323
+ var occurredAt;
324
+ if (input.occurred_at == null) {
325
+ occurredAt = _now();
326
+ } else if (Number.isInteger(input.occurred_at) && input.occurred_at >= 0) {
327
+ occurredAt = input.occurred_at;
328
+ } else {
329
+ return null;
330
+ }
331
+
332
+ var sessionHash = _sessionHash(input.session_id);
333
+ var id = _b().uuid.v7();
334
+ try {
335
+ await query(
336
+ "INSERT INTO clickstream_events " +
337
+ "(id, session_id_hash, kind, element, payload_json, page_path, occurred_at) " +
338
+ "VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
339
+ [id, sessionHash, input.kind, element, payloadJson, pagePath, occurredAt],
340
+ );
341
+ } catch (_e) { return null; /* drop-silent — by design; write-site sink swallow */ }
342
+ return { id: id, occurred_at: occurredAt };
343
+ },
344
+
345
+ // ---- read paths (throw on bad input) -------------------------------
346
+
347
+ // Full per-session timeline. Operator hands in the raw session_id
348
+ // they have on the request; the primitive hashes it under the
349
+ // same namespace as the write site and returns the merged
350
+ // pageview + event stream sorted ascending by occurred_at.
351
+ sessionPath: async function (input) {
352
+ if (!input || typeof input !== "object") {
353
+ throw new TypeError("clickstream.sessionPath: input object required");
354
+ }
355
+ if (typeof input.session_id !== "string" || input.session_id.length === 0) {
356
+ throw new TypeError("clickstream.sessionPath: session_id required");
357
+ }
358
+ if (input.session_id.length > MAX_SESSION_ID_LEN) {
359
+ throw new TypeError("clickstream.sessionPath: session_id exceeds " + MAX_SESSION_ID_LEN + " chars");
360
+ }
361
+ if (_looksLikeRawPii(input.session_id)) {
362
+ throw new TypeError("clickstream.sessionPath: session_id looks like raw PII");
363
+ }
364
+ var hash = _sessionHash(input.session_id);
365
+
366
+ var pv = await query(
367
+ "SELECT id, path, referrer, ua_class, customer_id_hash, occurred_at " +
368
+ " FROM clickstream_pageviews " +
369
+ " WHERE session_id_hash = ?1 " +
370
+ " ORDER BY occurred_at ASC, id ASC",
371
+ [hash],
372
+ );
373
+ var ev = await query(
374
+ "SELECT id, kind, element, payload_json, page_path, occurred_at " +
375
+ " FROM clickstream_events " +
376
+ " WHERE session_id_hash = ?1 " +
377
+ " ORDER BY occurred_at ASC, id ASC",
378
+ [hash],
379
+ );
380
+
381
+ var rows = [];
382
+ for (var i = 0; i < pv.rows.length; i += 1) {
383
+ var p = pv.rows[i];
384
+ rows.push({
385
+ kind: "pageview",
386
+ id: p.id,
387
+ path: p.path,
388
+ referrer: p.referrer,
389
+ ua_class: p.ua_class,
390
+ customer_id_hash: p.customer_id_hash,
391
+ occurred_at: Number(p.occurred_at) || 0,
392
+ });
393
+ }
394
+ for (var j = 0; j < ev.rows.length; j += 1) {
395
+ var e = ev.rows[j];
396
+ var payload;
397
+ try { payload = JSON.parse(e.payload_json || "{}"); }
398
+ catch (_e) { payload = {}; /* drop-silent — by design; stored shape is primitive-owned */ }
399
+ rows.push({
400
+ kind: e.kind,
401
+ id: e.id,
402
+ element: e.element,
403
+ payload: payload,
404
+ page_path: e.page_path,
405
+ occurred_at: Number(e.occurred_at) || 0,
406
+ });
407
+ }
408
+ rows.sort(function (a, b) {
409
+ if (a.occurred_at !== b.occurred_at) return a.occurred_at - b.occurred_at;
410
+ if (a.id < b.id) return -1;
411
+ if (a.id > b.id) return 1;
412
+ return 0;
413
+ });
414
+ return rows;
415
+ },
416
+
417
+ // Top-N visited paths in the window. Reports both total view
418
+ // count and unique-session count (the second is the operator-
419
+ // useful "reach" metric — a single bot hammering /home doesn't
420
+ // skew it).
421
+ topPages: async function (windowOpts) {
422
+ var w = _resolveWindow(windowOpts, "topPages");
423
+ var limit = (windowOpts && windowOpts.limit) == null ? DEFAULT_TOP_LIMIT : windowOpts.limit;
424
+ _limit(limit, "topPages");
425
+ var r = await query(
426
+ "SELECT path, COUNT(*) AS views, COUNT(DISTINCT session_id_hash) AS unique_sessions " +
427
+ " FROM clickstream_pageviews " +
428
+ " WHERE occurred_at >= ?1 AND occurred_at < ?2 " +
429
+ " GROUP BY path " +
430
+ " ORDER BY views DESC, path ASC " +
431
+ " LIMIT ?3",
432
+ [w.from, w.to, limit],
433
+ );
434
+ return r.rows.map(function (row) {
435
+ return {
436
+ path: row.path,
437
+ views: Number(row.views) || 0,
438
+ unique_sessions: Number(row.unique_sessions) || 0,
439
+ };
440
+ });
441
+ },
442
+
443
+ // Top-N click elements in the window. Optional `page_path`
444
+ // narrows the aggregate to a single page (operator inspecting
445
+ // a specific landing-page CTA mix).
446
+ topClicks: async function (windowOpts) {
447
+ var w = _resolveWindow(windowOpts, "topClicks");
448
+ var limit = (windowOpts && windowOpts.limit) == null ? DEFAULT_TOP_LIMIT : windowOpts.limit;
449
+ _limit(limit, "topClicks");
450
+ var sql, params;
451
+ if (windowOpts && windowOpts.page_path != null) {
452
+ if (typeof windowOpts.page_path !== "string" || windowOpts.page_path.length === 0) {
453
+ throw new TypeError("clickstream.topClicks: page_path must be a non-empty string when provided");
454
+ }
455
+ if (windowOpts.page_path.length > MAX_PATH_LEN) {
456
+ throw new TypeError("clickstream.topClicks: page_path exceeds " + MAX_PATH_LEN + " chars");
457
+ }
458
+ sql =
459
+ "SELECT element, page_path, COUNT(*) AS clicks " +
460
+ " FROM clickstream_events " +
461
+ " WHERE kind = 'click' " +
462
+ " AND element IS NOT NULL " +
463
+ " AND occurred_at >= ?1 AND occurred_at < ?2 " +
464
+ " AND page_path = ?3 " +
465
+ " GROUP BY element, page_path " +
466
+ " ORDER BY clicks DESC, element ASC " +
467
+ " LIMIT ?4";
468
+ params = [w.from, w.to, windowOpts.page_path, limit];
469
+ } else {
470
+ sql =
471
+ "SELECT element, page_path, COUNT(*) AS clicks " +
472
+ " FROM clickstream_events " +
473
+ " WHERE kind = 'click' " +
474
+ " AND element IS NOT NULL " +
475
+ " AND occurred_at >= ?1 AND occurred_at < ?2 " +
476
+ " GROUP BY element, page_path " +
477
+ " ORDER BY clicks DESC, element ASC " +
478
+ " LIMIT ?3";
479
+ params = [w.from, w.to, limit];
480
+ }
481
+ var r = await query(sql, params);
482
+ return r.rows.map(function (row) {
483
+ return {
484
+ element: row.element,
485
+ page_path: row.page_path,
486
+ clicks: Number(row.clicks) || 0,
487
+ };
488
+ });
489
+ },
490
+
491
+ // Multi-step funnel retention. Steps are path patterns — either
492
+ // a literal path or a path ending in `/*` for prefix match. A
493
+ // session is retained for step N when it visited a matching
494
+ // path at OR AFTER its earliest visit to step N-1. The first
495
+ // step seeds `total_sessions` (the denominator every retained
496
+ // percentage uses).
497
+ funnelAnalysis: async function (input) {
498
+ if (!input || typeof input !== "object") {
499
+ throw new TypeError("clickstream.funnelAnalysis: input object required");
500
+ }
501
+ if (!Array.isArray(input.steps) || input.steps.length < 2) {
502
+ throw new TypeError("clickstream.funnelAnalysis: steps must be an array of at least 2 patterns");
503
+ }
504
+ if (input.steps.length > MAX_FUNNEL_STEPS) {
505
+ throw new TypeError("clickstream.funnelAnalysis: steps exceeds " + MAX_FUNNEL_STEPS);
506
+ }
507
+ for (var s = 0; s < input.steps.length; s += 1) {
508
+ var p = input.steps[s];
509
+ if (typeof p !== "string" || p.length === 0 || p.length > MAX_PATH_LEN) {
510
+ throw new TypeError("clickstream.funnelAnalysis: steps[" + s + "] must be a non-empty path pattern <= " + MAX_PATH_LEN + " chars");
511
+ }
512
+ }
513
+ var w = _resolveWindow(input, "funnelAnalysis");
514
+
515
+ // Translate each pattern to a SQL predicate (literal `path = ?`
516
+ // or `path LIKE ?` for the `/foo/*` prefix shape). Capture
517
+ // (session_id_hash, earliest occurred_at) per step then walk
518
+ // the steps in order, retaining a session at step N only if
519
+ // its earliest-match for step N happened at-or-after its
520
+ // step (N-1) earliest-match.
521
+ var stepHits = [];
522
+ for (var i = 0; i < input.steps.length; i += 1) {
523
+ var pat = input.steps[i];
524
+ var sql, params;
525
+ if (pat.length > 2 && pat.slice(-2) === "/*") {
526
+ var prefix = pat.slice(0, -1); // keep trailing slash
527
+ sql =
528
+ "SELECT session_id_hash, MIN(occurred_at) AS earliest " +
529
+ " FROM clickstream_pageviews " +
530
+ " WHERE occurred_at >= ?1 AND occurred_at < ?2 " +
531
+ " AND path LIKE ?3 " +
532
+ " GROUP BY session_id_hash";
533
+ params = [w.from, w.to, prefix + "%"];
534
+ } else {
535
+ sql =
536
+ "SELECT session_id_hash, MIN(occurred_at) AS earliest " +
537
+ " FROM clickstream_pageviews " +
538
+ " WHERE occurred_at >= ?1 AND occurred_at < ?2 " +
539
+ " AND path = ?3 " +
540
+ " GROUP BY session_id_hash";
541
+ params = [w.from, w.to, pat];
542
+ }
543
+ var r = await query(sql, params);
544
+ var byHash = new Map();
545
+ for (var j = 0; j < r.rows.length; j += 1) {
546
+ byHash.set(r.rows[j].session_id_hash, Number(r.rows[j].earliest) || 0);
547
+ }
548
+ stepHits.push(byHash);
549
+ }
550
+
551
+ // Walk forwards: retained[i] is the set of session_id_hashes
552
+ // that reached step i, anchored by the earliest-occurred-at
553
+ // recorded for step i-1.
554
+ var totalSessions = stepHits[0].size;
555
+ var retainedSet = new Set(stepHits[0].keys());
556
+ var stepsOut = [];
557
+ stepsOut.push({
558
+ path_pattern: input.steps[0],
559
+ sessions: retainedSet.size,
560
+ retained_pct: totalSessions > 0 ? 1 : 0,
561
+ });
562
+ var prevHits = stepHits[0];
563
+ for (var k = 1; k < input.steps.length; k += 1) {
564
+ var thisHits = stepHits[k];
565
+ var next = new Set();
566
+ retainedSet.forEach(function (hash) {
567
+ if (!thisHits.has(hash)) return;
568
+ var prev = prevHits.get(hash);
569
+ var curr = thisHits.get(hash);
570
+ if (prev == null || curr == null) return;
571
+ if (curr >= prev) next.add(hash);
572
+ });
573
+ var pct = totalSessions > 0 ? (next.size / totalSessions) : 0;
574
+ if (pct < 0) pct = 0;
575
+ if (pct > 1) pct = 1;
576
+ stepsOut.push({
577
+ path_pattern: input.steps[k],
578
+ sessions: next.size,
579
+ retained_pct: pct,
580
+ });
581
+ retainedSet = next;
582
+ // Anchor advances to the latest cursor at each step — the
583
+ // monotonic "step N at-or-after step N-1" semantics.
584
+ var advanced = new Map();
585
+ next.forEach(function (h) { advanced.set(h, thisHits.get(h)); });
586
+ prevHits = advanced;
587
+ }
588
+ return {
589
+ steps: stepsOut,
590
+ total_sessions: totalSessions,
591
+ };
592
+ },
593
+
594
+ // Single-page-session rate. A session counts as single-page
595
+ // when every pageview it landed in the window shares the same
596
+ // path. Empty windows return 0 (no sessions, no bounce).
597
+ bouncerate: async function (windowOpts) {
598
+ var w = _resolveWindow(windowOpts, "bouncerate");
599
+ var r = await query(
600
+ "SELECT session_id_hash, COUNT(DISTINCT path) AS distinct_paths " +
601
+ " FROM clickstream_pageviews " +
602
+ " WHERE occurred_at >= ?1 AND occurred_at < ?2 " +
603
+ " GROUP BY session_id_hash",
604
+ [w.from, w.to],
605
+ );
606
+ var total = r.rows.length;
607
+ var single = 0;
608
+ for (var i = 0; i < r.rows.length; i += 1) {
609
+ if (Number(r.rows[i].distinct_paths) === 1) single += 1;
610
+ }
611
+ var rate = total > 0 ? (single / total) : 0;
612
+ if (rate < 0) rate = 0;
613
+ if (rate > 1) rate = 1;
614
+ return {
615
+ single_page_sessions: single,
616
+ total_sessions: total,
617
+ bouncerate: rate,
618
+ };
619
+ },
620
+
621
+ // Average + p95 dwell time per page from `dwell` events. p95 is
622
+ // sorted-nearest-rank (no interpolation) — cheap, monotonic,
623
+ // and operator-meaningful when the sample count crosses ~20.
624
+ dwellByPage: async function (windowOpts) {
625
+ var w = _resolveWindow(windowOpts, "dwellByPage");
626
+ var r = await query(
627
+ "SELECT page_path, payload_json, occurred_at " +
628
+ " FROM clickstream_events " +
629
+ " WHERE kind = 'dwell' " +
630
+ " AND occurred_at >= ?1 AND occurred_at < ?2 " +
631
+ " ORDER BY page_path ASC",
632
+ [w.from, w.to],
633
+ );
634
+ var buckets = new Map();
635
+ for (var i = 0; i < r.rows.length; i += 1) {
636
+ var row = r.rows[i];
637
+ var payload;
638
+ try { payload = JSON.parse(row.payload_json || "{}"); }
639
+ catch (_e) { payload = {}; /* drop-silent — by design; stored shape is primitive-owned */ }
640
+ var ms = payload && typeof payload.ms === "number" && isFinite(payload.ms) && payload.ms >= 0 ? payload.ms : null;
641
+ if (ms == null) continue;
642
+ if (!buckets.has(row.page_path)) buckets.set(row.page_path, []);
643
+ buckets.get(row.page_path).push(ms);
644
+ }
645
+ var out = [];
646
+ buckets.forEach(function (samples, page_path) {
647
+ samples.sort(function (a, b) { return a - b; });
648
+ var n = samples.length;
649
+ var sum = 0;
650
+ for (var k = 0; k < n; k += 1) sum += samples[k];
651
+ var avg = n > 0 ? (sum / n) : 0;
652
+ // Sorted-nearest-rank p95: index = ceil(0.95 * n) - 1, clamped.
653
+ var idx = Math.ceil(0.95 * n) - 1;
654
+ if (idx < 0) idx = 0;
655
+ if (idx > n - 1) idx = n - 1;
656
+ var p95 = n > 0 ? samples[idx] : 0;
657
+ out.push({
658
+ page_path: page_path,
659
+ samples: n,
660
+ avg_ms: avg,
661
+ p95_ms: p95,
662
+ });
663
+ });
664
+ out.sort(function (a, b) {
665
+ if (b.samples !== a.samples) return b.samples - a.samples;
666
+ if (a.page_path < b.page_path) return -1;
667
+ if (a.page_path > b.page_path) return 1;
668
+ return 0;
669
+ });
670
+ return out;
671
+ },
672
+
673
+ // Retention sweep. Operators schedule this against the same cron
674
+ // they use for the rest of the data-minimisation stack — returns
675
+ // the per-table delete count so a long-running sweep can log
676
+ // its impact.
677
+ cleanupOlderThan: async function (days) {
678
+ if (!Number.isInteger(days) || days < MIN_CLEANUP_DAYS || days > MAX_CLEANUP_DAYS) {
679
+ throw new TypeError("clickstream.cleanupOlderThan: days must be an integer in [" +
680
+ MIN_CLEANUP_DAYS + ", " + MAX_CLEANUP_DAYS + "]");
681
+ }
682
+ var cutoff = Date.now() - (days * 24 * 60 * 60 * 1000);
683
+ var pv = await query(
684
+ "DELETE FROM clickstream_pageviews WHERE occurred_at < ?1",
685
+ [cutoff],
686
+ );
687
+ var ev = await query(
688
+ "DELETE FROM clickstream_events WHERE occurred_at < ?1",
689
+ [cutoff],
690
+ );
691
+ return {
692
+ pageviews_deleted: Number(pv.rowCount) || 0,
693
+ events_deleted: Number(ev.rowCount) || 0,
694
+ };
695
+ },
696
+
697
+ // Exposed enums so the operator-facing dashboard can render the
698
+ // canonical kind / ua_class list without duplicating the source
699
+ // of truth.
700
+ UA_CLASSES: UA_CLASSES,
701
+ EVENT_KINDS: EVENT_KINDS,
702
+ };
703
+ }
704
+
705
+ module.exports = {
706
+ create: create,
707
+ UA_CLASSES: UA_CLASSES,
708
+ EVENT_KINDS: EVENT_KINDS,
709
+ SESSION_NAMESPACE: SESSION_NAMESPACE,
710
+ CUSTOMER_NAMESPACE:CUSTOMER_NAMESPACE,
711
+ ONE_YEAR_MS: ONE_YEAR_MS,
712
+ DEFAULT_WINDOW_MS: DEFAULT_WINDOW_MS,
713
+ };