@blamejs/blamejs-shop 0.0.53 → 0.0.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/analytics.js CHANGED
@@ -46,6 +46,45 @@
46
46
  * refunded). Every other status (pending / paid / fulfilling /
47
47
  * shipped / delivered) counts at face value because the operator
48
48
  * has either captured the funds or is committed to capturing them.
49
+ *
50
+ * Event-stream surface (writes against `analytics_events` from
51
+ * migration `0019_analytics_events.sql`):
52
+ *
53
+ * analytics.recordEvent({ event_type, session_id?, customer_id?,
54
+ * product_id?, search_q?, page_url?,
55
+ * user_agent_class?, payload? })
56
+ * → { id, occurred_at }
57
+ *
58
+ * analytics.topSearchTerms({ from?, to?, limit? })
59
+ * → [{ search_q, count }]
60
+ *
61
+ * analytics.topViewedProducts({ from?, to?, limit? })
62
+ * → [{ product_id, count }]
63
+ *
64
+ * analytics.funnel({ from?, to? })
65
+ * → { pdp_views, cart_adds, checkout_starts, checkout_completes,
66
+ * conversion_rate }
67
+ *
68
+ * analytics.sessionFlow(session_id, { limit? })
69
+ * → [{ id, event_type, session_id_hash, customer_id_hash,
70
+ * product_id, search_q, page_url, user_agent_class,
71
+ * payload, occurred_at }]
72
+ *
73
+ * analytics.dropAfter(ts)
74
+ * → { deleted }
75
+ *
76
+ * Privacy posture: `session_id` and `customer_id` are hashed via
77
+ * `b.crypto.namespaceHash` (namespaces `"analytics-session"` /
78
+ * `"analytics-customer"`) before the row reaches the database. The
79
+ * primitive REFUSES — with a TypeError, at every write entry point
80
+ * — to accept a value that looks like a raw email (contains `@`
81
+ * between two non-whitespace runs) or a raw IP (dotted-quad or
82
+ * colon-delimited hextet). The same refusal applies to `search_q`
83
+ * and `page_url` so an operator who accidentally pipes a logged-in
84
+ * user's identifier into a search term hits a loud error instead
85
+ * of a quiet PII leak. `payload` is JSON-encoded and bounded to
86
+ * 4 KiB; the primitive does not introspect its contents — operator
87
+ * discipline owns what goes inside.
49
88
  */
50
89
 
51
90
  var bShop;
@@ -88,6 +127,103 @@ function _limit(n, label, max) {
88
127
  }
89
128
  }
90
129
 
130
+ // ---- event-stream validators -------------------------------------------
131
+ //
132
+ // Event-stream writes go through `recordEvent`; every other event-
133
+ // stream surface is read-only and reuses `_resolveWindow` /
134
+ // `_limit`. The validators below are the write-site gates.
135
+
136
+ var EVENT_TYPES = [
137
+ "pdp_view", "collection_view", "search_query",
138
+ "wishlist_add", "wishlist_remove", "cart_add", "cart_remove",
139
+ "checkout_start", "checkout_complete", "newsletter_signup",
140
+ ];
141
+
142
+ var UA_CLASSES = ["desktop", "mobile", "bot", "other"];
143
+
144
+ var SESSION_NAMESPACE = "analytics-session";
145
+ var CUSTOMER_NAMESPACE = "analytics-customer";
146
+
147
+ // Payload size bound — operators put refinement filters / source
148
+ // attribution / variant ids here; 4 KiB is more than enough and
149
+ // caps the per-row footprint. Bigger payloads belong in a
150
+ // purpose-built table, not the event stream.
151
+ var MAX_PAYLOAD_BYTES = 4096;
152
+ // Bounded string lengths for the denormalised columns. The
153
+ // primitive surfaces a TypeError rather than silently truncating
154
+ // because a truncated identifier joins to nothing.
155
+ var MAX_SEARCH_Q = 256;
156
+ var MAX_PAGE_URL = 2048;
157
+ var MAX_PRODUCT_ID = 128;
158
+ var MAX_SESSION_ID = 512;
159
+ var MAX_CUSTOMER_ID = 512;
160
+
161
+ // Resolve a `{ from, to }` window for the event-stream queries.
162
+ // Mirrors `_resolveWindow` but uses the `from` / `to` naming the
163
+ // operator-facing surface advertises so a typo here doesn't tie a
164
+ // `since` error message to a `from`-keyed call site.
165
+ function _resolveEventWindow(opts) {
166
+ opts = opts || {};
167
+ var now = Date.now();
168
+ var from = opts.from == null ? (now - DEFAULT_WINDOW_MS) : opts.from;
169
+ var to = opts.to == null ? now : opts.to;
170
+ _epochMs(from, "from");
171
+ _epochMs(to, "to");
172
+ if (from >= to) {
173
+ throw new TypeError("analytics: from must be strictly less than to");
174
+ }
175
+ if ((to - from) > ONE_YEAR_MS) {
176
+ throw new TypeError("analytics: window (to - from) must be ≤ 1 year");
177
+ }
178
+ return { from: from, to: to };
179
+ }
180
+
181
+ // PII guard — refuse a value that looks like a raw email or IP.
182
+ // Hashed identifiers (hex) never satisfy either shape, so the gate
183
+ // is a one-way "operator handed us raw PII" detector, not a typing
184
+ // constraint. The check is intentionally permissive on the email
185
+ // side (we'd rather reject a borderline string than ingest a real
186
+ // address) and uses the same dotted-quad / hextet shapes the
187
+ // vendored zod schemas exercise.
188
+ var RAW_EMAIL_RE = /[^\s@]+@[^\s@]+\.[^\s@]+/;
189
+ // IPv4 dotted-quad or IPv6 with at least one colon-delimited
190
+ // hextet. The IPv6 shape catches the common forms ("::1",
191
+ // "2001:db8::1", full eight-group) without trying to be RFC-precise
192
+ // — anything with two-or-more colons separated by hex is enough to
193
+ // trigger the refusal.
194
+ var RAW_IPV4_RE = /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/;
195
+ var RAW_IPV6_RE = /(?:[0-9a-fA-F]{1,4}:){2,}[0-9a-fA-F]{0,4}/;
196
+
197
+ function _refuseRawPii(label, value) {
198
+ if (typeof value !== "string" || value.length === 0) return;
199
+ if (RAW_EMAIL_RE.test(value)) {
200
+ throw new TypeError(
201
+ "analytics: " + label + " looks like a raw email — hash via " +
202
+ "b.crypto.namespaceHash before passing to recordEvent"
203
+ );
204
+ }
205
+ if (RAW_IPV4_RE.test(value) || RAW_IPV6_RE.test(value)) {
206
+ throw new TypeError(
207
+ "analytics: " + label + " looks like a raw IP — IPs must not " +
208
+ "reach the analytics_events table; hash or drop at the caller"
209
+ );
210
+ }
211
+ }
212
+
213
+ function _optString(value, label, max) {
214
+ if (value == null) return null;
215
+ if (typeof value !== "string") {
216
+ throw new TypeError("analytics: " + label + " must be a string when provided");
217
+ }
218
+ if (value.length === 0) {
219
+ throw new TypeError("analytics: " + label + " must be non-empty when provided");
220
+ }
221
+ if (value.length > max) {
222
+ throw new TypeError("analytics: " + label + " exceeds " + max + " chars");
223
+ }
224
+ return value;
225
+ }
226
+
91
227
  // ---- factory ------------------------------------------------------------
92
228
 
93
229
  function create(opts) {
@@ -235,6 +371,270 @@ function create(opts) {
235
371
  });
236
372
  },
237
373
 
374
+ // Record a single event-stream row. `session_id` and
375
+ // `customer_id` are hashed via `b.crypto.namespaceHash` before
376
+ // the row reaches the database — raw identifiers never persist.
377
+ // At least one of the two MUST be supplied; an anonymous row
378
+ // with no join key is useless for funnel debugging and a sign
379
+ // the caller forgot to wire the session middleware.
380
+ //
381
+ // Refusals (TypeError, before any I/O):
382
+ // - bad `event_type` (not in the allowed enum)
383
+ // - missing both `session_id` and `customer_id`
384
+ // - oversized `payload` (> 4 KiB after JSON-encode)
385
+ // - bad `occurred_at` (not a non-negative integer epoch-ms)
386
+ // - any string-typed field that looks like a raw email or IP
387
+ // - any string-typed field that exceeds its length bound
388
+ recordEvent: async function (input) {
389
+ if (!input || typeof input !== "object") {
390
+ throw new TypeError("analytics.recordEvent: input object required");
391
+ }
392
+ var eventType = input.event_type;
393
+ if (EVENT_TYPES.indexOf(eventType) === -1) {
394
+ throw new TypeError(
395
+ "analytics.recordEvent: event_type must be one of " +
396
+ EVENT_TYPES.join(", ")
397
+ );
398
+ }
399
+ // Both raw identifiers run through the PII guard first — an
400
+ // email-shaped session_id is a tell that the caller wired the
401
+ // wrong field. The guard runs before hashing because a
402
+ // namespaceHash output is hex (no `@`, no dotted-quad) and
403
+ // would slip past trivially.
404
+ _refuseRawPii("session_id", input.session_id);
405
+ _refuseRawPii("customer_id", input.customer_id);
406
+
407
+ var sessionId = _optString(input.session_id, "session_id", MAX_SESSION_ID);
408
+ var customerId = _optString(input.customer_id, "customer_id", MAX_CUSTOMER_ID);
409
+ if (sessionId == null && customerId == null) {
410
+ throw new TypeError(
411
+ "analytics.recordEvent: at least one of session_id / " +
412
+ "customer_id is required"
413
+ );
414
+ }
415
+
416
+ // Denormalised columns — each is optional and bounded. The
417
+ // PII guard runs on every string-typed value, so an operator
418
+ // who pipes `?q=alice@example.com` straight into `search_q`
419
+ // hits a loud refusal at the write site instead of leaking
420
+ // the address into the aggregate table.
421
+ _refuseRawPii("product_id", input.product_id);
422
+ _refuseRawPii("search_q", input.search_q);
423
+ _refuseRawPii("page_url", input.page_url);
424
+ var productId = _optString(input.product_id, "product_id", MAX_PRODUCT_ID);
425
+ var searchQ = _optString(input.search_q, "search_q", MAX_SEARCH_Q);
426
+ var pageUrl = _optString(input.page_url, "page_url", MAX_PAGE_URL);
427
+ var uaClass = input.user_agent_class;
428
+ if (uaClass != null && UA_CLASSES.indexOf(uaClass) === -1) {
429
+ throw new TypeError(
430
+ "analytics.recordEvent: user_agent_class must be one of " +
431
+ UA_CLASSES.join(", ")
432
+ );
433
+ }
434
+
435
+ // Payload — JSON-encode in-process so the size bound is
436
+ // enforced on the encoded bytes (matches what the row will
437
+ // hold). `undefined` → "{}" so the column NOT NULL default
438
+ // covers operators that don't pass a payload.
439
+ var payloadInput = input.payload == null ? {} : input.payload;
440
+ if (typeof payloadInput !== "object" || Array.isArray(payloadInput)) {
441
+ throw new TypeError(
442
+ "analytics.recordEvent: payload must be a plain object when provided"
443
+ );
444
+ }
445
+ var payloadJson;
446
+ try {
447
+ payloadJson = JSON.stringify(payloadInput);
448
+ } catch (e) {
449
+ throw new TypeError(
450
+ "analytics.recordEvent: payload not JSON-serialisable (" +
451
+ (e && e.message ? e.message : "unknown") + ")"
452
+ );
453
+ }
454
+ if (Buffer.byteLength(payloadJson, "utf8") > MAX_PAYLOAD_BYTES) {
455
+ throw new TypeError(
456
+ "analytics.recordEvent: payload exceeds " + MAX_PAYLOAD_BYTES +
457
+ " bytes (JSON-encoded)"
458
+ );
459
+ }
460
+
461
+ // `occurred_at` — defaults to now. Operators can pin it for
462
+ // backfills, but a NaN / float / string here is a typo and
463
+ // throws.
464
+ var occurredAt;
465
+ if (input.occurred_at == null) {
466
+ occurredAt = Date.now();
467
+ } else {
468
+ _epochMs(input.occurred_at, "occurred_at");
469
+ occurredAt = input.occurred_at;
470
+ }
471
+
472
+ var b = _b();
473
+ var sessionHash = sessionId == null ? null : b.crypto.namespaceHash(SESSION_NAMESPACE, sessionId);
474
+ var customerHash = customerId == null ? null : b.crypto.namespaceHash(CUSTOMER_NAMESPACE, customerId);
475
+ // `session_id_hash` is NOT NULL in the schema — fall back to
476
+ // a customer-scoped hash when only the customer is supplied
477
+ // so the join key column always has a value. The customer
478
+ // hash uses its own namespace so a session_id_hash composed
479
+ // this way can't accidentally collide with a real session.
480
+ if (sessionHash == null) sessionHash = customerHash;
481
+
482
+ var id = b.uuid.v7();
483
+ await query(
484
+ "INSERT INTO analytics_events " +
485
+ "(id, event_type, session_id_hash, customer_id_hash, payload_json, " +
486
+ " product_id, search_q, page_url, user_agent_class, occurred_at) " +
487
+ "VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)",
488
+ [id, eventType, sessionHash, customerHash, payloadJson,
489
+ productId, searchQ, pageUrl, uaClass == null ? null : uaClass, occurredAt],
490
+ );
491
+ return { id: id, occurred_at: occurredAt };
492
+ },
493
+
494
+ // Top-N search terms by event count across the window. Filters
495
+ // out NULL `search_q` (other event types share the table) and
496
+ // GROUPs by the denormalised column so the query never decodes
497
+ // the JSON payload. Default limit 10; max 100 (same envelope as
498
+ // `topSKUs`).
499
+ topSearchTerms: async function (windowOpts) {
500
+ var w = _resolveEventWindow(windowOpts);
501
+ var limit = (windowOpts && windowOpts.limit) == null ? 10 : windowOpts.limit;
502
+ _limit(limit, "limit");
503
+ var r = await query(
504
+ "SELECT search_q AS search_q, COUNT(*) AS count " +
505
+ " FROM analytics_events " +
506
+ " WHERE event_type = 'search_query' " +
507
+ " AND search_q IS NOT NULL " +
508
+ " AND occurred_at >= ?1 AND occurred_at < ?2 " +
509
+ " GROUP BY search_q " +
510
+ " ORDER BY count DESC, search_q ASC " +
511
+ " LIMIT ?3",
512
+ [w.from, w.to, limit],
513
+ );
514
+ return r.rows.map(function (row) {
515
+ return { search_q: row.search_q, count: Number(row.count) || 0 };
516
+ });
517
+ },
518
+
519
+ // Top-N viewed products by PDP-view count across the window.
520
+ // Same shape as `topSearchTerms` — different event_type +
521
+ // group-by column.
522
+ topViewedProducts: async function (windowOpts) {
523
+ var w = _resolveEventWindow(windowOpts);
524
+ var limit = (windowOpts && windowOpts.limit) == null ? 10 : windowOpts.limit;
525
+ _limit(limit, "limit");
526
+ var r = await query(
527
+ "SELECT product_id AS product_id, COUNT(*) AS count " +
528
+ " FROM analytics_events " +
529
+ " WHERE event_type = 'pdp_view' " +
530
+ " AND product_id IS NOT NULL " +
531
+ " AND occurred_at >= ?1 AND occurred_at < ?2 " +
532
+ " GROUP BY product_id " +
533
+ " ORDER BY count DESC, product_id ASC " +
534
+ " LIMIT ?3",
535
+ [w.from, w.to, limit],
536
+ );
537
+ return r.rows.map(function (row) {
538
+ return { product_id: row.product_id, count: Number(row.count) || 0 };
539
+ });
540
+ },
541
+
542
+ // PDP → cart → checkout-start → checkout-complete funnel for
543
+ // the window. Conversion rate is `completes / pdp_views`,
544
+ // clamped to [0, 1] — when there are zero PDP views the rate
545
+ // is reported as 0 (operators read the absolute counts to
546
+ // distinguish "no traffic" from "no conversion").
547
+ funnel: async function (windowOpts) {
548
+ var w = _resolveEventWindow(windowOpts);
549
+ var r = await query(
550
+ "SELECT event_type, COUNT(*) AS count " +
551
+ " FROM analytics_events " +
552
+ " WHERE occurred_at >= ?1 AND occurred_at < ?2 " +
553
+ " AND event_type IN ('pdp_view','cart_add','checkout_start','checkout_complete') " +
554
+ " GROUP BY event_type",
555
+ [w.from, w.to],
556
+ );
557
+ var counts = { pdp_view: 0, cart_add: 0, checkout_start: 0, checkout_complete: 0 };
558
+ for (var i = 0; i < r.rows.length; i += 1) {
559
+ counts[r.rows[i].event_type] = Number(r.rows[i].count) || 0;
560
+ }
561
+ var pdp = counts.pdp_view;
562
+ var carts = counts.cart_add;
563
+ var starts = counts.checkout_start;
564
+ var completes = counts.checkout_complete;
565
+ var rate = pdp > 0 ? (completes / pdp) : 0;
566
+ if (rate < 0) rate = 0;
567
+ if (rate > 1) rate = 1;
568
+ return {
569
+ pdp_views: pdp,
570
+ cart_adds: carts,
571
+ checkout_starts: starts,
572
+ checkout_completes: completes,
573
+ conversion_rate: rate,
574
+ };
575
+ },
576
+
577
+ // Per-session event sequence — operator hands the raw
578
+ // session_id, the primitive hashes it (same namespace as on
579
+ // write) and returns the chronological event list. Returning
580
+ // the `session_id_hash` alongside lets the operator confirm
581
+ // they're looking at the right session without ever seeing the
582
+ // raw id again. Default limit 100; max 500 (sessions don't
583
+ // legitimately emit more events than that in a single
584
+ // debugging window).
585
+ sessionFlow: async function (sessionId, opts) {
586
+ if (typeof sessionId !== "string" || sessionId.length === 0) {
587
+ throw new TypeError("analytics.sessionFlow: session_id required");
588
+ }
589
+ if (sessionId.length > MAX_SESSION_ID) {
590
+ throw new TypeError("analytics.sessionFlow: session_id exceeds " + MAX_SESSION_ID + " chars");
591
+ }
592
+ _refuseRawPii("session_id", sessionId);
593
+ var limit = (opts && opts.limit) == null ? 100 : opts.limit;
594
+ _limit(limit, "limit", 500);
595
+ var hash = _b().crypto.namespaceHash(SESSION_NAMESPACE, sessionId);
596
+ var r = await query(
597
+ "SELECT id, event_type, session_id_hash, customer_id_hash, " +
598
+ " payload_json, product_id, search_q, page_url, " +
599
+ " user_agent_class, occurred_at " +
600
+ " FROM analytics_events " +
601
+ " WHERE session_id_hash = ?1 " +
602
+ " ORDER BY occurred_at ASC, id ASC " +
603
+ " LIMIT ?2",
604
+ [hash, limit],
605
+ );
606
+ return r.rows.map(function (row) {
607
+ var payload;
608
+ try { payload = JSON.parse(row.payload_json || "{}"); }
609
+ catch (_e) { payload = {}; }
610
+ return {
611
+ id: row.id,
612
+ event_type: row.event_type,
613
+ session_id_hash: row.session_id_hash,
614
+ customer_id_hash: row.customer_id_hash,
615
+ product_id: row.product_id,
616
+ search_q: row.search_q,
617
+ page_url: row.page_url,
618
+ user_agent_class: row.user_agent_class,
619
+ payload: payload,
620
+ occurred_at: Number(row.occurred_at) || 0,
621
+ };
622
+ });
623
+ },
624
+
625
+ // Retention sweep — DELETE every event older than `ts`.
626
+ // Operators run this on a schedule (cron, queue worker) to
627
+ // satisfy data-minimisation obligations. The primitive returns
628
+ // the row count so the caller can log the size of each sweep.
629
+ dropAfter: async function (ts) {
630
+ _epochMs(ts, "ts");
631
+ var r = await query(
632
+ "DELETE FROM analytics_events WHERE occurred_at < ?1",
633
+ [ts],
634
+ );
635
+ return { deleted: Number(r.rowCount) || 0 };
636
+ },
637
+
238
638
  // Most-recent orders. No window — strictly most-recent-N. Used
239
639
  // by the dashboard's "Recent activity" sidebar.
240
640
  recentOrders: async function (recentOpts) {