@littlebearapps/platform-admin-sdk 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/README.md +112 -0
  2. package/dist/index.d.ts +16 -0
  3. package/dist/index.js +89 -0
  4. package/dist/prompts.d.ts +27 -0
  5. package/dist/prompts.js +80 -0
  6. package/dist/scaffold.d.ts +5 -0
  7. package/dist/scaffold.js +65 -0
  8. package/dist/templates.d.ts +16 -0
  9. package/dist/templates.js +131 -0
  10. package/package.json +46 -0
  11. package/templates/full/migrations/006_pattern_discovery.sql +199 -0
  12. package/templates/full/migrations/007_notifications_search.sql +127 -0
  13. package/templates/full/workers/lib/pattern-discovery/ai-prompt.ts +644 -0
  14. package/templates/full/workers/lib/pattern-discovery/clustering.ts +278 -0
  15. package/templates/full/workers/lib/pattern-discovery/shadow-evaluation.ts +603 -0
  16. package/templates/full/workers/lib/pattern-discovery/storage.ts +806 -0
  17. package/templates/full/workers/lib/pattern-discovery/types.ts +159 -0
  18. package/templates/full/workers/lib/pattern-discovery/validation.ts +278 -0
  19. package/templates/full/workers/pattern-discovery.ts +661 -0
  20. package/templates/full/workers/platform-alert-router.ts +1809 -0
  21. package/templates/full/workers/platform-notifications.ts +424 -0
  22. package/templates/full/workers/platform-search.ts +480 -0
  23. package/templates/full/workers/platform-settings.ts +436 -0
  24. package/templates/full/wrangler.alert-router.jsonc.hbs +34 -0
  25. package/templates/full/wrangler.notifications.jsonc.hbs +23 -0
  26. package/templates/full/wrangler.pattern-discovery.jsonc.hbs +33 -0
  27. package/templates/full/wrangler.search.jsonc.hbs +16 -0
  28. package/templates/full/wrangler.settings.jsonc.hbs +23 -0
  29. package/templates/shared/README.md.hbs +69 -0
  30. package/templates/shared/config/budgets.yaml.hbs +72 -0
  31. package/templates/shared/config/services.yaml.hbs +45 -0
  32. package/templates/shared/migrations/001_core_tables.sql +117 -0
  33. package/templates/shared/migrations/002_usage_warehouse.sql +830 -0
  34. package/templates/shared/migrations/003_feature_tracking.sql +250 -0
  35. package/templates/shared/migrations/004_settings_alerts.sql +452 -0
  36. package/templates/shared/migrations/seed.sql.hbs +4 -0
  37. package/templates/shared/package.json.hbs +21 -0
  38. package/templates/shared/scripts/sync-config.ts +242 -0
  39. package/templates/shared/tsconfig.json +12 -0
  40. package/templates/shared/workers/lib/analytics-engine.ts +357 -0
  41. package/templates/shared/workers/lib/billing.ts +293 -0
  42. package/templates/shared/workers/lib/circuit-breaker-middleware.ts +25 -0
  43. package/templates/shared/workers/lib/control.ts +292 -0
  44. package/templates/shared/workers/lib/economics.ts +368 -0
  45. package/templates/shared/workers/lib/metrics.ts +103 -0
  46. package/templates/shared/workers/lib/platform-settings.ts +407 -0
  47. package/templates/shared/workers/lib/shared/allowances.ts +333 -0
  48. package/templates/shared/workers/lib/shared/cloudflare.ts +1362 -0
  49. package/templates/shared/workers/lib/shared/types.ts +58 -0
  50. package/templates/shared/workers/lib/telemetry-sampling.ts +360 -0
  51. package/templates/shared/workers/lib/usage/collectors/example.ts +96 -0
  52. package/templates/shared/workers/lib/usage/collectors/index.ts +128 -0
  53. package/templates/shared/workers/lib/usage/handlers/audit.ts +306 -0
  54. package/templates/shared/workers/lib/usage/handlers/backfill.ts +845 -0
  55. package/templates/shared/workers/lib/usage/handlers/behavioral.ts +429 -0
  56. package/templates/shared/workers/lib/usage/handlers/data-queries.ts +507 -0
  57. package/templates/shared/workers/lib/usage/handlers/dlq-admin.ts +364 -0
  58. package/templates/shared/workers/lib/usage/handlers/health-trends.ts +222 -0
  59. package/templates/shared/workers/lib/usage/handlers/index.ts +35 -0
  60. package/templates/shared/workers/lib/usage/handlers/usage-admin.ts +421 -0
  61. package/templates/shared/workers/lib/usage/handlers/usage-features.ts +1262 -0
  62. package/templates/shared/workers/lib/usage/handlers/usage-metrics.ts +2420 -0
  63. package/templates/shared/workers/lib/usage/handlers/usage-settings.ts +610 -0
  64. package/templates/shared/workers/lib/usage/queue/budget-enforcement.ts +1032 -0
  65. package/templates/shared/workers/lib/usage/queue/cost-budget-enforcement.ts +128 -0
  66. package/templates/shared/workers/lib/usage/queue/cost-calculator.ts +77 -0
  67. package/templates/shared/workers/lib/usage/queue/dlq-handler.ts +161 -0
  68. package/templates/shared/workers/lib/usage/queue/index.ts +19 -0
  69. package/templates/shared/workers/lib/usage/queue/telemetry-processor.ts +790 -0
  70. package/templates/shared/workers/lib/usage/scheduled/anomaly-detection.ts +732 -0
  71. package/templates/shared/workers/lib/usage/scheduled/data-collection.ts +956 -0
  72. package/templates/shared/workers/lib/usage/scheduled/error-digest.ts +343 -0
  73. package/templates/shared/workers/lib/usage/scheduled/index.ts +18 -0
  74. package/templates/shared/workers/lib/usage/scheduled/rollups.ts +1561 -0
  75. package/templates/shared/workers/lib/usage/shared/constants.ts +362 -0
  76. package/templates/shared/workers/lib/usage/shared/index.ts +14 -0
  77. package/templates/shared/workers/lib/usage/shared/types.ts +1066 -0
  78. package/templates/shared/workers/lib/usage/shared/utils.ts +795 -0
  79. package/templates/shared/workers/platform-usage.ts +1915 -0
  80. package/templates/shared/wrangler.usage.jsonc.hbs +58 -0
  81. package/templates/standard/migrations/005_error_collection.sql +162 -0
  82. package/templates/standard/workers/error-collector.ts +2670 -0
  83. package/templates/standard/workers/lib/error-collector/capture.ts +213 -0
  84. package/templates/standard/workers/lib/error-collector/digest.ts +448 -0
  85. package/templates/standard/workers/lib/error-collector/email-health-alerts.ts +262 -0
  86. package/templates/standard/workers/lib/error-collector/fingerprint.ts +258 -0
  87. package/templates/standard/workers/lib/error-collector/gap-alerts.ts +293 -0
  88. package/templates/standard/workers/lib/error-collector/github.ts +329 -0
  89. package/templates/standard/workers/lib/error-collector/types.ts +262 -0
  90. package/templates/standard/workers/lib/sentinel/gap-detection.ts +734 -0
  91. package/templates/standard/workers/lib/shared/slack-alerts.ts +585 -0
  92. package/templates/standard/workers/platform-sentinel.ts +1744 -0
  93. package/templates/standard/wrangler.error-collector.jsonc.hbs +44 -0
  94. package/templates/standard/wrangler.sentinel.jsonc.hbs +45 -0
@@ -0,0 +1,732 @@
1
+ /**
2
+ * Anomaly Detection Module
3
+ *
4
+ * Functions for detecting usage anomalies using rolling statistics,
5
+ * dataset drift detection, and alerting via Slack.
6
+ * Extracted from platform-usage.ts as part of scheduled task modularisation.
7
+ */
8
+
9
+ import type { Env, RollingStats } from '../shared';
10
+ import { KNOWN_DATASETS, QUERIED_DATASETS, generateId, fetchWithRetry } from '../shared';
11
+ import { createLoggerFromEnv } from '@littlebearapps/platform-consumer-sdk';
12
+
13
+ // =============================================================================
14
+ // SLACK ALERTING
15
+ // =============================================================================
16
+
17
+ /**
18
+ * Slack alert payload structure.
19
+ */
20
+ interface SlackAlertPayload {
21
+ text: string;
22
+ attachments?: Array<{
23
+ color: string;
24
+ fields: Array<{ title: string; value: string; short?: boolean }>;
25
+ }>;
26
+ }
27
+
28
+ /**
29
+ * Send a Slack alert.
30
+ */
31
+ async function sendSlackAlert(env: Env, payload: SlackAlertPayload): Promise<void> {
32
+ if (!env.SLACK_WEBHOOK_URL) return;
33
+
34
+ try {
35
+ await fetchWithRetry(env.SLACK_WEBHOOK_URL, {
36
+ method: 'POST',
37
+ headers: { 'Content-Type': 'application/json' },
38
+ body: JSON.stringify(payload),
39
+ });
40
+ } catch (error) {
41
+ const log = createLoggerFromEnv(env, 'platform-usage', 'platform:usage:slack');
42
+ log.error('Failed to send Slack alert', error instanceof Error ? error : undefined, {
43
+ tag: 'SLACK_ERROR',
44
+ });
45
+ }
46
+ }
47
+
48
+ // =============================================================================
49
+ // ROLLING STATISTICS
50
+ // =============================================================================
51
+
52
+ /**
53
+ * Allowed metrics for rolling stats calculation.
54
+ * These metrics can be used in parameterised SQL queries.
55
+ */
56
+ const ALLOWED_ROLLING_METRICS = [
57
+ 'workers_requests',
58
+ 'workers_errors',
59
+ 'workers_cost_usd',
60
+ 'd1_rows_read',
61
+ 'd1_rows_written',
62
+ 'd1_cost_usd',
63
+ 'kv_reads',
64
+ 'kv_writes',
65
+ 'kv_cost_usd',
66
+ 'r2_class_a_ops',
67
+ 'r2_class_b_ops',
68
+ 'r2_cost_usd',
69
+ 'aigateway_requests',
70
+ 'aigateway_cost_usd',
71
+ 'workersai_requests',
72
+ 'workersai_neurons',
73
+ 'workersai_cost_usd',
74
+ 'total_cost_usd',
75
+ ] as const;
76
+
77
+ type AllowedMetric = (typeof ALLOWED_ROLLING_METRICS)[number];
78
+
79
+ /**
80
+ * Check if a metric is in the allowed list.
81
+ */
82
+ function isAllowedMetric(metric: string): metric is AllowedMetric {
83
+ return ALLOWED_ROLLING_METRICS.includes(metric as AllowedMetric);
84
+ }
85
+
86
+ /**
87
+ * Calculate 7-day rolling statistics for a metric.
88
+ * Uses daily rollups for efficient computation.
89
+ */
90
+ export async function calculate7DayRollingStats(
91
+ env: Env,
92
+ metric: string,
93
+ project: string
94
+ ): Promise<RollingStats | null> {
95
+ if (!isAllowedMetric(metric)) {
96
+ const log = createLoggerFromEnv(env, 'platform-usage', 'platform:usage:anomaly');
97
+ log.warn('Invalid metric for rolling stats', undefined, {
98
+ tag: 'INVALID_METRIC',
99
+ metric,
100
+ });
101
+ return null;
102
+ }
103
+
104
+ try {
105
+ // SQLite doesn't have native STDDEV, so calculate manually using sum and sum of squares
106
+ const result = await env.PLATFORM_DB.prepare(
107
+ `
108
+ SELECT
109
+ COUNT(*) as sample_count,
110
+ SUM(${metric}) as sum_value,
111
+ SUM(${metric} * ${metric}) as sum_squared,
112
+ AVG(${metric}) as avg_value
113
+ FROM daily_usage_rollups
114
+ WHERE project = ?
115
+ AND snapshot_date >= date('now', '-7 days')
116
+ AND snapshot_date < date('now')
117
+ `
118
+ )
119
+ .bind(project)
120
+ .first<{
121
+ sample_count: number;
122
+ sum_value: number;
123
+ sum_squared: number;
124
+ avg_value: number;
125
+ }>();
126
+
127
+ if (!result || result.sample_count === 0) {
128
+ return null;
129
+ }
130
+
131
+ const n = result.sample_count;
132
+ const avg = result.avg_value;
133
+ // Variance = (sum of squares - n * mean^2) / n
134
+ const variance = (result.sum_squared - n * avg * avg) / n;
135
+ const stddev = Math.sqrt(Math.max(0, variance)); // Ensure non-negative
136
+
137
+ return {
138
+ avg,
139
+ stddev,
140
+ samples: n,
141
+ };
142
+ } catch (error) {
143
+ const log = createLoggerFromEnv(env, 'platform-usage', 'platform:usage:anomaly');
144
+ log.error('Error calculating rolling stats', error instanceof Error ? error : undefined, {
145
+ tag: 'ROLLING_STATS_ERROR',
146
+ metric,
147
+ project,
148
+ });
149
+ return null;
150
+ }
151
+ }
152
+
153
+ // =============================================================================
154
+ // TODAY'S METRIC VALUE
155
+ // =============================================================================
156
+
157
+ /**
158
+ * Get today's value for a metric from hourly snapshots.
159
+ */
160
+ export async function getTodayMetricValue(
161
+ env: Env,
162
+ metric: string,
163
+ project: string = 'all'
164
+ ): Promise<number> {
165
+ if (!isAllowedMetric(metric)) {
166
+ return 0;
167
+ }
168
+
169
+ try {
170
+ const result = await env.PLATFORM_DB.prepare(
171
+ `
172
+ SELECT SUM(${metric}) as total
173
+ FROM hourly_usage_snapshots
174
+ WHERE project = ?
175
+ AND snapshot_hour >= datetime('now', 'start of day')
176
+ AND snapshot_hour < datetime('now', '+1 day', 'start of day')
177
+ `
178
+ )
179
+ .bind(project)
180
+ .first<{ total: number }>();
181
+
182
+ return result?.total ?? 0;
183
+ } catch (error) {
184
+ const log = createLoggerFromEnv(env, 'platform-usage', 'platform:usage:anomaly');
185
+ log.error('Error getting today metric', error instanceof Error ? error : undefined, {
186
+ tag: 'TODAY_METRIC_ERROR',
187
+ metric,
188
+ project,
189
+ });
190
+ return 0;
191
+ }
192
+ }
193
+
194
+ // =============================================================================
195
+ // ANOMALY RECORDING
196
+ // =============================================================================
197
+
198
+ /**
199
+ * Record an anomaly to the D1 database.
200
+ */
201
+ export async function recordAnomaly(
202
+ env: Env,
203
+ metric: string,
204
+ currentValue: number,
205
+ stats: RollingStats,
206
+ deviation: number,
207
+ project: string = 'all'
208
+ ): Promise<void> {
209
+ try {
210
+ await env.PLATFORM_DB.prepare(
211
+ `
212
+ INSERT INTO usage_anomalies (
213
+ id, detected_at, metric_name, project,
214
+ current_value, rolling_avg, rolling_stddev, deviation_factor
215
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
216
+ `
217
+ )
218
+ .bind(
219
+ generateId(),
220
+ Math.floor(Date.now() / 1000),
221
+ metric,
222
+ project,
223
+ currentValue,
224
+ stats.avg,
225
+ stats.stddev,
226
+ deviation
227
+ )
228
+ .run();
229
+ } catch (error) {
230
+ const log = createLoggerFromEnv(env, 'platform-usage', 'platform:usage:anomaly');
231
+ log.error('Error recording anomaly', error instanceof Error ? error : undefined, {
232
+ tag: 'RECORD_ANOMALY_ERROR',
233
+ metric,
234
+ project,
235
+ });
236
+ }
237
+ }
238
+
239
+ // =============================================================================
240
+ // ANOMALY ALERTING
241
+ // =============================================================================
242
+
243
+ /**
244
+ * Send a Slack alert for detected anomaly.
245
+ */
246
+ export async function sendAnomalySlackAlert(
247
+ env: Env,
248
+ metric: string,
249
+ currentValue: number,
250
+ stats: RollingStats,
251
+ deviation: number
252
+ ): Promise<void> {
253
+ // Determine severity color
254
+ const color = deviation > 5 ? 'danger' : 'warning';
255
+
256
+ // Format metric for display
257
+ const metricDisplay = metric.replace(/_/g, ' ').replace(/\b\w/g, (c) => c.toUpperCase());
258
+
259
+ // Format values
260
+ const formatValue = (val: number): string => {
261
+ if (metric.includes('cost')) {
262
+ return `$${val.toFixed(4)}`;
263
+ }
264
+ if (val >= 1_000_000) {
265
+ return `${(val / 1_000_000).toFixed(2)}M`;
266
+ }
267
+ if (val >= 1_000) {
268
+ return `${(val / 1_000).toFixed(2)}K`;
269
+ }
270
+ return val.toFixed(2);
271
+ };
272
+
273
+ const payload: SlackAlertPayload = {
274
+ text: `:warning: Usage Anomaly Detected`,
275
+ attachments: [
276
+ {
277
+ color,
278
+ fields: [
279
+ { title: 'Metric', value: metricDisplay, short: true },
280
+ { title: 'Deviation', value: `${deviation.toFixed(1)} stddev`, short: true },
281
+ { title: 'Current Value', value: formatValue(currentValue), short: true },
282
+ { title: '7-Day Avg', value: formatValue(stats.avg), short: true },
283
+ { title: 'Stddev', value: formatValue(stats.stddev), short: true },
284
+ { title: 'Samples', value: `${stats.samples} days`, short: true },
285
+ ],
286
+ },
287
+ ],
288
+ };
289
+
290
+ await sendSlackAlert(env, payload);
291
+ }
292
+
293
+ // =============================================================================
294
+ // ALERT ROUTER INTEGRATION
295
+ // =============================================================================
296
+
297
+ /**
298
+ * Route anomaly alert through the central alert-router.
299
+ * Provides unified Slack alerting + in-app notifications.
300
+ * Falls back to direct Slack webhook if alert-router unavailable.
301
+ */
302
+ async function sendAnomalyToAlertRouter(
303
+ env: Env,
304
+ metric: string,
305
+ currentValue: number,
306
+ stats: RollingStats,
307
+ deviation: number,
308
+ project: string = 'all'
309
+ ): Promise<void> {
310
+ if (!env.ALERT_ROUTER) {
311
+ await sendAnomalySlackAlert(env, metric, currentValue, stats, deviation);
312
+ return;
313
+ }
314
+
315
+ const metricDisplay = metric.replace(/_/g, ' ').replace(/\b\w/g, (c) => c.toUpperCase());
316
+ const severity = deviation > 5 ? 'p0' : deviation > 3 ? 'p1' : 'p2';
317
+
318
+ const formatVal = (val: number): string => {
319
+ if (metric.includes('cost')) return `$${val.toFixed(4)}`;
320
+ if (val >= 1_000_000) return `${(val / 1_000_000).toFixed(2)}M`;
321
+ if (val >= 1_000) return `${(val / 1_000).toFixed(2)}K`;
322
+ return val.toFixed(2);
323
+ };
324
+
325
+ const payload = {
326
+ source: 'anomaly-detection',
327
+ severity,
328
+ status: 'firing',
329
+ service_id: 'platform-usage',
330
+ summary: `Usage Anomaly: ${metricDisplay} (${deviation.toFixed(1)} stddev)`,
331
+ message: `Current: ${formatVal(currentValue)}, 7-day avg: ${formatVal(stats.avg)}, StdDev: ${formatVal(stats.stddev)}, Project: ${project}`,
332
+ timestamp: new Date().toISOString(),
333
+ metadata: {
334
+ metric,
335
+ project,
336
+ currentValue,
337
+ rollingAvg: stats.avg,
338
+ rollingStddev: stats.stddev,
339
+ deviationFactor: deviation,
340
+ samples: stats.samples,
341
+ },
342
+ };
343
+
344
+ try {
345
+ const response = await env.ALERT_ROUTER.fetch(
346
+ // Service binding URL — the hostname is ignored; only the path matters
347
+ 'https://platform-alert-router.internal/custom',
348
+ {
349
+ method: 'POST',
350
+ headers: { 'Content-Type': 'application/json' },
351
+ body: JSON.stringify(payload),
352
+ }
353
+ );
354
+
355
+ if (!response.ok) {
356
+ const log = createLoggerFromEnv(env, 'platform-usage', 'platform:usage:anomaly');
357
+ log.warn('Alert router returned non-OK, falling back to direct Slack', undefined, {
358
+ tag: 'ALERT_ROUTER_FALLBACK',
359
+ status: response.status,
360
+ });
361
+ await sendAnomalySlackAlert(env, metric, currentValue, stats, deviation);
362
+ }
363
+ } catch (error) {
364
+ const log = createLoggerFromEnv(env, 'platform-usage', 'platform:usage:anomaly');
365
+ log.error('Alert router failed, falling back to direct Slack', error instanceof Error ? error : undefined, {
366
+ tag: 'ALERT_ROUTER_ERROR',
367
+ });
368
+ await sendAnomalySlackAlert(env, metric, currentValue, stats, deviation);
369
+ }
370
+ }
371
+
372
+ // =============================================================================
373
+ // MAIN ANOMALY DETECTION
374
+ // =============================================================================
375
+
376
+ /**
377
+ * Metrics to monitor for anomalies.
378
+ */
379
+ const MONITORED_METRICS = [
380
+ 'workers_requests',
381
+ 'd1_rows_written',
382
+ 'total_cost_usd',
383
+ 'aigateway_requests',
384
+ 'workersai_neurons',
385
+ ] as const;
386
+
387
+ /**
388
+ * Projects monitored for per-project anomaly detection.
389
+ * Includes 'all' (aggregate) plus individual projects.
390
+ */
391
+ // TODO: Add your project IDs here (must match project_registry in D1)
392
+ const MONITORED_PROJECTS = ['all', 'platform'] as const;
393
+
394
+ /**
395
+ * Run anomaly detection for key metrics across all monitored projects.
396
+ * Called during scheduled runs (typically at midnight).
397
+ *
398
+ * @returns Number of anomalies detected
399
+ */
400
+ export async function detectAnomalies(env: Env): Promise<number> {
401
+ let anomaliesDetected = 0;
402
+
403
+ for (const project of MONITORED_PROJECTS) {
404
+ for (const metric of MONITORED_METRICS) {
405
+ try {
406
+ const stats = await calculate7DayRollingStats(env, metric, project);
407
+
408
+ // Need at least 7 days of data for reliable anomaly detection
409
+ if (!stats || stats.samples < 7) {
410
+ continue;
411
+ }
412
+
413
+ const todayValue = await getTodayMetricValue(env, metric, project);
414
+
415
+ // Skip if stddev is 0 (no variation in data)
416
+ if (stats.stddev === 0) {
417
+ continue;
418
+ }
419
+
420
+ const deviation = (todayValue - stats.avg) / stats.stddev;
421
+
422
+ // Detect anomaly if deviation > 3 standard deviations
423
+ if (deviation > 3) {
424
+ const log = createLoggerFromEnv(env, 'platform-usage', 'platform:usage:anomaly');
425
+ log.info('Anomaly detected', {
426
+ tag: 'ANOMALY_DETECTED',
427
+ metric,
428
+ project,
429
+ todayValue,
430
+ deviation: deviation.toFixed(1),
431
+ avg: stats.avg.toFixed(2),
432
+ });
433
+
434
+ await recordAnomaly(env, metric, todayValue, stats, deviation, project);
435
+ await sendAnomalyToAlertRouter(env, metric, todayValue, stats, deviation, project);
436
+ anomaliesDetected++;
437
+ }
438
+ } catch (error) {
439
+ const log = createLoggerFromEnv(env, 'platform-usage', 'platform:usage:anomaly');
440
+ log.error('Error checking metric for anomaly', error instanceof Error ? error : undefined, {
441
+ tag: 'CHECK_ANOMALY_ERROR',
442
+ metric,
443
+ project,
444
+ });
445
+ }
446
+ }
447
+ }
448
+
449
+ return anomaliesDetected;
450
+ }
451
+
452
+ // =============================================================================
453
+ // HOURLY D1 WRITE ANOMALY DETECTION
454
+ // =============================================================================
455
+
456
+ /**
457
+ * Calculate rolling stats from hourly snapshots (168 hours = 7 days).
458
+ * Used for hourly anomaly detection where daily rollups are too coarse.
459
+ */
460
+ export async function calculateHourlyRollingStats(
461
+ env: Env,
462
+ metric: string,
463
+ project: string
464
+ ): Promise<RollingStats | null> {
465
+ if (!isAllowedMetric(metric)) {
466
+ return null;
467
+ }
468
+
469
+ try {
470
+ const result = await env.PLATFORM_DB.prepare(
471
+ `
472
+ SELECT
473
+ COUNT(*) as sample_count,
474
+ SUM(${metric}) as sum_value,
475
+ SUM(${metric} * ${metric}) as sum_squared,
476
+ AVG(${metric}) as avg_value
477
+ FROM hourly_usage_snapshots
478
+ WHERE project = ?
479
+ AND snapshot_hour >= datetime('now', '-7 days')
480
+ AND snapshot_hour < datetime('now', '-1 hour')
481
+ `
482
+ )
483
+ .bind(project)
484
+ .first<{
485
+ sample_count: number;
486
+ sum_value: number;
487
+ sum_squared: number;
488
+ avg_value: number;
489
+ }>();
490
+
491
+ if (!result || result.sample_count < 48) {
492
+ return null; // Need at least 2 days of hourly data
493
+ }
494
+
495
+ const n = result.sample_count;
496
+ const avg = result.avg_value;
497
+ const variance = (result.sum_squared - n * avg * avg) / n;
498
+ const stddev = Math.sqrt(Math.max(0, variance));
499
+
500
+ return { avg, stddev, samples: n };
501
+ } catch (error) {
502
+ const log = createLoggerFromEnv(env, 'platform-usage', 'platform:usage:anomaly');
503
+ log.error('Error calculating hourly rolling stats', error instanceof Error ? error : undefined, {
504
+ tag: 'HOURLY_ROLLING_STATS_ERROR',
505
+ metric,
506
+ project,
507
+ });
508
+ return null;
509
+ }
510
+ }
511
+
512
+ /**
513
+ * Hourly D1 write anomaly check.
514
+ * Runs every hour to catch write spikes within hours, not days.
515
+ * Only checks d1_rows_written (highest-risk metric from Jan 2026 incident).
516
+ *
517
+ * @returns Number of anomalies detected (0 or 1)
518
+ */
519
+ export async function detectHourlyD1WriteAnomalies(env: Env): Promise<number> {
520
+ const metric = 'd1_rows_written';
521
+ const project = 'all';
522
+ const log = createLoggerFromEnv(env, 'platform-usage', 'platform:usage:anomaly');
523
+
524
+ try {
525
+ // Get the last completed hour's value
526
+ const lastHourResult = await env.PLATFORM_DB.prepare(
527
+ `
528
+ SELECT ${metric} as value
529
+ FROM hourly_usage_snapshots
530
+ WHERE project = ?
531
+ AND snapshot_hour >= datetime('now', '-2 hours')
532
+ AND snapshot_hour < datetime('now', '-1 hour')
533
+ ORDER BY snapshot_hour DESC
534
+ LIMIT 1
535
+ `
536
+ )
537
+ .bind(project)
538
+ .first<{ value: number }>();
539
+
540
+ if (!lastHourResult || lastHourResult.value === 0) {
541
+ return 0; // No data or zero writes — nothing to flag
542
+ }
543
+
544
+ const stats = await calculateHourlyRollingStats(env, metric, project);
545
+ if (!stats || stats.stddev === 0) {
546
+ return 0;
547
+ }
548
+
549
+ const deviation = (lastHourResult.value - stats.avg) / stats.stddev;
550
+
551
+ if (deviation > 3) {
552
+ log.info('Hourly D1 write anomaly detected', {
553
+ tag: 'HOURLY_D1_ANOMALY',
554
+ value: lastHourResult.value,
555
+ deviation: deviation.toFixed(1),
556
+ avg: stats.avg.toFixed(2),
557
+ stddev: stats.stddev.toFixed(2),
558
+ });
559
+
560
+ await recordAnomaly(env, metric, lastHourResult.value, stats, deviation, project);
561
+ await sendAnomalyToAlertRouter(env, metric, lastHourResult.value, stats, deviation, project);
562
+ return 1;
563
+ }
564
+
565
+ return 0;
566
+ } catch (error) {
567
+ log.error('Error in hourly D1 write anomaly check', error instanceof Error ? error : undefined, {
568
+ tag: 'HOURLY_D1_CHECK_ERROR',
569
+ });
570
+ return 0;
571
+ }
572
+ }
573
+
574
+ // =============================================================================
575
+ // DATASET REGISTRY - Drift Detection for Cloudflare GraphQL Datasets
576
+ // =============================================================================
577
+
578
+ /**
579
+ * Probe a single GraphQL dataset to check if it's available.
580
+ * Returns true if the dataset exists and is queryable.
581
+ */
582
+ export async function probeDataset(env: Env, datasetName: string): Promise<boolean> {
583
+ const GRAPHQL_ENDPOINT = 'https://api.cloudflare.com/client/v4/graphql';
584
+ const now = new Date();
585
+ const yesterday = new Date(now);
586
+ yesterday.setUTCDate(yesterday.getUTCDate() - 1);
587
+
588
+ // Build a minimal probe query
589
+ const query = `
590
+ query ProbeDataset($accountTag: String!, $limit: Int!) {
591
+ viewer {
592
+ accounts(filter: { accountTag: $accountTag }) {
593
+ ${datasetName}(limit: $limit, filter: {
594
+ datetime_geq: "${yesterday.toISOString().split('T')[0]}T00:00:00Z",
595
+ datetime_leq: "${now.toISOString().split('T')[0]}T00:00:00Z"
596
+ }) {
597
+ dimensions {
598
+ datetime
599
+ }
600
+ }
601
+ }
602
+ }
603
+ }
604
+ `;
605
+
606
+ try {
607
+ const response = await fetchWithRetry(GRAPHQL_ENDPOINT, {
608
+ method: 'POST',
609
+ headers: {
610
+ 'Content-Type': 'application/json',
611
+ Authorization: `Bearer ${env.CLOUDFLARE_API_TOKEN}`,
612
+ },
613
+ body: JSON.stringify({
614
+ query,
615
+ variables: {
616
+ accountTag: env.CLOUDFLARE_ACCOUNT_ID,
617
+ limit: 1,
618
+ },
619
+ }),
620
+ });
621
+
622
+ if (!response.ok) {
623
+ return false;
624
+ }
625
+
626
+ const result = (await response.json()) as { errors?: Array<{ message: string }> };
627
+
628
+ // Check for GraphQL errors indicating dataset doesn't exist
629
+ if (result.errors) {
630
+ const errorStr = JSON.stringify(result.errors);
631
+ if (
632
+ errorStr.includes('Cannot query field') ||
633
+ errorStr.includes('Unknown field') ||
634
+ errorStr.includes('not enabled') ||
635
+ errorStr.includes('not available')
636
+ ) {
637
+ return false;
638
+ }
639
+ }
640
+
641
+ return true;
642
+ } catch {
643
+ // Network errors or other issues - assume unavailable
644
+ return false;
645
+ }
646
+ }
647
+
648
+ /**
649
+ * Discover and update the dataset registry.
650
+ * Probes known datasets, updates last_seen, and alerts on new billable datasets.
651
+ *
652
+ * @returns Object with counts of datasets checked and alerts generated
653
+ */
654
+ export async function discoverAndUpdateDatasetRegistry(
655
+ env: Env
656
+ ): Promise<{ datasetsChecked: number; newBillableAlerts: number; d1Writes: number }> {
657
+ const log = createLoggerFromEnv(env, 'platform-usage', 'platform:usage:dataset-registry');
658
+ log.info('Starting weekly dataset discovery');
659
+
660
+ let datasetsChecked = 0;
661
+ let newBillableAlerts = 0;
662
+ let d1Writes = 0;
663
+ const now = new Date().toISOString();
664
+
665
+ for (const dataset of KNOWN_DATASETS) {
666
+ const available = await probeDataset(env, dataset.name);
667
+ datasetsChecked++;
668
+
669
+ if (available) {
670
+ // Update last_seen for this dataset
671
+ try {
672
+ await env.PLATFORM_DB.prepare(
673
+ `
674
+ INSERT INTO dataset_registry (dataset_name, first_seen, last_seen, is_queried, is_billable, category, created_at, updated_at)
675
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
676
+ ON CONFLICT (dataset_name) DO UPDATE SET
677
+ last_seen = excluded.last_seen,
678
+ updated_at = excluded.updated_at
679
+ `
680
+ )
681
+ .bind(
682
+ dataset.name,
683
+ now,
684
+ now,
685
+ QUERIED_DATASETS.has(dataset.name) ? 1 : 0,
686
+ dataset.billable ? 1 : 0,
687
+ dataset.category,
688
+ now,
689
+ now
690
+ )
691
+ .run();
692
+ d1Writes++;
693
+
694
+ // Alert if this is a billable dataset we're not querying
695
+ if (dataset.billable && !QUERIED_DATASETS.has(dataset.name)) {
696
+ log.info('Available billable dataset not queried', { dataset: dataset.name });
697
+ newBillableAlerts++;
698
+
699
+ // Send Slack alert for new billable dataset
700
+ if (env.SLACK_WEBHOOK_URL) {
701
+ await sendSlackAlert(env, {
702
+ text: ':warning: Billable Dataset Not Queried',
703
+ attachments: [
704
+ {
705
+ color: 'warning',
706
+ fields: [
707
+ { title: 'Dataset', value: dataset.name, short: true },
708
+ { title: 'Category', value: dataset.category, short: true },
709
+ {
710
+ title: 'Action Required',
711
+ value: 'Consider adding query for accurate cost tracking',
712
+ short: false,
713
+ },
714
+ ],
715
+ },
716
+ ],
717
+ });
718
+ }
719
+ }
720
+ } catch (error) {
721
+ log.error(`Error updating ${dataset.name}`, error instanceof Error ? error : undefined);
722
+ }
723
+ }
724
+
725
+ // Small delay between probes to avoid rate limiting
726
+ await new Promise((resolve) => setTimeout(resolve, 50));
727
+ }
728
+
729
+ log.info('Discovery complete', { datasetsChecked, newBillableAlerts, d1Writes });
730
+
731
+ return { datasetsChecked, newBillableAlerts, d1Writes };
732
+ }