@littlebearapps/platform-admin-sdk 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +112 -0
- package/dist/index.d.ts +16 -0
- package/dist/index.js +89 -0
- package/dist/prompts.d.ts +27 -0
- package/dist/prompts.js +80 -0
- package/dist/scaffold.d.ts +5 -0
- package/dist/scaffold.js +65 -0
- package/dist/templates.d.ts +16 -0
- package/dist/templates.js +131 -0
- package/package.json +46 -0
- package/templates/full/migrations/006_pattern_discovery.sql +199 -0
- package/templates/full/migrations/007_notifications_search.sql +127 -0
- package/templates/full/workers/lib/pattern-discovery/ai-prompt.ts +644 -0
- package/templates/full/workers/lib/pattern-discovery/clustering.ts +278 -0
- package/templates/full/workers/lib/pattern-discovery/shadow-evaluation.ts +603 -0
- package/templates/full/workers/lib/pattern-discovery/storage.ts +806 -0
- package/templates/full/workers/lib/pattern-discovery/types.ts +159 -0
- package/templates/full/workers/lib/pattern-discovery/validation.ts +278 -0
- package/templates/full/workers/pattern-discovery.ts +661 -0
- package/templates/full/workers/platform-alert-router.ts +1809 -0
- package/templates/full/workers/platform-notifications.ts +424 -0
- package/templates/full/workers/platform-search.ts +480 -0
- package/templates/full/workers/platform-settings.ts +436 -0
- package/templates/full/wrangler.alert-router.jsonc.hbs +34 -0
- package/templates/full/wrangler.notifications.jsonc.hbs +23 -0
- package/templates/full/wrangler.pattern-discovery.jsonc.hbs +33 -0
- package/templates/full/wrangler.search.jsonc.hbs +16 -0
- package/templates/full/wrangler.settings.jsonc.hbs +23 -0
- package/templates/shared/README.md.hbs +69 -0
- package/templates/shared/config/budgets.yaml.hbs +72 -0
- package/templates/shared/config/services.yaml.hbs +45 -0
- package/templates/shared/migrations/001_core_tables.sql +117 -0
- package/templates/shared/migrations/002_usage_warehouse.sql +830 -0
- package/templates/shared/migrations/003_feature_tracking.sql +250 -0
- package/templates/shared/migrations/004_settings_alerts.sql +452 -0
- package/templates/shared/migrations/seed.sql.hbs +4 -0
- package/templates/shared/package.json.hbs +21 -0
- package/templates/shared/scripts/sync-config.ts +242 -0
- package/templates/shared/tsconfig.json +12 -0
- package/templates/shared/workers/lib/analytics-engine.ts +357 -0
- package/templates/shared/workers/lib/billing.ts +293 -0
- package/templates/shared/workers/lib/circuit-breaker-middleware.ts +25 -0
- package/templates/shared/workers/lib/control.ts +292 -0
- package/templates/shared/workers/lib/economics.ts +368 -0
- package/templates/shared/workers/lib/metrics.ts +103 -0
- package/templates/shared/workers/lib/platform-settings.ts +407 -0
- package/templates/shared/workers/lib/shared/allowances.ts +333 -0
- package/templates/shared/workers/lib/shared/cloudflare.ts +1362 -0
- package/templates/shared/workers/lib/shared/types.ts +58 -0
- package/templates/shared/workers/lib/telemetry-sampling.ts +360 -0
- package/templates/shared/workers/lib/usage/collectors/example.ts +96 -0
- package/templates/shared/workers/lib/usage/collectors/index.ts +128 -0
- package/templates/shared/workers/lib/usage/handlers/audit.ts +306 -0
- package/templates/shared/workers/lib/usage/handlers/backfill.ts +845 -0
- package/templates/shared/workers/lib/usage/handlers/behavioral.ts +429 -0
- package/templates/shared/workers/lib/usage/handlers/data-queries.ts +507 -0
- package/templates/shared/workers/lib/usage/handlers/dlq-admin.ts +364 -0
- package/templates/shared/workers/lib/usage/handlers/health-trends.ts +222 -0
- package/templates/shared/workers/lib/usage/handlers/index.ts +35 -0
- package/templates/shared/workers/lib/usage/handlers/usage-admin.ts +421 -0
- package/templates/shared/workers/lib/usage/handlers/usage-features.ts +1262 -0
- package/templates/shared/workers/lib/usage/handlers/usage-metrics.ts +2420 -0
- package/templates/shared/workers/lib/usage/handlers/usage-settings.ts +610 -0
- package/templates/shared/workers/lib/usage/queue/budget-enforcement.ts +1032 -0
- package/templates/shared/workers/lib/usage/queue/cost-budget-enforcement.ts +128 -0
- package/templates/shared/workers/lib/usage/queue/cost-calculator.ts +77 -0
- package/templates/shared/workers/lib/usage/queue/dlq-handler.ts +161 -0
- package/templates/shared/workers/lib/usage/queue/index.ts +19 -0
- package/templates/shared/workers/lib/usage/queue/telemetry-processor.ts +790 -0
- package/templates/shared/workers/lib/usage/scheduled/anomaly-detection.ts +732 -0
- package/templates/shared/workers/lib/usage/scheduled/data-collection.ts +956 -0
- package/templates/shared/workers/lib/usage/scheduled/error-digest.ts +343 -0
- package/templates/shared/workers/lib/usage/scheduled/index.ts +18 -0
- package/templates/shared/workers/lib/usage/scheduled/rollups.ts +1561 -0
- package/templates/shared/workers/lib/usage/shared/constants.ts +362 -0
- package/templates/shared/workers/lib/usage/shared/index.ts +14 -0
- package/templates/shared/workers/lib/usage/shared/types.ts +1066 -0
- package/templates/shared/workers/lib/usage/shared/utils.ts +795 -0
- package/templates/shared/workers/platform-usage.ts +1915 -0
- package/templates/shared/wrangler.usage.jsonc.hbs +58 -0
- package/templates/standard/migrations/005_error_collection.sql +162 -0
- package/templates/standard/workers/error-collector.ts +2670 -0
- package/templates/standard/workers/lib/error-collector/capture.ts +213 -0
- package/templates/standard/workers/lib/error-collector/digest.ts +448 -0
- package/templates/standard/workers/lib/error-collector/email-health-alerts.ts +262 -0
- package/templates/standard/workers/lib/error-collector/fingerprint.ts +258 -0
- package/templates/standard/workers/lib/error-collector/gap-alerts.ts +293 -0
- package/templates/standard/workers/lib/error-collector/github.ts +329 -0
- package/templates/standard/workers/lib/error-collector/types.ts +262 -0
- package/templates/standard/workers/lib/sentinel/gap-detection.ts +734 -0
- package/templates/standard/workers/lib/shared/slack-alerts.ts +585 -0
- package/templates/standard/workers/platform-sentinel.ts +1744 -0
- package/templates/standard/wrangler.error-collector.jsonc.hbs +44 -0
- package/templates/standard/wrangler.sentinel.jsonc.hbs +45 -0
|
@@ -0,0 +1,734 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Gap Detection Module for Platform Sentinel
|
|
3
|
+
*
|
|
4
|
+
* Detects missing hourly usage snapshots and stale projects.
|
|
5
|
+
* Runs every 15 minutes as part of platform-sentinel's scheduled handler.
|
|
6
|
+
*
|
|
7
|
+
* @module workers/lib/sentinel/gap-detection
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type { D1Database, KVNamespace } from '@cloudflare/workers-types';
|
|
11
|
+
import type { Logger } from '@littlebearapps/platform-consumer-sdk';
|
|
12
|
+
|
|
13
|
+
// TODO: Set your dashboard URL and alert email
|
|
14
|
+
const DASHBOARD_URL = 'https://your-dashboard.example.com';
|
|
15
|
+
const PLATFORM_USAGE_URL = 'https://platform-usage.your-subdomain.workers.dev';
|
|
16
|
+
|
|
17
|
+
// TODO: Set the email "from" address for your Resend domain
|
|
18
|
+
const ALERT_FROM_EMAIL = 'Platform Alerts <alerts@mail.your-domain.com>';
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Environment bindings required for gap detection
|
|
22
|
+
*/
|
|
23
|
+
export interface GapDetectionEnv {
|
|
24
|
+
PLATFORM_DB: D1Database;
|
|
25
|
+
PLATFORM_CACHE: KVNamespace;
|
|
26
|
+
PLATFORM_ALERTS: KVNamespace;
|
|
27
|
+
SLACK_WEBHOOK_URL?: string;
|
|
28
|
+
RESEND_API_KEY?: string;
|
|
29
|
+
ALERT_EMAIL_TO?: string;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Missing hour entry
|
|
34
|
+
*/
|
|
35
|
+
export interface MissingHour {
|
|
36
|
+
project: string;
|
|
37
|
+
hour: string; // ISO datetime (YYYY-MM-DDTHH:00:00Z)
|
|
38
|
+
expectedAt: string; // When it should have been collected
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Stale project entry
|
|
43
|
+
*/
|
|
44
|
+
export interface StaleProject {
|
|
45
|
+
project: string;
|
|
46
|
+
lastSnapshot: string;
|
|
47
|
+
hoursSinceLastSnapshot: number;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Gap detection report
|
|
52
|
+
*/
|
|
53
|
+
export interface GapReport {
|
|
54
|
+
checkTime: string;
|
|
55
|
+
missingHours: MissingHour[];
|
|
56
|
+
staleProjects: StaleProject[];
|
|
57
|
+
totalMissingHours: number;
|
|
58
|
+
totalStaleProjects: number;
|
|
59
|
+
severity: 'ok' | 'warning' | 'critical';
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Rate limit for gap alerts: 1 alert per hour
|
|
64
|
+
*/
|
|
65
|
+
const GAP_ALERT_RATE_LIMIT_TTL = 3600;
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Projects we expect to have hourly snapshots for.
|
|
69
|
+
*
|
|
70
|
+
* Note: Only 'all' project gets hourly snapshots from GraphQL collection.
|
|
71
|
+
* Per-project data exists in resource_usage_snapshots table (per-resource granularity).
|
|
72
|
+
*/
|
|
73
|
+
const EXPECTED_PROJECTS = ['all'];
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* How many hours back to check for gaps (24 hours)
|
|
77
|
+
*/
|
|
78
|
+
const LOOKBACK_HOURS = 24;
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Stale threshold: project with no snapshot in 2+ hours
|
|
82
|
+
*/
|
|
83
|
+
const STALE_THRESHOLD_HOURS = 2;
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Detect gaps in hourly usage snapshots
|
|
87
|
+
*/
|
|
88
|
+
export async function detectGaps(env: GapDetectionEnv, log: Logger): Promise<GapReport> {
|
|
89
|
+
const checkTime = new Date().toISOString();
|
|
90
|
+
const missingHours: MissingHour[] = [];
|
|
91
|
+
const staleProjects: StaleProject[] = [];
|
|
92
|
+
|
|
93
|
+
try {
|
|
94
|
+
// 1. Find missing hours in the last 24h
|
|
95
|
+
const missingResult = await findMissingHours(env, log);
|
|
96
|
+
missingHours.push(...missingResult);
|
|
97
|
+
|
|
98
|
+
// 2. Find stale projects (no recent snapshots)
|
|
99
|
+
const staleResult = await findStaleProjects(env, log);
|
|
100
|
+
staleProjects.push(...staleResult);
|
|
101
|
+
} catch (error) {
|
|
102
|
+
log.error('Gap detection query failed', error);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Determine severity
|
|
106
|
+
let severity: GapReport['severity'] = 'ok';
|
|
107
|
+
|
|
108
|
+
if (
|
|
109
|
+
missingHours.length > 3 ||
|
|
110
|
+
staleProjects.length > 1 ||
|
|
111
|
+
staleProjects.some((p) => p.hoursSinceLastSnapshot > 6)
|
|
112
|
+
) {
|
|
113
|
+
severity = 'critical';
|
|
114
|
+
} else if (missingHours.length > 0 || staleProjects.length > 0) {
|
|
115
|
+
severity = 'warning';
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const report: GapReport = {
|
|
119
|
+
checkTime,
|
|
120
|
+
missingHours,
|
|
121
|
+
staleProjects,
|
|
122
|
+
totalMissingHours: missingHours.length,
|
|
123
|
+
totalStaleProjects: staleProjects.length,
|
|
124
|
+
severity,
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
log.info('Gap detection complete', {
|
|
128
|
+
missingHours: report.totalMissingHours,
|
|
129
|
+
staleProjects: report.totalStaleProjects,
|
|
130
|
+
severity: report.severity,
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
return report;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Find missing hourly snapshots in the last 24 hours
|
|
138
|
+
*/
|
|
139
|
+
async function findMissingHours(env: GapDetectionEnv, log: Logger): Promise<MissingHour[]> {
|
|
140
|
+
const missing: MissingHour[] = [];
|
|
141
|
+
const now = new Date();
|
|
142
|
+
|
|
143
|
+
// Generate expected hours for the last 24h
|
|
144
|
+
const expectedHours: string[] = [];
|
|
145
|
+
for (let i = 1; i <= LOOKBACK_HOURS; i++) {
|
|
146
|
+
const hour = new Date(now);
|
|
147
|
+
hour.setUTCHours(hour.getUTCHours() - i, 0, 0, 0);
|
|
148
|
+
expectedHours.push(hour.toISOString().replace(':00:00.000Z', ':00:00Z'));
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Check each expected project
|
|
152
|
+
for (const project of EXPECTED_PROJECTS) {
|
|
153
|
+
try {
|
|
154
|
+
// Get all snapshots for this project in the last 24h
|
|
155
|
+
const result = await env.PLATFORM_DB.prepare(
|
|
156
|
+
`
|
|
157
|
+
SELECT snapshot_hour
|
|
158
|
+
FROM hourly_usage_snapshots
|
|
159
|
+
WHERE project = ?
|
|
160
|
+
AND snapshot_hour >= datetime('now', '-24 hours')
|
|
161
|
+
ORDER BY snapshot_hour DESC
|
|
162
|
+
`
|
|
163
|
+
)
|
|
164
|
+
.bind(project)
|
|
165
|
+
.all<{ snapshot_hour: string }>();
|
|
166
|
+
|
|
167
|
+
const foundHours = new Set(result.results?.map((r) => r.snapshot_hour) ?? []);
|
|
168
|
+
|
|
169
|
+
// Find missing hours
|
|
170
|
+
for (const expectedHour of expectedHours) {
|
|
171
|
+
// Normalize the expected hour format for comparison
|
|
172
|
+
const normalizedExpected = expectedHour.replace('.000Z', 'Z');
|
|
173
|
+
|
|
174
|
+
// Check various format variations
|
|
175
|
+
const found =
|
|
176
|
+
foundHours.has(normalizedExpected) ||
|
|
177
|
+
foundHours.has(expectedHour) ||
|
|
178
|
+
foundHours.has(normalizedExpected.replace('Z', '.000Z'));
|
|
179
|
+
|
|
180
|
+
if (!found) {
|
|
181
|
+
missing.push({
|
|
182
|
+
project,
|
|
183
|
+
hour: normalizedExpected,
|
|
184
|
+
expectedAt: new Date(
|
|
185
|
+
new Date(normalizedExpected).getTime() + 60 * 60 * 1000
|
|
186
|
+
).toISOString(),
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
} catch (error) {
|
|
191
|
+
log.error('Failed to check missing hours for project', error, { project });
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Sort by hour descending (most recent first)
|
|
196
|
+
missing.sort((a, b) => b.hour.localeCompare(a.hour));
|
|
197
|
+
|
|
198
|
+
return missing;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Find projects that haven't sent snapshots recently.
|
|
203
|
+
*
|
|
204
|
+
* Only checks EXPECTED_PROJECTS (currently just 'all').
|
|
205
|
+
*/
|
|
206
|
+
async function findStaleProjects(env: GapDetectionEnv, log: Logger): Promise<StaleProject[]> {
|
|
207
|
+
const stale: StaleProject[] = [];
|
|
208
|
+
|
|
209
|
+
// Only check projects we expect to have hourly snapshots
|
|
210
|
+
if (EXPECTED_PROJECTS.length === 0) return stale;
|
|
211
|
+
|
|
212
|
+
// Build placeholder string for SQL IN clause
|
|
213
|
+
const placeholders = EXPECTED_PROJECTS.map(() => '?').join(', ');
|
|
214
|
+
|
|
215
|
+
try {
|
|
216
|
+
const result = await env.PLATFORM_DB.prepare(
|
|
217
|
+
`
|
|
218
|
+
SELECT
|
|
219
|
+
project,
|
|
220
|
+
MAX(snapshot_hour) as last_snapshot,
|
|
221
|
+
CAST((julianday('now') - julianday(MAX(snapshot_hour))) * 24 AS INTEGER) as hours_stale
|
|
222
|
+
FROM hourly_usage_snapshots
|
|
223
|
+
WHERE project IN (${placeholders})
|
|
224
|
+
GROUP BY project
|
|
225
|
+
HAVING hours_stale > ?
|
|
226
|
+
`
|
|
227
|
+
)
|
|
228
|
+
.bind(...EXPECTED_PROJECTS, STALE_THRESHOLD_HOURS)
|
|
229
|
+
.all<{ project: string; last_snapshot: string; hours_stale: number }>();
|
|
230
|
+
|
|
231
|
+
for (const row of result.results ?? []) {
|
|
232
|
+
stale.push({
|
|
233
|
+
project: row.project,
|
|
234
|
+
lastSnapshot: row.last_snapshot,
|
|
235
|
+
hoursSinceLastSnapshot: row.hours_stale,
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
} catch (error) {
|
|
239
|
+
log.error('Failed to check stale projects', error);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
return stale;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
/**
|
|
246
|
+
* Store gap report in D1 for aggregation by platform-auditor
|
|
247
|
+
*/
|
|
248
|
+
export async function storeGapReport(
|
|
249
|
+
env: GapDetectionEnv,
|
|
250
|
+
report: GapReport,
|
|
251
|
+
log: Logger
|
|
252
|
+
): Promise<void> {
|
|
253
|
+
try {
|
|
254
|
+
const id = crypto.randomUUID();
|
|
255
|
+
|
|
256
|
+
await env.PLATFORM_DB.prepare(
|
|
257
|
+
`
|
|
258
|
+
INSERT INTO gap_detection_log (id, detection_time, missing_hours_count, stale_projects_count, severity, report_json)
|
|
259
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
260
|
+
`
|
|
261
|
+
)
|
|
262
|
+
.bind(
|
|
263
|
+
id,
|
|
264
|
+
report.checkTime,
|
|
265
|
+
report.totalMissingHours,
|
|
266
|
+
report.totalStaleProjects,
|
|
267
|
+
report.severity,
|
|
268
|
+
JSON.stringify(report)
|
|
269
|
+
)
|
|
270
|
+
.run();
|
|
271
|
+
|
|
272
|
+
log.debug('Stored gap report', { id, severity: report.severity });
|
|
273
|
+
} catch (error) {
|
|
274
|
+
log.error('Failed to store gap report', error);
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Send Slack alert for detected gaps
|
|
280
|
+
*/
|
|
281
|
+
export async function alertGaps(
|
|
282
|
+
env: GapDetectionEnv,
|
|
283
|
+
report: GapReport,
|
|
284
|
+
log: Logger
|
|
285
|
+
): Promise<void> {
|
|
286
|
+
// Only alert for warning or critical
|
|
287
|
+
if (report.severity === 'ok') {
|
|
288
|
+
return;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
// Check rate limit
|
|
292
|
+
const alertKey = 'gap-detection:alert';
|
|
293
|
+
const alreadySent = await env.PLATFORM_ALERTS.get(alertKey);
|
|
294
|
+
|
|
295
|
+
if (alreadySent) {
|
|
296
|
+
log.debug('Gap alert rate limited');
|
|
297
|
+
return;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
if (!env.SLACK_WEBHOOK_URL) {
|
|
301
|
+
log.warn('No SLACK_WEBHOOK_URL configured, skipping gap alert');
|
|
302
|
+
return;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
const emoji = report.severity === 'critical' ? ':rotating_light:' : ':warning:';
|
|
306
|
+
const colour = report.severity === 'critical' ? '#dc3545' : '#ffc107';
|
|
307
|
+
|
|
308
|
+
// Build missing hours summary
|
|
309
|
+
const missingByProject = new Map<string, number>();
|
|
310
|
+
for (const m of report.missingHours) {
|
|
311
|
+
missingByProject.set(m.project, (missingByProject.get(m.project) ?? 0) + 1);
|
|
312
|
+
}
|
|
313
|
+
const missingSummary = Array.from(missingByProject.entries())
|
|
314
|
+
.map(([project, count]) => `${project}: ${count}h`)
|
|
315
|
+
.join(', ');
|
|
316
|
+
|
|
317
|
+
// Build stale projects summary
|
|
318
|
+
const staleSummary = report.staleProjects
|
|
319
|
+
.map((p) => `${p.project} (${p.hoursSinceLastSnapshot}h stale)`)
|
|
320
|
+
.join(', ');
|
|
321
|
+
|
|
322
|
+
const message = {
|
|
323
|
+
text: `[${report.severity.toUpperCase()}] Usage data gaps detected - ${report.totalMissingHours} missing hours`,
|
|
324
|
+
blocks: [
|
|
325
|
+
{
|
|
326
|
+
type: 'header',
|
|
327
|
+
text: {
|
|
328
|
+
type: 'plain_text',
|
|
329
|
+
text: `${emoji} Usage Data Gap Detected`,
|
|
330
|
+
},
|
|
331
|
+
},
|
|
332
|
+
{
|
|
333
|
+
type: 'section',
|
|
334
|
+
fields: [
|
|
335
|
+
{ type: 'mrkdwn', text: `*Severity:*\n${report.severity.toUpperCase()}` },
|
|
336
|
+
{ type: 'mrkdwn', text: `*Missing Hours:*\n${report.totalMissingHours}` },
|
|
337
|
+
{ type: 'mrkdwn', text: `*Stale Projects:*\n${report.totalStaleProjects}` },
|
|
338
|
+
{ type: 'mrkdwn', text: `*Check Time:*\n${report.checkTime}` },
|
|
339
|
+
],
|
|
340
|
+
},
|
|
341
|
+
] as Array<{ type: string; text?: unknown; fields?: unknown[] }>,
|
|
342
|
+
attachments: [
|
|
343
|
+
{
|
|
344
|
+
color: colour,
|
|
345
|
+
fields: [] as Array<{ title: string; value: string; short: boolean }>,
|
|
346
|
+
},
|
|
347
|
+
],
|
|
348
|
+
};
|
|
349
|
+
|
|
350
|
+
if (missingSummary) {
|
|
351
|
+
message.attachments[0].fields.push({
|
|
352
|
+
title: 'Missing Hours by Project',
|
|
353
|
+
value: missingSummary,
|
|
354
|
+
short: false,
|
|
355
|
+
});
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
if (staleSummary) {
|
|
359
|
+
message.attachments[0].fields.push({
|
|
360
|
+
title: 'Stale Projects',
|
|
361
|
+
value: staleSummary,
|
|
362
|
+
short: false,
|
|
363
|
+
});
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
// Add investigation commands
|
|
367
|
+
message.blocks.push({
|
|
368
|
+
type: 'section',
|
|
369
|
+
text: {
|
|
370
|
+
type: 'mrkdwn',
|
|
371
|
+
text: `*Investigation Commands:*\n\`\`\`# Check recent hourly snapshots
|
|
372
|
+
npx wrangler d1 execute platform-metrics --remote --command "SELECT project, snapshot_hour FROM hourly_usage_snapshots WHERE snapshot_hour >= datetime('now', '-6 hours') ORDER BY snapshot_hour DESC"
|
|
373
|
+
|
|
374
|
+
# Check gap detection history
|
|
375
|
+
npx wrangler d1 execute platform-metrics --remote --command "SELECT * FROM gap_detection_log ORDER BY detection_time DESC LIMIT 5"
|
|
376
|
+
|
|
377
|
+
# Trigger backfill (if needed)
|
|
378
|
+
curl -X POST ${PLATFORM_USAGE_URL}/usage/gaps/backfill -H 'Content-Type: application/json' -d '{"startDate":"YYYY-MM-DD","endDate":"YYYY-MM-DD"}'\`\`\``,
|
|
379
|
+
},
|
|
380
|
+
} as { type: string; text: { type: string; text: string } });
|
|
381
|
+
|
|
382
|
+
// Add dashboard link
|
|
383
|
+
message.blocks.push({
|
|
384
|
+
type: 'actions',
|
|
385
|
+
elements: [
|
|
386
|
+
{
|
|
387
|
+
type: 'button',
|
|
388
|
+
text: {
|
|
389
|
+
type: 'plain_text',
|
|
390
|
+
text: 'Usage Dashboard',
|
|
391
|
+
emoji: true,
|
|
392
|
+
},
|
|
393
|
+
url: `${DASHBOARD_URL}/usage/unified`,
|
|
394
|
+
},
|
|
395
|
+
],
|
|
396
|
+
} as unknown as { type: string; text?: unknown; fields?: unknown[] });
|
|
397
|
+
|
|
398
|
+
try {
|
|
399
|
+
const response = await fetch(env.SLACK_WEBHOOK_URL, {
|
|
400
|
+
method: 'POST',
|
|
401
|
+
headers: { 'Content-Type': 'application/json' },
|
|
402
|
+
body: JSON.stringify(message),
|
|
403
|
+
});
|
|
404
|
+
|
|
405
|
+
if (response.ok) {
|
|
406
|
+
// Set rate limit
|
|
407
|
+
await env.PLATFORM_ALERTS.put(alertKey, new Date().toISOString(), {
|
|
408
|
+
expirationTtl: GAP_ALERT_RATE_LIMIT_TTL,
|
|
409
|
+
});
|
|
410
|
+
log.info('Sent gap detection Slack alert', { severity: report.severity });
|
|
411
|
+
} else {
|
|
412
|
+
const text = await response.text();
|
|
413
|
+
log.error('Failed to send gap detection Slack alert', {
|
|
414
|
+
status: response.status,
|
|
415
|
+
error: text,
|
|
416
|
+
});
|
|
417
|
+
}
|
|
418
|
+
} catch (error) {
|
|
419
|
+
log.error('Error sending gap detection Slack alert', error);
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
/**
|
|
424
|
+
* Send email alert for critical gaps
|
|
425
|
+
*/
|
|
426
|
+
// =============================================================================
|
|
427
|
+
// PER-PROJECT GAP DETECTION
|
|
428
|
+
// =============================================================================
|
|
429
|
+
|
|
430
|
+
/**
|
|
431
|
+
* Threshold for per-project coverage alerts (percentage)
|
|
432
|
+
*/
|
|
433
|
+
const PROJECT_COVERAGE_THRESHOLD = 90;
|
|
434
|
+
|
|
435
|
+
/**
|
|
436
|
+
* On-demand resources excluded from coverage "expected" denominator.
|
|
437
|
+
* These only appear in GraphQL when they receive traffic, so they create
|
|
438
|
+
* false-positive gaps on quiet days. Format: "resource_type:resource_id"
|
|
439
|
+
*
|
|
440
|
+
* TODO: Customise this list for your projects' on-demand resources
|
|
441
|
+
*/
|
|
442
|
+
const ON_DEMAND_RESOURCE_EXCLUSIONS = new Set([
|
|
443
|
+
'worker:platform-settings', // Admin-only, on-demand API
|
|
444
|
+
'worker:platform-search', // Admin-only, on-demand API
|
|
445
|
+
'worker:platform-alert-router', // Only invoked by Gatus/GitHub webhooks
|
|
446
|
+
'worker:platform-ingest-tester', // Manual testing tool
|
|
447
|
+
'worker:platform-query-tester', // Manual testing tool
|
|
448
|
+
'worker:sdk-test-client', // Manual testing tool
|
|
449
|
+
]);
|
|
450
|
+
|
|
451
|
+
/**
|
|
452
|
+
* Resource-level coverage breakdown
|
|
453
|
+
*/
|
|
454
|
+
export interface ResourceCoverage {
|
|
455
|
+
resourceType: string;
|
|
456
|
+
hoursWithData: number;
|
|
457
|
+
coveragePct: number;
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
/**
|
|
461
|
+
* Per-project gap detection result
|
|
462
|
+
*/
|
|
463
|
+
export interface ProjectGap {
|
|
464
|
+
project: string;
|
|
465
|
+
hoursWithData: number;
|
|
466
|
+
expectedHours: number;
|
|
467
|
+
coveragePct: number;
|
|
468
|
+
missingHours: string[]; // ISO timestamps of missing hours
|
|
469
|
+
repository?: string; // GitHub repo from project_registry
|
|
470
|
+
resourceBreakdown?: ResourceCoverage[]; // Per-resource type coverage
|
|
471
|
+
lastDataHour?: string; // Most recent hour with data
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
/**
|
|
475
|
+
* Detect gaps in per-project data coverage.
|
|
476
|
+
* Queries resource_usage_snapshots table for projects with less than
|
|
477
|
+
* PROJECT_COVERAGE_THRESHOLD% coverage in the last 24 hours.
|
|
478
|
+
*
|
|
479
|
+
* @returns Array of projects with low coverage, including their repo mapping
|
|
480
|
+
*/
|
|
481
|
+
export async function detectProjectGaps(
|
|
482
|
+
env: GapDetectionEnv,
|
|
483
|
+
log: Logger
|
|
484
|
+
): Promise<ProjectGap[]> {
|
|
485
|
+
const gaps: ProjectGap[] = [];
|
|
486
|
+
|
|
487
|
+
try {
|
|
488
|
+
// Query resource-based coverage per project from resource_usage_snapshots.
|
|
489
|
+
// Build exclusion list for SQL (on-demand resources that create false-positive gaps)
|
|
490
|
+
const exclusionKeys = Array.from(ON_DEMAND_RESOURCE_EXCLUSIONS);
|
|
491
|
+
const exclusionPlaceholders = exclusionKeys.map(() => '?').join(', ');
|
|
492
|
+
|
|
493
|
+
const coverageResult = await env.PLATFORM_DB.prepare(
|
|
494
|
+
`
|
|
495
|
+
WITH recent AS (
|
|
496
|
+
SELECT project, resource_type, resource_id, snapshot_hour
|
|
497
|
+
FROM resource_usage_snapshots
|
|
498
|
+
WHERE snapshot_hour >= datetime('now', '-24 hours')
|
|
499
|
+
AND project IS NOT NULL
|
|
500
|
+
AND project NOT IN ('unknown', 'all')
|
|
501
|
+
),
|
|
502
|
+
known AS (
|
|
503
|
+
SELECT project, resource_type, resource_id
|
|
504
|
+
FROM resource_usage_snapshots
|
|
505
|
+
WHERE project IS NOT NULL
|
|
506
|
+
AND project NOT IN ('unknown', 'all')
|
|
507
|
+
AND (resource_type || ':' || resource_id) NOT IN (${exclusionPlaceholders})
|
|
508
|
+
)
|
|
509
|
+
SELECT
|
|
510
|
+
k.project,
|
|
511
|
+
COUNT(DISTINCT k.resource_type || ':' || k.resource_id) as expected_resources,
|
|
512
|
+
COUNT(DISTINCT r.resource_type || ':' || r.resource_id) as active_resources,
|
|
513
|
+
ROUND(
|
|
514
|
+
COUNT(DISTINCT r.resource_type || ':' || r.resource_id) * 100.0 /
|
|
515
|
+
MAX(COUNT(DISTINCT k.resource_type || ':' || k.resource_id), 1),
|
|
516
|
+
1
|
|
517
|
+
) as coverage_pct,
|
|
518
|
+
MAX(r.snapshot_hour) as last_data_hour
|
|
519
|
+
FROM known k
|
|
520
|
+
LEFT JOIN recent r
|
|
521
|
+
ON k.project = r.project
|
|
522
|
+
AND k.resource_type = r.resource_type
|
|
523
|
+
AND k.resource_id = r.resource_id
|
|
524
|
+
GROUP BY k.project
|
|
525
|
+
HAVING coverage_pct < ?
|
|
526
|
+
`
|
|
527
|
+
)
|
|
528
|
+
.bind(...exclusionKeys, PROJECT_COVERAGE_THRESHOLD)
|
|
529
|
+
.all<{
|
|
530
|
+
project: string;
|
|
531
|
+
expected_resources: number;
|
|
532
|
+
active_resources: number;
|
|
533
|
+
coverage_pct: number;
|
|
534
|
+
last_data_hour: string | null;
|
|
535
|
+
}>();
|
|
536
|
+
|
|
537
|
+
if (!coverageResult.results || coverageResult.results.length === 0) {
|
|
538
|
+
log.debug('All projects have adequate coverage');
|
|
539
|
+
return gaps;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
// For each project with low coverage, get details
|
|
543
|
+
for (const row of coverageResult.results) {
|
|
544
|
+
// Get resources that are missing from recent data
|
|
545
|
+
const missingResult = await env.PLATFORM_DB.prepare(
|
|
546
|
+
`
|
|
547
|
+
SELECT DISTINCT resource_type || ':' || resource_id as resource_key
|
|
548
|
+
FROM resource_usage_snapshots
|
|
549
|
+
WHERE project = ?
|
|
550
|
+
AND resource_type || ':' || resource_id NOT IN (
|
|
551
|
+
SELECT DISTINCT resource_type || ':' || resource_id
|
|
552
|
+
FROM resource_usage_snapshots
|
|
553
|
+
WHERE project = ?
|
|
554
|
+
AND snapshot_hour >= datetime('now', '-24 hours')
|
|
555
|
+
)
|
|
556
|
+
`
|
|
557
|
+
)
|
|
558
|
+
.bind(row.project, row.project)
|
|
559
|
+
.all<{ resource_key: string }>();
|
|
560
|
+
|
|
561
|
+
const missingResources = missingResult.results?.map((r) => r.resource_key) ?? [];
|
|
562
|
+
|
|
563
|
+
// Look up GitHub repo from project_registry
|
|
564
|
+
let repository: string | undefined;
|
|
565
|
+
try {
|
|
566
|
+
const repoResult = await env.PLATFORM_DB.prepare(
|
|
567
|
+
`SELECT repo_path FROM project_registry WHERE project_id = ? LIMIT 1`
|
|
568
|
+
)
|
|
569
|
+
.bind(row.project)
|
|
570
|
+
.first<{ repo_path: string | null }>();
|
|
571
|
+
repository = repoResult?.repo_path ?? undefined;
|
|
572
|
+
} catch {
|
|
573
|
+
log.warn('Could not look up repository for project', { project: row.project });
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
// Get resource-level breakdown: distinct resources per type
|
|
577
|
+
let resourceBreakdown: ResourceCoverage[] | undefined;
|
|
578
|
+
try {
|
|
579
|
+
const resourceResult = await env.PLATFORM_DB.prepare(
|
|
580
|
+
`
|
|
581
|
+
WITH recent AS (
|
|
582
|
+
SELECT resource_type, resource_id
|
|
583
|
+
FROM resource_usage_snapshots
|
|
584
|
+
WHERE snapshot_hour >= datetime('now', '-24 hours')
|
|
585
|
+
AND project = ?
|
|
586
|
+
),
|
|
587
|
+
known AS (
|
|
588
|
+
SELECT resource_type, resource_id
|
|
589
|
+
FROM resource_usage_snapshots
|
|
590
|
+
WHERE project = ?
|
|
591
|
+
)
|
|
592
|
+
SELECT
|
|
593
|
+
k.resource_type,
|
|
594
|
+
COUNT(DISTINCT k.resource_id) as total_resources,
|
|
595
|
+
COUNT(DISTINCT r.resource_id) as active_resources,
|
|
596
|
+
ROUND(
|
|
597
|
+
COUNT(DISTINCT r.resource_id) * 100.0 /
|
|
598
|
+
MAX(COUNT(DISTINCT k.resource_id), 1),
|
|
599
|
+
1
|
|
600
|
+
) as coverage_pct
|
|
601
|
+
FROM known k
|
|
602
|
+
LEFT JOIN recent r
|
|
603
|
+
ON k.resource_type = r.resource_type
|
|
604
|
+
AND k.resource_id = r.resource_id
|
|
605
|
+
GROUP BY k.resource_type
|
|
606
|
+
ORDER BY coverage_pct ASC
|
|
607
|
+
`
|
|
608
|
+
)
|
|
609
|
+
.bind(row.project, row.project)
|
|
610
|
+
.all<{ resource_type: string; total_resources: number; active_resources: number; coverage_pct: number }>();
|
|
611
|
+
|
|
612
|
+
if (resourceResult.results && resourceResult.results.length > 0) {
|
|
613
|
+
resourceBreakdown = resourceResult.results.map((r) => ({
|
|
614
|
+
resourceType: r.resource_type,
|
|
615
|
+
hoursWithData: r.active_resources,
|
|
616
|
+
coveragePct: r.coverage_pct,
|
|
617
|
+
}));
|
|
618
|
+
}
|
|
619
|
+
} catch {
|
|
620
|
+
log.warn('Could not get resource breakdown for project', { project: row.project });
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
gaps.push({
|
|
624
|
+
project: row.project,
|
|
625
|
+
hoursWithData: row.active_resources,
|
|
626
|
+
expectedHours: row.expected_resources,
|
|
627
|
+
coveragePct: row.coverage_pct,
|
|
628
|
+
missingHours: missingResources,
|
|
629
|
+
repository,
|
|
630
|
+
resourceBreakdown,
|
|
631
|
+
lastDataHour: row.last_data_hour ?? undefined,
|
|
632
|
+
});
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
log.info('Detected project gaps', {
|
|
636
|
+
projectCount: gaps.length,
|
|
637
|
+
projects: gaps.map((g) => `${g.project}:${g.coveragePct}%`),
|
|
638
|
+
});
|
|
639
|
+
} catch (error) {
|
|
640
|
+
log.error('Failed to detect project gaps', error);
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
return gaps;
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
export async function alertGapsEmail(
|
|
647
|
+
env: GapDetectionEnv,
|
|
648
|
+
report: GapReport,
|
|
649
|
+
log: Logger
|
|
650
|
+
): Promise<void> {
|
|
651
|
+
// Only email for critical
|
|
652
|
+
if (report.severity !== 'critical') {
|
|
653
|
+
return;
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
// Check rate limit (4 hours for email)
|
|
657
|
+
const alertKey = 'gap-detection:email';
|
|
658
|
+
const alreadySent = await env.PLATFORM_ALERTS.get(alertKey);
|
|
659
|
+
|
|
660
|
+
if (alreadySent) {
|
|
661
|
+
log.debug('Gap email alert rate limited');
|
|
662
|
+
return;
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
if (!env.RESEND_API_KEY || !env.ALERT_EMAIL_TO) {
|
|
666
|
+
log.warn('Resend not configured, skipping gap email alert');
|
|
667
|
+
return;
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
const html = `
|
|
671
|
+
<!DOCTYPE html>
|
|
672
|
+
<html>
|
|
673
|
+
<head>
|
|
674
|
+
<meta charset="UTF-8">
|
|
675
|
+
<title>Critical: Usage Data Gaps Detected</title>
|
|
676
|
+
</head>
|
|
677
|
+
<body style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; margin: 0; padding: 20px; background-color: #f5f5f5;">
|
|
678
|
+
<div style="max-width: 600px; margin: 0 auto; background: white; border-radius: 8px; overflow: hidden; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
|
|
679
|
+
<div style="background-color: #dc3545; color: white; padding: 20px;">
|
|
680
|
+
<h1 style="margin: 0; font-size: 20px;">Critical: Usage Data Gaps Detected</h1>
|
|
681
|
+
</div>
|
|
682
|
+
<div style="padding: 20px;">
|
|
683
|
+
<table style="width: 100%; border-collapse: collapse;">
|
|
684
|
+
<tr><td style="padding: 10px 0; border-bottom: 1px solid #eee;"><strong>Missing Hours</strong></td><td style="padding: 10px 0; border-bottom: 1px solid #eee;">${report.totalMissingHours}</td></tr>
|
|
685
|
+
<tr><td style="padding: 10px 0; border-bottom: 1px solid #eee;"><strong>Stale Projects</strong></td><td style="padding: 10px 0; border-bottom: 1px solid #eee;">${report.totalStaleProjects}</td></tr>
|
|
686
|
+
<tr><td style="padding: 10px 0;"><strong>Detection Time</strong></td><td style="padding: 10px 0;">${report.checkTime}</td></tr>
|
|
687
|
+
</table>
|
|
688
|
+
<div style="margin-top: 20px; padding: 15px; background: #f8f9fa; border-radius: 4px;">
|
|
689
|
+
<strong>Affected Projects:</strong>
|
|
690
|
+
<p style="margin: 10px 0 0 0; color: #666;">${report.staleProjects.map((p) => p.project).join(', ') || 'None stale'}</p>
|
|
691
|
+
</div>
|
|
692
|
+
<div style="margin-top: 20px;">
|
|
693
|
+
<a href="${DASHBOARD_URL}/usage/unified" style="display: inline-block; padding: 10px 20px; background: #007bff; color: white; text-decoration: none; border-radius: 4px;">View Dashboard</a>
|
|
694
|
+
</div>
|
|
695
|
+
</div>
|
|
696
|
+
<div style="background: #f8f9fa; padding: 15px 20px; font-size: 12px; color: #666;">
|
|
697
|
+
<p style="margin: 0;">Platform Sentinel | Gap Detection</p>
|
|
698
|
+
</div>
|
|
699
|
+
</div>
|
|
700
|
+
</body>
|
|
701
|
+
</html>`;
|
|
702
|
+
|
|
703
|
+
try {
|
|
704
|
+
const response = await fetch('https://api.resend.com/emails', {
|
|
705
|
+
method: 'POST',
|
|
706
|
+
headers: {
|
|
707
|
+
Authorization: `Bearer ${env.RESEND_API_KEY}`,
|
|
708
|
+
'Content-Type': 'application/json',
|
|
709
|
+
},
|
|
710
|
+
body: JSON.stringify({
|
|
711
|
+
from: ALERT_FROM_EMAIL,
|
|
712
|
+
to: env.ALERT_EMAIL_TO,
|
|
713
|
+
subject: `[CRITICAL] Usage Data Gaps: ${report.totalMissingHours} missing hours`,
|
|
714
|
+
html,
|
|
715
|
+
}),
|
|
716
|
+
});
|
|
717
|
+
|
|
718
|
+
if (response.ok) {
|
|
719
|
+
// Set rate limit (4 hours for email)
|
|
720
|
+
await env.PLATFORM_ALERTS.put(alertKey, new Date().toISOString(), {
|
|
721
|
+
expirationTtl: 14400,
|
|
722
|
+
});
|
|
723
|
+
log.info('Sent gap detection email alert');
|
|
724
|
+
} else {
|
|
725
|
+
const text = await response.text();
|
|
726
|
+
log.error('Failed to send gap detection email alert', {
|
|
727
|
+
status: response.status,
|
|
728
|
+
error: text,
|
|
729
|
+
});
|
|
730
|
+
}
|
|
731
|
+
} catch (error) {
|
|
732
|
+
log.error('Error sending gap detection email alert', error);
|
|
733
|
+
}
|
|
734
|
+
}
|