@littlebearapps/platform-admin-sdk 1.5.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/templates.d.ts +1 -1
- package/dist/templates.js +112 -1
- package/package.json +1 -1
- package/templates/full/dashboard/src/components/patterns/ActivePatterns.tsx +62 -0
- package/templates/full/dashboard/src/components/patterns/PatternTabs.tsx +116 -0
- package/templates/full/dashboard/src/components/patterns/SystemPatterns.tsx +52 -0
- package/templates/full/dashboard/src/components/patterns/index.ts +3 -0
- package/templates/full/dashboard/src/components/reports/GapDetectionReport.tsx +69 -0
- package/templates/full/dashboard/src/components/reports/SdkAuditReport.tsx +72 -0
- package/templates/full/dashboard/src/components/reports/index.ts +2 -0
- package/templates/full/dashboard/src/pages/api/notifications/[id]/read.ts +37 -0
- package/templates/full/dashboard/src/pages/api/notifications/read-all.ts +28 -0
- package/templates/full/dashboard/src/pages/api/patterns/cache-refresh.ts +38 -0
- package/templates/full/dashboard/src/pages/api/patterns/discover.ts +36 -0
- package/templates/full/dashboard/src/pages/api/patterns/ready-for-review.ts +39 -0
- package/templates/full/dashboard/src/pages/api/patterns/stats.ts +39 -0
- package/templates/full/dashboard/src/pages/api/patterns/suggestions.ts +43 -0
- package/templates/full/dashboard/src/pages/api/reports/audit.ts +45 -0
- package/templates/full/dashboard/src/pages/api/reports/usage.ts +52 -0
- package/templates/full/dashboard/src/pages/api/search/reindex.ts +28 -0
- package/templates/full/dashboard/src/pages/api/search/stats.ts +27 -0
- package/templates/full/dashboard/src/pages/api/settings/index.ts +37 -0
- package/templates/full/dashboard/src/pages/api/settings/update.ts +41 -0
- package/templates/full/dashboard/src/pages/api/topology/index.ts +56 -0
- package/templates/full/scripts/ops/universal-backfill.ts +147 -0
- package/templates/shared/.github/workflows/contract-check.yml.hbs +42 -0
- package/templates/shared/.github/workflows/dashboard-deploy.yml.hbs +39 -0
- package/templates/shared/.github/workflows/security.yml +33 -0
- package/templates/shared/dashboard/src/components/Nav.astro.hbs +2 -0
- package/templates/shared/dashboard/src/components/infrastructure/AlertHistory.tsx +57 -0
- package/templates/shared/dashboard/src/components/infrastructure/InfrastructureStats.tsx +73 -0
- package/templates/shared/dashboard/src/components/infrastructure/ServiceRegistry.tsx +55 -0
- package/templates/shared/dashboard/src/components/infrastructure/UptimeStatus.tsx +56 -0
- package/templates/shared/dashboard/src/components/infrastructure/index.ts +4 -0
- package/templates/shared/dashboard/src/components/ui/Breadcrumbs.tsx +27 -0
- package/templates/shared/dashboard/src/components/ui/EmptyState.tsx +26 -0
- package/templates/shared/dashboard/src/components/ui/ErrorBoundary.tsx +42 -0
- package/templates/shared/dashboard/src/components/ui/LoadingSkeleton.tsx +18 -0
- package/templates/shared/dashboard/src/components/ui/PageShell.tsx +26 -0
- package/templates/shared/dashboard/src/components/ui/Toast.tsx +44 -0
- package/templates/shared/dashboard/src/components/ui/index.ts +6 -0
- package/templates/shared/dashboard/src/components/usage/AnomaliesWidget.tsx +68 -0
- package/templates/shared/dashboard/src/components/usage/HourlyUsageChart.tsx +55 -0
- package/templates/shared/dashboard/src/components/usage/PlanAllowanceDashboard.tsx +67 -0
- package/templates/shared/dashboard/src/components/usage/ProjectCostBreakdown.tsx +55 -0
- package/templates/shared/dashboard/src/components/usage/index.ts +4 -0
- package/templates/shared/dashboard/src/lib/cloudflare/costs.ts +21 -0
- package/templates/shared/dashboard/src/pages/api/costs/overview.ts +65 -0
- package/templates/shared/dashboard/src/pages/api/costs/providers.ts +47 -0
- package/templates/shared/dashboard/src/pages/api/infrastructure/services.ts +55 -0
- package/templates/shared/dashboard/src/pages/api/infrastructure/stats.ts +99 -0
- package/templates/shared/dashboard/src/pages/api/usage/allowances.ts +56 -0
- package/templates/shared/dashboard/src/pages/api/usage/anomalies.ts +45 -0
- package/templates/shared/dashboard/src/pages/api/usage/billing.ts +53 -0
- package/templates/shared/dashboard/src/pages/api/usage/granular.ts +50 -0
- package/templates/shared/dashboard/src/pages/api/usage/hourly.ts +45 -0
- package/templates/shared/dashboard/src/pages/api/usage/projects.ts +51 -0
- package/templates/shared/dashboard/src/pages/api/user/identity.ts +11 -0
- package/templates/shared/dashboard/src/pages/settings/notifications.astro +34 -0
- package/templates/shared/dashboard/src/pages/settings/thresholds.astro +39 -0
- package/templates/shared/dashboard/src/pages/settings/usage.astro +28 -0
- package/templates/shared/docs/architecture.md +89 -0
- package/templates/shared/docs/post-deploy-runbook.md +126 -0
- package/templates/shared/docs/troubleshooting.md +91 -0
- package/templates/shared/package.json.hbs +5 -0
- package/templates/shared/scripts/ops/backfill-cloudflare-daily.ts +145 -0
- package/templates/shared/scripts/ops/backfill-monthly-rollups.ts +125 -0
- package/templates/shared/scripts/ops/validate-controls.js +141 -0
- package/templates/shared/tests/contract/validate-schemas.test.ts +130 -0
- package/templates/shared/tests/fixtures/telemetry-envelope-invalid.json +9 -0
- package/templates/shared/tests/fixtures/telemetry-envelope-valid.json +27 -0
- package/templates/shared/tests/helpers/mock-d1.ts +61 -0
- package/templates/shared/tests/helpers/mock-kv.ts +37 -0
- package/templates/shared/tests/unit/workers/batch-persistence.test.ts +133 -0
- package/templates/shared/tests/unit/workers/budget-enforcement.test.ts +214 -0
- package/templates/shared/vitest.config.ts +18 -0
- package/templates/standard/dashboard/src/components/health/CircuitBreakerEvents.tsx +69 -0
- package/templates/standard/dashboard/src/components/health/CircuitBreakerPanel.tsx +97 -0
- package/templates/standard/dashboard/src/components/health/index.ts +2 -0
- package/templates/standard/dashboard/src/pages/api/errors/[fingerprint]/mute.ts +49 -0
- package/templates/standard/dashboard/src/pages/api/errors/[fingerprint]/resolve.ts +36 -0
- package/templates/standard/dashboard/src/pages/api/errors/[fingerprint].ts +55 -0
- package/templates/standard/dashboard/src/pages/api/health/audit-history.ts +37 -0
- package/templates/standard/dashboard/src/pages/circuit-breakers.astro +13 -0
- package/templates/standard/tests/unit/error-collector/capture.test.ts +106 -0
- package/templates/standard/tests/unit/error-collector/fingerprint.test.ts +155 -0
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import type { APIRoute } from 'astro';
|
|
2
|
+
|
|
3
|
+
interface Env {
|
|
4
|
+
PLATFORM_DB?: D1Database;
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
export const GET: APIRoute = async ({ locals }) => {
|
|
8
|
+
const env = locals.runtime?.env as Env | undefined;
|
|
9
|
+
const db = env?.PLATFORM_DB;
|
|
10
|
+
|
|
11
|
+
const projects: Array<{
|
|
12
|
+
project: string;
|
|
13
|
+
total_cost: number;
|
|
14
|
+
d1_writes: number;
|
|
15
|
+
worker_requests: number;
|
|
16
|
+
latest_date: string;
|
|
17
|
+
}> = [];
|
|
18
|
+
|
|
19
|
+
if (db) {
|
|
20
|
+
try {
|
|
21
|
+
const monthStart = new Date();
|
|
22
|
+
monthStart.setDate(1);
|
|
23
|
+
const cutoff = monthStart.toISOString().slice(0, 10);
|
|
24
|
+
|
|
25
|
+
const result = await db
|
|
26
|
+
.prepare(
|
|
27
|
+
`SELECT project,
|
|
28
|
+
SUM(total_cost_usd) as total_cost,
|
|
29
|
+
SUM(d1_writes) as d1_writes,
|
|
30
|
+
SUM(worker_requests) as worker_requests,
|
|
31
|
+
MAX(snapshot_date) as latest_date
|
|
32
|
+
FROM daily_usage_rollups
|
|
33
|
+
WHERE project != 'all' AND snapshot_date >= ?
|
|
34
|
+
GROUP BY project
|
|
35
|
+
ORDER BY total_cost DESC
|
|
36
|
+
LIMIT 20`
|
|
37
|
+
)
|
|
38
|
+
.bind(cutoff)
|
|
39
|
+
.all();
|
|
40
|
+
if (result.results) {
|
|
41
|
+
projects.push(...(result.results as typeof projects));
|
|
42
|
+
}
|
|
43
|
+
} catch {
|
|
44
|
+
// Table may not exist
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return new Response(JSON.stringify({ projects }), {
|
|
49
|
+
headers: { 'Content-Type': 'application/json', 'Cache-Control': 'max-age=300' },
|
|
50
|
+
});
|
|
51
|
+
};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { APIRoute } from 'astro';
|
|
2
|
+
|
|
3
|
+
export const GET: APIRoute = async ({ request }) => {
|
|
4
|
+
// Extract identity from CF Access JWT headers
|
|
5
|
+
const email = request.headers.get('cf-access-authenticated-user-email') ?? 'unknown';
|
|
6
|
+
const name = email.split('@')[0] ?? 'User';
|
|
7
|
+
|
|
8
|
+
return new Response(JSON.stringify({ email, name }), {
|
|
9
|
+
headers: { 'Content-Type': 'application/json', 'Cache-Control': 'max-age=3600' },
|
|
10
|
+
});
|
|
11
|
+
};
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
---
|
|
2
|
+
import DashboardLayout from '../../layouts/DashboardLayout.astro';
|
|
3
|
+
import { SettingsCard } from '../../components/settings/SettingsCard';
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
<DashboardLayout title="Notification Settings">
|
|
7
|
+
<div class="max-w-3xl mx-auto space-y-4">
|
|
8
|
+
<h2 class="text-xl font-bold text-gray-900 dark:text-white mb-4">Notification Settings</h2>
|
|
9
|
+
<SettingsCard
|
|
10
|
+
client:load
|
|
11
|
+
label="Slack Alerts"
|
|
12
|
+
description="Send alerts to a Slack channel via webhook URL."
|
|
13
|
+
value="Not configured"
|
|
14
|
+
/>
|
|
15
|
+
<SettingsCard
|
|
16
|
+
client:load
|
|
17
|
+
label="Email Alerts"
|
|
18
|
+
description="Send critical alerts via email."
|
|
19
|
+
value="Disabled"
|
|
20
|
+
/>
|
|
21
|
+
<SettingsCard
|
|
22
|
+
client:load
|
|
23
|
+
label="Budget Warnings"
|
|
24
|
+
description="Notify at 70% and 90% budget thresholds."
|
|
25
|
+
value="Enabled"
|
|
26
|
+
/>
|
|
27
|
+
<SettingsCard
|
|
28
|
+
client:load
|
|
29
|
+
label="Circuit Breaker Trips"
|
|
30
|
+
description="Immediate notification when a circuit breaker trips."
|
|
31
|
+
value="Enabled"
|
|
32
|
+
/>
|
|
33
|
+
</div>
|
|
34
|
+
</DashboardLayout>
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
---
|
|
2
|
+
import DashboardLayout from '../../layouts/DashboardLayout.astro';
|
|
3
|
+
import { SettingsCard } from '../../components/settings/SettingsCard';
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
<DashboardLayout title="Alert Thresholds">
|
|
7
|
+
<div class="max-w-3xl mx-auto space-y-4">
|
|
8
|
+
<h2 class="text-xl font-bold text-gray-900 dark:text-white mb-4">Alert Thresholds</h2>
|
|
9
|
+
<p class="text-sm text-gray-500 dark:text-gray-400 mb-4">
|
|
10
|
+
Configure when budget warnings fire. These values are defined in
|
|
11
|
+
<code class="text-xs bg-gray-100 dark:bg-gray-700 px-1 py-0.5 rounded">budgets.yaml</code>
|
|
12
|
+
and synced via <code class="text-xs bg-gray-100 dark:bg-gray-700 px-1 py-0.5 rounded">npm run sync:config</code>.
|
|
13
|
+
</p>
|
|
14
|
+
<SettingsCard
|
|
15
|
+
client:load
|
|
16
|
+
label="Warning Threshold"
|
|
17
|
+
description="Alert when feature budget usage reaches this percentage."
|
|
18
|
+
value="70%"
|
|
19
|
+
/>
|
|
20
|
+
<SettingsCard
|
|
21
|
+
client:load
|
|
22
|
+
label="Critical Threshold"
|
|
23
|
+
description="Escalate alert severity at this percentage."
|
|
24
|
+
value="90%"
|
|
25
|
+
/>
|
|
26
|
+
<SettingsCard
|
|
27
|
+
client:load
|
|
28
|
+
label="Circuit Breaker Trip"
|
|
29
|
+
description="Automatically pause feature at this percentage."
|
|
30
|
+
value="100%"
|
|
31
|
+
/>
|
|
32
|
+
<SettingsCard
|
|
33
|
+
client:load
|
|
34
|
+
label="Anomaly Sensitivity"
|
|
35
|
+
description="Deviation percentage to flag as anomaly."
|
|
36
|
+
value="200%"
|
|
37
|
+
/>
|
|
38
|
+
</div>
|
|
39
|
+
</DashboardLayout>
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
---
|
|
2
|
+
import DashboardLayout from '../../layouts/DashboardLayout.astro';
|
|
3
|
+
import { SettingsCard } from '../../components/settings/SettingsCard';
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
<DashboardLayout title="Usage Settings">
|
|
7
|
+
<div class="max-w-3xl mx-auto space-y-4">
|
|
8
|
+
<h2 class="text-xl font-bold text-gray-900 dark:text-white mb-4">Usage Display Settings</h2>
|
|
9
|
+
<SettingsCard
|
|
10
|
+
client:load
|
|
11
|
+
label="Default Period"
|
|
12
|
+
description="Default time range for usage charts."
|
|
13
|
+
value="24 hours"
|
|
14
|
+
/>
|
|
15
|
+
<SettingsCard
|
|
16
|
+
client:load
|
|
17
|
+
label="Cost Display"
|
|
18
|
+
description="Show costs as net (after allowances) or gross."
|
|
19
|
+
value="Net"
|
|
20
|
+
/>
|
|
21
|
+
<SettingsCard
|
|
22
|
+
client:load
|
|
23
|
+
label="Data Refresh"
|
|
24
|
+
description="How often dashboard polls for new data."
|
|
25
|
+
value="60 seconds"
|
|
26
|
+
/>
|
|
27
|
+
</div>
|
|
28
|
+
</DashboardLayout>
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# Architecture Overview
|
|
2
|
+
|
|
3
|
+
## System Design
|
|
4
|
+
|
|
5
|
+
The platform follows a hub-and-spoke model:
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
Producer Projects (Scout, Brand Copilot, etc.)
|
|
9
|
+
│
|
|
10
|
+
▼ SDK telemetry via Queue
|
|
11
|
+
┌────────────────────────────┐
|
|
12
|
+
│ platform-usage worker │ ← Central data warehouse
|
|
13
|
+
│ (cron + queue consumer) │
|
|
14
|
+
└────────────┬───────────────┘
|
|
15
|
+
│
|
|
16
|
+
┌────────┼────────────┐
|
|
17
|
+
▼ ▼ ▼
|
|
18
|
+
D1 DB KV Cache Analytics Engine
|
|
19
|
+
│ │
|
|
20
|
+
▼ ▼
|
|
21
|
+
Dashboard (Astro SSR + CF Pages)
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Data Flow
|
|
25
|
+
|
|
26
|
+
### Collection Pipeline
|
|
27
|
+
|
|
28
|
+
1. **SDK Telemetry**: Producer projects send telemetry via `platform-telemetry` queue
|
|
29
|
+
2. **External APIs**: Cron jobs query Cloudflare GraphQL, GitHub, Stripe, AI providers
|
|
30
|
+
3. **Queue Consumer**: Processes messages, enforces budgets, writes to D1 + Analytics Engine
|
|
31
|
+
4. **Failed messages** route to `platform-telemetry-dlq` for manual inspection
|
|
32
|
+
|
|
33
|
+
### Storage Tiers
|
|
34
|
+
|
|
35
|
+
| Storage | Use Case | Cost Profile |
|
|
36
|
+
|---------|----------|--------------|
|
|
37
|
+
| D1 | Historical data, rollups, settings | Writes: $1/M rows |
|
|
38
|
+
| KV | Circuit breakers, budgets, cache | Reads: $0.50/M, Writes: $5/M |
|
|
39
|
+
| Analytics Engine | High-cardinality metrics | Free tier generous |
|
|
40
|
+
|
|
41
|
+
### Budget Enforcement
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
Telemetry arrives → Check feature budget → Check project budget → Check global budget
|
|
45
|
+
│ │ │ │
|
|
46
|
+
│ 70% → Slack warn 90% → Slack critical 100% → CB trip
|
|
47
|
+
│ │
|
|
48
|
+
└─────────────────────────────────────────────────────────────────────┘
|
|
49
|
+
Write to D1 + Analytics Engine
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Worker Topology
|
|
53
|
+
|
|
54
|
+
### Minimal Tier
|
|
55
|
+
- **platform-usage**: Data warehouse, cron scheduler, queue consumer
|
|
56
|
+
|
|
57
|
+
### Standard Tier (adds)
|
|
58
|
+
- **error-collector**: Tail worker → error fingerprinting → GitHub issues
|
|
59
|
+
- **platform-sentinel**: Gap detection, cost monitoring
|
|
60
|
+
- **platform-mapper**: Infrastructure discovery, attribution
|
|
61
|
+
|
|
62
|
+
### Full Tier (adds)
|
|
63
|
+
- **pattern-discovery**: AI-assisted transient error pattern detection
|
|
64
|
+
- **platform-alert-router**: Unified alert normalisation
|
|
65
|
+
- **platform-notifications**: In-app notification API
|
|
66
|
+
- **platform-search**: Full-text search (FTS5)
|
|
67
|
+
- **platform-settings**: Settings management API
|
|
68
|
+
- **platform-auditor**: SDK integration auditor + AI Judge
|
|
69
|
+
|
|
70
|
+
## Dashboard Architecture
|
|
71
|
+
|
|
72
|
+
Astro SSR deployed on Cloudflare Pages with:
|
|
73
|
+
- Service bindings to backend workers
|
|
74
|
+
- D1/KV direct bindings for read-heavy pages
|
|
75
|
+
- CF Access for authentication
|
|
76
|
+
- React islands for interactive components
|
|
77
|
+
|
|
78
|
+
## Configuration
|
|
79
|
+
|
|
80
|
+
All configuration lives in Git and syncs to runtime:
|
|
81
|
+
|
|
82
|
+
```
|
|
83
|
+
platform/config/
|
|
84
|
+
├── services.yaml ← Project registry, feature IDs
|
|
85
|
+
├── budgets.yaml ← Limits, thresholds, CB config
|
|
86
|
+
└── observability.yaml ← Monitoring standards
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Sync via: `npm run sync:config` → D1 tables + KV keys
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# Post-Deploy Runbook
|
|
2
|
+
|
|
3
|
+
## After Initial Scaffold
|
|
4
|
+
|
|
5
|
+
### 1. Configure Cloudflare Resources
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
# Create D1 database
|
|
9
|
+
direnv exec . wrangler d1 create platform-metrics
|
|
10
|
+
|
|
11
|
+
# Update database IDs in all wrangler.*.jsonc files
|
|
12
|
+
# (use the ID returned from the create command)
|
|
13
|
+
|
|
14
|
+
# Run migrations
|
|
15
|
+
direnv exec . wrangler d1 migrations apply platform-metrics --remote
|
|
16
|
+
|
|
17
|
+
# Create KV namespace
|
|
18
|
+
direnv exec . wrangler kv namespace create PLATFORM_CACHE
|
|
19
|
+
# Update KV namespace IDs in wrangler configs
|
|
20
|
+
|
|
21
|
+
# Create telemetry queue
|
|
22
|
+
direnv exec . wrangler queues create platform-telemetry
|
|
23
|
+
direnv exec . wrangler queues create platform-telemetry-dlq
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### 2. Sync Configuration
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
# Push services.yaml + budgets.yaml to D1/KV
|
|
30
|
+
npm run sync:config
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### 3. Deploy Workers
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
# Deploy in order: usage first (it's the foundation)
|
|
37
|
+
npm run deploy:usage
|
|
38
|
+
|
|
39
|
+
# Standard tier: error collector + sentinel
|
|
40
|
+
direnv exec . wrangler deploy -c wrangler.*-error-collector.jsonc
|
|
41
|
+
direnv exec . wrangler deploy -c wrangler.*-sentinel.jsonc
|
|
42
|
+
|
|
43
|
+
# Full tier: remaining workers
|
|
44
|
+
direnv exec . wrangler deploy -c wrangler.*-pattern-discovery.jsonc
|
|
45
|
+
# ... etc
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### 4. Deploy Dashboard
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
cd dashboard
|
|
52
|
+
npm install && npm run build
|
|
53
|
+
direnv exec . npx wrangler pages deploy dist
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### 5. Verify
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
# Check worker is responding
|
|
60
|
+
direnv exec . wrangler tail {worker-name} --format json
|
|
61
|
+
|
|
62
|
+
# Run validation
|
|
63
|
+
npm run validate:pipeline
|
|
64
|
+
npm run validate:controls
|
|
65
|
+
|
|
66
|
+
# Check dashboard loads
|
|
67
|
+
# https://your-dashboard-url.pages.dev
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## After Code Changes
|
|
71
|
+
|
|
72
|
+
### Worker Changes
|
|
73
|
+
|
|
74
|
+
1. Run tests: `npm test`
|
|
75
|
+
2. Run typecheck: `npm run typecheck`
|
|
76
|
+
3. Check D1 migrations: any new migrations need `wrangler d1 migrations apply`
|
|
77
|
+
4. Deploy affected worker(s)
|
|
78
|
+
5. Watch logs for 30 seconds: `direnv exec . wrangler tail {worker} --format json`
|
|
79
|
+
|
|
80
|
+
### Budget/Config Changes
|
|
81
|
+
|
|
82
|
+
1. Run `npm run validate:controls --strict`
|
|
83
|
+
2. Run `npm run sync:config`
|
|
84
|
+
3. Verify KV keys updated correctly
|
|
85
|
+
|
|
86
|
+
### Dashboard Changes
|
|
87
|
+
|
|
88
|
+
1. Build locally: `cd dashboard && npm run build`
|
|
89
|
+
2. Deploy: `direnv exec . npx wrangler pages deploy dist`
|
|
90
|
+
3. Verify pages load, API routes return data
|
|
91
|
+
|
|
92
|
+
## Backfill After Data Gaps
|
|
93
|
+
|
|
94
|
+
If usage collection missed data (e.g., worker was down):
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
# 1. Backfill hourly data from Cloudflare GraphQL
|
|
98
|
+
npm run backfill -- --start 2026-01-01 --end 2026-01-31
|
|
99
|
+
|
|
100
|
+
# 2. Roll up to daily
|
|
101
|
+
npm run backfill:daily -- --start 2026-01-01 --end 2026-01-31
|
|
102
|
+
|
|
103
|
+
# 3. Roll up to monthly
|
|
104
|
+
npm run backfill:monthly -- --start 2026-01 --end 2026-01
|
|
105
|
+
|
|
106
|
+
# Always dry-run first:
|
|
107
|
+
npm run backfill:daily -- --dry-run --start 2026-01-01 --end 2026-01-31
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## Emergency: Circuit Breaker Trip
|
|
111
|
+
|
|
112
|
+
If a circuit breaker trips unexpectedly:
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
# 1. Check what tripped
|
|
116
|
+
# Dashboard → Circuit Breakers page
|
|
117
|
+
|
|
118
|
+
# 2. Investigate root cause
|
|
119
|
+
direnv exec . wrangler tail {worker} --format json
|
|
120
|
+
|
|
121
|
+
# 3. If safe to reset
|
|
122
|
+
npm run reset-cb
|
|
123
|
+
|
|
124
|
+
# 4. Monitor after reset
|
|
125
|
+
direnv exec . wrangler tail {worker} --format json
|
|
126
|
+
```
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# Troubleshooting Guide
|
|
2
|
+
|
|
3
|
+
## Common Issues
|
|
4
|
+
|
|
5
|
+
### Workers Not Deploying
|
|
6
|
+
|
|
7
|
+
**Symptom**: `wrangler deploy` fails or worker doesn't respond.
|
|
8
|
+
|
|
9
|
+
**Check**:
|
|
10
|
+
1. Verify wrangler config references correct `main` entry point
|
|
11
|
+
2. Check `compatibility_date` is recent
|
|
12
|
+
3. Ensure all D1 bindings use correct database IDs
|
|
13
|
+
4. Run `direnv exec . wrangler whoami` to verify credentials
|
|
14
|
+
|
|
15
|
+
### D1 Write Errors
|
|
16
|
+
|
|
17
|
+
**Symptom**: `D1_ERROR: too many requests` or budget circuit breaker trips.
|
|
18
|
+
|
|
19
|
+
**Fix**:
|
|
20
|
+
1. Check `budgets.yaml` — limits may be too low for current usage
|
|
21
|
+
2. Run `npm run validate:controls` to verify budget configuration
|
|
22
|
+
3. Check circuit breaker state: `cb:global`, `cb:project:{name}`, `cb:feature:{id}`
|
|
23
|
+
4. Reset with `npm run reset-cb` if needed
|
|
24
|
+
|
|
25
|
+
### Queue Messages Stuck in DLQ
|
|
26
|
+
|
|
27
|
+
**Symptom**: DLQ depth increasing, messages not processing.
|
|
28
|
+
|
|
29
|
+
**Check**:
|
|
30
|
+
1. View DLQ status in dashboard Health tab
|
|
31
|
+
2. Check queue consumer logs: `direnv exec . wrangler tail {worker-name}`
|
|
32
|
+
3. Common cause: malformed telemetry envelopes — validate with `npm run validate:schemas`
|
|
33
|
+
4. Retry DLQ messages via dashboard or API
|
|
34
|
+
|
|
35
|
+
### Dashboard Not Loading Data
|
|
36
|
+
|
|
37
|
+
**Symptom**: Dashboard shows empty states or loading spinners.
|
|
38
|
+
|
|
39
|
+
**Check**:
|
|
40
|
+
1. Verify D1 database has data: `direnv exec . wrangler d1 execute platform-metrics --command "SELECT COUNT(*) FROM hourly_usage_snapshots"`
|
|
41
|
+
2. Check KV cache: service registry may be stale
|
|
42
|
+
3. Run `npm run sync:config` to refresh D1/KV from YAML
|
|
43
|
+
4. Check CF Access — dashboard requires authentication
|
|
44
|
+
|
|
45
|
+
### Budget Warnings Not Firing
|
|
46
|
+
|
|
47
|
+
**Symptom**: Usage exceeds thresholds but no Slack alerts.
|
|
48
|
+
|
|
49
|
+
**Check**:
|
|
50
|
+
1. Verify `SLACK_WEBHOOK_URL` is set in wrangler config
|
|
51
|
+
2. Check KV dedup keys: `BUDGET_WARN:{feature}` (1hr TTL)
|
|
52
|
+
3. Ensure `budgets.yaml` has correct feature keys matching `services.yaml`
|
|
53
|
+
4. Run `npm run validate:controls --strict` for cross-reference check
|
|
54
|
+
|
|
55
|
+
### Backfill Scripts Failing
|
|
56
|
+
|
|
57
|
+
**Symptom**: `backfill:daily` or `backfill:monthly` errors.
|
|
58
|
+
|
|
59
|
+
**Check**:
|
|
60
|
+
1. Required env vars: `CLOUDFLARE_API_TOKEN`, `CLOUDFLARE_ACCOUNT_ID`, `D1_DATABASE_ID`
|
|
61
|
+
2. API token needs D1:Write permissions
|
|
62
|
+
3. Run with `--dry-run` first to verify date ranges
|
|
63
|
+
4. Rate limit: scripts include 200ms delay between queries
|
|
64
|
+
|
|
65
|
+
## Diagnostic Commands
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
# Check worker health
|
|
69
|
+
direnv exec . wrangler tail {worker-name} --format json
|
|
70
|
+
|
|
71
|
+
# Verify D1 schema
|
|
72
|
+
direnv exec . wrangler d1 migrations list platform-metrics --remote
|
|
73
|
+
|
|
74
|
+
# Test config sync
|
|
75
|
+
npm run sync:config -- --dry-run
|
|
76
|
+
|
|
77
|
+
# Validate all controls
|
|
78
|
+
npm run validate:controls --strict
|
|
79
|
+
|
|
80
|
+
# Run schema validation
|
|
81
|
+
npm run validate:schemas
|
|
82
|
+
|
|
83
|
+
# Check pipeline health
|
|
84
|
+
npm run validate:pipeline
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Getting Help
|
|
88
|
+
|
|
89
|
+
1. Check the [KV Key Patterns](./kv-key-patterns.md) doc for key prefix reference
|
|
90
|
+
2. Review circuit breaker states in the dashboard Circuit Breakers page
|
|
91
|
+
3. Run `npm test` to verify worker logic locally
|
|
@@ -8,10 +8,14 @@
|
|
|
8
8
|
"sync:config": "npx tsx scripts/sync-config.ts",
|
|
9
9
|
"deploy:usage": "wrangler deploy -c wrangler.{{projectSlug}}-usage.jsonc",
|
|
10
10
|
"backfill": "npx tsx scripts/ops/backfill-cloudflare-hourly.ts",
|
|
11
|
+
"backfill:daily": "npx tsx scripts/ops/backfill-cloudflare-daily.ts",
|
|
12
|
+
"backfill:monthly": "npx tsx scripts/ops/backfill-monthly-rollups.ts",
|
|
13
|
+
"validate:controls": "node scripts/ops/validate-controls.js",
|
|
11
14
|
"reset-cb": "npx tsx scripts/ops/reset-budget-state.ts",
|
|
12
15
|
"verify": "npx tsx scripts/ops/verify-account-completeness.ts",
|
|
13
16
|
"validate:pipeline": "npx tsx scripts/ops/validate-pipeline.ts",
|
|
14
17
|
"validate:schemas": "node scripts/validate-schemas.js",
|
|
18
|
+
"test": "vitest run",
|
|
15
19
|
"deploy:auditor": "wrangler deploy -c wrangler.{{projectSlug}}-auditor.jsonc",
|
|
16
20
|
"deploy:mapper": "wrangler deploy -c wrangler.{{projectSlug}}-mapper.jsonc",
|
|
17
21
|
"deploy:test-client": "wrangler deploy -c wrangler.{{projectSlug}}-sdk-test-client.jsonc",
|
|
@@ -27,6 +31,7 @@
|
|
|
27
31
|
"ajv-formats": "^3.0.0",
|
|
28
32
|
"tsx": "^4.19.0",
|
|
29
33
|
"typescript": "^5.7.3",
|
|
34
|
+
"vitest": "^3.0.5",
|
|
30
35
|
"wrangler": "^3.100.0"
|
|
31
36
|
}
|
|
32
37
|
}
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
#!/usr/bin/env npx tsx
|
|
2
|
+
/**
|
|
3
|
+
* Cloudflare Daily Rollup Backfill Script
|
|
4
|
+
*
|
|
5
|
+
* Aggregates hourly_usage_snapshots into daily_usage_rollups via the D1 REST API.
|
|
6
|
+
* Queries existing hourly data and rolls it up to daily granularity.
|
|
7
|
+
*
|
|
8
|
+
* Prerequisites:
|
|
9
|
+
* CLOUDFLARE_API_TOKEN — API token with D1:Write permissions
|
|
10
|
+
* CLOUDFLARE_ACCOUNT_ID — Your Cloudflare account ID
|
|
11
|
+
* D1_DATABASE_ID — Your platform-metrics D1 database ID
|
|
12
|
+
*
|
|
13
|
+
* Usage:
|
|
14
|
+
* npx tsx scripts/ops/backfill-cloudflare-daily.ts
|
|
15
|
+
* npx tsx scripts/ops/backfill-cloudflare-daily.ts --dry-run
|
|
16
|
+
* npx tsx scripts/ops/backfill-cloudflare-daily.ts --start 2026-02-01 --end 2026-02-28
|
|
17
|
+
* npx tsx scripts/ops/backfill-cloudflare-daily.ts --limit 30
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
const REST_API_BASE = 'https://api.cloudflare.com/client/v4';
|
|
21
|
+
const RATE_LIMIT_MS = 200;
|
|
22
|
+
|
|
23
|
+
interface Args {
|
|
24
|
+
start?: string;
|
|
25
|
+
end?: string;
|
|
26
|
+
dryRun: boolean;
|
|
27
|
+
limit: number;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function parseArgs(): Args {
|
|
31
|
+
const args = process.argv.slice(2);
|
|
32
|
+
const result: Args = { dryRun: false, limit: 90 };
|
|
33
|
+
|
|
34
|
+
for (let i = 0; i < args.length; i++) {
|
|
35
|
+
if (args[i] === '--start' && args[i + 1]) result.start = args[++i];
|
|
36
|
+
else if (args[i] === '--end' && args[i + 1]) result.end = args[++i];
|
|
37
|
+
else if (args[i] === '--limit' && args[i + 1]) result.limit = Number(args[++i]);
|
|
38
|
+
else if (args[i] === '--dry-run') result.dryRun = true;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (!result.start) {
|
|
42
|
+
const d = new Date();
|
|
43
|
+
d.setDate(d.getDate() - 30);
|
|
44
|
+
result.start = d.toISOString().slice(0, 10);
|
|
45
|
+
}
|
|
46
|
+
if (!result.end) {
|
|
47
|
+
result.end = new Date().toISOString().slice(0, 10);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return result;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function getEnvOrThrow(key: string): string {
|
|
54
|
+
const val = process.env[key];
|
|
55
|
+
if (!val) throw new Error(`Missing required env var: ${key}`);
|
|
56
|
+
return val;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
async function d1Query(accountId: string, dbId: string, token: string, sql: string, params: unknown[] = []) {
|
|
60
|
+
const res = await fetch(`${REST_API_BASE}/accounts/${accountId}/d1/database/${dbId}/query`, {
|
|
61
|
+
method: 'POST',
|
|
62
|
+
headers: { Authorization: `Bearer ${token}`, 'Content-Type': 'application/json' },
|
|
63
|
+
body: JSON.stringify({ sql, params }),
|
|
64
|
+
});
|
|
65
|
+
if (!res.ok) {
|
|
66
|
+
const text = await res.text();
|
|
67
|
+
throw new Error(`D1 query failed (${res.status}): ${text}`);
|
|
68
|
+
}
|
|
69
|
+
return res.json() as Promise<{ result: Array<{ results: unknown[] }> }>;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async function sleep(ms: number) {
|
|
73
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
async function main() {
|
|
77
|
+
const args = parseArgs();
|
|
78
|
+
const token = getEnvOrThrow('CLOUDFLARE_API_TOKEN');
|
|
79
|
+
const accountId = getEnvOrThrow('CLOUDFLARE_ACCOUNT_ID');
|
|
80
|
+
const dbId = getEnvOrThrow('D1_DATABASE_ID');
|
|
81
|
+
|
|
82
|
+
console.log(`Backfilling daily rollups: ${args.start} → ${args.end} (limit: ${args.limit}, dry-run: ${args.dryRun})`);
|
|
83
|
+
|
|
84
|
+
// Find dates that have hourly data but no daily rollup
|
|
85
|
+
const missingDays = await d1Query(accountId, dbId, token, `
|
|
86
|
+
SELECT DISTINCT DATE(snapshot_hour) as snapshot_date
|
|
87
|
+
FROM hourly_usage_snapshots
|
|
88
|
+
WHERE snapshot_hour >= ? AND snapshot_hour < ? AND project = 'all'
|
|
89
|
+
AND DATE(snapshot_hour) NOT IN (
|
|
90
|
+
SELECT snapshot_date FROM daily_usage_rollups WHERE project = 'all'
|
|
91
|
+
)
|
|
92
|
+
ORDER BY snapshot_date ASC
|
|
93
|
+
LIMIT ?
|
|
94
|
+
`, [args.start + ' 00:00:00', args.end + ' 23:59:59', args.limit]);
|
|
95
|
+
|
|
96
|
+
const dates = (missingDays.result?.[0]?.results ?? []) as Array<{ snapshot_date: string }>;
|
|
97
|
+
console.log(`Found ${dates.length} dates needing daily rollups`);
|
|
98
|
+
|
|
99
|
+
let inserted = 0;
|
|
100
|
+
for (const { snapshot_date } of dates) {
|
|
101
|
+
const nextDate = new Date(snapshot_date);
|
|
102
|
+
nextDate.setDate(nextDate.getDate() + 1);
|
|
103
|
+
const nextDateStr = nextDate.toISOString().slice(0, 10);
|
|
104
|
+
|
|
105
|
+
if (args.dryRun) {
|
|
106
|
+
console.log(`[DRY-RUN] Would roll up ${snapshot_date}`);
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
await d1Query(accountId, dbId, token, `
|
|
111
|
+
INSERT INTO daily_usage_rollups (
|
|
112
|
+
project, snapshot_date,
|
|
113
|
+
d1_reads, d1_writes, kv_reads, kv_writes,
|
|
114
|
+
r2_reads, r2_writes, worker_requests, total_cost_usd
|
|
115
|
+
)
|
|
116
|
+
SELECT
|
|
117
|
+
project, DATE(snapshot_hour) as snapshot_date,
|
|
118
|
+
SUM(d1_reads), SUM(d1_writes), SUM(kv_reads), SUM(kv_writes),
|
|
119
|
+
SUM(r2_reads), SUM(r2_writes), SUM(worker_requests), SUM(total_cost_usd)
|
|
120
|
+
FROM hourly_usage_snapshots
|
|
121
|
+
WHERE snapshot_hour >= ? AND snapshot_hour < ? AND project = 'all'
|
|
122
|
+
GROUP BY project, DATE(snapshot_hour)
|
|
123
|
+
ON CONFLICT (project, snapshot_date) DO UPDATE SET
|
|
124
|
+
d1_reads = excluded.d1_reads,
|
|
125
|
+
d1_writes = excluded.d1_writes,
|
|
126
|
+
kv_reads = excluded.kv_reads,
|
|
127
|
+
kv_writes = excluded.kv_writes,
|
|
128
|
+
r2_reads = excluded.r2_reads,
|
|
129
|
+
r2_writes = excluded.r2_writes,
|
|
130
|
+
worker_requests = excluded.worker_requests,
|
|
131
|
+
total_cost_usd = excluded.total_cost_usd
|
|
132
|
+
`, [snapshot_date + ' 00:00:00', nextDateStr + ' 00:00:00']);
|
|
133
|
+
|
|
134
|
+
inserted++;
|
|
135
|
+
console.log(` Rolled up ${snapshot_date} (${inserted}/${dates.length})`);
|
|
136
|
+
await sleep(RATE_LIMIT_MS);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
console.log(`Done. Inserted ${inserted} daily rollup rows.`);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
main().catch((err) => {
|
|
143
|
+
console.error('Fatal error:', err);
|
|
144
|
+
process.exit(1);
|
|
145
|
+
});
|