kitfly 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +60 -0
- package/LICENSE +21 -0
- package/README.md +136 -0
- package/VERSION +1 -0
- package/package.json +63 -0
- package/schemas/README.md +32 -0
- package/schemas/site.schema.json +5 -0
- package/schemas/theme.schema.json +5 -0
- package/schemas/v0/site.schema.json +172 -0
- package/schemas/v0/theme.schema.json +210 -0
- package/scripts/build-all.ts +121 -0
- package/scripts/build.ts +601 -0
- package/scripts/bundle.ts +781 -0
- package/scripts/dev.ts +777 -0
- package/scripts/generate-checksums.sh +78 -0
- package/scripts/release/export-release-key.sh +28 -0
- package/scripts/release/release-guard-tag-version.sh +79 -0
- package/scripts/release/sign-release-assets.sh +123 -0
- package/scripts/release/upload-release-assets.sh +76 -0
- package/scripts/release/upload-release-provenance.sh +52 -0
- package/scripts/release/verify-public-key.sh +48 -0
- package/scripts/release/verify-signatures.sh +117 -0
- package/scripts/version-sync.ts +82 -0
- package/src/__tests__/build.test.ts +240 -0
- package/src/__tests__/bundle.test.ts +786 -0
- package/src/__tests__/cli.test.ts +706 -0
- package/src/__tests__/crucible.test.ts +1043 -0
- package/src/__tests__/engine.test.ts +157 -0
- package/src/__tests__/init.test.ts +450 -0
- package/src/__tests__/pipeline.test.ts +1087 -0
- package/src/__tests__/productbook.test.ts +1206 -0
- package/src/__tests__/runbook.test.ts +974 -0
- package/src/__tests__/server-registry.test.ts +1251 -0
- package/src/__tests__/servicebook.test.ts +1248 -0
- package/src/__tests__/shared.test.ts +2005 -0
- package/src/__tests__/styles.test.ts +14 -0
- package/src/__tests__/theme-schema.test.ts +47 -0
- package/src/__tests__/theme.test.ts +554 -0
- package/src/cli.ts +582 -0
- package/src/commands/init.ts +92 -0
- package/src/commands/update.ts +444 -0
- package/src/engine.ts +20 -0
- package/src/logger.ts +15 -0
- package/src/migrations/0000_schema_versioning.ts +67 -0
- package/src/migrations/0001_server_port.ts +52 -0
- package/src/migrations/0002_brand_logo.ts +49 -0
- package/src/migrations/index.ts +26 -0
- package/src/migrations/schema.ts +24 -0
- package/src/server-registry.ts +405 -0
- package/src/shared.ts +1239 -0
- package/src/site/styles.css +931 -0
- package/src/site/template.html +193 -0
- package/src/templates/crucible.ts +1163 -0
- package/src/templates/driver.ts +876 -0
- package/src/templates/handbook.ts +339 -0
- package/src/templates/minimal.ts +139 -0
- package/src/templates/pipeline.ts +966 -0
- package/src/templates/productbook.ts +1032 -0
- package/src/templates/runbook.ts +829 -0
- package/src/templates/schema.ts +119 -0
- package/src/templates/servicebook.ts +1242 -0
- package/src/theme.ts +245 -0
|
@@ -0,0 +1,829 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Runbook Template
|
|
3
|
+
*
|
|
4
|
+
* Extends minimal with structured sections for operational documentation.
|
|
5
|
+
* Sections: Procedures, Troubleshooting, Reference, Incidents
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { TemplateContext, TemplateDef } from "./schema.ts";
|
|
9
|
+
|
|
10
|
+
export const runbook: TemplateDef = {
|
|
11
|
+
id: "runbook",
|
|
12
|
+
name: "Runbook",
|
|
13
|
+
description: "Operational runbook with procedures, troubleshooting, and incident response",
|
|
14
|
+
version: 1,
|
|
15
|
+
extends: "minimal",
|
|
16
|
+
sections: [
|
|
17
|
+
{
|
|
18
|
+
name: "Procedures",
|
|
19
|
+
path: "content/procedures",
|
|
20
|
+
description: "Step-by-step operational tasks",
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
name: "Troubleshooting",
|
|
24
|
+
path: "content/troubleshooting",
|
|
25
|
+
description: "Problem → solution guides",
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
name: "Reference",
|
|
29
|
+
path: "content/reference",
|
|
30
|
+
description: "Interfaces, contacts, checklists",
|
|
31
|
+
},
|
|
32
|
+
{ name: "Incidents", path: "content/incidents", description: "Emergency response procedures" },
|
|
33
|
+
],
|
|
34
|
+
files: [
|
|
35
|
+
{
|
|
36
|
+
path: "site.yaml",
|
|
37
|
+
content: (ctx: TemplateContext) => `# ${ctx.branding.siteName} - Site Configuration
|
|
38
|
+
# Documentation: https://github.com/3leaps/kitfly
|
|
39
|
+
|
|
40
|
+
title: "${ctx.branding.siteName}"
|
|
41
|
+
|
|
42
|
+
# ← CUSTOMIZE: Your brand settings
|
|
43
|
+
brand:
|
|
44
|
+
name: "${ctx.branding.brandName}"
|
|
45
|
+
url: "${ctx.branding.brandUrl}"
|
|
46
|
+
# external: false # Set true if brand URL is external
|
|
47
|
+
|
|
48
|
+
# Content sections
|
|
49
|
+
sections:
|
|
50
|
+
- name: "Procedures"
|
|
51
|
+
path: "content/procedures"
|
|
52
|
+
- name: "Troubleshooting"
|
|
53
|
+
path: "content/troubleshooting"
|
|
54
|
+
- name: "Reference"
|
|
55
|
+
path: "content/reference"
|
|
56
|
+
- name: "Incidents"
|
|
57
|
+
path: "content/incidents"
|
|
58
|
+
|
|
59
|
+
# Home page
|
|
60
|
+
home: "index.md"
|
|
61
|
+
`,
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
path: "index.md",
|
|
65
|
+
content: (ctx: TemplateContext) => `---
|
|
66
|
+
title: Home
|
|
67
|
+
description: ${ctx.branding.siteName} - Operational Runbook
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
# ${ctx.branding.siteName}
|
|
71
|
+
|
|
72
|
+
Operational runbook for ${ctx.branding.brandName}. Use this documentation for procedures, troubleshooting, and incident response.
|
|
73
|
+
|
|
74
|
+
## Quick Links
|
|
75
|
+
|
|
76
|
+
### [Procedures](/content/procedures/deployment)
|
|
77
|
+
Step-by-step guides for operational tasks: deployments, maintenance, migrations.
|
|
78
|
+
|
|
79
|
+
### [Troubleshooting](/content/troubleshooting/common-issues)
|
|
80
|
+
Diagnose and resolve common issues. Symptoms → causes → solutions.
|
|
81
|
+
|
|
82
|
+
### [Reference](/content/reference/interfaces/api-template)
|
|
83
|
+
Supporting materials: API specs, contacts, checklists, metrics.
|
|
84
|
+
|
|
85
|
+
### [Incidents](/content/incidents/escalation)
|
|
86
|
+
Emergency response procedures and escalation paths.
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## On-Call Quick Reference
|
|
91
|
+
|
|
92
|
+
| Severity | Response Time | Escalation |
|
|
93
|
+
|----------|---------------|------------|
|
|
94
|
+
| P1 - Critical | 15 minutes | Immediate page |
|
|
95
|
+
| P2 - High | 1 hour | Business hours |
|
|
96
|
+
| P3 - Medium | 4 hours | Next business day |
|
|
97
|
+
| P4 - Low | Best effort | Backlog |
|
|
98
|
+
|
|
99
|
+
See [Escalation](/content/incidents/escalation) for full details.
|
|
100
|
+
|
|
101
|
+
---
|
|
102
|
+
|
|
103
|
+
*Last updated: ${new Date().toISOString().split("T")[0]}*
|
|
104
|
+
`,
|
|
105
|
+
},
|
|
106
|
+
// Procedures section
|
|
107
|
+
{
|
|
108
|
+
path: "content/procedures/deployment.md",
|
|
109
|
+
content: (ctx: TemplateContext) => `---
|
|
110
|
+
title: Deployment Procedure
|
|
111
|
+
description: Standard deployment process for ${ctx.branding.brandName}
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
# Deployment Procedure
|
|
115
|
+
|
|
116
|
+
<!-- ← CUSTOMIZE: Replace with your deployment steps -->
|
|
117
|
+
|
|
118
|
+
## Objective
|
|
119
|
+
|
|
120
|
+
Deploy new code to production environment safely and reliably.
|
|
121
|
+
|
|
122
|
+
## Prerequisites
|
|
123
|
+
|
|
124
|
+
- [ ] Code reviewed and approved
|
|
125
|
+
- [ ] Tests passing in CI
|
|
126
|
+
- [ ] Change ticket approved
|
|
127
|
+
- [ ] Rollback plan documented
|
|
128
|
+
|
|
129
|
+
## Steps
|
|
130
|
+
|
|
131
|
+
### 1. Pre-deployment Checks
|
|
132
|
+
|
|
133
|
+
\`\`\`bash
|
|
134
|
+
# Verify build status
|
|
135
|
+
./scripts/check-build-status.sh
|
|
136
|
+
|
|
137
|
+
# Expected output: "Build #123 - PASSED"
|
|
138
|
+
\`\`\`
|
|
139
|
+
|
|
140
|
+
### 2. Create Deployment
|
|
141
|
+
|
|
142
|
+
\`\`\`bash
|
|
143
|
+
# Trigger deployment
|
|
144
|
+
./scripts/deploy.sh --environment production --version v1.2.3
|
|
145
|
+
\`\`\`
|
|
146
|
+
|
|
147
|
+
**Verification**: Deployment dashboard shows "In Progress"
|
|
148
|
+
|
|
149
|
+
### 3. Monitor Rollout
|
|
150
|
+
|
|
151
|
+
- Watch error rates in monitoring dashboard
|
|
152
|
+
- Check application logs for startup errors
|
|
153
|
+
- Verify health check endpoints responding
|
|
154
|
+
|
|
155
|
+
### 4. Post-deployment Validation
|
|
156
|
+
|
|
157
|
+
- [ ] Health checks passing
|
|
158
|
+
- [ ] Key user flows working
|
|
159
|
+
- [ ] Error rates within normal range
|
|
160
|
+
- [ ] Performance metrics stable
|
|
161
|
+
|
|
162
|
+
## Rollback
|
|
163
|
+
|
|
164
|
+
If issues are detected:
|
|
165
|
+
|
|
166
|
+
\`\`\`bash
|
|
167
|
+
./scripts/rollback.sh --to-version v1.2.2
|
|
168
|
+
\`\`\`
|
|
169
|
+
|
|
170
|
+
## Related
|
|
171
|
+
|
|
172
|
+
- [Pre-deploy Checklist](/content/reference/checklists/pre-deploy)
|
|
173
|
+
- [Incident Escalation](/content/incidents/escalation)
|
|
174
|
+
`,
|
|
175
|
+
},
|
|
176
|
+
// Troubleshooting section
|
|
177
|
+
{
|
|
178
|
+
path: "content/troubleshooting/common-issues.md",
|
|
179
|
+
content: (_ctx: TemplateContext) => `---
|
|
180
|
+
title: Common Issues
|
|
181
|
+
description: Troubleshooting guide for frequent problems
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
# Common Issues
|
|
185
|
+
|
|
186
|
+
<!-- ← CUSTOMIZE: Add your common issues and solutions -->
|
|
187
|
+
|
|
188
|
+
## Connection Timeout
|
|
189
|
+
|
|
190
|
+
**Symptoms**:
|
|
191
|
+
- API calls failing with timeout errors
|
|
192
|
+
- Dashboard showing "Connection refused"
|
|
193
|
+
- Users reporting slow or unresponsive application
|
|
194
|
+
|
|
195
|
+
**Possible Causes**:
|
|
196
|
+
1. Database connection pool exhausted
|
|
197
|
+
2. Network connectivity issues
|
|
198
|
+
3. Service overloaded
|
|
199
|
+
|
|
200
|
+
**Resolution**:
|
|
201
|
+
|
|
202
|
+
1. Check database connections:
|
|
203
|
+
\`\`\`bash
|
|
204
|
+
./scripts/check-db-connections.sh
|
|
205
|
+
\`\`\`
|
|
206
|
+
|
|
207
|
+
2. Verify network connectivity:
|
|
208
|
+
\`\`\`bash
|
|
209
|
+
curl -v https://api.example.com/health
|
|
210
|
+
\`\`\`
|
|
211
|
+
|
|
212
|
+
3. If overloaded, scale up:
|
|
213
|
+
\`\`\`bash
|
|
214
|
+
./scripts/scale-service.sh --replicas 5
|
|
215
|
+
\`\`\`
|
|
216
|
+
|
|
217
|
+
**Escalation**: If unresolved after 15 minutes, escalate to [on-call](/content/incidents/escalation).
|
|
218
|
+
|
|
219
|
+
---
|
|
220
|
+
|
|
221
|
+
## High Memory Usage
|
|
222
|
+
|
|
223
|
+
**Symptoms**:
|
|
224
|
+
- Memory alerts firing
|
|
225
|
+
- OOM kills in container logs
|
|
226
|
+
- Degraded performance
|
|
227
|
+
|
|
228
|
+
**Resolution**:
|
|
229
|
+
|
|
230
|
+
1. Identify memory-heavy processes:
|
|
231
|
+
\`\`\`bash
|
|
232
|
+
./scripts/memory-report.sh
|
|
233
|
+
\`\`\`
|
|
234
|
+
|
|
235
|
+
2. Check for memory leaks in recent deployments
|
|
236
|
+
3. Consider rolling restart if immediate relief needed
|
|
237
|
+
|
|
238
|
+
---
|
|
239
|
+
|
|
240
|
+
## Authentication Failures
|
|
241
|
+
|
|
242
|
+
**Symptoms**:
|
|
243
|
+
- Users unable to log in
|
|
244
|
+
- 401 errors in API responses
|
|
245
|
+
- Token validation failures
|
|
246
|
+
|
|
247
|
+
**Resolution**:
|
|
248
|
+
|
|
249
|
+
1. Verify auth service status
|
|
250
|
+
2. Check certificate expiration
|
|
251
|
+
3. Validate configuration
|
|
252
|
+
|
|
253
|
+
See [Auth Troubleshooting](/content/troubleshooting/auth-issues) for detailed steps.
|
|
254
|
+
`,
|
|
255
|
+
},
|
|
256
|
+
// Reference section - Interfaces
|
|
257
|
+
{
|
|
258
|
+
path: "content/reference/interfaces/api-template.md",
|
|
259
|
+
content: (_ctx: TemplateContext) => `---
|
|
260
|
+
title: API Integration Template
|
|
261
|
+
description: Template for documenting external API integrations
|
|
262
|
+
---
|
|
263
|
+
|
|
264
|
+
# API Integration: [Service Name]
|
|
265
|
+
|
|
266
|
+
<!-- ← CUSTOMIZE: Copy this template for each integration -->
|
|
267
|
+
|
|
268
|
+
## Overview
|
|
269
|
+
|
|
270
|
+
| Field | Value |
|
|
271
|
+
|-------|-------|
|
|
272
|
+
| **Service** | [Name of external service] |
|
|
273
|
+
| **Type** | REST API / GraphQL / SOAP |
|
|
274
|
+
| **Environment** | Production / Staging |
|
|
275
|
+
| **Documentation** | [Link to vendor docs] |
|
|
276
|
+
|
|
277
|
+
## Authentication
|
|
278
|
+
|
|
279
|
+
- **Method**: API Key / OAuth 2.0 / Basic Auth
|
|
280
|
+
- **Credentials Location**: Secrets vault path
|
|
281
|
+
- **Rotation Schedule**: Quarterly / Annual
|
|
282
|
+
|
|
283
|
+
## Endpoints
|
|
284
|
+
|
|
285
|
+
### Primary Endpoint
|
|
286
|
+
|
|
287
|
+
\`\`\`
|
|
288
|
+
POST https://api.vendor.com/v1/resource
|
|
289
|
+
Content-Type: application/json
|
|
290
|
+
Authorization: Bearer {token}
|
|
291
|
+
\`\`\`
|
|
292
|
+
|
|
293
|
+
**Request**:
|
|
294
|
+
\`\`\`json
|
|
295
|
+
{
|
|
296
|
+
"field1": "value",
|
|
297
|
+
"field2": 123
|
|
298
|
+
}
|
|
299
|
+
\`\`\`
|
|
300
|
+
|
|
301
|
+
**Response** (200 OK):
|
|
302
|
+
\`\`\`json
|
|
303
|
+
{
|
|
304
|
+
"id": "abc123",
|
|
305
|
+
"status": "success"
|
|
306
|
+
}
|
|
307
|
+
\`\`\`
|
|
308
|
+
|
|
309
|
+
## Error Handling
|
|
310
|
+
|
|
311
|
+
| Code | Meaning | Action |
|
|
312
|
+
|------|---------|--------|
|
|
313
|
+
| 400 | Bad request | Check request format |
|
|
314
|
+
| 401 | Unauthorized | Refresh credentials |
|
|
315
|
+
| 429 | Rate limited | Back off and retry |
|
|
316
|
+
| 500 | Server error | Contact vendor |
|
|
317
|
+
|
|
318
|
+
## Rate Limits
|
|
319
|
+
|
|
320
|
+
- **Limit**: 1000 requests/minute
|
|
321
|
+
- **Throttling**: Automatic retry with exponential backoff
|
|
322
|
+
|
|
323
|
+
## Vendor Support
|
|
324
|
+
|
|
325
|
+
- **Support Portal**: [URL]
|
|
326
|
+
- **SLA**: 99.9% uptime
|
|
327
|
+
- **Contact**: See [Contacts](/content/reference/contacts/directory)
|
|
328
|
+
`,
|
|
329
|
+
},
|
|
330
|
+
// Reference section - Contacts
|
|
331
|
+
{
|
|
332
|
+
path: "content/reference/contacts/directory.md",
|
|
333
|
+
content: (ctx: TemplateContext) => `---
|
|
334
|
+
title: Contact Directory
|
|
335
|
+
description: Team and vendor contacts for ${ctx.branding.brandName}
|
|
336
|
+
---
|
|
337
|
+
|
|
338
|
+
# Contact Directory
|
|
339
|
+
|
|
340
|
+
<!-- ← CUSTOMIZE: Add your team and vendor contacts -->
|
|
341
|
+
|
|
342
|
+
## Internal Team
|
|
343
|
+
|
|
344
|
+
### On-Call
|
|
345
|
+
|
|
346
|
+
| Role | Contact | Escalation |
|
|
347
|
+
|------|---------|------------|
|
|
348
|
+
| Primary On-Call | [PagerDuty/Slack] | Automatic rotation |
|
|
349
|
+
| Secondary On-Call | [PagerDuty/Slack] | If primary unavailable |
|
|
350
|
+
| Engineering Lead | [Name] | For P1 decisions |
|
|
351
|
+
|
|
352
|
+
### Team Leads
|
|
353
|
+
|
|
354
|
+
| Area | Contact | Responsibility |
|
|
355
|
+
|------|---------|----------------|
|
|
356
|
+
| Platform | [Name/Email] | Infrastructure, deployments |
|
|
357
|
+
| Backend | [Name/Email] | API, services |
|
|
358
|
+
| Frontend | [Name/Email] | UI, client apps |
|
|
359
|
+
| Data | [Name/Email] | Database, analytics |
|
|
360
|
+
|
|
361
|
+
## Vendor Contacts
|
|
362
|
+
|
|
363
|
+
### [Vendor Name]
|
|
364
|
+
|
|
365
|
+
| Type | Contact |
|
|
366
|
+
|------|---------|
|
|
367
|
+
| Support Portal | [URL] |
|
|
368
|
+
| Support Email | [email] |
|
|
369
|
+
| Account Manager | [Name/Email] |
|
|
370
|
+
| Emergency Line | [Phone] |
|
|
371
|
+
|
|
372
|
+
### Cloud Provider
|
|
373
|
+
|
|
374
|
+
| Type | Contact |
|
|
375
|
+
|------|---------|
|
|
376
|
+
| Support | [AWS/GCP/Azure portal] |
|
|
377
|
+
| Account Team | [Name/Email] |
|
|
378
|
+
| TAM | [Name/Email] |
|
|
379
|
+
|
|
380
|
+
## Escalation Matrix
|
|
381
|
+
|
|
382
|
+
See [Escalation Procedures](/content/incidents/escalation) for when and how to escalate.
|
|
383
|
+
`,
|
|
384
|
+
},
|
|
385
|
+
// Reference section - Checklists
|
|
386
|
+
{
|
|
387
|
+
path: "content/reference/checklists/pre-deploy.md",
|
|
388
|
+
content: (_ctx: TemplateContext) => `---
|
|
389
|
+
title: Pre-Deployment Checklist
|
|
390
|
+
description: Verification checklist before deploying to production
|
|
391
|
+
---
|
|
392
|
+
|
|
393
|
+
# Pre-Deployment Checklist
|
|
394
|
+
|
|
395
|
+
<!-- ← CUSTOMIZE: Adapt to your deployment process -->
|
|
396
|
+
|
|
397
|
+
## Code Readiness
|
|
398
|
+
|
|
399
|
+
- [ ] All tests passing in CI
|
|
400
|
+
- [ ] Code review approved
|
|
401
|
+
- [ ] No critical security findings
|
|
402
|
+
- [ ] Documentation updated
|
|
403
|
+
|
|
404
|
+
## Change Management
|
|
405
|
+
|
|
406
|
+
- [ ] Change ticket created and approved
|
|
407
|
+
- [ ] Stakeholders notified
|
|
408
|
+
- [ ] Deployment window confirmed
|
|
409
|
+
- [ ] Rollback plan documented
|
|
410
|
+
|
|
411
|
+
## Environment Verification
|
|
412
|
+
|
|
413
|
+
- [ ] Target environment healthy
|
|
414
|
+
- [ ] Dependencies available
|
|
415
|
+
- [ ] Configuration validated
|
|
416
|
+
- [ ] Secrets/credentials current
|
|
417
|
+
|
|
418
|
+
## Monitoring Readiness
|
|
419
|
+
|
|
420
|
+
- [ ] Dashboards accessible
|
|
421
|
+
- [ ] Alerts configured
|
|
422
|
+
- [ ] Log aggregation working
|
|
423
|
+
- [ ] On-call engineer aware
|
|
424
|
+
|
|
425
|
+
## Go/No-Go
|
|
426
|
+
|
|
427
|
+
| Criteria | Status |
|
|
428
|
+
|----------|--------|
|
|
429
|
+
| Tests passing | ⬜ |
|
|
430
|
+
| Approvals complete | ⬜ |
|
|
431
|
+
| Environment ready | ⬜ |
|
|
432
|
+
| Monitoring ready | ⬜ |
|
|
433
|
+
| Rollback tested | ⬜ |
|
|
434
|
+
|
|
435
|
+
**Decision**: ⬜ GO / ⬜ NO-GO
|
|
436
|
+
|
|
437
|
+
---
|
|
438
|
+
|
|
439
|
+
## Post-Deployment
|
|
440
|
+
|
|
441
|
+
See [Deployment Procedure](/content/procedures/deployment) for execution steps.
|
|
442
|
+
`,
|
|
443
|
+
},
|
|
444
|
+
// Reference section - Analytics
|
|
445
|
+
{
|
|
446
|
+
path: "content/reference/analytics/dashboards.md",
|
|
447
|
+
content: (ctx: TemplateContext) => `---
|
|
448
|
+
title: Dashboards & Metrics
|
|
449
|
+
description: Key metrics and dashboard links for ${ctx.branding.brandName}
|
|
450
|
+
---
|
|
451
|
+
|
|
452
|
+
# Dashboards & Metrics
|
|
453
|
+
|
|
454
|
+
<!-- ← CUSTOMIZE: Add your monitoring URLs and KPIs -->
|
|
455
|
+
|
|
456
|
+
## Primary Dashboards
|
|
457
|
+
|
|
458
|
+
| Dashboard | URL | Purpose |
|
|
459
|
+
|-----------|-----|---------|
|
|
460
|
+
| System Health | [Grafana/Datadog URL] | Overall system status |
|
|
461
|
+
| Application Metrics | [URL] | Request rates, latencies |
|
|
462
|
+
| Error Tracking | [Sentry/Rollbar URL] | Exceptions and errors |
|
|
463
|
+
| Infrastructure | [URL] | CPU, memory, network |
|
|
464
|
+
|
|
465
|
+
## Key Performance Indicators
|
|
466
|
+
|
|
467
|
+
### Availability
|
|
468
|
+
|
|
469
|
+
| Metric | Target | Current |
|
|
470
|
+
|--------|--------|---------|
|
|
471
|
+
| Uptime | 99.9% | [Link to metric] |
|
|
472
|
+
| Error Rate | < 0.1% | [Link to metric] |
|
|
473
|
+
| P95 Latency | < 200ms | [Link to metric] |
|
|
474
|
+
|
|
475
|
+
### Business Metrics
|
|
476
|
+
|
|
477
|
+
| Metric | Target | Dashboard |
|
|
478
|
+
|--------|--------|-----------|
|
|
479
|
+
| Requests/sec | > 1000 | [Link] |
|
|
480
|
+
| Active Users | - | [Link] |
|
|
481
|
+
| Transaction Success | > 99.5% | [Link] |
|
|
482
|
+
|
|
483
|
+
## SLA Definitions
|
|
484
|
+
|
|
485
|
+
| Tier | Availability | Response Time |
|
|
486
|
+
|------|--------------|---------------|
|
|
487
|
+
| Critical | 99.99% | 15 min |
|
|
488
|
+
| Standard | 99.9% | 1 hour |
|
|
489
|
+
| Best Effort | 99% | 4 hours |
|
|
490
|
+
|
|
491
|
+
## Alert Thresholds
|
|
492
|
+
|
|
493
|
+
| Alert | Warning | Critical | Action |
|
|
494
|
+
|-------|---------|----------|--------|
|
|
495
|
+
| CPU | > 70% | > 90% | Scale or investigate |
|
|
496
|
+
| Memory | > 80% | > 95% | Check for leaks |
|
|
497
|
+
| Error Rate | > 1% | > 5% | Investigate immediately |
|
|
498
|
+
| Latency P95 | > 500ms | > 1s | Check dependencies |
|
|
499
|
+
`,
|
|
500
|
+
},
|
|
501
|
+
// Incidents section
|
|
502
|
+
{
|
|
503
|
+
path: "content/incidents/escalation.md",
|
|
504
|
+
content: (_ctx: TemplateContext) => `---
|
|
505
|
+
title: Escalation Procedures
|
|
506
|
+
description: When and how to escalate incidents
|
|
507
|
+
---
|
|
508
|
+
|
|
509
|
+
# Escalation Procedures
|
|
510
|
+
|
|
511
|
+
<!-- ← CUSTOMIZE: Add your escalation paths and contacts -->
|
|
512
|
+
|
|
513
|
+
## Severity Levels
|
|
514
|
+
|
|
515
|
+
| Level | Definition | Response Time | Examples |
|
|
516
|
+
|-------|------------|---------------|----------|
|
|
517
|
+
| **P1** | Critical - Service down | 15 min | Complete outage, data loss |
|
|
518
|
+
| **P2** | High - Major degradation | 1 hour | Partial outage, slow response |
|
|
519
|
+
| **P3** | Medium - Minor impact | 4 hours | Feature broken, workaround exists |
|
|
520
|
+
| **P4** | Low - Minimal impact | Best effort | Cosmetic issues, minor bugs |
|
|
521
|
+
|
|
522
|
+
## Escalation Matrix
|
|
523
|
+
|
|
524
|
+
### P1 - Critical
|
|
525
|
+
|
|
526
|
+
1. **Immediately**: Page on-call engineer
|
|
527
|
+
2. **15 minutes**: If no response, page secondary
|
|
528
|
+
3. **30 minutes**: Escalate to engineering lead
|
|
529
|
+
4. **1 hour**: Notify stakeholders, consider exec briefing
|
|
530
|
+
|
|
531
|
+
### P2 - High
|
|
532
|
+
|
|
533
|
+
1. **Immediately**: Notify on-call via Slack/PagerDuty
|
|
534
|
+
2. **1 hour**: Escalate if no progress
|
|
535
|
+
3. **4 hours**: Engineering lead involvement
|
|
536
|
+
|
|
537
|
+
### P3/P4 - Medium/Low
|
|
538
|
+
|
|
539
|
+
1. Create ticket in tracking system
|
|
540
|
+
2. Assign to appropriate team
|
|
541
|
+
3. Follow normal sprint process
|
|
542
|
+
|
|
543
|
+
## Communication
|
|
544
|
+
|
|
545
|
+
### Internal Updates
|
|
546
|
+
|
|
547
|
+
| Audience | Channel | Frequency |
|
|
548
|
+
|----------|---------|-----------|
|
|
549
|
+
| Incident Team | War room/Slack | Continuous |
|
|
550
|
+
| Engineering | #incidents channel | Every 30 min |
|
|
551
|
+
| Leadership | Email/Slack | Hourly for P1 |
|
|
552
|
+
|
|
553
|
+
### External Communication
|
|
554
|
+
|
|
555
|
+
For customer-facing incidents:
|
|
556
|
+
1. Status page update within 15 minutes
|
|
557
|
+
2. Customer support briefed
|
|
558
|
+
3. Follow-up communication on resolution
|
|
559
|
+
|
|
560
|
+
## Post-Incident
|
|
561
|
+
|
|
562
|
+
1. **Immediate**: Confirm resolution and monitoring
|
|
563
|
+
2. **24 hours**: Draft incident report
|
|
564
|
+
3. **1 week**: Post-mortem meeting
|
|
565
|
+
4. **2 weeks**: Action items assigned and tracked
|
|
566
|
+
|
|
567
|
+
See [Post-Mortem Template](/content/incidents/post-mortem-template) for documentation format.
|
|
568
|
+
`,
|
|
569
|
+
},
|
|
570
|
+
{
|
|
571
|
+
path: "content/incidents/post-mortem-template.md",
|
|
572
|
+
content: (_ctx: TemplateContext) => `---
|
|
573
|
+
title: Post-Mortem Template
|
|
574
|
+
description: Template for incident post-mortem documentation
|
|
575
|
+
---
|
|
576
|
+
|
|
577
|
+
# Post-Mortem: [Incident Title]
|
|
578
|
+
|
|
579
|
+
<!-- ← CUSTOMIZE: Copy this template for each incident -->
|
|
580
|
+
|
|
581
|
+
## Summary
|
|
582
|
+
|
|
583
|
+
| Field | Value |
|
|
584
|
+
|-------|-------|
|
|
585
|
+
| **Date** | YYYY-MM-DD |
|
|
586
|
+
| **Duration** | X hours Y minutes |
|
|
587
|
+
| **Severity** | P1/P2/P3 |
|
|
588
|
+
| **Impact** | [Description of user/business impact] |
|
|
589
|
+
| **Status** | Draft / Final |
|
|
590
|
+
|
|
591
|
+
## Timeline
|
|
592
|
+
|
|
593
|
+
| Time (UTC) | Event |
|
|
594
|
+
|------------|-------|
|
|
595
|
+
| HH:MM | Issue first detected |
|
|
596
|
+
| HH:MM | On-call paged |
|
|
597
|
+
| HH:MM | Root cause identified |
|
|
598
|
+
| HH:MM | Fix deployed |
|
|
599
|
+
| HH:MM | Service restored |
|
|
600
|
+
| HH:MM | Incident closed |
|
|
601
|
+
|
|
602
|
+
## Root Cause
|
|
603
|
+
|
|
604
|
+
[Detailed technical explanation of what caused the incident]
|
|
605
|
+
|
|
606
|
+
## Contributing Factors
|
|
607
|
+
|
|
608
|
+
- Factor 1
|
|
609
|
+
- Factor 2
|
|
610
|
+
- Factor 3
|
|
611
|
+
|
|
612
|
+
## Resolution
|
|
613
|
+
|
|
614
|
+
[What was done to resolve the incident]
|
|
615
|
+
|
|
616
|
+
## Lessons Learned
|
|
617
|
+
|
|
618
|
+
### What Went Well
|
|
619
|
+
|
|
620
|
+
- Item 1
|
|
621
|
+
- Item 2
|
|
622
|
+
|
|
623
|
+
### What Could Be Improved
|
|
624
|
+
|
|
625
|
+
- Item 1
|
|
626
|
+
- Item 2
|
|
627
|
+
|
|
628
|
+
## Action Items
|
|
629
|
+
|
|
630
|
+
| Item | Owner | Due Date | Status |
|
|
631
|
+
|------|-------|----------|--------|
|
|
632
|
+
| [Action] | [Name] | [Date] | ⬜ Open |
|
|
633
|
+
| [Action] | [Name] | [Date] | ⬜ Open |
|
|
634
|
+
|
|
635
|
+
## References
|
|
636
|
+
|
|
637
|
+
- [Link to incident ticket]
|
|
638
|
+
- [Link to relevant dashboards]
|
|
639
|
+
- [Link to related documentation]
|
|
640
|
+
`,
|
|
641
|
+
},
|
|
642
|
+
// CUSTOMIZING.md
|
|
643
|
+
{
|
|
644
|
+
path: "CUSTOMIZING.md",
|
|
645
|
+
content: (ctx: TemplateContext) => `---
|
|
646
|
+
template: runbook
|
|
647
|
+
template_version: 1
|
|
648
|
+
created: ${new Date().toISOString().split("T")[0]}
|
|
649
|
+
---
|
|
650
|
+
|
|
651
|
+
# Customizing ${ctx.branding.siteName}
|
|
652
|
+
|
|
653
|
+
This guide helps you (and AI assistants) understand how to customize this runbook.
|
|
654
|
+
|
|
655
|
+
## Site Structure
|
|
656
|
+
|
|
657
|
+
\`\`\`
|
|
658
|
+
${ctx.name}/
|
|
659
|
+
├── site.yaml # Site configuration (sections, branding)
|
|
660
|
+
├── theme.yaml # Theme customization (create if needed)
|
|
661
|
+
├── index.md # Home page with quick links
|
|
662
|
+
├── CUSTOMIZING.md # This file
|
|
663
|
+
├── content/
|
|
664
|
+
│ ├── procedures/ # Step-by-step operational tasks
|
|
665
|
+
│ ├── troubleshooting/ # Problem → solution guides
|
|
666
|
+
│ ├── reference/ # Supporting materials
|
|
667
|
+
│ │ ├── interfaces/ # API specs, protocol docs
|
|
668
|
+
│ │ ├── contacts/ # Team and vendor contacts
|
|
669
|
+
│ │ ├── checklists/ # Verification checklists
|
|
670
|
+
│ │ └── analytics/ # Dashboards, KPIs, SLAs
|
|
671
|
+
│ └── incidents/ # Emergency response
|
|
672
|
+
└── assets/
|
|
673
|
+
└── brand/ # Logo, favicon
|
|
674
|
+
\`\`\`
|
|
675
|
+
|
|
676
|
+
## Configuration Files
|
|
677
|
+
|
|
678
|
+
### site.yaml - Site Configuration
|
|
679
|
+
|
|
680
|
+
\`\`\`yaml
|
|
681
|
+
title: "${ctx.branding.siteName}"
|
|
682
|
+
|
|
683
|
+
brand:
|
|
684
|
+
name: "${ctx.branding.brandName}" # Shown in header
|
|
685
|
+
url: "/" # Logo link destination
|
|
686
|
+
|
|
687
|
+
sections:
|
|
688
|
+
- name: "Procedures"
|
|
689
|
+
path: "content/procedures"
|
|
690
|
+
# Add or modify sections here
|
|
691
|
+
\`\`\`
|
|
692
|
+
|
|
693
|
+
### theme.yaml - Visual Customization (optional)
|
|
694
|
+
|
|
695
|
+
Create \`theme.yaml\` to customize colors and typography:
|
|
696
|
+
|
|
697
|
+
\`\`\`yaml
|
|
698
|
+
colors:
|
|
699
|
+
primary: "#2563eb"
|
|
700
|
+
background: "#ffffff"
|
|
701
|
+
text: "#1f2937"
|
|
702
|
+
|
|
703
|
+
footer:
|
|
704
|
+
text: "© ${ctx.year} ${ctx.branding.brandName}"
|
|
705
|
+
\`\`\`
|
|
706
|
+
|
|
707
|
+
## Header and Footer
|
|
708
|
+
|
|
709
|
+
- **Header**: Brand name/logo from \`site.yaml\` + section navigation
|
|
710
|
+
- **Footer**: Auto-generated copyright, customizable via \`theme.yaml\`
|
|
711
|
+
|
|
712
|
+
## Brand Assets
|
|
713
|
+
|
|
714
|
+
| Asset | Location | Recommended Size |
|
|
715
|
+
|-------|----------|------------------|
|
|
716
|
+
| Logo | \`assets/brand/logo.png\` | 200x50px (or SVG) |
|
|
717
|
+
| Favicon | \`assets/brand/favicon.ico\` | 32x32px |
|
|
718
|
+
|
|
719
|
+
## Adding Content
|
|
720
|
+
|
|
721
|
+
### New Procedure
|
|
722
|
+
|
|
723
|
+
1. Create file in \`content/procedures/\`:
|
|
724
|
+
\`\`\`
|
|
725
|
+
content/procedures/my-procedure.md
|
|
726
|
+
\`\`\`
|
|
727
|
+
|
|
728
|
+
2. Use the procedure format:
|
|
729
|
+
\`\`\`markdown
|
|
730
|
+
---
|
|
731
|
+
title: Procedure Name
|
|
732
|
+
---
|
|
733
|
+
|
|
734
|
+
# Procedure Name
|
|
735
|
+
|
|
736
|
+
## Objective
|
|
737
|
+
What this accomplishes.
|
|
738
|
+
|
|
739
|
+
## Prerequisites
|
|
740
|
+
- [ ] Required item
|
|
741
|
+
|
|
742
|
+
## Steps
|
|
743
|
+
1. First step
|
|
744
|
+
2. Second step
|
|
745
|
+
|
|
746
|
+
## Verification
|
|
747
|
+
How to confirm success.
|
|
748
|
+
|
|
749
|
+
## Rollback
|
|
750
|
+
How to revert if needed.
|
|
751
|
+
\`\`\`
|
|
752
|
+
|
|
753
|
+
### New Troubleshooting Guide
|
|
754
|
+
|
|
755
|
+
Follow the pattern: Symptoms → Causes → Resolution → Escalation
|
|
756
|
+
|
|
757
|
+
### New Interface/API Doc
|
|
758
|
+
|
|
759
|
+
Copy \`content/reference/interfaces/api-template.md\` and customize.
|
|
760
|
+
|
|
761
|
+
### New Section
|
|
762
|
+
|
|
763
|
+
1. Create folder: \`content/newsection/\`
|
|
764
|
+
2. Add to \`site.yaml\`:
|
|
765
|
+
\`\`\`yaml
|
|
766
|
+
sections:
|
|
767
|
+
- name: "New Section"
|
|
768
|
+
path: "content/newsection"
|
|
769
|
+
\`\`\`
|
|
770
|
+
3. Add at least one markdown file
|
|
771
|
+
|
|
772
|
+
## Linking and References
|
|
773
|
+
|
|
774
|
+
### Internal Links
|
|
775
|
+
|
|
776
|
+
\`\`\`markdown
|
|
777
|
+
See [Deployment](/content/procedures/deployment) procedure.
|
|
778
|
+
\`\`\`
|
|
779
|
+
|
|
780
|
+
### External Links
|
|
781
|
+
|
|
782
|
+
\`\`\`markdown
|
|
783
|
+
See [Vendor Documentation](https://vendor.com/docs).
|
|
784
|
+
\`\`\`
|
|
785
|
+
|
|
786
|
+
### Images
|
|
787
|
+
|
|
788
|
+
\`\`\`markdown
|
|
789
|
+

|
|
790
|
+
\`\`\`
|
|
791
|
+
|
|
792
|
+
## Important Limitations
|
|
793
|
+
|
|
794
|
+
- **Content must be inside this folder** - kitfly cannot include external files
|
|
795
|
+
- **External resources**: Link via URL rather than copying
|
|
796
|
+
- **Binary files** (PDFs): Place in \`assets/\` and link to them
|
|
797
|
+
- **No dynamic includes**: This generates static HTML
|
|
798
|
+
|
|
799
|
+
## Document Conventions
|
|
800
|
+
|
|
801
|
+
### Procedures
|
|
802
|
+
|
|
803
|
+
- Start with Objective (what/why)
|
|
804
|
+
- List Prerequisites as checkboxes
|
|
805
|
+
- Number steps explicitly
|
|
806
|
+
- Include expected outputs
|
|
807
|
+
- Always have Rollback section
|
|
808
|
+
|
|
809
|
+
### Troubleshooting
|
|
810
|
+
|
|
811
|
+
- Lead with Symptoms (what user sees)
|
|
812
|
+
- List Possible Causes
|
|
813
|
+
- Provide step-by-step Resolution
|
|
814
|
+
- Include Escalation path
|
|
815
|
+
|
|
816
|
+
### Checklists
|
|
817
|
+
|
|
818
|
+
- Use checkbox format: \`- [ ] Item\`
|
|
819
|
+
- Group by phase or category
|
|
820
|
+
- Include Go/No-Go decision point
|
|
821
|
+
|
|
822
|
+
## Getting Help
|
|
823
|
+
|
|
824
|
+
- [Kitfly Documentation](https://github.com/3leaps/kitfly)
|
|
825
|
+
- [Markdown Guide](https://www.markdownguide.org/)
|
|
826
|
+
`,
|
|
827
|
+
},
|
|
828
|
+
],
|
|
829
|
+
};
|