tribunal-kit 3.0.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/ARCHITECTURE.md +99 -99
- package/.agent/GEMINI.md +52 -52
- package/.agent/agents/accessibility-reviewer.md +187 -220
- package/.agent/agents/ai-code-reviewer.md +199 -233
- package/.agent/agents/backend-specialist.md +215 -238
- package/.agent/agents/code-archaeologist.md +161 -181
- package/.agent/agents/database-architect.md +184 -207
- package/.agent/agents/debugger.md +191 -218
- package/.agent/agents/dependency-reviewer.md +103 -136
- package/.agent/agents/devops-engineer.md +218 -238
- package/.agent/agents/documentation-writer.md +201 -221
- package/.agent/agents/explorer-agent.md +160 -180
- package/.agent/agents/frontend-reviewer.md +160 -194
- package/.agent/agents/frontend-specialist.md +248 -237
- package/.agent/agents/game-developer.md +48 -52
- package/.agent/agents/logic-reviewer.md +116 -149
- package/.agent/agents/mobile-developer.md +200 -223
- package/.agent/agents/mobile-reviewer.md +162 -195
- package/.agent/agents/orchestrator.md +181 -211
- package/.agent/agents/penetration-tester.md +157 -174
- package/.agent/agents/performance-optimizer.md +183 -203
- package/.agent/agents/performance-reviewer.md +178 -211
- package/.agent/agents/precedence-reviewer.md +213 -0
- package/.agent/agents/product-manager.md +142 -162
- package/.agent/agents/product-owner.md +6 -25
- package/.agent/agents/project-planner.md +142 -162
- package/.agent/agents/qa-automation-engineer.md +225 -242
- package/.agent/agents/security-auditor.md +174 -194
- package/.agent/agents/seo-specialist.md +193 -213
- package/.agent/agents/sql-reviewer.md +161 -194
- package/.agent/agents/supervisor-agent.md +184 -203
- package/.agent/agents/swarm-worker-contracts.md +17 -17
- package/.agent/agents/swarm-worker-registry.md +46 -46
- package/.agent/agents/test-coverage-reviewer.md +160 -193
- package/.agent/agents/test-engineer.md +0 -21
- package/.agent/agents/type-safety-reviewer.md +175 -208
- package/.agent/patterns/generator.md +9 -9
- package/.agent/patterns/inversion.md +12 -12
- package/.agent/patterns/pipeline.md +9 -9
- package/.agent/patterns/reviewer.md +13 -13
- package/.agent/patterns/tool-wrapper.md +9 -9
- package/.agent/rules/GEMINI.md +63 -63
- package/.agent/scripts/append_flow.js +72 -0
- package/.agent/scripts/case_law_manager.py +525 -0
- package/.agent/scripts/compress_skills.py +167 -0
- package/.agent/scripts/consolidate_skills.py +173 -0
- package/.agent/scripts/deep_compress.py +202 -0
- package/.agent/scripts/minify_context.py +80 -0
- package/.agent/scripts/security_scan.py +1 -1
- package/.agent/scripts/skill_evolution.py +563 -0
- package/.agent/scripts/strip_tribunal.py +41 -0
- package/.agent/skills/agent-organizer/SKILL.md +100 -126
- package/.agent/skills/agentic-patterns/SKILL.md +0 -70
- package/.agent/skills/ai-prompt-injection-defense/SKILL.md +134 -160
- package/.agent/skills/api-patterns/SKILL.md +123 -215
- package/.agent/skills/api-security-auditor/SKILL.md +143 -177
- package/.agent/skills/app-builder/SKILL.md +334 -50
- package/.agent/skills/app-builder/templates/SKILL.md +13 -15
- package/.agent/skills/app-builder/templates/astro-static/TEMPLATE.md +16 -16
- package/.agent/skills/app-builder/templates/chrome-extension/TEMPLATE.md +22 -22
- package/.agent/skills/app-builder/templates/cli-tool/TEMPLATE.md +18 -18
- package/.agent/skills/app-builder/templates/electron-desktop/TEMPLATE.md +20 -20
- package/.agent/skills/app-builder/templates/express-api/TEMPLATE.md +17 -17
- package/.agent/skills/app-builder/templates/flutter-app/TEMPLATE.md +18 -18
- package/.agent/skills/app-builder/templates/monorepo-turborepo/TEMPLATE.md +21 -21
- package/.agent/skills/app-builder/templates/nextjs-fullstack/TEMPLATE.md +19 -19
- package/.agent/skills/app-builder/templates/nextjs-saas/TEMPLATE.md +26 -26
- package/.agent/skills/app-builder/templates/nextjs-static/TEMPLATE.md +26 -26
- package/.agent/skills/app-builder/templates/nuxt-app/TEMPLATE.md +19 -19
- package/.agent/skills/app-builder/templates/python-fastapi/TEMPLATE.md +18 -18
- package/.agent/skills/app-builder/templates/react-native-app/TEMPLATE.md +20 -20
- package/.agent/skills/appflow-wireframe/SKILL.md +95 -121
- package/.agent/skills/architecture/SKILL.md +169 -331
- package/.agent/skills/authentication-best-practices/SKILL.md +139 -173
- package/.agent/skills/bash-linux/SKILL.md +129 -154
- package/.agent/skills/behavioral-modes/SKILL.md +8 -69
- package/.agent/skills/brainstorming/SKILL.md +436 -104
- package/.agent/skills/building-native-ui/SKILL.md +152 -174
- package/.agent/skills/clean-code/SKILL.md +331 -360
- package/.agent/skills/code-review-checklist/SKILL.md +0 -62
- package/.agent/skills/config-validator/SKILL.md +115 -141
- package/.agent/skills/csharp-developer/SKILL.md +468 -528
- package/.agent/skills/database-design/SKILL.md +104 -369
- package/.agent/skills/deployment-procedures/SKILL.md +119 -145
- package/.agent/skills/devops-engineer/SKILL.md +295 -332
- package/.agent/skills/devops-incident-responder/SKILL.md +87 -113
- package/.agent/skills/doc.md +5 -5
- package/.agent/skills/documentation-templates/SKILL.md +27 -63
- package/.agent/skills/edge-computing/SKILL.md +131 -157
- package/.agent/skills/extract-design-system/SKILL.md +108 -134
- package/.agent/skills/framer-motion-expert/SKILL.md +111 -855
- package/.agent/skills/frontend-design/SKILL.md +151 -499
- package/.agent/skills/game-design-expert/SKILL.md +79 -105
- package/.agent/skills/game-engineering-expert/SKILL.md +96 -122
- package/.agent/skills/geo-fundamentals/SKILL.md +97 -124
- package/.agent/skills/github-operations/SKILL.md +279 -314
- package/.agent/skills/gsap-expert/SKILL.md +119 -826
- package/.agent/skills/i18n-localization/SKILL.md +113 -138
- package/.agent/skills/intelligent-routing/SKILL.md +167 -127
- package/.agent/skills/lint-and-validate/SKILL.md +16 -52
- package/.agent/skills/llm-engineering/SKILL.md +344 -357
- package/.agent/skills/local-first/SKILL.md +128 -154
- package/.agent/skills/mcp-builder/SKILL.md +92 -118
- package/.agent/skills/mobile-design/SKILL.md +213 -219
- package/.agent/skills/motion-engineering/SKILL.md +184 -0
- package/.agent/skills/nextjs-react-expert/SKILL.md +99 -698
- package/.agent/skills/nodejs-best-practices/SKILL.md +498 -559
- package/.agent/skills/observability/SKILL.md +293 -330
- package/.agent/skills/parallel-agents/SKILL.md +96 -122
- package/.agent/skills/performance-profiling/SKILL.md +217 -254
- package/.agent/skills/plan-writing/SKILL.md +92 -118
- package/.agent/skills/platform-engineer/SKILL.md +97 -123
- package/.agent/skills/playwright-best-practices/SKILL.md +137 -162
- package/.agent/skills/powershell-windows/SKILL.md +112 -146
- package/.agent/skills/project-idioms/SKILL.md +87 -0
- package/.agent/skills/python-patterns/SKILL.md +15 -35
- package/.agent/skills/python-pro/SKILL.md +148 -754
- package/.agent/skills/react-specialist/SKILL.md +123 -827
- package/.agent/skills/readme-builder/SKILL.md +23 -85
- package/.agent/skills/realtime-patterns/SKILL.md +269 -304
- package/.agent/skills/red-team-tactics/SKILL.md +18 -51
- package/.agent/skills/rust-pro/SKILL.md +623 -701
- package/.agent/skills/seo-fundamentals/SKILL.md +129 -154
- package/.agent/skills/server-management/SKILL.md +164 -190
- package/.agent/skills/shadcn-ui-expert/SKILL.md +181 -206
- package/.agent/skills/skill-creator/SKILL.md +24 -56
- package/.agent/skills/sql-pro/SKILL.md +579 -633
- package/.agent/skills/supabase-postgres-best-practices/SKILL.md +35 -66
- package/.agent/skills/swiftui-expert/SKILL.md +151 -176
- package/.agent/skills/systematic-debugging/SKILL.md +92 -118
- package/.agent/skills/tailwind-patterns/SKILL.md +516 -576
- package/.agent/skills/tdd-workflow/SKILL.md +111 -137
- package/.agent/skills/test-result-analyzer/SKILL.md +33 -73
- package/.agent/skills/testing-patterns/SKILL.md +512 -573
- package/.agent/skills/trend-researcher/SKILL.md +30 -71
- package/.agent/skills/ui-ux-pro-max/SKILL.md +8 -41
- package/.agent/skills/ui-ux-researcher/SKILL.md +51 -91
- package/.agent/skills/vue-expert/SKILL.md +127 -866
- package/.agent/skills/vulnerability-scanner/SKILL.md +354 -269
- package/.agent/skills/web-accessibility-auditor/SKILL.md +168 -193
- package/.agent/skills/web-design-guidelines/SKILL.md +25 -61
- package/.agent/skills/webapp-testing/SKILL.md +119 -145
- package/.agent/skills/whimsy-injector/SKILL.md +58 -132
- package/.agent/skills/workflow-optimizer/SKILL.md +28 -68
- package/.agent/workflows/api-tester.md +151 -151
- package/.agent/workflows/audit.md +127 -138
- package/.agent/workflows/brainstorm.md +110 -110
- package/.agent/workflows/changelog.md +112 -112
- package/.agent/workflows/create.md +124 -124
- package/.agent/workflows/debug.md +165 -189
- package/.agent/workflows/deploy.md +180 -189
- package/.agent/workflows/enhance.md +128 -151
- package/.agent/workflows/fix.md +114 -135
- package/.agent/workflows/generate.md +13 -4
- package/.agent/workflows/migrate.md +160 -160
- package/.agent/workflows/orchestrate.md +168 -168
- package/.agent/workflows/performance-benchmarker.md +114 -123
- package/.agent/workflows/plan.md +173 -173
- package/.agent/workflows/preview.md +80 -80
- package/.agent/workflows/refactor.md +161 -183
- package/.agent/workflows/review-ai.md +101 -129
- package/.agent/workflows/review.md +116 -116
- package/.agent/workflows/session.md +94 -94
- package/.agent/workflows/status.md +79 -79
- package/.agent/workflows/strengthen-skills.md +138 -139
- package/.agent/workflows/swarm.md +179 -179
- package/.agent/workflows/test.md +189 -211
- package/.agent/workflows/tribunal-backend.md +94 -113
- package/.agent/workflows/tribunal-database.md +95 -115
- package/.agent/workflows/tribunal-frontend.md +96 -118
- package/.agent/workflows/tribunal-full.md +93 -133
- package/.agent/workflows/tribunal-mobile.md +95 -119
- package/.agent/workflows/tribunal-performance.md +110 -133
- package/.agent/workflows/ui-ux-pro-max.md +122 -143
- package/README.md +30 -1
- package/bin/tribunal-kit.js +175 -12
- package/package.json +25 -4
- package/.agent/skills/api-patterns/api-style.md +0 -42
- package/.agent/skills/api-patterns/auth.md +0 -24
- package/.agent/skills/api-patterns/documentation.md +0 -26
- package/.agent/skills/api-patterns/graphql.md +0 -41
- package/.agent/skills/api-patterns/rate-limiting.md +0 -31
- package/.agent/skills/api-patterns/response.md +0 -37
- package/.agent/skills/api-patterns/rest.md +0 -40
- package/.agent/skills/api-patterns/security-testing.md +0 -122
- package/.agent/skills/api-patterns/trpc.md +0 -41
- package/.agent/skills/api-patterns/versioning.md +0 -22
- package/.agent/skills/app-builder/agent-coordination.md +0 -71
- package/.agent/skills/app-builder/feature-building.md +0 -53
- package/.agent/skills/app-builder/project-detection.md +0 -34
- package/.agent/skills/app-builder/scaffolding.md +0 -118
- package/.agent/skills/app-builder/tech-stack.md +0 -40
- package/.agent/skills/architecture/context-discovery.md +0 -43
- package/.agent/skills/architecture/examples.md +0 -94
- package/.agent/skills/architecture/pattern-selection.md +0 -68
- package/.agent/skills/architecture/patterns-reference.md +0 -50
- package/.agent/skills/architecture/trade-off-analysis.md +0 -77
- package/.agent/skills/brainstorming/dynamic-questioning.md +0 -360
- package/.agent/skills/database-design/database-selection.md +0 -43
- package/.agent/skills/database-design/indexing.md +0 -39
- package/.agent/skills/database-design/migrations.md +0 -48
- package/.agent/skills/database-design/optimization.md +0 -36
- package/.agent/skills/database-design/orm-selection.md +0 -30
- package/.agent/skills/database-design/schema-design.md +0 -56
- package/.agent/skills/frontend-design/animation-guide.md +0 -331
- package/.agent/skills/frontend-design/color-system.md +0 -329
- package/.agent/skills/frontend-design/decision-trees.md +0 -418
- package/.agent/skills/frontend-design/motion-graphics.md +0 -306
- package/.agent/skills/frontend-design/typography-system.md +0 -363
- package/.agent/skills/frontend-design/ux-psychology.md +0 -1116
- package/.agent/skills/frontend-design/visual-effects.md +0 -383
- package/.agent/skills/intelligent-routing/router-manifest.md +0 -65
- package/.agent/skills/mobile-design/decision-trees.md +0 -516
- package/.agent/skills/mobile-design/mobile-backend.md +0 -491
- package/.agent/skills/mobile-design/mobile-color-system.md +0 -420
- package/.agent/skills/mobile-design/mobile-debugging.md +0 -122
- package/.agent/skills/mobile-design/mobile-design-thinking.md +0 -357
- package/.agent/skills/mobile-design/mobile-navigation.md +0 -458
- package/.agent/skills/mobile-design/mobile-performance.md +0 -767
- package/.agent/skills/mobile-design/mobile-testing.md +0 -356
- package/.agent/skills/mobile-design/mobile-typography.md +0 -433
- package/.agent/skills/mobile-design/platform-android.md +0 -666
- package/.agent/skills/mobile-design/platform-ios.md +0 -561
- package/.agent/skills/mobile-design/touch-psychology.md +0 -537
- package/.agent/skills/nextjs-react-expert/1-async-eliminating-waterfalls.md +0 -312
- package/.agent/skills/nextjs-react-expert/2-bundle-bundle-size-optimization.md +0 -240
- package/.agent/skills/nextjs-react-expert/3-server-server-side-performance.md +0 -490
- package/.agent/skills/nextjs-react-expert/4-client-client-side-data-fetching.md +0 -264
- package/.agent/skills/nextjs-react-expert/5-rerender-re-render-optimization.md +0 -581
- package/.agent/skills/nextjs-react-expert/6-rendering-rendering-performance.md +0 -432
- package/.agent/skills/nextjs-react-expert/7-js-javascript-performance.md +0 -684
- package/.agent/skills/nextjs-react-expert/8-advanced-advanced-patterns.md +0 -150
- package/.agent/skills/vulnerability-scanner/checklists.md +0 -121
|
@@ -1,330 +1,293 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: observability
|
|
3
|
-
description: Production observability mastery. Structured logging (Pino/Winston), OpenTelemetry tracing, metrics (Prometheus/Grafana), SLIs/SLOs/error budgets, distributed tracing, alerting design, health checks, and AI observability. Use when setting up monitoring, debugging production issues, or designing observable distributed systems.
|
|
4
|
-
allowed-tools: Read, Write, Edit, Glob, Grep
|
|
5
|
-
version: 2.0.0
|
|
6
|
-
last-updated: 2026-04-01
|
|
7
|
-
applies-to-model: gemini-2.5-pro, claude-3-7-sonnet
|
|
8
|
-
---
|
|
9
|
-
|
|
10
|
-
# Observability — Production Monitoring Mastery
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
});
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
//
|
|
53
|
-
// -
|
|
54
|
-
//
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
-
|
|
72
|
-
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
"@opentelemetry/instrumentation-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
span.
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
span.setStatus({ code: SpanStatusCode.
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
span.
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
const
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
"99.
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
"99.
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
//
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
```
|
|
295
|
-
|
|
296
|
-
---
|
|
297
|
-
|
|
298
|
-
## 🤖 LLM-Specific Traps
|
|
299
|
-
|
|
300
|
-
1. **`console.log` in Production:** Use structured logging (Pino/Winston). `console.log` can't be searched or filtered.
|
|
301
|
-
2. **Logging PII:** Never log emails, names, passwords, or tokens. Use redaction.
|
|
302
|
-
3. **Liveness Checking Dependencies:** Liveness probes must NOT check DB/Redis. Only readiness probes check dependencies.
|
|
303
|
-
4. **Alerting on Causes:** "CPU is 80%" is not actionable. Alert on "P95 latency > 1s" instead.
|
|
304
|
-
5. **Missing Request IDs:** Without correlation IDs, debugging distributed systems is impossible.
|
|
305
|
-
6. **Percentiles vs Averages:** Average latency hides outliers. Track P50, P95, P99.
|
|
306
|
-
7. **No Error Budgets:** Without SLOs and error budgets, "availability" is subjective.
|
|
307
|
-
8. **Metrics Without Labels:** `requests_total` without `method`, `path`, `status` labels is useless.
|
|
308
|
-
9. **Tracing Without Sampling:** 100% trace collection is expensive. Use head-based or tail-based sampling.
|
|
309
|
-
10. **Log Levels in Code:** Hardcoded `logger.debug()` everywhere. Use configurable log levels via env.
|
|
310
|
-
|
|
311
|
-
---
|
|
312
|
-
|
|
313
|
-
## 🏛️ Tribunal Integration
|
|
314
|
-
|
|
315
|
-
**Slash command: `/tribunal-backend`**
|
|
316
|
-
|
|
317
|
-
### ✅ Pre-Flight Self-Audit
|
|
318
|
-
|
|
319
|
-
```
|
|
320
|
-
✅ Am I using structured logging (not console.log)?
|
|
321
|
-
✅ Do all logs include requestId for correlation?
|
|
322
|
-
✅ Am I NOT logging PII or secrets?
|
|
323
|
-
✅ Are liveness and readiness checks separate?
|
|
324
|
-
✅ Is OpenTelemetry tracing configured?
|
|
325
|
-
✅ Am I tracking RED metrics (Rate, Errors, Duration)?
|
|
326
|
-
✅ Are SLOs defined with error budgets?
|
|
327
|
-
✅ Do alerts have runbook links?
|
|
328
|
-
✅ Am I alerting on symptoms (not causes)?
|
|
329
|
-
✅ Are log levels configurable via environment variable?
|
|
330
|
-
```
|
|
1
|
+
---
|
|
2
|
+
name: observability
|
|
3
|
+
description: Production observability mastery. Structured logging (Pino/Winston), OpenTelemetry tracing, metrics (Prometheus/Grafana), SLIs/SLOs/error budgets, distributed tracing, alerting design, health checks, and AI observability. Use when setting up monitoring, debugging production issues, or designing observable distributed systems.
|
|
4
|
+
allowed-tools: Read, Write, Edit, Glob, Grep
|
|
5
|
+
version: 2.0.0
|
|
6
|
+
last-updated: 2026-04-01
|
|
7
|
+
applies-to-model: gemini-2.5-pro, claude-3-7-sonnet
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Observability — Production Monitoring Mastery
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## The Three Pillars
|
|
15
|
+
|
|
16
|
+
```
|
|
17
|
+
Logs → WHAT happened (structured events)
|
|
18
|
+
Traces → WHERE it happened (request flow across services)
|
|
19
|
+
Metrics → HOW MUCH is happening (counters, histograms, gauges)
|
|
20
|
+
|
|
21
|
+
All three are needed. Logs alone are not observability.
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Structured Logging
|
|
27
|
+
|
|
28
|
+
```typescript
|
|
29
|
+
import pino from "pino";
|
|
30
|
+
|
|
31
|
+
// ✅ Structured JSON logging
|
|
32
|
+
const logger = pino({
|
|
33
|
+
level: process.env.LOG_LEVEL ?? "info",
|
|
34
|
+
timestamp: pino.stdTimeFunctions.isoTime,
|
|
35
|
+
...(process.env.NODE_ENV === "development" && {
|
|
36
|
+
transport: { target: "pino-pretty" },
|
|
37
|
+
}),
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
// ✅ GOOD: Structured with context
|
|
41
|
+
logger.info({ userId: user.id, action: "login", ip: req.ip }, "User logged in");
|
|
42
|
+
logger.error({ err, orderId: order.id, paymentGateway: "stripe" }, "Payment failed");
|
|
43
|
+
logger.warn({ queueDepth: 1500, threshold: 1000 }, "Queue depth exceeding threshold");
|
|
44
|
+
|
|
45
|
+
// ❌ BAD: Unstructured string logging
|
|
46
|
+
console.log("User " + user.id + " logged in from " + req.ip);
|
|
47
|
+
console.log("Error: " + error.message);
|
|
48
|
+
|
|
49
|
+
// ❌ HALLUCINATION TRAP: console.log is NOT production logging
|
|
50
|
+
// - No severity levels (info/warn/error)
|
|
51
|
+
// - No structured fields (can't search/filter)
|
|
52
|
+
// - No timestamps in ISO format
|
|
53
|
+
// - Can't be collected by log aggregators
|
|
54
|
+
// ✅ Use Pino (Node.js) or structlog (Python) for production
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Log Levels
|
|
58
|
+
|
|
59
|
+
```
|
|
60
|
+
fatal → App is crashing, immediate attention required
|
|
61
|
+
error → Operation failed, needs investigation
|
|
62
|
+
warn → Something unexpected, but app continues
|
|
63
|
+
info → Business events (user login, order placed, deploy)
|
|
64
|
+
debug → Technical details (query timing, cache hit/miss)
|
|
65
|
+
trace → Verbose debugging (only in development)
|
|
66
|
+
|
|
67
|
+
Rules:
|
|
68
|
+
- Production default: info
|
|
69
|
+
- Never log PII (names, emails, SSNs) at any level
|
|
70
|
+
- Never log secrets (tokens, passwords, API keys)
|
|
71
|
+
- Log request IDs for correlation
|
|
72
|
+
- Log durations for performance tracking
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Request Context / Correlation
|
|
76
|
+
|
|
77
|
+
```typescript
|
|
78
|
+
import { AsyncLocalStorage } from "node:async_hooks";
|
|
79
|
+
|
|
80
|
+
const requestContext = new AsyncLocalStorage<{ requestId: string; userId?: string }>();
|
|
81
|
+
|
|
82
|
+
// Middleware: set context per request
|
|
83
|
+
app.use((req, res, next) => {
|
|
84
|
+
const requestId = req.headers["x-request-id"]?.toString() ?? crypto.randomUUID();
|
|
85
|
+
res.setHeader("x-request-id", requestId);
|
|
86
|
+
requestContext.run({ requestId, userId: req.user?.id }, next);
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
// Child logger with context
|
|
90
|
+
function getLogger() {
|
|
91
|
+
const ctx = requestContext.getStore();
|
|
92
|
+
return logger.child({
|
|
93
|
+
requestId: ctx?.requestId,
|
|
94
|
+
userId: ctx?.userId,
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Every log from this request includes requestId and userId
|
|
99
|
+
const log = getLogger();
|
|
100
|
+
log.info("Processing order"); // { requestId: "abc-123", userId: "42", msg: "Processing order" }
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## Distributed Tracing (OpenTelemetry)
|
|
106
|
+
|
|
107
|
+
```typescript
|
|
108
|
+
import { NodeSDK } from "@opentelemetry/sdk-node";
|
|
109
|
+
import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node";
|
|
110
|
+
import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
|
|
111
|
+
|
|
112
|
+
// Initialize OpenTelemetry
|
|
113
|
+
const sdk = new NodeSDK({
|
|
114
|
+
traceExporter: new OTLPTraceExporter({
|
|
115
|
+
url: process.env.OTEL_EXPORTER_OTLP_ENDPOINT ?? "http://localhost:4318/v1/traces",
|
|
116
|
+
}),
|
|
117
|
+
instrumentations: [
|
|
118
|
+
getNodeAutoInstrumentations({
|
|
119
|
+
"@opentelemetry/instrumentation-http": { enabled: true },
|
|
120
|
+
"@opentelemetry/instrumentation-express": { enabled: true },
|
|
121
|
+
"@opentelemetry/instrumentation-pg": { enabled: true },
|
|
122
|
+
"@opentelemetry/instrumentation-redis": { enabled: true },
|
|
123
|
+
}),
|
|
124
|
+
],
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
sdk.start();
|
|
128
|
+
|
|
129
|
+
// Manual span for custom business logic
|
|
130
|
+
import { trace } from "@opentelemetry/api";
|
|
131
|
+
|
|
132
|
+
const tracer = trace.getTracer("order-service");
|
|
133
|
+
|
|
134
|
+
async function processOrder(order: Order) {
|
|
135
|
+
return tracer.startActiveSpan("processOrder", async (span) => {
|
|
136
|
+
try {
|
|
137
|
+
span.setAttribute("order.id", order.id);
|
|
138
|
+
span.setAttribute("order.total", order.total);
|
|
139
|
+
span.setAttribute("order.items.count", order.items.length);
|
|
140
|
+
|
|
141
|
+
const result = await executeOrder(order);
|
|
142
|
+
span.setStatus({ code: SpanStatusCode.OK });
|
|
143
|
+
return result;
|
|
144
|
+
} catch (error) {
|
|
145
|
+
span.setStatus({ code: SpanStatusCode.ERROR, message: error.message });
|
|
146
|
+
span.recordException(error);
|
|
147
|
+
throw error;
|
|
148
|
+
} finally {
|
|
149
|
+
span.end();
|
|
150
|
+
}
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## Metrics
|
|
158
|
+
|
|
159
|
+
```typescript
|
|
160
|
+
import { metrics } from "@opentelemetry/api";
|
|
161
|
+
|
|
162
|
+
const meter = metrics.getMeter("api-server");
|
|
163
|
+
|
|
164
|
+
// Counter — things that only go up
|
|
165
|
+
const requestCounter = meter.createCounter("http.requests.total", {
|
|
166
|
+
description: "Total HTTP requests",
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
// Histogram — request durations
|
|
170
|
+
const requestDuration = meter.createHistogram("http.request.duration_ms", {
|
|
171
|
+
description: "HTTP request duration in milliseconds",
|
|
172
|
+
unit: "ms",
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
// Gauge — current values
|
|
176
|
+
const activeConnections = meter.createUpDownCounter("db.connections.active", {
|
|
177
|
+
description: "Active database connections",
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
// Middleware to record metrics
|
|
181
|
+
app.use((req, res, next) => {
|
|
182
|
+
const start = performance.now();
|
|
183
|
+
res.on("finish", () => {
|
|
184
|
+
const duration = performance.now() - start;
|
|
185
|
+
requestCounter.add(1, {
|
|
186
|
+
method: req.method,
|
|
187
|
+
path: req.route?.path ?? req.path,
|
|
188
|
+
status: res.statusCode.toString(),
|
|
189
|
+
});
|
|
190
|
+
requestDuration.record(duration, {
|
|
191
|
+
method: req.method,
|
|
192
|
+
status: res.statusCode.toString(),
|
|
193
|
+
});
|
|
194
|
+
});
|
|
195
|
+
next();
|
|
196
|
+
});
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
### Key Metrics to Track
|
|
200
|
+
|
|
201
|
+
```
|
|
202
|
+
RED method (for services):
|
|
203
|
+
Rate → requests per second
|
|
204
|
+
Errors → error rate (4xx, 5xx)
|
|
205
|
+
Duration → latency percentiles (P50, P95, P99)
|
|
206
|
+
|
|
207
|
+
USE method (for resources):
|
|
208
|
+
Utilization → CPU %, memory %, disk %
|
|
209
|
+
Saturation → queue depth, thread pool saturation
|
|
210
|
+
Errors → disk failures, OOM kills
|
|
211
|
+
|
|
212
|
+
Business metrics:
|
|
213
|
+
- Sign-ups per hour
|
|
214
|
+
- Orders processed per minute
|
|
215
|
+
- Revenue per day
|
|
216
|
+
- API calls per customer
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
---
|
|
220
|
+
|
|
221
|
+
## SLIs, SLOs & Error Budgets
|
|
222
|
+
|
|
223
|
+
```
|
|
224
|
+
SLI (Service Level Indicator) → What you measure
|
|
225
|
+
"99.2% of requests complete in <500ms"
|
|
226
|
+
|
|
227
|
+
SLO (Service Level Objective) → Your target
|
|
228
|
+
"99.9% of requests should complete in <500ms"
|
|
229
|
+
|
|
230
|
+
SLA (Service Level Agreement) → Your contract (with penalties)
|
|
231
|
+
"99.95% uptime or we refund 10%"
|
|
232
|
+
|
|
233
|
+
Error Budget = 100% - SLO
|
|
234
|
+
SLO: 99.9% → Error budget: 0.1% → 43 min downtime/month
|
|
235
|
+
SLO: 99.5% → Error budget: 0.5% → 3.6 hours downtime/month
|
|
236
|
+
|
|
237
|
+
Rules:
|
|
238
|
+
- Burn error budget too fast → freeze deployments
|
|
239
|
+
- Error budget remaining → ship features faster
|
|
240
|
+
- Don't set SLOs you can't measure
|
|
241
|
+
- SLOs should be slightly below actual performance
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
---
|
|
245
|
+
|
|
246
|
+
## Health Checks
|
|
247
|
+
|
|
248
|
+
```typescript
|
|
249
|
+
// Liveness: Is the process running?
|
|
250
|
+
app.get("/health/live", (req, res) => {
|
|
251
|
+
res.status(200).json({ status: "ok" });
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
// Readiness: Can it accept traffic?
|
|
255
|
+
app.get("/health/ready", async (req, res) => {
|
|
256
|
+
try {
|
|
257
|
+
await db.raw("SELECT 1"); // database check
|
|
258
|
+
await redis.ping(); // cache check
|
|
259
|
+
res.status(200).json({
|
|
260
|
+
status: "ready",
|
|
261
|
+
checks: { database: "ok", cache: "ok" },
|
|
262
|
+
});
|
|
263
|
+
} catch (error) {
|
|
264
|
+
res.status(503).json({
|
|
265
|
+
status: "not ready",
|
|
266
|
+
checks: { database: error.message },
|
|
267
|
+
});
|
|
268
|
+
}
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
// ❌ HALLUCINATION TRAP: Liveness ≠ Readiness
|
|
272
|
+
// Liveness fails → container restarts (only for unrecoverable states)
|
|
273
|
+
// Readiness fails → stop sending traffic (temporary — DB down, etc.)
|
|
274
|
+
// Making liveness check the DB → DB outage restarts all containers → cascade failure
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
---
|
|
278
|
+
|
|
279
|
+
## Alerting
|
|
280
|
+
|
|
281
|
+
```
|
|
282
|
+
Alert design rules:
|
|
283
|
+
1. Alert on SYMPTOMS, not causes (high latency, not "CPU is 80%")
|
|
284
|
+
2. Every alert must have a runbook link
|
|
285
|
+
3. Every alert must be ACTIONABLE — if you can't do anything, it's a notification
|
|
286
|
+
4. Use severity levels:
|
|
287
|
+
- Critical → page on-call (customer-facing outage)
|
|
288
|
+
- Warning → Slack notification (degraded, not broken)
|
|
289
|
+
- Info → dashboard only (awareness)
|
|
290
|
+
5. Avoid alert fatigue — fewer, meaningful alerts beat many noisy ones
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
---
|