@luanpdd/kit-mcp 1.8.1 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/CHANGELOG.md +86 -0
  2. package/README.md +97 -1
  3. package/gates/golden-signals-coverage.md +133 -0
  4. package/gates/obs-agents-mcp-supabase.md +86 -0
  5. package/gates/obs-skills-frontmatter.md +76 -0
  6. package/gates/omm-no-regression.md +83 -0
  7. package/gates/postmortem-template-required.md +127 -0
  8. package/gates/prr-checklist-coverage.md +128 -0
  9. package/gates/skill-must-include.md +21 -19
  10. package/kit/agents/burn-rate-forecaster.md +160 -0
  11. package/kit/agents/golden-signals-instrumenter.md +241 -0
  12. package/kit/agents/incident-investigator.md +245 -0
  13. package/kit/agents/observability-instrumenter.md +200 -0
  14. package/kit/agents/omm-auditor.md +251 -0
  15. package/kit/agents/postmortem-writer.md +282 -0
  16. package/kit/agents/prr-conductor.md +288 -0
  17. package/kit/agents/slo-engineer.md +224 -0
  18. package/kit/agents/supabase-architect.md +62 -0
  19. package/kit/agents/supabase-auth-bootstrapper.md +17 -0
  20. package/kit/agents/supabase-edge-fn-writer.md +124 -0
  21. package/kit/agents/supabase-migration-writer.md +98 -0
  22. package/kit/agents/supabase-realtime-implementer.md +23 -0
  23. package/kit/agents/supabase-rls-writer.md +17 -0
  24. package/kit/agents/supabase-storage-implementer.md +174 -0
  25. package/kit/agents/toil-auditor.md +277 -0
  26. package/kit/commands/auditar-marco.md +102 -1
  27. package/kit/commands/auditar-observabilidade.md +103 -0
  28. package/kit/commands/auditar-toil.md +129 -0
  29. package/kit/commands/burn-rate-status.md +140 -0
  30. package/kit/commands/concluir-marco.md +73 -1
  31. package/kit/commands/definir-slo.md +108 -0
  32. package/kit/commands/discutir-fase.md +26 -0
  33. package/kit/commands/forense.md +83 -1
  34. package/kit/commands/golden-signals.md +142 -0
  35. package/kit/commands/instrumentar-fase.md +200 -0
  36. package/kit/commands/investigar-producao.md +162 -0
  37. package/kit/commands/observabilidade.md +116 -0
  38. package/kit/commands/planejar-fase.md +20 -0
  39. package/kit/commands/postmortem.md +179 -0
  40. package/kit/commands/prr.md +205 -0
  41. package/kit/commands/risk-budget.md +220 -0
  42. package/kit/commands/sre.md +227 -0
  43. package/kit/commands/verificar-trabalho.md +26 -0
  44. package/kit/skills/_shared-observability/glossary.md +396 -0
  45. package/kit/skills/_shared-sre/glossary.md +573 -0
  46. package/kit/skills/blameless-postmortems/SKILL.md +340 -0
  47. package/kit/skills/burn-rate-alerting/SKILL.md +258 -0
  48. package/kit/skills/core-analysis-loop/SKILL.md +352 -0
  49. package/kit/skills/distributed-tracing/SKILL.md +362 -0
  50. package/kit/skills/eliminating-toil/SKILL.md +243 -0
  51. package/kit/skills/event-based-slos/SKILL.md +296 -0
  52. package/kit/skills/four-golden-signals/SKILL.md +297 -0
  53. package/kit/skills/observability-driven-development/SKILL.md +315 -0
  54. package/kit/skills/observability-maturity-model/SKILL.md +222 -0
  55. package/kit/skills/opentelemetry-standard/SKILL.md +351 -0
  56. package/kit/skills/production-readiness-review/SKILL.md +305 -0
  57. package/kit/skills/sre-risk-management/SKILL.md +221 -0
  58. package/kit/skills/structured-events/SKILL.md +265 -0
  59. package/kit/skills/telemetry-pipelines/SKILL.md +259 -0
  60. package/kit/skills/telemetry-sampling/SKILL.md +256 -0
  61. package/package.json +1 -1
@@ -0,0 +1,127 @@
1
+ ---
2
+ id: postmortem-template-required
3
+ stage: pre-conclude
4
+ blocking: true
5
+ description: Bloqueia /concluir-marco se há investigação em .planning/investigations/ sem postmortem correspondente em .planning/postmortems/. "No postmortem left unreviewed" (cap 15).
6
+ ---
7
+
8
+ # Postmortem template required gate
9
+
10
+ **When to run:** pre-conclude (blocking — milestone NÃO arquiva até cada incident ter postmortem blameless).
11
+
12
+ ## Check
13
+
14
+ ```bash
15
+ #!/usr/bin/env bash
16
+ # PT-BR: validar que cada investigação em .planning/investigations/ tem postmortem em .planning/postmortems/.
17
+ # Match por basename (sem extensão .md). Investigations com Status: INCONCLUSIVE são exceção.
18
+ # Bash 3.2-portable (macOS default).
19
+ set -e
20
+
21
+ INV_DIR=".planning/investigations"
22
+ PM_DIR=".planning/postmortems"
23
+
24
+ # PT-BR: se não há investigations, gate passa com INFO
25
+ if [ ! -d "$INV_DIR" ]; then
26
+ echo "INFO: $INV_DIR não existe — projeto sem incidents registrados. Gate skipped."
27
+ exit 0
28
+ fi
29
+
30
+ # PT-BR: listar investigations (single-file *.md OR subdir com STATE.md)
31
+ INVESTIGATIONS=""
32
+
33
+ # PT-BR: pattern A — .planning/investigations/<id>.md (single file)
34
+ SINGLE_FILES=$(find "$INV_DIR" -maxdepth 1 -type f -name "*.md" 2>/dev/null || true)
35
+ if [ -n "$SINGLE_FILES" ]; then
36
+ INVESTIGATIONS="$INVESTIGATIONS
37
+ $SINGLE_FILES"
38
+ fi
39
+
40
+ # PT-BR: pattern B — .planning/investigations/<id>/STATE.md (subdir state)
41
+ SUBDIR_STATES=$(find "$INV_DIR" -mindepth 2 -maxdepth 2 -type f -name "STATE.md" 2>/dev/null || true)
42
+ if [ -n "$SUBDIR_STATES" ]; then
43
+ INVESTIGATIONS="$INVESTIGATIONS
44
+ $SUBDIR_STATES"
45
+ fi
46
+
47
+ # PT-BR: filtrar linhas vazias
48
+ INVESTIGATIONS=$(echo "$INVESTIGATIONS" | grep -v "^$" || true)
49
+
50
+ if [ -z "$INVESTIGATIONS" ]; then
51
+ echo "INFO: $INV_DIR vazio — sem incidents registrados. Gate skipped."
52
+ exit 0
53
+ fi
54
+
55
+ # PT-BR: para cada investigation, extrair <id> e checar postmortem correspondente
56
+ MISSING=0
57
+ MISSING_LIST=""
58
+ OLDIFS="$IFS"
59
+ IFS='
60
+ '
61
+ for inv_path in $INVESTIGATIONS; do
62
+ [ -z "$inv_path" ] && continue
63
+ [ ! -f "$inv_path" ] && continue
64
+
65
+ # PT-BR: extrair <id> — basename sem .md OU dirname se for STATE.md em subdir
66
+ base=$(basename "$inv_path")
67
+ if [ "$base" = "STATE.md" ]; then
68
+ # pattern B — id é o nome do subdir parent
69
+ id=$(basename "$(dirname "$inv_path")")
70
+ else
71
+ # pattern A — id é basename sem .md
72
+ id="${base%.md}"
73
+ fi
74
+
75
+ # PT-BR: se investigation tem Status: INCONCLUSIVE (sem root cause), pular
76
+ if grep -qiE "^Status:.*INCONCLUSIVE|^.*Status.*INCONCLUSIVE" "$inv_path" 2>/dev/null; then
77
+ echo "INFO: investigation '$id' marcada INCONCLUSIVE — sem root cause, postmortem não exigido."
78
+ continue
79
+ fi
80
+
81
+ # PT-BR: postmortem esperado em .planning/postmortems/<id>.md
82
+ pm_path="$PM_DIR/$id.md"
83
+ if [ ! -f "$pm_path" ]; then
84
+ MISSING=$((MISSING + 1))
85
+ MISSING_LIST="$MISSING_LIST $id"
86
+ fi
87
+ done
88
+ IFS="$OLDIFS"
89
+
90
+ if [ "$MISSING" -eq 0 ]; then
91
+ echo "PASS: todas as investigações têm postmortem correspondente em $PM_DIR/"
92
+ exit 0
93
+ else
94
+ echo "FAIL: $MISSING investigação(ões) sem postmortem em $PM_DIR/:$MISSING_LIST"
95
+ echo "Sugestão: rodar /postmortem --from-investigation <id> para cada item ausente."
96
+ echo "Cross-ref: kit/skills/blameless-postmortems/SKILL.md + kit/agents/postmortem-writer.md"
97
+ echo "Princípio canônico: 'No postmortem left unreviewed' (cap 15 livro Google SRE)."
98
+ exit 1
99
+ fi
100
+ ```
101
+
102
+ ## Verdict
103
+
104
+ - **passed** — todas investigations têm postmortem correspondente OR investigations marcadas INCONCLUSIVE OR diretório `.planning/investigations/` ausente
105
+ - **block** — pelo menos 1 investigation sem postmortem em `.planning/postmortems/`
106
+
107
+ ## Why
108
+
109
+ O livro Google SRE (cap 15 — *Postmortem Culture: Learning from Failure*) define como princípio canônico **"no postmortem left unreviewed"**: cada incident significativo (registrado como investigação via `/forense` + `incident-investigator` v1.9) deve gerar postmortem blameless documentando *o que aprendemos* e *o que mudaremos*.
110
+
111
+ Sem este gate, milestones arquivam com investigations órfãs — root cause foi diagnosticado mas aprendizado organizacional perdeu-se (anti-pattern hero culture: "fixei o bug, vamos seguir"). Gate força a chain canônica entre v1.9 (Core Analysis Loop diagnostica) e v1.10 (postmortem documenta).
112
+
113
+ Cross-ref agent canônico: [`postmortem-writer`](../kit/agents/postmortem-writer.md) (Phase 37 / AGCORE-SRE-03). Skill: [`blameless-postmortems`](../kit/skills/blameless-postmortems/SKILL.md) (Phase 36 / SKFD-SRE-04). Comando: `/postmortem --from-investigation <id>` (Phase 38 / CMD-SRE-03). Chain documentado em `kit/commands/forense.md` bloco `<sre_integration>` (Plan 40-01 / INT-FW-V2-01).
114
+
115
+ ## REQ
116
+
117
+ QA-SRE-02.
118
+
119
+ ## Configuração
120
+
121
+ Gate é **blocking** por default (cultura SRE blameless é não-negociável uma vez instituída). Para tornar warn-only durante adoption inicial:
122
+
123
+ ```bash
124
+ node ./.claude/framework/bin/tools.cjs config-set workflow.postmortem_required_warn true
125
+ ```
126
+
127
+ (Nota: implementação do toggle warn-only é deferida — gate atual lê apenas presença/ausência de pares investigation↔postmortem, não consulta config.)
@@ -0,0 +1,128 @@
1
+ ---
2
+ id: prr-checklist-coverage
3
+ stage: pre-verify
4
+ blocking: true
5
+ description: Valida que cada PRR-REPORT.md em .planning/prr/ cobre os 6 axes canonicos (System Architecture/Instrumentation/Emergency/Capacity/Change/Performance — cap 32 livro Google SRE).
6
+ ---
7
+
8
+ # PRR checklist coverage gate
9
+
10
+ **When to run:** pre-verify (blocking — PRR sem 6 axes = aprovação inválida).
11
+
12
+ ## Check
13
+
14
+ ```bash
15
+ #!/usr/bin/env bash
16
+ # PT-BR: validar que cada PRR-REPORT.md em .planning/prr/**/*.md cobre os 6 axes do PRR.
17
+ # Match por palavra-chave em heading H2 (case-insensitive). Pular um axe = aprovação inválida.
18
+ # Bash 3.2-portable (macOS default).
19
+ set -e
20
+
21
+ PRR_DIR=".planning/prr"
22
+
23
+ # PT-BR: se não há PRR reports, gate passa com INFO
24
+ if [ ! -d "$PRR_DIR" ]; then
25
+ echo "INFO: $PRR_DIR não existe — projeto sem PRR reports. Gate skipped."
26
+ exit 0
27
+ fi
28
+
29
+ # PT-BR: listar todos os *.md em .planning/prr/ recursivamente
30
+ PRR_FILES=$(find "$PRR_DIR" -type f -name "*.md" 2>/dev/null || true)
31
+ PRR_FILES=$(echo "$PRR_FILES" | grep -v "^$" || true)
32
+
33
+ if [ -z "$PRR_FILES" ]; then
34
+ echo "INFO: $PRR_DIR vazio — sem PRR reports. Gate skipped."
35
+ exit 0
36
+ fi
37
+
38
+ # PT-BR: para cada PRR report, validar que cobre os 6 axes
39
+ VIOLATIONS=0
40
+ OLDIFS="$IFS"
41
+ IFS='
42
+ '
43
+ for prr_file in $PRR_FILES; do
44
+ [ -z "$prr_file" ] && continue
45
+ [ ! -f "$prr_file" ] && continue
46
+
47
+ # PT-BR: extrair headings H2 (case-insensitive)
48
+ H2=$(grep -E "^## " "$prr_file" 2>/dev/null || true)
49
+
50
+ # PT-BR: 6 axes — match em palavras-chave (qualquer variante aceitável)
51
+ AXE_MISSING=""
52
+
53
+ # Axe 1: System Architecture
54
+ if ! echo "$H2" | grep -qiE "system.*architecture|architecture"; then
55
+ AXE_MISSING="$AXE_MISSING Axe1(SystemArchitecture)"
56
+ fi
57
+
58
+ # Axe 2: Instrumentation / Metrics / Monitoring
59
+ if ! echo "$H2" | grep -qiE "instrumentation|metrics|monitoring"; then
60
+ AXE_MISSING="$AXE_MISSING Axe2(Instrumentation)"
61
+ fi
62
+
63
+ # Axe 3: Emergency Response
64
+ if ! echo "$H2" | grep -qiE "emergency.*response|emergency"; then
65
+ AXE_MISSING="$AXE_MISSING Axe3(EmergencyResponse)"
66
+ fi
67
+
68
+ # Axe 4: Capacity Planning
69
+ if ! echo "$H2" | grep -qiE "capacity.*planning|capacity"; then
70
+ AXE_MISSING="$AXE_MISSING Axe4(CapacityPlanning)"
71
+ fi
72
+
73
+ # Axe 5: Change Management
74
+ if ! echo "$H2" | grep -qiE "change.*management|change"; then
75
+ AXE_MISSING="$AXE_MISSING Axe5(ChangeManagement)"
76
+ fi
77
+
78
+ # Axe 6: Performance
79
+ if ! echo "$H2" | grep -qiE "performance"; then
80
+ AXE_MISSING="$AXE_MISSING Axe6(Performance)"
81
+ fi
82
+
83
+ if [ -n "$AXE_MISSING" ]; then
84
+ echo "FAIL: $prr_file — axes ausentes:$AXE_MISSING"
85
+ VIOLATIONS=$((VIOLATIONS + 1))
86
+ fi
87
+ done
88
+ IFS="$OLDIFS"
89
+
90
+ if [ "$VIOLATIONS" -eq 0 ]; then
91
+ total=$(echo "$PRR_FILES" | wc -l | tr -d ' ')
92
+ echo "PASS: $total PRR-REPORT(s) cobrem os 6 axes canônicos"
93
+ exit 0
94
+ else
95
+ echo "FAIL: $VIOLATIONS PRR-REPORT(s) com axes ausentes"
96
+ echo "Sugestão: rodar /sre prr <service> ou /prr para regenerar com template canônico (6 axes obrigatórios)."
97
+ echo "Cross-ref: kit/skills/production-readiness-review/SKILL.md + kit/agents/prr-conductor.md"
98
+ echo "Princípio canônico: 'Pular um axe = aprovação inválida' (cap 32 livro Google SRE)."
99
+ exit 1
100
+ fi
101
+ ```
102
+
103
+ ## Verdict
104
+
105
+ - **passed** — cada PRR-REPORT.md em `.planning/prr/**/*.md` tem H2 cobrindo os 6 axes (System Architecture / Instrumentation / Emergency Response / Capacity Planning / Change Management / Performance) OR diretório `.planning/prr/` ausente
106
+ - **block** — pelo menos 1 PRR-REPORT.md com axe(s) ausente(s)
107
+
108
+ ## Why
109
+
110
+ O livro Google SRE (cap 32 — *Evolving SRE Engagement Model*) define **6 axes canônicos** do Production Readiness Review. A skill `production-readiness-review` (Phase 36 / SKFD-SRE-05) declara como regra absoluta: *"Pular um axe = aprovação inválida (lacuna oculta vira incident em 6 meses)"*.
111
+
112
+ Sem este gate, PRRs apressados podem omitir axes "menos relevantes" (anti-pattern documentado na skill); gaps em Change Management ou Capacity Planning não detectados em PRR viram incidents em produção meses depois. Gate força padrão canônico — cada `PRR-REPORT.md` cobrindo os 6 axes integralmente, mesmo que items dentro de um axe sejam N/A para o serviço (justificativa explícita no item, não no axe).
113
+
114
+ Cross-ref agent canônico: [`prr-conductor`](../kit/agents/prr-conductor.md) (Phase 37 / AGCORE-SRE-04). Skill: [`production-readiness-review`](../kit/skills/production-readiness-review/SKILL.md) (Phase 36 / SKFD-SRE-05). Comando: `/prr --service <name>` ou `/prr --feature <description>` (Phase 38 / CMD-SRE-04).
115
+
116
+ ## REQ
117
+
118
+ QA-SRE-03.
119
+
120
+ ## Configuração
121
+
122
+ Gate é **blocking** por default. Para tornar warn-only durante adoption inicial:
123
+
124
+ ```bash
125
+ node ./.claude/framework/bin/tools.cjs config-set workflow.prr_checklist_coverage_warn true
126
+ ```
127
+
128
+ (Nota: implementação do toggle warn-only é deferida — gate atual não consulta config.)
@@ -14,41 +14,43 @@ description: Valida que skills supabase-* contêm strings obrigatórias verbatim
14
14
  ```bash
15
15
  #!/usr/bin/env bash
16
16
  # PT-BR: cada skill deve incluir strings obrigatórias verbatim para prevenir anti-patterns
17
+ # Portable: bash 3.2+ (macOS default), sem associative arrays
17
18
  set -e
18
19
 
19
20
  VIOLATIONS=0
20
21
 
21
- # PT-BR: mapeamento skill → must-include strings (delimitadas por |)
22
- declare -A MUST_INCLUDE
23
- MUST_INCLUDE["supabase-rls-policies"]="(select auth.uid())|user_metadata|TO authenticated"
24
- MUST_INCLUDE["supabase-database-functions"]="set search_path = ''|SECURITY INVOKER"
25
- MUST_INCLUDE["supabase-auth-ssr"]="getAll|setAll|auth-helpers-nextjs|@supabase/ssr"
26
- MUST_INCLUDE["supabase-realtime"]="broadcast|private: true|realtime.broadcast_changes|removeChannel"
27
- MUST_INCLUDE["supabase-edge-functions"]="npm:|jsr:|Deno.serve|EdgeRuntime.waitUntil|/tmp"
28
- MUST_INCLUDE["supabase-declarative-schema"]="supabase/schemas/|supabase stop|supabase db diff -f"
29
- MUST_INCLUDE["supabase-migrations"]="YYYYMMDDHHmmss|RLS|granular"
30
- MUST_INCLUDE["supabase-postgres-style"]="snake_case|ISO 8601|lowercase"
31
- MUST_INCLUDE["supabase-storage"]="signed URL|storage.objects|multi-tenant"
32
- MUST_INCLUDE["supabase-pgvector-rag"]="HNSW|IVFFlat|<=>|RAG with permissions"
33
- MUST_INCLUDE["supabase-cron-queues"]="pg_cron|pgmq|pg_net"
22
+ check_skill() {
23
+ local skill="$1"
24
+ local required="$2" # strings separadas por |
25
+ local file="kit/skills/$skill/SKILL.md"
34
26
 
35
- for skill in "${!MUST_INCLUDE[@]}"; do
36
- file="kit/skills/$skill/SKILL.md"
37
27
  if [ ! -f "$file" ]; then
38
28
  echo "FAIL: $file — skill ausente"
39
29
  VIOLATIONS=$((VIOLATIONS + 1))
40
- continue
30
+ return
41
31
  fi
42
32
 
43
33
  # PT-BR: testa cada string (separada por |)
44
- IFS='|' read -ra REQUIRED <<< "${MUST_INCLUDE[$skill]}"
45
- for str in "${REQUIRED[@]}"; do
34
+ local IFS='|'
35
+ for str in $required; do
46
36
  if ! grep -qF "$str" "$file"; then
47
37
  echo "FAIL: $file — must-include ausente: '$str'"
48
38
  VIOLATIONS=$((VIOLATIONS + 1))
49
39
  fi
50
40
  done
51
- done
41
+ }
42
+
43
+ check_skill "supabase-rls-policies" "(select auth.uid())|user_metadata|TO authenticated"
44
+ check_skill "supabase-database-functions" "set search_path = ''|SECURITY INVOKER"
45
+ check_skill "supabase-auth-ssr" "getAll|setAll|auth-helpers-nextjs|@supabase/ssr"
46
+ check_skill "supabase-realtime" "broadcast|private: true|realtime.broadcast_changes|removeChannel"
47
+ check_skill "supabase-edge-functions" "npm:|jsr:|Deno.serve|EdgeRuntime.waitUntil|/tmp"
48
+ check_skill "supabase-declarative-schema" "supabase/schemas/|supabase stop|supabase db diff -f"
49
+ check_skill "supabase-migrations" "YYYYMMDDHHmmss|RLS|granular"
50
+ check_skill "supabase-postgres-style" "snake_case|ISO 8601|lowercase"
51
+ check_skill "supabase-storage" "signed URL|storage.objects|multi-tenant"
52
+ check_skill "supabase-pgvector-rag" "HNSW|IVFFlat|<=>|RAG with permissions"
53
+ check_skill "supabase-cron-queues" "pg_cron|pgmq|pg_net"
52
54
 
53
55
  if [ "$VIOLATIONS" -gt 0 ]; then
54
56
  echo "Total violations: $VIOLATIONS"
@@ -0,0 +1,160 @@
1
+ ---
2
+ name: burn-rate-forecaster
3
+ description: Calcula burn rate atual + ETA exhaustão + alert config (page vs ticket) — usa lookahead/baseline windows fator 4×, mcp__supabase__execute_sql para queries SLI.
4
+ tools: Read, Bash, Grep, mcp__supabase__execute_sql, mcp__supabase__list_tables
5
+ color: orange
6
+ ---
7
+
8
+ Você é o forecaster de burn rate. Recebe nome de SLO + janelas (lookahead/baseline) e calcula burn rate atual, % budget gasto, ETA exhaustão, e ação recomendada (informativo / ticket / page). Você consulta a skill [`burn-rate-alerting`](../skills/burn-rate-alerting/SKILL.md) — conhecimento autoritativo sobre fórmulas de extrapolação.
9
+
10
+ ## Compatibilidade
11
+
12
+ | IDE | Tier | Capability |
13
+ |---|---|---|
14
+ | Claude Code (com Supabase MCP) | **Full** | Queries live em SLI views via execute_sql |
15
+ | Cursor (com Supabase MCP) | **Full** | Idem |
16
+ | Codex | **Partial** | Apresenta SQL ao user, parsea resultado colado |
17
+ | Gemini CLI | **Partial** | Idem |
18
+ | Windsurf, Antigravity, Copilot, Trae | **Offline-only** | SQL como text, sem execução |
19
+
20
+ ## Por que existe
21
+
22
+ Burn rate calculado errado é pior que não calculado — false positives geram alert fatigue, false negatives perdem incidents. Este agent aplica fórmula canônica do livro Cap 13 (lookahead ≤ 4× baseline, target burn 14.4× para page, 1× para ticket) consistentemente.
23
+
24
+ ## Inputs esperados (do caller)
25
+
26
+ - `slo_name`: nome do SLO (ex: `checkout_success`) — view materializada deve existir em `obs.sli_<slo_name>`
27
+ - (Opcional) `lookahead`: `4h` (default short-term) | `3d` (long-term) | custom
28
+ - (Opcional) `baseline`: `1h` (default short-term) | `18h` (long-term) | custom
29
+ - (Opcional) `target`: target % do SLO (default: lê de `.planning/slos/<slo_name>.md`)
30
+
31
+ ## Passos
32
+
33
+ ### Step 0 — Preflight
34
+
35
+ 1. Verificar que `.planning/slos/<slo_name>.md` existe — extrair `target` e `window`.
36
+ 2. Verificar que `obs.sli_<slo_name>` existe via `mcp__supabase__list_tables --schemas=['obs']`.
37
+
38
+ Se algo faltando, abortar com mensagem clara: "SLO {name} não definido. Rode `/definir-slo {feature}` primeiro."
39
+
40
+ ### Step 1 — Validar lookahead ≤ 4× baseline
41
+
42
+ ```text
43
+ if lookahead_seconds > 4 × baseline_seconds:
44
+ warn "lookahead 4h é confiável apenas com baseline ≥ 1h. Sua config: lookahead=Xh, baseline=Yh — fora da regra 4×."
45
+ Sugerir ajustar baseline ou usar context-aware burn rate.
46
+ ```
47
+
48
+ ### Step 2 — Query burn rate atual (baseline window)
49
+
50
+ ```sql
51
+ -- PT-BR: burn rate em janela baseline
52
+ with baseline as (
53
+ select
54
+ sum(good) as good,
55
+ sum(bad) as bad,
56
+ sum(total) as total
57
+ from obs.sli_{slo_name}
58
+ where bucket > now() - interval '{baseline}'
59
+ )
60
+ select
61
+ total as events_in_baseline,
62
+ bad as bad_in_baseline,
63
+ bad::float / nullif(total, 0) as error_rate,
64
+ (bad::float / nullif(total, 0)) / (1 - {target_decimal}) as burn_rate
65
+ from baseline;
66
+ ```
67
+
68
+ Invoke via `mcp__supabase__execute_sql` (Full) ou apresentar ao user (Offline).
69
+
70
+ ### Step 3 — Query budget gasto e remanescente (window inteira)
71
+
72
+ ```sql
73
+ -- PT-BR: budget gasto e remaining em window inteira do SLO (default 30d)
74
+ with full_window as (
75
+ select
76
+ sum(bad) as burned,
77
+ sum(total) as total_events
78
+ from obs.sli_{slo_name}
79
+ where bucket > now() - interval '30 days'
80
+ )
81
+ select
82
+ (1 - {target_decimal}) * total_events as budget_events,
83
+ burned,
84
+ (1 - {target_decimal}) * total_events - burned as remaining_events,
85
+ 100.0 * burned / nullif((1 - {target_decimal}) * total_events, 0) as budget_burned_pct
86
+ from full_window;
87
+ ```
88
+
89
+ ### Step 4 — Predictive forecast — ETA exhaustão
90
+
91
+ ```text
92
+ projected_remaining_at_lookahead = remaining_events_now - (burn_per_baseline × lookahead/baseline)
93
+
94
+ ETA seconds = remaining_events_now / (burn_per_baseline / baseline_seconds)
95
+ ETA hours = ETA seconds / 3600
96
+ ```
97
+
98
+ ### Step 5 — Determinar status
99
+
100
+ ```text
101
+ if burn_rate >= 14.4 (sustained 4h+):
102
+ status = "PAGE"
103
+ action = "Page on-call imediato — invocar `/investigar-producao`"
104
+ elif burn_rate >= 1.0:
105
+ status = "TICKET"
106
+ action = "Criar ticket de eng — investigar antes do budget esgotar (ETA={ETA}h)"
107
+ elif budget_burned_pct >= 80:
108
+ status = "WARN"
109
+ action = "Budget acima 80% — proteger contra deploys arriscados"
110
+ else:
111
+ status = "OK"
112
+ action = "Informativo apenas"
113
+ ```
114
+
115
+ ### Step 6 — Output
116
+
117
+ Tabela canônica:
118
+
119
+ ```
120
+ ═══════════════════════════════════════════════════════════
121
+ BURN-RATE-FORECASTER · {slo_name}
122
+ ═══════════════════════════════════════════════════════════
123
+
124
+ ## Snapshot — {timestamp}
125
+
126
+ | Metric | Value |
127
+ |---|---|
128
+ | SLO target | {target}% |
129
+ | Window | 30d sliding |
130
+ | Budget total | {budget_events} events |
131
+ | Budget gasto | {burned_events} events ({burned_pct}%) |
132
+ | Budget remaining | {remaining_events} events ({remaining_pct}%) |
133
+ | Baseline ({baseline}) error rate | {error_rate}% |
134
+ | Burn rate atual | {burn_rate}× |
135
+ | ETA exhaustão | {ETA} (se burn_rate sustained) |
136
+
137
+ ## Status: **{status}**
138
+
139
+ {action}
140
+
141
+ ## Comparação — burn rate threshold
142
+
143
+ | Threshold | Burn rate | Action |
144
+ |---|---|---|
145
+ | Page on-call | ≥ 14.4× | acordar engineer |
146
+ | Ticket | ≥ 1.0× | abrir Jira/Linear |
147
+ | Warn | budget > 80% gasto | rever cadência de deploy |
148
+
149
+ {Se status = PAGE ou TICKET:}
150
+ ## Próximos passos
151
+ 1. `/investigar-producao "{slo_name} burn rate = {burn_rate}× às {timestamp}"`
152
+ 2. (Após root cause identificada) Decidir: rollback / hotfix / mitigação parcial
153
+ 3. Atualizar runbook do SLO com lessons learned
154
+ ```
155
+
156
+ ## Quando NÃO invocar
157
+
158
+ - SLO sem materialized view — invoke `slo-engineer` primeiro
159
+ - Métrica informativa sem target — use dashboard
160
+ - Verificação ad hoc rápida — query direto via `mcp__supabase__execute_sql` se já sabe a fórmula