@luanpdd/kit-mcp 1.34.0 → 1.36.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/README.md +1 -1
  2. package/bin/cli.js +2 -2
  3. package/bin/mcp.js +6 -6
  4. package/bin/ui.js +74 -74
  5. package/gates/ai-prompt-stability.md +120 -120
  6. package/gates/budget-description.md +68 -68
  7. package/gates/confidence.md +29 -29
  8. package/gates/dependency-check.md +33 -33
  9. package/gates/dept-cycle-prevention.md +179 -179
  10. package/gates/golden-signals-coverage.md +133 -133
  11. package/gates/legacy-refactor-safety.md +178 -178
  12. package/gates/multi-tenant-rls-coverage.md +102 -102
  13. package/gates/no-personal-uuid.md +72 -72
  14. package/gates/obs-agents-mcp-supabase.md +86 -86
  15. package/gates/obs-skills-frontmatter.md +76 -76
  16. package/gates/observability-coverage.md +151 -151
  17. package/gates/omm-no-regression.md +83 -83
  18. package/gates/postmortem-template-required.md +127 -127
  19. package/gates/prr-checklist-coverage.md +128 -128
  20. package/gates/regression.md +32 -32
  21. package/gates/release-pipeline-policy.md +132 -132
  22. package/gates/secrets-scan.md +33 -33
  23. package/gates/service-role-not-in-user-facing.md +113 -113
  24. package/gates/skill-must-include.md +71 -71
  25. package/gates/sync-idempotent.md +62 -62
  26. package/gates/verify-phase-goal.md +34 -34
  27. package/kit/agents/designer-ui.md +216 -216
  28. package/kit/agents/workflow-generator.md +537 -0
  29. package/kit/commands/adicionar-backlog.md +1 -1
  30. package/kit/commands/adicionar-fase.md +1 -1
  31. package/kit/commands/adicionar-tarefa.md +1 -1
  32. package/kit/commands/auditar-observabilidade.md +103 -103
  33. package/kit/commands/auditar-toil.md +129 -129
  34. package/kit/commands/caracterizar-prompt.md +195 -195
  35. package/kit/commands/criar-workflow.md +158 -0
  36. package/kit/commands/definir-perfil.md +1 -1
  37. package/kit/commands/definir-slo.md +108 -108
  38. package/kit/commands/fio.md +1 -1
  39. package/kit/commands/golden-signals.md +142 -142
  40. package/kit/commands/instrumentar-fase.md +200 -200
  41. package/kit/commands/investigar-producao.md +162 -162
  42. package/kit/commands/observabilidade.md +118 -118
  43. package/kit/commands/postmortem.md +179 -179
  44. package/kit/commands/prr.md +205 -205
  45. package/kit/commands/publicar-rapido.md +207 -207
  46. package/kit/commands/risk-budget.md +220 -220
  47. package/kit/commands/sre.md +230 -230
  48. package/kit/file-manifest.json +5 -2
  49. package/kit/framework/references/output-style.md +22 -22
  50. package/kit/hooks/post-apply-migration.js +199 -199
  51. package/kit/hooks/sidecar-tool-publisher.js +210 -210
  52. package/kit/skills/_shared-dados-distribuidos/glossary.md +224 -224
  53. package/kit/skills/_shared-legacy/glossary.md +389 -389
  54. package/kit/skills/_shared-multi-tenant/glossary.md +186 -186
  55. package/kit/skills/_shared-observability/glossary.md +396 -396
  56. package/kit/skills/_shared-sre/glossary.md +712 -712
  57. package/kit/skills/_shared-supabase/glossary.md +234 -234
  58. package/kit/skills/blameless-postmortems/SKILL.md +340 -340
  59. package/kit/skills/burn-rate-alerting/SKILL.md +258 -258
  60. package/kit/skills/cascading-failures/SKILL.md +311 -311
  61. package/kit/skills/core-analysis-loop/SKILL.md +352 -352
  62. package/kit/skills/distributed-tracing/SKILL.md +362 -362
  63. package/kit/skills/dynamic-workflow-authoring/SKILL.md +327 -0
  64. package/kit/skills/eliminating-toil/SKILL.md +243 -243
  65. package/kit/skills/event-based-slos/SKILL.md +296 -296
  66. package/kit/skills/four-golden-signals/SKILL.md +314 -314
  67. package/kit/skills/hermetic-builds/SKILL.md +323 -323
  68. package/kit/skills/legacy-monster-methods/SKILL.md +444 -444
  69. package/kit/skills/llm-as-dependency/SKILL.md +436 -436
  70. package/kit/skills/load-shedding-graceful-degradation/SKILL.md +396 -396
  71. package/kit/skills/observability-driven-development/SKILL.md +315 -315
  72. package/kit/skills/observability-maturity-model/SKILL.md +222 -222
  73. package/kit/skills/opentelemetry-standard/SKILL.md +351 -351
  74. package/kit/skills/production-readiness-review/SKILL.md +305 -305
  75. package/kit/skills/release-engineering/SKILL.md +367 -367
  76. package/kit/skills/retry-strategies/SKILL.md +372 -372
  77. package/kit/skills/sre-risk-management/SKILL.md +221 -221
  78. package/kit/skills/structured-events/SKILL.md +265 -265
  79. package/kit/skills/supabase-cron-queues/SKILL.md +275 -275
  80. package/kit/skills/supabase-database-functions/SKILL.md +332 -332
  81. package/kit/skills/supabase-declarative-schema/SKILL.md +183 -183
  82. package/kit/skills/supabase-pgvector-rag/SKILL.md +253 -253
  83. package/kit/skills/supabase-postgres-style/SKILL.md +138 -138
  84. package/kit/skills/supabase-storage/SKILL.md +234 -234
  85. package/kit/skills/telemetry-pipelines/SKILL.md +259 -259
  86. package/kit/skills/telemetry-sampling/SKILL.md +256 -256
  87. package/kit/skills/ui-anti-padroes-ia/SKILL.md +261 -261
  88. package/kit/skills/ui-contexto-produto/SKILL.md +248 -248
  89. package/kit/skills/ui-cor-estrategia/SKILL.md +213 -213
  90. package/kit/skills/ui-critica-auditoria/SKILL.md +260 -260
  91. package/kit/skills/ui-motion-funcional/SKILL.md +264 -264
  92. package/kit/skills/ui-ritmo-espacial/SKILL.md +259 -259
  93. package/kit/skills/ui-tipografia/SKILL.md +211 -211
  94. package/package.json +1 -1
  95. package/src/cli/index.js +1114 -1114
  96. package/src/cli/render.js +194 -194
  97. package/src/cli/upgrade-check.js +135 -135
  98. package/src/core/error-redaction.js +76 -76
  99. package/src/core/failures.js +153 -153
  100. package/src/core/gate-runner.js +205 -205
  101. package/src/core/gates.js +82 -82
  102. package/src/core/logger.js +170 -170
  103. package/src/core/manifest-verify.js +174 -174
  104. package/src/core/metrics.js +268 -268
  105. package/src/core/notify.js +60 -60
  106. package/src/core/path-safety.js +141 -141
  107. package/src/core/replays.js +120 -120
  108. package/src/core/ui.js +185 -185
  109. package/src/mcp-server/install.js +149 -149
  110. package/src/mcp-server/roots.js +124 -124
  111. package/src/ui/auto-spawn.js +113 -113
  112. package/src/ui/browser.js +78 -78
  113. package/src/ui/client.js +130 -130
  114. package/src/ui/events.js +65 -65
  115. package/src/ui/lockfile.js +191 -191
  116. package/src/ui/port.js +67 -67
  117. package/src/ui/server.js +547 -547
  118. package/src/ui/wrapper.js +129 -129
@@ -1,362 +1,362 @@
1
- ---
2
- name: distributed-tracing
3
- description: Use ao instrumentar tracing — trace_id/span_id/parent_id, propagar W3C TraceContext via header traceparent, stitching além de RPCs (batch, lambda, queue).
4
- ---
5
-
6
- # Observabilidade — Distributed Tracing
7
-
8
- ## Quando usar
9
-
10
- LLM carrega esta skill ao instrumentar tracing distribuído ou stitching de spans. Trigger phrases:
11
-
12
- - "distributed tracing", "traces", "spans"
13
- - "propagar contexto entre serviços", "trace cross-service"
14
- - "W3C TraceContext", "traceparent header"
15
- - "trace_id span_id parent_span_id"
16
- - "ligar lambda batch job ao trace"
17
- - "stitching de eventos"
18
-
19
- ## Regras absolutas
20
-
21
- - **trace_id é compartilhado** entre todos os spans de um único request distribuído. **NÃO** mude por hop.
22
- - **span_id é único por span** — gere novo a cada `startSpan()`. 16 hex chars (8 bytes).
23
- - **parent_span_id aponta para span pai** — null no root span. Define a árvore.
24
- - **W3C TraceContext é o padrão** — header HTTP `traceparent: 00-{trace_id}-{span_id}-{flags}`. Adote sempre. B3 é fallback para legacy.
25
- - **Propague ANTES de fazer call cross-service** — extrair contexto do request inbound, propagar no request outbound. Sem isso, trace quebra.
26
- - **Stitching ≠ apenas RPC** — também batch jobs, queue messages, lambda invocations, S3 uploads. Carregue `traceparent` em metadata da queue, env var do lambda, header da Step Function.
27
- - **Sample decision propaga** — bit `01` em flags de `traceparent` significa "sample=true". Decisão tomada no head propaga downstream.
28
- - **Não invente trace_id** — sempre derive do contexto inbound ou gere via SDK (não `crypto.randomUUID()`).
29
- - **Spans devem ter `kind`** — `SERVER` (handler de inbound), `CLIENT` (call outbound), `PRODUCER`/`CONSUMER` (queue), `INTERNAL` (subspan dentro do mesmo process).
30
-
31
- ## Patterns canônicos
32
-
33
- ### Pattern: extrair contexto inbound + propagar outbound (Node)
34
-
35
- ```ts
36
- // PT-BR: handler HTTP — extrai traceparent do request inbound, propaga em call outbound
37
- import { trace, context, propagation } from '@opentelemetry/api'
38
-
39
- const tracer = trace.getTracer('orders-service')
40
-
41
- export async function placeOrder(req: Request) {
42
- // PT-BR: 1 — extrair contexto inbound do header traceparent
43
- const inboundContext = propagation.extract(context.active(), req.headers)
44
-
45
- return tracer.startActiveSpan(
46
- 'place_order',
47
- { kind: SpanKind.SERVER },
48
- inboundContext,
49
- async (span) => {
50
- span.setAttribute('user.id', req.user.id)
51
-
52
- // PT-BR: 2 — fazer call outbound — propagation injeta traceparent automaticamente
53
- // se você usar fetch/grpc instrumentados (ver skill opentelemetry-standard)
54
- const outboundHeaders: Record<string, string> = {}
55
- propagation.inject(context.active(), outboundHeaders)
56
-
57
- const inventoryRes = await fetch('http://inventory/check', {
58
- headers: outboundHeaders, // PT-BR: traceparent injetado aqui
59
- body: JSON.stringify({ items: req.items })
60
- })
61
-
62
- span.end()
63
- return inventoryRes.json()
64
- }
65
- )
66
- }
67
- ```
68
-
69
- ### Pattern: traceparent format
70
-
71
- ```text
72
- traceparent: 00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01
73
- ^ ^ ^ ^
74
- | | | |
75
- version | flags (sampled bit)
76
- trace_id (32 hex / 16 bytes) |
77
- span_id (16 hex / 8 bytes)
78
- ```
79
-
80
- ```text
81
- flags:
82
- 01 = sampled (decisão upstream: capture este trace)
83
- 00 = not sampled (decisão upstream: skip)
84
- ```
85
-
86
- ### Pattern: trace cross-service via Supabase Edge Function
87
-
88
- ```ts
89
- // PT-BR: Edge Function recebe request → propaga para outro service
90
- import { trace, context, propagation } from 'npm:@opentelemetry/api@1.9.0'
91
- import { W3CTraceContextPropagator } from 'npm:@opentelemetry/core@1.27.0'
92
-
93
- propagation.setGlobalPropagator(new W3CTraceContextPropagator())
94
-
95
- const tracer = trace.getTracer('edge-orders')
96
-
97
- Deno.serve(async (req) => {
98
- // PT-BR: extrair traceparent inbound
99
- const inboundCtx = propagation.extract(context.active(), {
100
- traceparent: req.headers.get('traceparent') ?? '',
101
- })
102
-
103
- return tracer.startActiveSpan(
104
- 'edge_handler',
105
- { kind: 1 /* SERVER */ },
106
- inboundCtx,
107
- async (span) => {
108
- span.setAttribute('endpoint', new URL(req.url).pathname)
109
-
110
- // PT-BR: call outbound para Postgres via PostgREST — injeta traceparent
111
- const outHeaders: Record<string, string> = {}
112
- propagation.inject(context.active(), outHeaders)
113
-
114
- const dbRes = await fetch(Deno.env.get('SUPABASE_URL') + '/rest/v1/orders', {
115
- method: 'POST',
116
- headers: {
117
- ...outHeaders,
118
- 'apikey': Deno.env.get('SUPABASE_ANON_KEY')!,
119
- 'content-type': 'application/json',
120
- },
121
- body: await req.text(),
122
- })
123
-
124
- span.setAttribute('db.status_code', dbRes.status)
125
- span.end()
126
- return dbRes
127
- }
128
- )
129
- })
130
- ```
131
-
132
- ### Pattern: stitching além de RPC — queue message (não-RPC)
133
-
134
- ```ts
135
- // PT-BR: producer — anexa traceparent ao payload da queue (pgmq, SQS, RabbitMQ)
136
- import { trace, context, propagation } from '@opentelemetry/api'
137
-
138
- const tracer = trace.getTracer('producer')
139
-
140
- export async function enqueueEmail(emailJob: EmailJob) {
141
- return tracer.startActiveSpan(
142
- 'enqueue_email',
143
- { kind: SpanKind.PRODUCER },
144
- async (span) => {
145
- span.setAttribute('queue.name', 'emails')
146
- span.setAttribute('email.recipient', emailJob.to)
147
-
148
- // PT-BR: serializar contexto no payload da mensagem
149
- const carrier: Record<string, string> = {}
150
- propagation.inject(context.active(), carrier)
151
-
152
- await pgmqEnqueue('emails', {
153
- ...emailJob,
154
- _trace_context: carrier, // PT-BR: viaja com o job
155
- })
156
-
157
- span.end()
158
- }
159
- )
160
- }
161
-
162
- // PT-BR: consumer — extrai traceparent do payload, continua o trace
163
- export async function processEmailJob(job: EmailJobWithContext) {
164
- const inboundCtx = propagation.extract(
165
- context.active(),
166
- job._trace_context ?? {} // PT-BR: se vazio, novo trace
167
- )
168
-
169
- return tracer.startActiveSpan(
170
- 'process_email',
171
- { kind: SpanKind.CONSUMER },
172
- inboundCtx,
173
- async (span) => {
174
- span.setAttribute('email.recipient', job.to)
175
- // PT-BR: agora o span do worker faz parte do mesmo trace do producer
176
- await sendEmail(job)
177
- span.end()
178
- }
179
- )
180
- }
181
- ```
182
-
183
- ### Pattern: stitching de batch job (não-RPC)
184
-
185
- ```ts
186
- // PT-BR: cron job processa N items — 1 span por item, todos com mesmo trace_id
187
- const tracer = trace.getTracer('billing-cron')
188
-
189
- export async function dailyBillingJob() {
190
- return tracer.startActiveSpan('daily_billing', async (rootSpan) => {
191
- rootSpan.setAttribute('job.type', 'cron')
192
- rootSpan.setAttribute('build_id', BUILD_ID)
193
-
194
- const customers = await db.getCustomersDueForBilling()
195
- rootSpan.setAttribute('customers.count', customers.length)
196
-
197
- // PT-BR: cada customer vira span filho com mesmo trace_id
198
- for (const customer of customers) {
199
- await tracer.startActiveSpan(
200
- 'bill_customer',
201
- { kind: SpanKind.INTERNAL },
202
- async (span) => {
203
- span.setAttribute('customer.id', customer.id)
204
- span.setAttribute('customer.tier', customer.tier)
205
- try {
206
- await chargeCustomer(customer)
207
- span.setAttribute('result.success', true)
208
- } catch (e) {
209
- span.setAttribute('result.success', false)
210
- span.setAttribute('error.type', classify(e))
211
- } finally {
212
- span.end()
213
- }
214
- }
215
- )
216
- }
217
-
218
- rootSpan.end()
219
- })
220
- }
221
- ```
222
-
223
- ### Pattern: span kinds
224
-
225
- | Kind | Quando usar | Exemplo |
226
- |---|---|---|
227
- | `SERVER` | Recebendo request inbound | Handler HTTP, gRPC server method |
228
- | `CLIENT` | Fazendo call outbound | `fetch()`, gRPC client call, DB query |
229
- | `PRODUCER` | Enviando msg para queue | `pgmq.enqueue()`, SQS publish |
230
- | `CONSUMER` | Processando msg de queue | Worker recebendo job |
231
- | `INTERNAL` | Subdivisão dentro do mesmo process | "json_parse", "validation_step" |
232
-
233
- ### Pattern: query traces — montar waterfall
234
-
235
- ```sql
236
- -- PT-BR: pegar todos os spans de um trace em ordem cronológica
237
- select
238
- span_id,
239
- parent_span_id,
240
- span_name,
241
- span_kind,
242
- service_name,
243
- duration_ms,
244
- start_time
245
- from observability.spans
246
- where trace_id = '4bf92f3577b34da6a3ce929d0e0e4736'
247
- order by start_time asc;
248
-
249
- -- PT-BR: encontrar root span — parent_span_id IS NULL ou span sem parent no mesmo trace
250
- select *
251
- from observability.spans
252
- where trace_id = '4bf92f3577b34da6a3ce929d0e0e4736'
253
- and parent_span_id is null;
254
-
255
- -- PT-BR: spans mais lentos cross-trace, último 1h
256
- select
257
- service_name,
258
- span_name,
259
- percentile_cont(0.99) within group (order by duration_ms) as p99,
260
- count(*) as samples
261
- from observability.spans
262
- where start_time > now() - interval '1 hour'
263
- group by service_name, span_name
264
- having count(*) > 100
265
- order by p99 desc
266
- limit 20;
267
- ```
268
-
269
- ## Anti-patterns
270
-
271
- ### ANTI: gerar trace_id por hop
272
-
273
- ```ts
274
- // PT-BR: BAD — quebra a cadeia, cada service vê trace diferente
275
- const traceId = crypto.randomUUID().replace(/-/g, '').slice(0, 32)
276
-
277
- // PT-BR: GOOD — extrair do header inbound; deixar SDK gerar root
278
- const inboundCtx = propagation.extract(context.active(), req.headers)
279
- tracer.startActiveSpan('handler', {}, inboundCtx, ...)
280
- ```
281
-
282
- ### ANTI: esquecer de propagar em call outbound
283
-
284
- ```ts
285
- // PT-BR: BAD — outbound call sem traceparent — trace quebra no service B
286
- await fetch('http://service-b/api', { body: ... })
287
-
288
- // PT-BR: GOOD — injetar traceparent
289
- const headers: Record<string, string> = {}
290
- propagation.inject(context.active(), headers)
291
- await fetch('http://service-b/api', { headers, body: ... })
292
- ```
293
-
294
- ### ANTI: trace só de RPCs, não de batch/queue
295
-
296
- ```ts
297
- // PT-BR: BAD — producer/consumer não compartilham trace, debug fica fragmentado
298
- await pgmqEnqueue('emails', payload) // sem trace context
299
- // ... depois worker processa sem saber que veio do request X
300
-
301
- // PT-BR: GOOD — propagar contexto via metadata da queue
302
- const carrier = {}
303
- propagation.inject(context.active(), carrier)
304
- await pgmqEnqueue('emails', { ...payload, _trace_context: carrier })
305
- ```
306
-
307
- ### ANTI: span sem `end()`
308
-
309
- ```ts
310
- // PT-BR: BAD — span fica aberto forever, duration_ms não calculado, memory leak
311
- const span = tracer.startSpan('handler')
312
- // ... handler logic
313
- return result // PT-BR: ESQUECEU span.end()
314
-
315
- // PT-BR: GOOD — sempre `try/finally`
316
- const span = tracer.startSpan('handler')
317
- try {
318
- // ... logic
319
- } finally {
320
- span.end()
321
- }
322
- ```
323
-
324
- ### ANTI: span hierarchy errada
325
-
326
- ```ts
327
- // PT-BR: BAD — usar startSpan sem startActiveSpan, parent não é settado automático
328
- const parent = tracer.startSpan('parent')
329
- const child = tracer.startSpan('child') // PT-BR: parent_span_id ficou null
330
- parent.end()
331
- child.end()
332
-
333
- // PT-BR: GOOD — startActiveSpan empurra contexto, child herda parent
334
- tracer.startActiveSpan('parent', (parent) => {
335
- tracer.startActiveSpan('child', (child) => {
336
- // PT-BR: child.parent_span_id === parent.span_id
337
- child.end()
338
- })
339
- parent.end()
340
- })
341
- ```
342
-
343
- ## Verificação
344
-
345
- 1. **1 trace_id por request** — enviar 1 request, queryar `SELECT DISTINCT trace_id FROM spans WHERE request_id = X` → 1 resultado.
346
- 2. **Cross-service stitching** — request HTTP service A → service B → DB. Queryar `SELECT count(distinct service_name) FROM spans WHERE trace_id = X` → ≥ 3.
347
- 3. **Root span identificável** — `SELECT * FROM spans WHERE trace_id = X AND parent_span_id IS NULL` → 1 row (o root).
348
- 4. **Span hierarchy correta** — graficar via tool (Jaeger UI, Honeycomb, etc.) ou recursivo SQL — deve formar árvore válida (sem ciclos).
349
- 5. **Duration não-zero** — `SELECT min(duration_ms), max(duration_ms) FROM spans` — min ≥ 0, max razoável.
350
- 6. **Sampled flag respeitado** — verificar que se traceparent inbound = `01`, downstream também sample=true.
351
- 7. **Queue stitching funciona** — enqueue + consume → mesmo `trace_id` em ambos os spans.
352
-
353
- ---
354
-
355
- ## Ver também
356
-
357
- - `kit/skills/_shared-observability/glossary.md` — W3C TraceContext, B3, span kinds
358
- - `kit/skills/structured-events/SKILL.md` — atributos canônicos por span
359
- - `kit/skills/opentelemetry-standard/SKILL.md` — SDK que faz extract/inject
360
- - `kit/skills/telemetry-sampling/SKILL.md` *(Phase 34)* — head vs tail sampling decisão
361
-
362
- *Material-fonte: Observability Engineering (O'Reilly, 2022) — Cap 6: "Stitching Events into Traces".*
1
+ ---
2
+ name: distributed-tracing
3
+ description: Use ao instrumentar tracing — trace_id/span_id/parent_id, propagar W3C TraceContext via header traceparent, stitching além de RPCs (batch, lambda, queue).
4
+ ---
5
+
6
+ # Observabilidade — Distributed Tracing
7
+
8
+ ## Quando usar
9
+
10
+ LLM carrega esta skill ao instrumentar tracing distribuído ou stitching de spans. Trigger phrases:
11
+
12
+ - "distributed tracing", "traces", "spans"
13
+ - "propagar contexto entre serviços", "trace cross-service"
14
+ - "W3C TraceContext", "traceparent header"
15
+ - "trace_id span_id parent_span_id"
16
+ - "ligar lambda batch job ao trace"
17
+ - "stitching de eventos"
18
+
19
+ ## Regras absolutas
20
+
21
+ - **trace_id é compartilhado** entre todos os spans de um único request distribuído. **NÃO** mude por hop.
22
+ - **span_id é único por span** — gere novo a cada `startSpan()`. 16 hex chars (8 bytes).
23
+ - **parent_span_id aponta para span pai** — null no root span. Define a árvore.
24
+ - **W3C TraceContext é o padrão** — header HTTP `traceparent: 00-{trace_id}-{span_id}-{flags}`. Adote sempre. B3 é fallback para legacy.
25
+ - **Propague ANTES de fazer call cross-service** — extrair contexto do request inbound, propagar no request outbound. Sem isso, trace quebra.
26
+ - **Stitching ≠ apenas RPC** — também batch jobs, queue messages, lambda invocations, S3 uploads. Carregue `traceparent` em metadata da queue, env var do lambda, header da Step Function.
27
+ - **Sample decision propaga** — bit `01` em flags de `traceparent` significa "sample=true". Decisão tomada no head propaga downstream.
28
+ - **Não invente trace_id** — sempre derive do contexto inbound ou gere via SDK (não `crypto.randomUUID()`).
29
+ - **Spans devem ter `kind`** — `SERVER` (handler de inbound), `CLIENT` (call outbound), `PRODUCER`/`CONSUMER` (queue), `INTERNAL` (subspan dentro do mesmo process).
30
+
31
+ ## Patterns canônicos
32
+
33
+ ### Pattern: extrair contexto inbound + propagar outbound (Node)
34
+
35
+ ```ts
36
+ // PT-BR: handler HTTP — extrai traceparent do request inbound, propaga em call outbound
37
+ import { trace, context, propagation } from '@opentelemetry/api'
38
+
39
+ const tracer = trace.getTracer('orders-service')
40
+
41
+ export async function placeOrder(req: Request) {
42
+ // PT-BR: 1 — extrair contexto inbound do header traceparent
43
+ const inboundContext = propagation.extract(context.active(), req.headers)
44
+
45
+ return tracer.startActiveSpan(
46
+ 'place_order',
47
+ { kind: SpanKind.SERVER },
48
+ inboundContext,
49
+ async (span) => {
50
+ span.setAttribute('user.id', req.user.id)
51
+
52
+ // PT-BR: 2 — fazer call outbound — propagation injeta traceparent automaticamente
53
+ // se você usar fetch/grpc instrumentados (ver skill opentelemetry-standard)
54
+ const outboundHeaders: Record<string, string> = {}
55
+ propagation.inject(context.active(), outboundHeaders)
56
+
57
+ const inventoryRes = await fetch('http://inventory/check', {
58
+ headers: outboundHeaders, // PT-BR: traceparent injetado aqui
59
+ body: JSON.stringify({ items: req.items })
60
+ })
61
+
62
+ span.end()
63
+ return inventoryRes.json()
64
+ }
65
+ )
66
+ }
67
+ ```
68
+
69
+ ### Pattern: traceparent format
70
+
71
+ ```text
72
+ traceparent: 00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01
73
+ ^ ^ ^ ^
74
+ | | | |
75
+ version | flags (sampled bit)
76
+ trace_id (32 hex / 16 bytes) |
77
+ span_id (16 hex / 8 bytes)
78
+ ```
79
+
80
+ ```text
81
+ flags:
82
+ 01 = sampled (decisão upstream: capture este trace)
83
+ 00 = not sampled (decisão upstream: skip)
84
+ ```
85
+
86
+ ### Pattern: trace cross-service via Supabase Edge Function
87
+
88
+ ```ts
89
+ // PT-BR: Edge Function recebe request → propaga para outro service
90
+ import { trace, context, propagation } from 'npm:@opentelemetry/api@1.9.0'
91
+ import { W3CTraceContextPropagator } from 'npm:@opentelemetry/core@1.27.0'
92
+
93
+ propagation.setGlobalPropagator(new W3CTraceContextPropagator())
94
+
95
+ const tracer = trace.getTracer('edge-orders')
96
+
97
+ Deno.serve(async (req) => {
98
+ // PT-BR: extrair traceparent inbound
99
+ const inboundCtx = propagation.extract(context.active(), {
100
+ traceparent: req.headers.get('traceparent') ?? '',
101
+ })
102
+
103
+ return tracer.startActiveSpan(
104
+ 'edge_handler',
105
+ { kind: 1 /* SERVER */ },
106
+ inboundCtx,
107
+ async (span) => {
108
+ span.setAttribute('endpoint', new URL(req.url).pathname)
109
+
110
+ // PT-BR: call outbound para Postgres via PostgREST — injeta traceparent
111
+ const outHeaders: Record<string, string> = {}
112
+ propagation.inject(context.active(), outHeaders)
113
+
114
+ const dbRes = await fetch(Deno.env.get('SUPABASE_URL') + '/rest/v1/orders', {
115
+ method: 'POST',
116
+ headers: {
117
+ ...outHeaders,
118
+ 'apikey': Deno.env.get('SUPABASE_ANON_KEY')!,
119
+ 'content-type': 'application/json',
120
+ },
121
+ body: await req.text(),
122
+ })
123
+
124
+ span.setAttribute('db.status_code', dbRes.status)
125
+ span.end()
126
+ return dbRes
127
+ }
128
+ )
129
+ })
130
+ ```
131
+
132
+ ### Pattern: stitching além de RPC — queue message (não-RPC)
133
+
134
+ ```ts
135
+ // PT-BR: producer — anexa traceparent ao payload da queue (pgmq, SQS, RabbitMQ)
136
+ import { trace, context, propagation } from '@opentelemetry/api'
137
+
138
+ const tracer = trace.getTracer('producer')
139
+
140
+ export async function enqueueEmail(emailJob: EmailJob) {
141
+ return tracer.startActiveSpan(
142
+ 'enqueue_email',
143
+ { kind: SpanKind.PRODUCER },
144
+ async (span) => {
145
+ span.setAttribute('queue.name', 'emails')
146
+ span.setAttribute('email.recipient', emailJob.to)
147
+
148
+ // PT-BR: serializar contexto no payload da mensagem
149
+ const carrier: Record<string, string> = {}
150
+ propagation.inject(context.active(), carrier)
151
+
152
+ await pgmqEnqueue('emails', {
153
+ ...emailJob,
154
+ _trace_context: carrier, // PT-BR: viaja com o job
155
+ })
156
+
157
+ span.end()
158
+ }
159
+ )
160
+ }
161
+
162
+ // PT-BR: consumer — extrai traceparent do payload, continua o trace
163
+ export async function processEmailJob(job: EmailJobWithContext) {
164
+ const inboundCtx = propagation.extract(
165
+ context.active(),
166
+ job._trace_context ?? {} // PT-BR: se vazio, novo trace
167
+ )
168
+
169
+ return tracer.startActiveSpan(
170
+ 'process_email',
171
+ { kind: SpanKind.CONSUMER },
172
+ inboundCtx,
173
+ async (span) => {
174
+ span.setAttribute('email.recipient', job.to)
175
+ // PT-BR: agora o span do worker faz parte do mesmo trace do producer
176
+ await sendEmail(job)
177
+ span.end()
178
+ }
179
+ )
180
+ }
181
+ ```
182
+
183
+ ### Pattern: stitching de batch job (não-RPC)
184
+
185
+ ```ts
186
+ // PT-BR: cron job processa N items — 1 span por item, todos com mesmo trace_id
187
+ const tracer = trace.getTracer('billing-cron')
188
+
189
+ export async function dailyBillingJob() {
190
+ return tracer.startActiveSpan('daily_billing', async (rootSpan) => {
191
+ rootSpan.setAttribute('job.type', 'cron')
192
+ rootSpan.setAttribute('build_id', BUILD_ID)
193
+
194
+ const customers = await db.getCustomersDueForBilling()
195
+ rootSpan.setAttribute('customers.count', customers.length)
196
+
197
+ // PT-BR: cada customer vira span filho com mesmo trace_id
198
+ for (const customer of customers) {
199
+ await tracer.startActiveSpan(
200
+ 'bill_customer',
201
+ { kind: SpanKind.INTERNAL },
202
+ async (span) => {
203
+ span.setAttribute('customer.id', customer.id)
204
+ span.setAttribute('customer.tier', customer.tier)
205
+ try {
206
+ await chargeCustomer(customer)
207
+ span.setAttribute('result.success', true)
208
+ } catch (e) {
209
+ span.setAttribute('result.success', false)
210
+ span.setAttribute('error.type', classify(e))
211
+ } finally {
212
+ span.end()
213
+ }
214
+ }
215
+ )
216
+ }
217
+
218
+ rootSpan.end()
219
+ })
220
+ }
221
+ ```
222
+
223
+ ### Pattern: span kinds
224
+
225
+ | Kind | Quando usar | Exemplo |
226
+ |---|---|---|
227
+ | `SERVER` | Recebendo request inbound | Handler HTTP, gRPC server method |
228
+ | `CLIENT` | Fazendo call outbound | `fetch()`, gRPC client call, DB query |
229
+ | `PRODUCER` | Enviando msg para queue | `pgmq.enqueue()`, SQS publish |
230
+ | `CONSUMER` | Processando msg de queue | Worker recebendo job |
231
+ | `INTERNAL` | Subdivisão dentro do mesmo process | "json_parse", "validation_step" |
232
+
233
+ ### Pattern: query traces — montar waterfall
234
+
235
+ ```sql
236
+ -- PT-BR: pegar todos os spans de um trace em ordem cronológica
237
+ select
238
+ span_id,
239
+ parent_span_id,
240
+ span_name,
241
+ span_kind,
242
+ service_name,
243
+ duration_ms,
244
+ start_time
245
+ from observability.spans
246
+ where trace_id = '4bf92f3577b34da6a3ce929d0e0e4736'
247
+ order by start_time asc;
248
+
249
+ -- PT-BR: encontrar root span — parent_span_id IS NULL ou span sem parent no mesmo trace
250
+ select *
251
+ from observability.spans
252
+ where trace_id = '4bf92f3577b34da6a3ce929d0e0e4736'
253
+ and parent_span_id is null;
254
+
255
+ -- PT-BR: spans mais lentos cross-trace, último 1h
256
+ select
257
+ service_name,
258
+ span_name,
259
+ percentile_cont(0.99) within group (order by duration_ms) as p99,
260
+ count(*) as samples
261
+ from observability.spans
262
+ where start_time > now() - interval '1 hour'
263
+ group by service_name, span_name
264
+ having count(*) > 100
265
+ order by p99 desc
266
+ limit 20;
267
+ ```
268
+
269
+ ## Anti-patterns
270
+
271
+ ### ANTI: gerar trace_id por hop
272
+
273
+ ```ts
274
+ // PT-BR: BAD — quebra a cadeia, cada service vê trace diferente
275
+ const traceId = crypto.randomUUID().replace(/-/g, '').slice(0, 32)
276
+
277
+ // PT-BR: GOOD — extrair do header inbound; deixar SDK gerar root
278
+ const inboundCtx = propagation.extract(context.active(), req.headers)
279
+ tracer.startActiveSpan('handler', {}, inboundCtx, ...)
280
+ ```
281
+
282
+ ### ANTI: esquecer de propagar em call outbound
283
+
284
+ ```ts
285
+ // PT-BR: BAD — outbound call sem traceparent — trace quebra no service B
286
+ await fetch('http://service-b/api', { body: ... })
287
+
288
+ // PT-BR: GOOD — injetar traceparent
289
+ const headers: Record<string, string> = {}
290
+ propagation.inject(context.active(), headers)
291
+ await fetch('http://service-b/api', { headers, body: ... })
292
+ ```
293
+
294
+ ### ANTI: trace só de RPCs, não de batch/queue
295
+
296
+ ```ts
297
+ // PT-BR: BAD — producer/consumer não compartilham trace, debug fica fragmentado
298
+ await pgmqEnqueue('emails', payload) // sem trace context
299
+ // ... depois worker processa sem saber que veio do request X
300
+
301
+ // PT-BR: GOOD — propagar contexto via metadata da queue
302
+ const carrier = {}
303
+ propagation.inject(context.active(), carrier)
304
+ await pgmqEnqueue('emails', { ...payload, _trace_context: carrier })
305
+ ```
306
+
307
+ ### ANTI: span sem `end()`
308
+
309
+ ```ts
310
+ // PT-BR: BAD — span fica aberto forever, duration_ms não calculado, memory leak
311
+ const span = tracer.startSpan('handler')
312
+ // ... handler logic
313
+ return result // PT-BR: ESQUECEU span.end()
314
+
315
+ // PT-BR: GOOD — sempre `try/finally`
316
+ const span = tracer.startSpan('handler')
317
+ try {
318
+ // ... logic
319
+ } finally {
320
+ span.end()
321
+ }
322
+ ```
323
+
324
+ ### ANTI: span hierarchy errada
325
+
326
+ ```ts
327
+ // PT-BR: BAD — usar startSpan sem startActiveSpan, parent não é settado automático
328
+ const parent = tracer.startSpan('parent')
329
+ const child = tracer.startSpan('child') // PT-BR: parent_span_id ficou null
330
+ parent.end()
331
+ child.end()
332
+
333
+ // PT-BR: GOOD — startActiveSpan empurra contexto, child herda parent
334
+ tracer.startActiveSpan('parent', (parent) => {
335
+ tracer.startActiveSpan('child', (child) => {
336
+ // PT-BR: child.parent_span_id === parent.span_id
337
+ child.end()
338
+ })
339
+ parent.end()
340
+ })
341
+ ```
342
+
343
+ ## Verificação
344
+
345
+ 1. **1 trace_id por request** — enviar 1 request, queryar `SELECT DISTINCT trace_id FROM spans WHERE request_id = X` → 1 resultado.
346
+ 2. **Cross-service stitching** — request HTTP service A → service B → DB. Queryar `SELECT count(distinct service_name) FROM spans WHERE trace_id = X` → ≥ 3.
347
+ 3. **Root span identificável** — `SELECT * FROM spans WHERE trace_id = X AND parent_span_id IS NULL` → 1 row (o root).
348
+ 4. **Span hierarchy correta** — graficar via tool (Jaeger UI, Honeycomb, etc.) ou recursivo SQL — deve formar árvore válida (sem ciclos).
349
+ 5. **Duration não-zero** — `SELECT min(duration_ms), max(duration_ms) FROM spans` — min ≥ 0, max razoável.
350
+ 6. **Sampled flag respeitado** — verificar que se traceparent inbound = `01`, downstream também sample=true.
351
+ 7. **Queue stitching funciona** — enqueue + consume → mesmo `trace_id` em ambos os spans.
352
+
353
+ ---
354
+
355
+ ## Ver também
356
+
357
+ - `kit/skills/_shared-observability/glossary.md` — W3C TraceContext, B3, span kinds
358
+ - `kit/skills/structured-events/SKILL.md` — atributos canônicos por span
359
+ - `kit/skills/opentelemetry-standard/SKILL.md` — SDK que faz extract/inject
360
+ - `kit/skills/telemetry-sampling/SKILL.md` *(Phase 34)* — head vs tail sampling decisão
361
+
362
+ *Material-fonte: Observability Engineering (O'Reilly, 2022) — Cap 6: "Stitching Events into Traces".*