@luanpdd/kit-mcp 1.35.0 → 1.36.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/bin/cli.js +2 -2
  2. package/bin/mcp.js +6 -6
  3. package/bin/ui.js +74 -74
  4. package/gates/ai-prompt-stability.md +120 -120
  5. package/gates/budget-description.md +68 -68
  6. package/gates/confidence.md +29 -29
  7. package/gates/dependency-check.md +33 -33
  8. package/gates/dept-cycle-prevention.md +179 -179
  9. package/gates/golden-signals-coverage.md +133 -133
  10. package/gates/legacy-refactor-safety.md +178 -178
  11. package/gates/multi-tenant-rls-coverage.md +102 -102
  12. package/gates/no-personal-uuid.md +72 -72
  13. package/gates/obs-agents-mcp-supabase.md +86 -86
  14. package/gates/obs-skills-frontmatter.md +76 -76
  15. package/gates/observability-coverage.md +151 -151
  16. package/gates/omm-no-regression.md +83 -83
  17. package/gates/postmortem-template-required.md +127 -127
  18. package/gates/prr-checklist-coverage.md +128 -128
  19. package/gates/regression.md +32 -32
  20. package/gates/release-pipeline-policy.md +132 -132
  21. package/gates/secrets-scan.md +33 -33
  22. package/gates/service-role-not-in-user-facing.md +113 -113
  23. package/gates/skill-must-include.md +71 -71
  24. package/gates/sync-idempotent.md +62 -62
  25. package/gates/verify-phase-goal.md +34 -34
  26. package/kit/agents/designer-ui.md +216 -216
  27. package/kit/agents/workflow-generator.md +537 -167
  28. package/kit/commands/adicionar-backlog.md +1 -1
  29. package/kit/commands/adicionar-fase.md +1 -1
  30. package/kit/commands/adicionar-tarefa.md +1 -1
  31. package/kit/commands/auditar-observabilidade.md +103 -103
  32. package/kit/commands/auditar-toil.md +129 -129
  33. package/kit/commands/caracterizar-prompt.md +195 -195
  34. package/kit/commands/criar-workflow.md +158 -158
  35. package/kit/commands/definir-perfil.md +1 -1
  36. package/kit/commands/definir-slo.md +108 -108
  37. package/kit/commands/fio.md +1 -1
  38. package/kit/commands/golden-signals.md +142 -142
  39. package/kit/commands/instrumentar-fase.md +200 -200
  40. package/kit/commands/investigar-producao.md +162 -162
  41. package/kit/commands/observabilidade.md +118 -118
  42. package/kit/commands/postmortem.md +179 -179
  43. package/kit/commands/prr.md +205 -205
  44. package/kit/commands/publicar-rapido.md +207 -207
  45. package/kit/commands/risk-budget.md +220 -220
  46. package/kit/commands/sre.md +230 -230
  47. package/kit/file-manifest.json +424 -424
  48. package/kit/framework/references/output-style.md +22 -22
  49. package/kit/hooks/post-apply-migration.js +199 -199
  50. package/kit/hooks/sidecar-tool-publisher.js +210 -210
  51. package/kit/skills/_shared-dados-distribuidos/glossary.md +224 -224
  52. package/kit/skills/_shared-legacy/glossary.md +389 -389
  53. package/kit/skills/_shared-multi-tenant/glossary.md +186 -186
  54. package/kit/skills/_shared-observability/glossary.md +396 -396
  55. package/kit/skills/_shared-sre/glossary.md +712 -712
  56. package/kit/skills/_shared-supabase/glossary.md +234 -234
  57. package/kit/skills/blameless-postmortems/SKILL.md +340 -340
  58. package/kit/skills/burn-rate-alerting/SKILL.md +258 -258
  59. package/kit/skills/cascading-failures/SKILL.md +311 -311
  60. package/kit/skills/core-analysis-loop/SKILL.md +352 -352
  61. package/kit/skills/distributed-tracing/SKILL.md +362 -362
  62. package/kit/skills/dynamic-workflow-authoring/SKILL.md +327 -223
  63. package/kit/skills/eliminating-toil/SKILL.md +243 -243
  64. package/kit/skills/event-based-slos/SKILL.md +296 -296
  65. package/kit/skills/four-golden-signals/SKILL.md +314 -314
  66. package/kit/skills/hermetic-builds/SKILL.md +323 -323
  67. package/kit/skills/legacy-monster-methods/SKILL.md +444 -444
  68. package/kit/skills/llm-as-dependency/SKILL.md +436 -436
  69. package/kit/skills/load-shedding-graceful-degradation/SKILL.md +396 -396
  70. package/kit/skills/observability-driven-development/SKILL.md +315 -315
  71. package/kit/skills/observability-maturity-model/SKILL.md +222 -222
  72. package/kit/skills/opentelemetry-standard/SKILL.md +351 -351
  73. package/kit/skills/production-readiness-review/SKILL.md +305 -305
  74. package/kit/skills/release-engineering/SKILL.md +367 -367
  75. package/kit/skills/retry-strategies/SKILL.md +372 -372
  76. package/kit/skills/sre-risk-management/SKILL.md +221 -221
  77. package/kit/skills/structured-events/SKILL.md +265 -265
  78. package/kit/skills/supabase-cron-queues/SKILL.md +275 -275
  79. package/kit/skills/supabase-database-functions/SKILL.md +332 -332
  80. package/kit/skills/supabase-declarative-schema/SKILL.md +183 -183
  81. package/kit/skills/supabase-pgvector-rag/SKILL.md +253 -253
  82. package/kit/skills/supabase-postgres-style/SKILL.md +138 -138
  83. package/kit/skills/supabase-storage/SKILL.md +234 -234
  84. package/kit/skills/telemetry-pipelines/SKILL.md +259 -259
  85. package/kit/skills/telemetry-sampling/SKILL.md +256 -256
  86. package/kit/skills/ui-anti-padroes-ia/SKILL.md +261 -261
  87. package/kit/skills/ui-contexto-produto/SKILL.md +248 -248
  88. package/kit/skills/ui-cor-estrategia/SKILL.md +213 -213
  89. package/kit/skills/ui-critica-auditoria/SKILL.md +260 -260
  90. package/kit/skills/ui-motion-funcional/SKILL.md +264 -264
  91. package/kit/skills/ui-ritmo-espacial/SKILL.md +259 -259
  92. package/kit/skills/ui-tipografia/SKILL.md +211 -211
  93. package/package.json +1 -1
  94. package/src/cli/index.js +1114 -1114
  95. package/src/cli/render.js +194 -194
  96. package/src/cli/upgrade-check.js +135 -135
  97. package/src/core/error-redaction.js +76 -76
  98. package/src/core/failures.js +153 -153
  99. package/src/core/gate-runner.js +205 -205
  100. package/src/core/gates.js +82 -82
  101. package/src/core/logger.js +170 -170
  102. package/src/core/manifest-verify.js +174 -174
  103. package/src/core/metrics.js +268 -268
  104. package/src/core/notify.js +60 -60
  105. package/src/core/path-safety.js +141 -141
  106. package/src/core/replays.js +120 -120
  107. package/src/core/ui.js +185 -185
  108. package/src/mcp-server/install.js +149 -149
  109. package/src/mcp-server/roots.js +124 -124
  110. package/src/ui/auto-spawn.js +113 -113
  111. package/src/ui/browser.js +78 -78
  112. package/src/ui/client.js +130 -130
  113. package/src/ui/events.js +65 -65
  114. package/src/ui/lockfile.js +191 -191
  115. package/src/ui/port.js +67 -67
  116. package/src/ui/server.js +547 -547
  117. package/src/ui/wrapper.js +129 -129
@@ -1,362 +1,362 @@
1
- ---
2
- name: distributed-tracing
3
- description: Use ao instrumentar tracing — trace_id/span_id/parent_id, propagar W3C TraceContext via header traceparent, stitching além de RPCs (batch, lambda, queue).
4
- ---
5
-
6
- # Observabilidade — Distributed Tracing
7
-
8
- ## Quando usar
9
-
10
- LLM carrega esta skill ao instrumentar tracing distribuído ou stitching de spans. Trigger phrases:
11
-
12
- - "distributed tracing", "traces", "spans"
13
- - "propagar contexto entre serviços", "trace cross-service"
14
- - "W3C TraceContext", "traceparent header"
15
- - "trace_id span_id parent_span_id"
16
- - "ligar lambda batch job ao trace"
17
- - "stitching de eventos"
18
-
19
- ## Regras absolutas
20
-
21
- - **trace_id é compartilhado** entre todos os spans de um único request distribuído. **NÃO** mude por hop.
22
- - **span_id é único por span** — gere novo a cada `startSpan()`. 16 hex chars (8 bytes).
23
- - **parent_span_id aponta para span pai** — null no root span. Define a árvore.
24
- - **W3C TraceContext é o padrão** — header HTTP `traceparent: 00-{trace_id}-{span_id}-{flags}`. Adote sempre. B3 é fallback para legacy.
25
- - **Propague ANTES de fazer call cross-service** — extrair contexto do request inbound, propagar no request outbound. Sem isso, trace quebra.
26
- - **Stitching ≠ apenas RPC** — também batch jobs, queue messages, lambda invocations, S3 uploads. Carregue `traceparent` em metadata da queue, env var do lambda, header da Step Function.
27
- - **Sample decision propaga** — bit `01` em flags de `traceparent` significa "sample=true". Decisão tomada no head propaga downstream.
28
- - **Não invente trace_id** — sempre derive do contexto inbound ou gere via SDK (não `crypto.randomUUID()`).
29
- - **Spans devem ter `kind`** — `SERVER` (handler de inbound), `CLIENT` (call outbound), `PRODUCER`/`CONSUMER` (queue), `INTERNAL` (subspan dentro do mesmo process).
30
-
31
- ## Patterns canônicos
32
-
33
- ### Pattern: extrair contexto inbound + propagar outbound (Node)
34
-
35
- ```ts
36
- // PT-BR: handler HTTP — extrai traceparent do request inbound, propaga em call outbound
37
- import { trace, context, propagation } from '@opentelemetry/api'
38
-
39
- const tracer = trace.getTracer('orders-service')
40
-
41
- export async function placeOrder(req: Request) {
42
- // PT-BR: 1 — extrair contexto inbound do header traceparent
43
- const inboundContext = propagation.extract(context.active(), req.headers)
44
-
45
- return tracer.startActiveSpan(
46
- 'place_order',
47
- { kind: SpanKind.SERVER },
48
- inboundContext,
49
- async (span) => {
50
- span.setAttribute('user.id', req.user.id)
51
-
52
- // PT-BR: 2 — fazer call outbound — propagation injeta traceparent automaticamente
53
- // se você usar fetch/grpc instrumentados (ver skill opentelemetry-standard)
54
- const outboundHeaders: Record<string, string> = {}
55
- propagation.inject(context.active(), outboundHeaders)
56
-
57
- const inventoryRes = await fetch('http://inventory/check', {
58
- headers: outboundHeaders, // PT-BR: traceparent injetado aqui
59
- body: JSON.stringify({ items: req.items })
60
- })
61
-
62
- span.end()
63
- return inventoryRes.json()
64
- }
65
- )
66
- }
67
- ```
68
-
69
- ### Pattern: traceparent format
70
-
71
- ```text
72
- traceparent: 00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01
73
- ^ ^ ^ ^
74
- | | | |
75
- version | flags (sampled bit)
76
- trace_id (32 hex / 16 bytes) |
77
- span_id (16 hex / 8 bytes)
78
- ```
79
-
80
- ```text
81
- flags:
82
- 01 = sampled (decisão upstream: capture este trace)
83
- 00 = not sampled (decisão upstream: skip)
84
- ```
85
-
86
- ### Pattern: trace cross-service via Supabase Edge Function
87
-
88
- ```ts
89
- // PT-BR: Edge Function recebe request → propaga para outro service
90
- import { trace, context, propagation } from 'npm:@opentelemetry/api@1.9.0'
91
- import { W3CTraceContextPropagator } from 'npm:@opentelemetry/core@1.27.0'
92
-
93
- propagation.setGlobalPropagator(new W3CTraceContextPropagator())
94
-
95
- const tracer = trace.getTracer('edge-orders')
96
-
97
- Deno.serve(async (req) => {
98
- // PT-BR: extrair traceparent inbound
99
- const inboundCtx = propagation.extract(context.active(), {
100
- traceparent: req.headers.get('traceparent') ?? '',
101
- })
102
-
103
- return tracer.startActiveSpan(
104
- 'edge_handler',
105
- { kind: 1 /* SERVER */ },
106
- inboundCtx,
107
- async (span) => {
108
- span.setAttribute('endpoint', new URL(req.url).pathname)
109
-
110
- // PT-BR: call outbound para Postgres via PostgREST — injeta traceparent
111
- const outHeaders: Record<string, string> = {}
112
- propagation.inject(context.active(), outHeaders)
113
-
114
- const dbRes = await fetch(Deno.env.get('SUPABASE_URL') + '/rest/v1/orders', {
115
- method: 'POST',
116
- headers: {
117
- ...outHeaders,
118
- 'apikey': Deno.env.get('SUPABASE_ANON_KEY')!,
119
- 'content-type': 'application/json',
120
- },
121
- body: await req.text(),
122
- })
123
-
124
- span.setAttribute('db.status_code', dbRes.status)
125
- span.end()
126
- return dbRes
127
- }
128
- )
129
- })
130
- ```
131
-
132
- ### Pattern: stitching além de RPC — queue message (não-RPC)
133
-
134
- ```ts
135
- // PT-BR: producer — anexa traceparent ao payload da queue (pgmq, SQS, RabbitMQ)
136
- import { trace, context, propagation } from '@opentelemetry/api'
137
-
138
- const tracer = trace.getTracer('producer')
139
-
140
- export async function enqueueEmail(emailJob: EmailJob) {
141
- return tracer.startActiveSpan(
142
- 'enqueue_email',
143
- { kind: SpanKind.PRODUCER },
144
- async (span) => {
145
- span.setAttribute('queue.name', 'emails')
146
- span.setAttribute('email.recipient', emailJob.to)
147
-
148
- // PT-BR: serializar contexto no payload da mensagem
149
- const carrier: Record<string, string> = {}
150
- propagation.inject(context.active(), carrier)
151
-
152
- await pgmqEnqueue('emails', {
153
- ...emailJob,
154
- _trace_context: carrier, // PT-BR: viaja com o job
155
- })
156
-
157
- span.end()
158
- }
159
- )
160
- }
161
-
162
- // PT-BR: consumer — extrai traceparent do payload, continua o trace
163
- export async function processEmailJob(job: EmailJobWithContext) {
164
- const inboundCtx = propagation.extract(
165
- context.active(),
166
- job._trace_context ?? {} // PT-BR: se vazio, novo trace
167
- )
168
-
169
- return tracer.startActiveSpan(
170
- 'process_email',
171
- { kind: SpanKind.CONSUMER },
172
- inboundCtx,
173
- async (span) => {
174
- span.setAttribute('email.recipient', job.to)
175
- // PT-BR: agora o span do worker faz parte do mesmo trace do producer
176
- await sendEmail(job)
177
- span.end()
178
- }
179
- )
180
- }
181
- ```
182
-
183
- ### Pattern: stitching de batch job (não-RPC)
184
-
185
- ```ts
186
- // PT-BR: cron job processa N items — 1 span por item, todos com mesmo trace_id
187
- const tracer = trace.getTracer('billing-cron')
188
-
189
- export async function dailyBillingJob() {
190
- return tracer.startActiveSpan('daily_billing', async (rootSpan) => {
191
- rootSpan.setAttribute('job.type', 'cron')
192
- rootSpan.setAttribute('build_id', BUILD_ID)
193
-
194
- const customers = await db.getCustomersDueForBilling()
195
- rootSpan.setAttribute('customers.count', customers.length)
196
-
197
- // PT-BR: cada customer vira span filho com mesmo trace_id
198
- for (const customer of customers) {
199
- await tracer.startActiveSpan(
200
- 'bill_customer',
201
- { kind: SpanKind.INTERNAL },
202
- async (span) => {
203
- span.setAttribute('customer.id', customer.id)
204
- span.setAttribute('customer.tier', customer.tier)
205
- try {
206
- await chargeCustomer(customer)
207
- span.setAttribute('result.success', true)
208
- } catch (e) {
209
- span.setAttribute('result.success', false)
210
- span.setAttribute('error.type', classify(e))
211
- } finally {
212
- span.end()
213
- }
214
- }
215
- )
216
- }
217
-
218
- rootSpan.end()
219
- })
220
- }
221
- ```
222
-
223
- ### Pattern: span kinds
224
-
225
- | Kind | Quando usar | Exemplo |
226
- |---|---|---|
227
- | `SERVER` | Recebendo request inbound | Handler HTTP, gRPC server method |
228
- | `CLIENT` | Fazendo call outbound | `fetch()`, gRPC client call, DB query |
229
- | `PRODUCER` | Enviando msg para queue | `pgmq.enqueue()`, SQS publish |
230
- | `CONSUMER` | Processando msg de queue | Worker recebendo job |
231
- | `INTERNAL` | Subdivisão dentro do mesmo process | "json_parse", "validation_step" |
232
-
233
- ### Pattern: query traces — montar waterfall
234
-
235
- ```sql
236
- -- PT-BR: pegar todos os spans de um trace em ordem cronológica
237
- select
238
- span_id,
239
- parent_span_id,
240
- span_name,
241
- span_kind,
242
- service_name,
243
- duration_ms,
244
- start_time
245
- from observability.spans
246
- where trace_id = '4bf92f3577b34da6a3ce929d0e0e4736'
247
- order by start_time asc;
248
-
249
- -- PT-BR: encontrar root span — parent_span_id IS NULL ou span sem parent no mesmo trace
250
- select *
251
- from observability.spans
252
- where trace_id = '4bf92f3577b34da6a3ce929d0e0e4736'
253
- and parent_span_id is null;
254
-
255
- -- PT-BR: spans mais lentos cross-trace, último 1h
256
- select
257
- service_name,
258
- span_name,
259
- percentile_cont(0.99) within group (order by duration_ms) as p99,
260
- count(*) as samples
261
- from observability.spans
262
- where start_time > now() - interval '1 hour'
263
- group by service_name, span_name
264
- having count(*) > 100
265
- order by p99 desc
266
- limit 20;
267
- ```
268
-
269
- ## Anti-patterns
270
-
271
- ### ANTI: gerar trace_id por hop
272
-
273
- ```ts
274
- // PT-BR: BAD — quebra a cadeia, cada service vê trace diferente
275
- const traceId = crypto.randomUUID().replace(/-/g, '').slice(0, 32)
276
-
277
- // PT-BR: GOOD — extrair do header inbound; deixar SDK gerar root
278
- const inboundCtx = propagation.extract(context.active(), req.headers)
279
- tracer.startActiveSpan('handler', {}, inboundCtx, ...)
280
- ```
281
-
282
- ### ANTI: esquecer de propagar em call outbound
283
-
284
- ```ts
285
- // PT-BR: BAD — outbound call sem traceparent — trace quebra no service B
286
- await fetch('http://service-b/api', { body: ... })
287
-
288
- // PT-BR: GOOD — injetar traceparent
289
- const headers: Record<string, string> = {}
290
- propagation.inject(context.active(), headers)
291
- await fetch('http://service-b/api', { headers, body: ... })
292
- ```
293
-
294
- ### ANTI: trace só de RPCs, não de batch/queue
295
-
296
- ```ts
297
- // PT-BR: BAD — producer/consumer não compartilham trace, debug fica fragmentado
298
- await pgmqEnqueue('emails', payload) // sem trace context
299
- // ... depois worker processa sem saber que veio do request X
300
-
301
- // PT-BR: GOOD — propagar contexto via metadata da queue
302
- const carrier = {}
303
- propagation.inject(context.active(), carrier)
304
- await pgmqEnqueue('emails', { ...payload, _trace_context: carrier })
305
- ```
306
-
307
- ### ANTI: span sem `end()`
308
-
309
- ```ts
310
- // PT-BR: BAD — span fica aberto forever, duration_ms não calculado, memory leak
311
- const span = tracer.startSpan('handler')
312
- // ... handler logic
313
- return result // PT-BR: ESQUECEU span.end()
314
-
315
- // PT-BR: GOOD — sempre `try/finally`
316
- const span = tracer.startSpan('handler')
317
- try {
318
- // ... logic
319
- } finally {
320
- span.end()
321
- }
322
- ```
323
-
324
- ### ANTI: span hierarchy errada
325
-
326
- ```ts
327
- // PT-BR: BAD — usar startSpan sem startActiveSpan, parent não é settado automático
328
- const parent = tracer.startSpan('parent')
329
- const child = tracer.startSpan('child') // PT-BR: parent_span_id ficou null
330
- parent.end()
331
- child.end()
332
-
333
- // PT-BR: GOOD — startActiveSpan empurra contexto, child herda parent
334
- tracer.startActiveSpan('parent', (parent) => {
335
- tracer.startActiveSpan('child', (child) => {
336
- // PT-BR: child.parent_span_id === parent.span_id
337
- child.end()
338
- })
339
- parent.end()
340
- })
341
- ```
342
-
343
- ## Verificação
344
-
345
- 1. **1 trace_id por request** — enviar 1 request, queryar `SELECT DISTINCT trace_id FROM spans WHERE request_id = X` → 1 resultado.
346
- 2. **Cross-service stitching** — request HTTP service A → service B → DB. Queryar `SELECT count(distinct service_name) FROM spans WHERE trace_id = X` → ≥ 3.
347
- 3. **Root span identificável** — `SELECT * FROM spans WHERE trace_id = X AND parent_span_id IS NULL` → 1 row (o root).
348
- 4. **Span hierarchy correta** — graficar via tool (Jaeger UI, Honeycomb, etc.) ou recursivo SQL — deve formar árvore válida (sem ciclos).
349
- 5. **Duration não-zero** — `SELECT min(duration_ms), max(duration_ms) FROM spans` — min ≥ 0, max razoável.
350
- 6. **Sampled flag respeitado** — verificar que se traceparent inbound = `01`, downstream também sample=true.
351
- 7. **Queue stitching funciona** — enqueue + consume → mesmo `trace_id` em ambos os spans.
352
-
353
- ---
354
-
355
- ## Ver também
356
-
357
- - `kit/skills/_shared-observability/glossary.md` — W3C TraceContext, B3, span kinds
358
- - `kit/skills/structured-events/SKILL.md` — atributos canônicos por span
359
- - `kit/skills/opentelemetry-standard/SKILL.md` — SDK que faz extract/inject
360
- - `kit/skills/telemetry-sampling/SKILL.md` *(Phase 34)* — head vs tail sampling decisão
361
-
362
- *Material-fonte: Observability Engineering (O'Reilly, 2022) — Cap 6: "Stitching Events into Traces".*
1
+ ---
2
+ name: distributed-tracing
3
+ description: Use ao instrumentar tracing — trace_id/span_id/parent_id, propagar W3C TraceContext via header traceparent, stitching além de RPCs (batch, lambda, queue).
4
+ ---
5
+
6
+ # Observabilidade — Distributed Tracing
7
+
8
+ ## Quando usar
9
+
10
+ LLM carrega esta skill ao instrumentar tracing distribuído ou stitching de spans. Trigger phrases:
11
+
12
+ - "distributed tracing", "traces", "spans"
13
+ - "propagar contexto entre serviços", "trace cross-service"
14
+ - "W3C TraceContext", "traceparent header"
15
+ - "trace_id span_id parent_span_id"
16
+ - "ligar lambda batch job ao trace"
17
+ - "stitching de eventos"
18
+
19
+ ## Regras absolutas
20
+
21
+ - **trace_id é compartilhado** entre todos os spans de um único request distribuído. **NÃO** mude por hop.
22
+ - **span_id é único por span** — gere novo a cada `startSpan()`. 16 hex chars (8 bytes).
23
+ - **parent_span_id aponta para span pai** — null no root span. Define a árvore.
24
+ - **W3C TraceContext é o padrão** — header HTTP `traceparent: 00-{trace_id}-{span_id}-{flags}`. Adote sempre. B3 é fallback para legacy.
25
+ - **Propague ANTES de fazer call cross-service** — extrair contexto do request inbound, propagar no request outbound. Sem isso, trace quebra.
26
+ - **Stitching ≠ apenas RPC** — também batch jobs, queue messages, lambda invocations, S3 uploads. Carregue `traceparent` em metadata da queue, env var do lambda, header da Step Function.
27
+ - **Sample decision propaga** — bit `01` em flags de `traceparent` significa "sample=true". Decisão tomada no head propaga downstream.
28
+ - **Não invente trace_id** — sempre derive do contexto inbound ou gere via SDK (não `crypto.randomUUID()`).
29
+ - **Spans devem ter `kind`** — `SERVER` (handler de inbound), `CLIENT` (call outbound), `PRODUCER`/`CONSUMER` (queue), `INTERNAL` (subspan dentro do mesmo process).
30
+
31
+ ## Patterns canônicos
32
+
33
+ ### Pattern: extrair contexto inbound + propagar outbound (Node)
34
+
35
+ ```ts
36
+ // PT-BR: handler HTTP — extrai traceparent do request inbound, propaga em call outbound
37
+ import { trace, context, propagation } from '@opentelemetry/api'
38
+
39
+ const tracer = trace.getTracer('orders-service')
40
+
41
+ export async function placeOrder(req: Request) {
42
+ // PT-BR: 1 — extrair contexto inbound do header traceparent
43
+ const inboundContext = propagation.extract(context.active(), req.headers)
44
+
45
+ return tracer.startActiveSpan(
46
+ 'place_order',
47
+ { kind: SpanKind.SERVER },
48
+ inboundContext,
49
+ async (span) => {
50
+ span.setAttribute('user.id', req.user.id)
51
+
52
+ // PT-BR: 2 — fazer call outbound — propagation injeta traceparent automaticamente
53
+ // se você usar fetch/grpc instrumentados (ver skill opentelemetry-standard)
54
+ const outboundHeaders: Record<string, string> = {}
55
+ propagation.inject(context.active(), outboundHeaders)
56
+
57
+ const inventoryRes = await fetch('http://inventory/check', {
58
+ headers: outboundHeaders, // PT-BR: traceparent injetado aqui
59
+ body: JSON.stringify({ items: req.items })
60
+ })
61
+
62
+ span.end()
63
+ return inventoryRes.json()
64
+ }
65
+ )
66
+ }
67
+ ```
68
+
69
+ ### Pattern: traceparent format
70
+
71
+ ```text
72
+ traceparent: 00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01
73
+ ^ ^ ^ ^
74
+ | | | |
75
+ version | flags (sampled bit)
76
+ trace_id (32 hex / 16 bytes) |
77
+ span_id (16 hex / 8 bytes)
78
+ ```
79
+
80
+ ```text
81
+ flags:
82
+ 01 = sampled (decisão upstream: capture este trace)
83
+ 00 = not sampled (decisão upstream: skip)
84
+ ```
85
+
86
+ ### Pattern: trace cross-service via Supabase Edge Function
87
+
88
+ ```ts
89
+ // PT-BR: Edge Function recebe request → propaga para outro service
90
+ import { trace, context, propagation } from 'npm:@opentelemetry/api@1.9.0'
91
+ import { W3CTraceContextPropagator } from 'npm:@opentelemetry/core@1.27.0'
92
+
93
+ propagation.setGlobalPropagator(new W3CTraceContextPropagator())
94
+
95
+ const tracer = trace.getTracer('edge-orders')
96
+
97
+ Deno.serve(async (req) => {
98
+ // PT-BR: extrair traceparent inbound
99
+ const inboundCtx = propagation.extract(context.active(), {
100
+ traceparent: req.headers.get('traceparent') ?? '',
101
+ })
102
+
103
+ return tracer.startActiveSpan(
104
+ 'edge_handler',
105
+ { kind: 1 /* SERVER */ },
106
+ inboundCtx,
107
+ async (span) => {
108
+ span.setAttribute('endpoint', new URL(req.url).pathname)
109
+
110
+ // PT-BR: call outbound para Postgres via PostgREST — injeta traceparent
111
+ const outHeaders: Record<string, string> = {}
112
+ propagation.inject(context.active(), outHeaders)
113
+
114
+ const dbRes = await fetch(Deno.env.get('SUPABASE_URL') + '/rest/v1/orders', {
115
+ method: 'POST',
116
+ headers: {
117
+ ...outHeaders,
118
+ 'apikey': Deno.env.get('SUPABASE_ANON_KEY')!,
119
+ 'content-type': 'application/json',
120
+ },
121
+ body: await req.text(),
122
+ })
123
+
124
+ span.setAttribute('db.status_code', dbRes.status)
125
+ span.end()
126
+ return dbRes
127
+ }
128
+ )
129
+ })
130
+ ```
131
+
132
+ ### Pattern: stitching além de RPC — queue message (não-RPC)
133
+
134
+ ```ts
135
+ // PT-BR: producer — anexa traceparent ao payload da queue (pgmq, SQS, RabbitMQ)
136
+ import { trace, context, propagation } from '@opentelemetry/api'
137
+
138
+ const tracer = trace.getTracer('producer')
139
+
140
+ export async function enqueueEmail(emailJob: EmailJob) {
141
+ return tracer.startActiveSpan(
142
+ 'enqueue_email',
143
+ { kind: SpanKind.PRODUCER },
144
+ async (span) => {
145
+ span.setAttribute('queue.name', 'emails')
146
+ span.setAttribute('email.recipient', emailJob.to)
147
+
148
+ // PT-BR: serializar contexto no payload da mensagem
149
+ const carrier: Record<string, string> = {}
150
+ propagation.inject(context.active(), carrier)
151
+
152
+ await pgmqEnqueue('emails', {
153
+ ...emailJob,
154
+ _trace_context: carrier, // PT-BR: viaja com o job
155
+ })
156
+
157
+ span.end()
158
+ }
159
+ )
160
+ }
161
+
162
+ // PT-BR: consumer — extrai traceparent do payload, continua o trace
163
+ export async function processEmailJob(job: EmailJobWithContext) {
164
+ const inboundCtx = propagation.extract(
165
+ context.active(),
166
+ job._trace_context ?? {} // PT-BR: se vazio, novo trace
167
+ )
168
+
169
+ return tracer.startActiveSpan(
170
+ 'process_email',
171
+ { kind: SpanKind.CONSUMER },
172
+ inboundCtx,
173
+ async (span) => {
174
+ span.setAttribute('email.recipient', job.to)
175
+ // PT-BR: agora o span do worker faz parte do mesmo trace do producer
176
+ await sendEmail(job)
177
+ span.end()
178
+ }
179
+ )
180
+ }
181
+ ```
182
+
183
+ ### Pattern: stitching de batch job (não-RPC)
184
+
185
+ ```ts
186
+ // PT-BR: cron job processa N items — 1 span por item, todos com mesmo trace_id
187
+ const tracer = trace.getTracer('billing-cron')
188
+
189
+ export async function dailyBillingJob() {
190
+ return tracer.startActiveSpan('daily_billing', async (rootSpan) => {
191
+ rootSpan.setAttribute('job.type', 'cron')
192
+ rootSpan.setAttribute('build_id', BUILD_ID)
193
+
194
+ const customers = await db.getCustomersDueForBilling()
195
+ rootSpan.setAttribute('customers.count', customers.length)
196
+
197
+ // PT-BR: cada customer vira span filho com mesmo trace_id
198
+ for (const customer of customers) {
199
+ await tracer.startActiveSpan(
200
+ 'bill_customer',
201
+ { kind: SpanKind.INTERNAL },
202
+ async (span) => {
203
+ span.setAttribute('customer.id', customer.id)
204
+ span.setAttribute('customer.tier', customer.tier)
205
+ try {
206
+ await chargeCustomer(customer)
207
+ span.setAttribute('result.success', true)
208
+ } catch (e) {
209
+ span.setAttribute('result.success', false)
210
+ span.setAttribute('error.type', classify(e))
211
+ } finally {
212
+ span.end()
213
+ }
214
+ }
215
+ )
216
+ }
217
+
218
+ rootSpan.end()
219
+ })
220
+ }
221
+ ```
222
+
223
+ ### Pattern: span kinds
224
+
225
+ | Kind | Quando usar | Exemplo |
226
+ |---|---|---|
227
+ | `SERVER` | Recebendo request inbound | Handler HTTP, gRPC server method |
228
+ | `CLIENT` | Fazendo call outbound | `fetch()`, gRPC client call, DB query |
229
+ | `PRODUCER` | Enviando msg para queue | `pgmq.enqueue()`, SQS publish |
230
+ | `CONSUMER` | Processando msg de queue | Worker recebendo job |
231
+ | `INTERNAL` | Subdivisão dentro do mesmo process | "json_parse", "validation_step" |
232
+
233
+ ### Pattern: query traces — montar waterfall
234
+
235
+ ```sql
236
+ -- PT-BR: pegar todos os spans de um trace em ordem cronológica
237
+ select
238
+ span_id,
239
+ parent_span_id,
240
+ span_name,
241
+ span_kind,
242
+ service_name,
243
+ duration_ms,
244
+ start_time
245
+ from observability.spans
246
+ where trace_id = '4bf92f3577b34da6a3ce929d0e0e4736'
247
+ order by start_time asc;
248
+
249
+ -- PT-BR: encontrar root span — parent_span_id IS NULL ou span sem parent no mesmo trace
250
+ select *
251
+ from observability.spans
252
+ where trace_id = '4bf92f3577b34da6a3ce929d0e0e4736'
253
+ and parent_span_id is null;
254
+
255
+ -- PT-BR: spans mais lentos cross-trace, último 1h
256
+ select
257
+ service_name,
258
+ span_name,
259
+ percentile_cont(0.99) within group (order by duration_ms) as p99,
260
+ count(*) as samples
261
+ from observability.spans
262
+ where start_time > now() - interval '1 hour'
263
+ group by service_name, span_name
264
+ having count(*) > 100
265
+ order by p99 desc
266
+ limit 20;
267
+ ```
268
+
269
+ ## Anti-patterns
270
+
271
+ ### ANTI: gerar trace_id por hop
272
+
273
+ ```ts
274
+ // PT-BR: BAD — quebra a cadeia, cada service vê trace diferente
275
+ const traceId = crypto.randomUUID().replace(/-/g, '').slice(0, 32)
276
+
277
+ // PT-BR: GOOD — extrair do header inbound; deixar SDK gerar root
278
+ const inboundCtx = propagation.extract(context.active(), req.headers)
279
+ tracer.startActiveSpan('handler', {}, inboundCtx, ...)
280
+ ```
281
+
282
+ ### ANTI: esquecer de propagar em call outbound
283
+
284
+ ```ts
285
+ // PT-BR: BAD — outbound call sem traceparent — trace quebra no service B
286
+ await fetch('http://service-b/api', { body: ... })
287
+
288
+ // PT-BR: GOOD — injetar traceparent
289
+ const headers: Record<string, string> = {}
290
+ propagation.inject(context.active(), headers)
291
+ await fetch('http://service-b/api', { headers, body: ... })
292
+ ```
293
+
294
+ ### ANTI: trace só de RPCs, não de batch/queue
295
+
296
+ ```ts
297
+ // PT-BR: BAD — producer/consumer não compartilham trace, debug fica fragmentado
298
+ await pgmqEnqueue('emails', payload) // sem trace context
299
+ // ... depois worker processa sem saber que veio do request X
300
+
301
+ // PT-BR: GOOD — propagar contexto via metadata da queue
302
+ const carrier = {}
303
+ propagation.inject(context.active(), carrier)
304
+ await pgmqEnqueue('emails', { ...payload, _trace_context: carrier })
305
+ ```
306
+
307
+ ### ANTI: span sem `end()`
308
+
309
+ ```ts
310
+ // PT-BR: BAD — span fica aberto forever, duration_ms não calculado, memory leak
311
+ const span = tracer.startSpan('handler')
312
+ // ... handler logic
313
+ return result // PT-BR: ESQUECEU span.end()
314
+
315
+ // PT-BR: GOOD — sempre `try/finally`
316
+ const span = tracer.startSpan('handler')
317
+ try {
318
+ // ... logic
319
+ } finally {
320
+ span.end()
321
+ }
322
+ ```
323
+
324
+ ### ANTI: span hierarchy errada
325
+
326
+ ```ts
327
+ // PT-BR: BAD — usar startSpan sem startActiveSpan, parent não é settado automático
328
+ const parent = tracer.startSpan('parent')
329
+ const child = tracer.startSpan('child') // PT-BR: parent_span_id ficou null
330
+ parent.end()
331
+ child.end()
332
+
333
+ // PT-BR: GOOD — startActiveSpan empurra contexto, child herda parent
334
+ tracer.startActiveSpan('parent', (parent) => {
335
+ tracer.startActiveSpan('child', (child) => {
336
+ // PT-BR: child.parent_span_id === parent.span_id
337
+ child.end()
338
+ })
339
+ parent.end()
340
+ })
341
+ ```
342
+
343
+ ## Verificação
344
+
345
+ 1. **1 trace_id por request** — enviar 1 request, queryar `SELECT DISTINCT trace_id FROM spans WHERE request_id = X` → 1 resultado.
346
+ 2. **Cross-service stitching** — request HTTP service A → service B → DB. Queryar `SELECT count(distinct service_name) FROM spans WHERE trace_id = X` → ≥ 3.
347
+ 3. **Root span identificável** — `SELECT * FROM spans WHERE trace_id = X AND parent_span_id IS NULL` → 1 row (o root).
348
+ 4. **Span hierarchy correta** — graficar via tool (Jaeger UI, Honeycomb, etc.) ou recursivo SQL — deve formar árvore válida (sem ciclos).
349
+ 5. **Duration não-zero** — `SELECT min(duration_ms), max(duration_ms) FROM spans` — min ≥ 0, max razoável.
350
+ 6. **Sampled flag respeitado** — verificar que se traceparent inbound = `01`, downstream também sample=true.
351
+ 7. **Queue stitching funciona** — enqueue + consume → mesmo `trace_id` em ambos os spans.
352
+
353
+ ---
354
+
355
+ ## Ver também
356
+
357
+ - `kit/skills/_shared-observability/glossary.md` — W3C TraceContext, B3, span kinds
358
+ - `kit/skills/structured-events/SKILL.md` — atributos canônicos por span
359
+ - `kit/skills/opentelemetry-standard/SKILL.md` — SDK que faz extract/inject
360
+ - `kit/skills/telemetry-sampling/SKILL.md` *(Phase 34)* — head vs tail sampling decisão
361
+
362
+ *Material-fonte: Observability Engineering (O'Reilly, 2022) — Cap 6: "Stitching Events into Traces".*