npm - hatch3r - Versions diffs - 1.7.5 → 1.8.0 - Mend

hatch3r 1.7.5 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

package/README.md +2 -2
package/agents/hatch3r-context-rules.md +22 -6
package/agents/hatch3r-creator.md +2 -1
package/agents/hatch3r-handoff-loader.md +1 -1
package/agents/hatch3r-implementer.md +8 -0
package/agents/hatch3r-learnings-loader.md +1 -1
package/agents/hatch3r-reviewer.md +2 -0
package/agents/shared/user-content-templates.md +31 -1
package/commands/hatch3r-agent-customize.md +4 -0
package/commands/hatch3r-api-spec.md +7 -0
package/commands/hatch3r-benchmark.md +7 -0
package/commands/hatch3r-board-fill.md +7 -0
package/commands/hatch3r-board-groom.md +4 -0
package/commands/hatch3r-board-init.md +51 -0
package/commands/hatch3r-board-pickup.md +8 -0
package/commands/hatch3r-board-refresh.md +4 -0
package/commands/hatch3r-board-shared.md +6 -6
package/commands/hatch3r-bug-plan.md +7 -0
package/commands/hatch3r-codebase-map.md +8 -0
package/commands/hatch3r-command-customize.md +4 -0
package/commands/hatch3r-context-health.md +5 -0
package/commands/hatch3r-create.md +57 -4
package/commands/hatch3r-debug.md +7 -0
package/commands/hatch3r-dep-audit.md +4 -0
package/commands/hatch3r-feature-plan.md +7 -0
package/commands/hatch3r-handoff.md +7 -0
package/commands/hatch3r-healthcheck.md +4 -0
package/commands/hatch3r-hooks.md +4 -0
package/commands/hatch3r-learn.md +16 -0
package/commands/hatch3r-migration-plan.md +7 -0
package/commands/hatch3r-onboard.md +7 -0
package/commands/hatch3r-pr-resolve.md +8 -1
package/commands/hatch3r-project-spec.md +8 -0
package/commands/hatch3r-quick-change.md +7 -0
package/commands/hatch3r-recipe.md +4 -0
package/commands/hatch3r-refactor-plan.md +7 -0
package/commands/hatch3r-release.md +5 -0
package/commands/hatch3r-revision.md +7 -0
package/commands/hatch3r-roadmap.md +8 -0
package/commands/hatch3r-rule-customize.md +4 -0
package/commands/hatch3r-security-audit.md +4 -0
package/commands/hatch3r-skill-customize.md +4 -0
package/commands/hatch3r-test-plan.md +7 -0
package/commands/hatch3r-workflow.md +9 -1
package/dist/cli/index.js +2600 -777
package/dist/cli/index.js.map +1 -1
package/package.json +8 -5
package/rules/hatch3r-agent-orchestration-detail.md +3 -0
package/rules/hatch3r-agent-orchestration-detail.mdc +3 -0
package/rules/hatch3r-agent-orchestration.md +25 -2
package/rules/hatch3r-agent-orchestration.mdc +25 -2
package/rules/hatch3r-iteration-summary.md +2 -0
package/rules/hatch3r-iteration-summary.mdc +2 -0
package/rules/hatch3r-observability-tracing-detail.md +7 -148
package/rules/hatch3r-observability-tracing-detail.mdc +6 -148
package/rules/hatch3r-observability-tracing.md +154 -6
package/rules/hatch3r-observability-tracing.mdc +154 -6
package/skills/hatch3r-agent-customize/SKILL.md +10 -0
package/skills/hatch3r-ai-feature/SKILL.md +2 -0
package/skills/hatch3r-api-spec/SKILL.md +68 -0
package/skills/hatch3r-cli-csvkit/SKILL.md +2 -2
package/skills/hatch3r-cli-duckdb/SKILL.md +3 -3
package/skills/hatch3r-cli-jq/SKILL.md +4 -0
package/skills/hatch3r-cli-miller/SKILL.md +2 -2
package/skills/hatch3r-cli-overview/SKILL.md +1 -1
package/skills/{hatch3r-cli-xsv → hatch3r-cli-qsv}/SKILL.md +20 -18
package/skills/hatch3r-cli-stagehand/SKILL.md +48 -16
package/skills/hatch3r-command-customize/SKILL.md +10 -0
package/skills/hatch3r-customize/SKILL.md +3 -0
package/skills/hatch3r-design-system-detect/SKILL.md +2 -0
package/skills/hatch3r-observability-verify/SKILL.md +4 -3
package/skills/hatch3r-reliability-verify/SKILL.md +2 -0
package/skills/hatch3r-rule-customize/SKILL.md +10 -0
package/skills/hatch3r-skill-customize/SKILL.md +10 -0
package/skills/hatch3r-ui-ux-verify/SKILL.md +2 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "hatch3r",
-  "version": "1.7.5",
+  "version": "1.8.0",
   "description": "Battle-tested agentic coding setup framework. One command to hatch your agent stack -- agents, skills, rules, commands, and MCP for every major AI coding tool.",
   "type": "module",
   "exports": {
@@ -22,17 +22,19 @@
     "test:watch": "vitest",
     "inventory": "tsx scripts/inventory.ts",
     "inventory:check-docs": "tsx scripts/inventory.ts --check-docs",
-    "validate:rule-parity": "tsx scripts/validate-rule-parity.ts",
-    "validate:efficiency": "tsx scripts/validate-efficiency-invariants.ts",
+    "validate:rule-parity": "tsx scripts/validate-rule-parity.ts && tsx scripts/validate-rule-pillar-currency.ts",
+    "validate:efficiency": "tsx scripts/validate-efficiency-invariants.ts && tsx scripts/validate-bridge-budget.ts && tsx scripts/validate-fanout-emission.ts",
     "validate:cli-skills": "tsx scripts/validate-cli-skills.ts",
+    "validate:wiring": "tsx scripts/validate-wiring.ts",
     "generate:cli-skills": "tsx scripts/generate-cli-skills.ts",
-    "validate": "npm run validate:rule-parity && npm run validate:efficiency && npm run validate:cli-skills",
+    "validate": "npm run validate:rule-parity && npm run validate:efficiency && npm run validate:cli-skills && npm run validate:wiring",
     "audit:validate-registry": "tsx scripts/validate-finding-registry.ts",
     "audit:migrate": "tsx scripts/migrate-finding-registry.ts",
     "audit:archive": "tsx scripts/audit-archive.ts",
     "audit:find": "tsx scripts/audit-find.ts",
     "audit:reset": "tsx scripts/clean-audit-workspace.ts",
-    "lockfile:check": "lockfile-lint --path package-lock.json --type npm --allowed-hosts npm --validate-https"
+    "lockfile:check": "lockfile-lint --path package-lock.json --type npm --allowed-hosts npm --validate-https",
+    "mcp:cve-check": "tsx scripts/check-mcp-cves.ts"
   },
   "keywords": [
     "agents",
@@ -90,6 +92,7 @@
     "commander": "^14.0.3",
     "inquirer": "^13.3.2",
     "ora": "^9.3.0",
+    "p-limit": "^3.1.0",
     "proper-lockfile": "^4.1.2",
     "update-notifier": "^7.3.1",
     "yaml": "^2.8.3"

package/rules/hatch3r-agent-orchestration-detail.md CHANGED Viewed

@@ -5,8 +5,11 @@ description: Extended orchestration reference — PipelineContext schemas, resil
 scope: conditional
 globs: "**/.agents/**,**/pipeline/**,**/*orchestrat*,**/*agent*"
 tags: [core]
+precedence: normal
 quality_charter: agents/shared/quality-charter.md
 cache_friendly: true
+detail_rule: true
+consumed_by: hatch3r-agent-orchestration
 ---
 # Agent Orchestration — Extended Reference

package/rules/hatch3r-agent-orchestration-detail.mdc CHANGED Viewed

@@ -2,6 +2,9 @@
 description: Extended orchestration reference — PipelineContext schemas, resilience protocols, observability integration, and auto-mode guardrails
 globs: ["**/.agents/**", "**/pipeline/**", "**/*orchestrat*", "**/*agent*"]
 alwaysApply: false
+precedence: normal
+detail_rule: true
+consumed_by: hatch3r-agent-orchestration
 ---
 # Agent Orchestration — Extended Reference

package/rules/hatch3r-agent-orchestration.md CHANGED Viewed

@@ -4,6 +4,7 @@ type: rule
 description: Mandatory agent delegation, skill loading, and subagent usage directives for ALL tasks in ALL contexts
 scope: always
 tags: [core]
+precedence: high
 quality_charter: agents/shared/quality-charter.md
 cache_friendly: true
 ---
@@ -103,6 +104,28 @@ Examples:
 A missing header on a tracked Tier >= 2 task is a self-detectable drift signal — the user may halt the turn and request re-grounding. The header also functions as a per-reply cache prime: rendering it forces the orchestrator to re-resolve which phase it is in before choosing tools. Tier 1 tasks, read-only answers, and chat-only iterations do NOT require the header.
+### End-of-Turn Delegation Attestation
+When the turn is on a tracked task at Tier >= 2 AND caused at least one file mutation, the orchestrator MUST emit a closing block immediately before the Iteration Summary. The block enumerates every file mutated this turn, the spawning sub-agent invocation, and the `delegation_proof_id` returned by that sub-agent.
+Format:
+```
+[hatch3r-delegation-attestation]
+files_mutated_this_turn:
+  - <relative path>: via <agent-name> (proof: <delegation_proof_id>)
+mutating_subagent_invocations: <integer>
+inline_edits_by_orchestrator: none | <carve-out: hatch3r-quick-change Tier-1 + queued re-delegation>
+```
+Rules:
+- Each `files_mutated_this_turn` row MUST cite the spawning sub-agent invocation and quote the `delegation_proof_id` returned by that sub-agent verbatim. Unattributable rows are self-declared P8 B2 violations and the orchestrator MUST queue re-delegation in the next turn.
+- `inline_edits_by_orchestrator: none` is the only acceptable value outside the `hatch3r-quick-change` Tier-1 carve-out declared in the "Inline implementation" definition above.
+- Tier 1 read-only and chat-only turns are exempt — same scope as the Per-Turn Pipeline-State Header.
+- Missing block on a Tier >= 2 mutating turn is a self-detectable drift signal — the user may halt the turn and re-ground per the same protocol as the missing-header signal.
+- The block is consumed by reviewers and the next orchestrator turn; it sits beside the Iteration Summary, not inside it, preserving the existing 5-field iteration-summary contract verbatim.
 ### Mandatory Delegation Directive (No Inline Implementation)
 Restating with maximum clarity for sub-agent prompt inclusion: the orchestrator MUST NOT call `Edit`, `Write`, `MultiEdit`, `NotebookEdit`, `replace_string_in_file`, `multi_replace_string_in_file`, `create_file`, `str_replace_based_edit_tool`, `apply_patch`, or any platform-equivalent code-writing tool from its own turn. The only path for code mutation is the Task tool spawning `hatch3r-implementer` (Phase 2) or `hatch3r-fixer` (Phase 3). Carve-out: `hatch3r-quick-change` Tier 1 trivial items per its declared scope. No other carve-out exists. Violations are bypass mode (see issue #73) — surface them by halting the turn and re-delegating.
@@ -132,14 +155,14 @@ For multi-sub-task implementations, the implementer performs a lightweight mini-
 1. Spawn `hatch3r-reviewer` with diff and acceptance criteria. Reviewer includes blast radius summary.
 2. Critical/Warning findings: spawn `hatch3r-fixer` with full reviewer output.
-3. Re-review after fixes. Repeat until 0 Critical + 0 Warning, or max 3 iterations.
+3. Re-review after fixes. Repeat until 0 Critical + 0 Warning, or max 4 iterations (matches `DEFAULT_MAX_REVIEW_ITERATIONS` in `src/pipeline/reviewLoop.ts`; raised from 3 to 4 in Cycle 7.5 W2B2 finding H26 so the oscillation detector becomes reachable in default config). The rule default and the code constant are kept in sync by `src/__tests__/pipeline/reviewLoop.test.ts` (CI-enforced).
 4. **Confirmation pass** after clean review: lightweight re-review for fix-driven regressions and acceptance criteria completeness. The confirmation pass checks only: (a) no new test failures compared to Phase 2 baseline, (b) no type errors introduced, (c) acceptance criteria from the issue are still met. It does not re-run the full review checklist.
 5. Max iterations reached: surface to user with a structured summary: iteration count, remaining Critical findings (with file:line), remaining Warning findings, and a recommendation (fix manually vs. accept risk). Never present raw reviewer output without summarization.
 6. **Review gate confidence signal:** When the review loop exits with a clean verdict, record the iteration count in `PipelineContext.reviewResult.iterations`. Clean-on-first-pass (iteration 1) signals higher confidence than clean-after-multiple-iterations (iteration 2-3). Phase 4 specialists and the orchestrator should factor this into their risk assessment.
 **Phase 4 — Final Quality** (after review loop is clean):
-Launch parallel subagents -- no artificial concurrency limit.
+Launch Phase 4 specialists in parallel, bounded by `max_phase4_parallel` (default `3`, override via `HATCH3R_MAX_PHASE4_PARALLEL` env var; valid range 1-16, values outside the range fall back to default with a logged warning). The bound exists to cap per-orchestrator concurrent context cost — it does not soften the P8 B2 directive that fan-out scales with task decomposition. When the number of applicable specialists exceeds `max_phase4_parallel`, batch them by severity-descending priority: `CRITICAL → HIGH → MEDIUM → LOW` (severity is the worst-case finding class the specialist is expected to surface, per the `hatch3r-test-writer` / `hatch3r-security-auditor` always-on baseline → CRITICAL, conditional UI/security/perf → HIGH, docs/lint → MEDIUM, low-impact specialists → LOW). Within the same severity bucket, dispatch order is the trigger-table order in the table above. Each batch runs to completion (all specialists return SUCCESS/PARTIAL/FAILED) before the next batch starts; the validation pass below runs once after the final batch.
 - **Always** (except when Phase Skip Criteria applies — see below)**:** `hatch3r-test-writer`, `hatch3r-security-auditor`
 - **Evaluate:** `hatch3r-docs-writer` (when APIs/architecture/UX affected)

package/rules/hatch3r-agent-orchestration.mdc CHANGED Viewed

@@ -1,6 +1,7 @@
 ---
 description: Mandatory agent delegation, skill loading, and subagent usage directives for ALL tasks in ALL contexts
 alwaysApply: true
+precedence: high
 ---
 # Agent Orchestration
@@ -98,6 +99,28 @@ Examples:
 A missing header on a tracked Tier >= 2 task is a self-detectable drift signal — the user may halt the turn and request re-grounding. The header also functions as a per-reply cache prime: rendering it forces the orchestrator to re-resolve which phase it is in before choosing tools. Tier 1 tasks, read-only answers, and chat-only iterations do NOT require the header.
+### End-of-Turn Delegation Attestation
+When the turn is on a tracked task at Tier >= 2 AND caused at least one file mutation, the orchestrator MUST emit a closing block immediately before the Iteration Summary. The block enumerates every file mutated this turn, the spawning sub-agent invocation, and the `delegation_proof_id` returned by that sub-agent.
+Format:
+```
+[hatch3r-delegation-attestation]
+files_mutated_this_turn:
+  - <relative path>: via <agent-name> (proof: <delegation_proof_id>)
+mutating_subagent_invocations: <integer>
+inline_edits_by_orchestrator: none | <carve-out: hatch3r-quick-change Tier-1 + queued re-delegation>
+```
+Rules:
+- Each `files_mutated_this_turn` row MUST cite the spawning sub-agent invocation and quote the `delegation_proof_id` returned by that sub-agent verbatim. Unattributable rows are self-declared P8 B2 violations and the orchestrator MUST queue re-delegation in the next turn.
+- `inline_edits_by_orchestrator: none` is the only acceptable value outside the `hatch3r-quick-change` Tier-1 carve-out declared in the "Inline implementation" definition above.
+- Tier 1 read-only and chat-only turns are exempt — same scope as the Per-Turn Pipeline-State Header.
+- Missing block on a Tier >= 2 mutating turn is a self-detectable drift signal — the user may halt the turn and re-ground per the same protocol as the missing-header signal.
+- The block is consumed by reviewers and the next orchestrator turn; it sits beside the Iteration Summary, not inside it, preserving the existing 5-field iteration-summary contract verbatim.
 ### Mandatory Delegation Directive (No Inline Implementation)
 Restating with maximum clarity for sub-agent prompt inclusion: the orchestrator MUST NOT call `Edit`, `Write`, `MultiEdit`, `NotebookEdit`, `replace_string_in_file`, `multi_replace_string_in_file`, `create_file`, `str_replace_based_edit_tool`, `apply_patch`, or any platform-equivalent code-writing tool from its own turn. The only path for code mutation is the Task tool spawning `hatch3r-implementer` (Phase 2) or `hatch3r-fixer` (Phase 3). Carve-out: `hatch3r-quick-change` Tier 1 trivial items per its declared scope. No other carve-out exists. Violations are bypass mode (see issue #73) — surface them by halting the turn and re-delegating.
@@ -127,14 +150,14 @@ For multi-sub-task implementations, the implementer performs a lightweight mini-
 1. Spawn `hatch3r-reviewer` with diff and acceptance criteria. Reviewer includes blast radius summary.
 2. Critical/Warning findings: spawn `hatch3r-fixer` with full reviewer output.
-3. Re-review after fixes. Repeat until 0 Critical + 0 Warning, or max 3 iterations.
+3. Re-review after fixes. Repeat until 0 Critical + 0 Warning, or max 4 iterations (matches `DEFAULT_MAX_REVIEW_ITERATIONS` in `src/pipeline/reviewLoop.ts`; raised from 3 to 4 in Cycle 7.5 W2B2 finding H26 so the oscillation detector becomes reachable in default config). The rule default and the code constant are kept in sync by `src/__tests__/pipeline/reviewLoop.test.ts` (CI-enforced).
 4. **Confirmation pass** after clean review: lightweight re-review for fix-driven regressions and acceptance criteria completeness. The confirmation pass checks only: (a) no new test failures compared to Phase 2 baseline, (b) no type errors introduced, (c) acceptance criteria from the issue are still met. It does not re-run the full review checklist.
 5. Max iterations reached: surface to user with a structured summary: iteration count, remaining Critical findings (with file:line), remaining Warning findings, and a recommendation (fix manually vs. accept risk). Never present raw reviewer output without summarization.
 6. **Review gate confidence signal:** When the review loop exits with a clean verdict, record the iteration count in `PipelineContext.reviewResult.iterations`. Clean-on-first-pass (iteration 1) signals higher confidence than clean-after-multiple-iterations (iteration 2-3). Phase 4 specialists and the orchestrator should factor this into their risk assessment.
 **Phase 4 — Final Quality** (after review loop is clean):
-Launch parallel subagents -- no artificial concurrency limit.
+Launch Phase 4 specialists in parallel, bounded by `max_phase4_parallel` (default `3`, override via `HATCH3R_MAX_PHASE4_PARALLEL` env var; valid range 1-16, values outside the range fall back to default with a logged warning). The bound exists to cap per-orchestrator concurrent context cost — it does not soften the P8 B2 directive that fan-out scales with task decomposition. When the number of applicable specialists exceeds `max_phase4_parallel`, batch them by severity-descending priority: `CRITICAL → HIGH → MEDIUM → LOW` (severity is the worst-case finding class the specialist is expected to surface, per the `hatch3r-test-writer` / `hatch3r-security-auditor` always-on baseline → CRITICAL, conditional UI/security/perf → HIGH, docs/lint → MEDIUM, low-impact specialists → LOW). Within the same severity bucket, dispatch order is the trigger-table order in the table above. Each batch runs to completion (all specialists return SUCCESS/PARTIAL/FAILED) before the next batch starts; the validation pass below runs once after the final batch.
 - **Always** (except when Phase Skip Criteria applies — see below)**:** `hatch3r-test-writer`, `hatch3r-security-auditor`
 - **Evaluate:** `hatch3r-docs-writer` (when APIs/architecture/UX affected)

package/rules/hatch3r-iteration-summary.md CHANGED Viewed

@@ -67,6 +67,8 @@ Append only when they carry information. Do not include empty headers.
 **Suggested Next Action:** {one line}
 ```
+The **End-of-Turn Delegation Attestation** (defined in `hatch3r-agent-orchestration` -> End-of-Turn Delegation Attestation) is conditionally required and appears immediately BEFORE this Iteration Summary block. It applies when the turn is on a Tier >= 2 tracked task AND caused at least one file mutation. The Iteration Summary's 5-field contract is unchanged — the Attestation lives in a separate block to preserve backward compatibility for the 15 adapter outputs.
 ## Field Semantics
 - **Outcome** is one sentence. The user should grasp what happened from this line alone.

package/rules/hatch3r-iteration-summary.mdc CHANGED Viewed

@@ -62,6 +62,8 @@ Append only when they carry information. Do not include empty headers.
 **Suggested Next Action:** {one line}
 ```
+The **End-of-Turn Delegation Attestation** (defined in `hatch3r-agent-orchestration` -> End-of-Turn Delegation Attestation) is conditionally required and appears immediately BEFORE this Iteration Summary block. It applies when the turn is on a Tier >= 2 tracked task AND caused at least one file mutation. The Iteration Summary's 5-field contract is unchanged — the Attestation lives in a separate block to preserve backward compatibility for the 15 adapter outputs.
 ## Field Semantics
 - **Outcome** is one sentence. The user should grasp what happened from this line alone.

package/rules/hatch3r-observability-tracing-detail.md CHANGED Viewed

@@ -1,161 +1,20 @@
 ---
 id: hatch3r-observability-tracing-detail
 type: rule
-description: Extended tracing reference -- AI agent instrumentation, tool call audit trails, LLM request tracing, and correlation ID patterns
+description: "[Deprecated] AI agent tracing detail rule -- consolidated into hatch3r-observability-tracing's AI Agent Instrumentation section"
 scope: conditional
 globs: "**/*trac*,**/*span*,**/*telemetry*,**/*otel*,**/*agent*,**/observability/**,**/routes/**,**/handlers/**,**/services/**,**/api/**,**/middleware/**,**/controllers/**,**/lib/**"
 tags: [devops]
 quality_charter: agents/shared/quality-charter.md
+deprecated: true
 cache_friendly: true
 ---
-# Observability -- Tracing Extended Reference
+# Observability -- Tracing Extended Reference (Deprecated Redirect)
-On-demand companion to `hatch3r-observability-tracing`. Load when instrumenting AI agent systems, implementing tool call audit trails, or setting up correlation IDs for multi-agent workflows.
+This rule has been merged into `hatch3r-observability-tracing`. Load that rule for AI agent instrumentation, tool call spans, LLM request/response tracing, tool call audit trails, and correlation ID patterns.
-## GenAI Span Attributes
+- See `hatch3r-observability-tracing` § "AI Agent Instrumentation" for: GenAI span attributes, agent invocation spans, tool call spans, LLM request/response tracing, tool call audit trail, correlation IDs for agent workflows.
-Use these attributes on all spans representing interactions with generative AI models:
+<!-- DEPRECATED-CONTENT-REMOVED -->
-| Attribute | Type | Description | Example |
-|-----------|------|-------------|---------|
-| `gen_ai.system` | string | GenAI provider system name | `openai`, `anthropic`, `azure_openai` |
-| `gen_ai.request.model` | string | Model name as specified in the request | `gpt-4o`, `claude-sonnet-4-20250514` |
-| `gen_ai.response.model` | string | Model name as returned in the response | `gpt-4o-2024-08-06` |
-| `gen_ai.request.max_tokens` | int | Maximum tokens requested for generation | `4096` |
-| `gen_ai.request.temperature` | float | Temperature parameter | `0.7` |
-| `gen_ai.response.finish_reasons` | string[] | Reasons the model stopped generating | `["stop"]`, `["length"]` |
-| `gen_ai.usage.input_tokens` | int | Tokens in the input/prompt | `1250` |
-| `gen_ai.usage.output_tokens` | int | Tokens in the generated output | `530` |
-- Always set `gen_ai.system` and `gen_ai.request.model` on every GenAI span.
-- Record `gen_ai.usage.input_tokens` and `gen_ai.usage.output_tokens` from the API response for cost dashboards.
-- Use `gen_ai.response.finish_reasons` to detect truncated outputs (`length`) and trigger re-prompting.
-## Agent Invocation Spans
-Instrument the full lifecycle of an agent invocation with a dedicated span. This span is the parent for all LLM calls, tool executions, and sub-agent delegations.
-- **Span name pattern:** `agent.{agent_name}.invoke`
-- **Required attributes:** `agent.id`, `agent.name`, `agent.parent_id`, `agent.task`, `agent.framework`
-- **Span events for state transitions:** `agent.planning`, `agent.tool_selection`, `agent.awaiting_human`, `agent.delegating`, `agent.completed`, `agent.error`
-```typescript
-const agentSpan = tracer.startSpan('agent.code_reviewer.invoke', {
-  attributes: {
-    'agent.id': invocationId,
-    'agent.name': 'code_reviewer',
-    'agent.parent_id': parentAgentId ?? '',
-    'agent.task': `review PR #${prNumber}`,
-    'agent.framework': 'custom',
-  },
-});
-agentSpan.addEvent('agent.planning');
-// ... agent reasoning and tool calls happen as child spans ...
-agentSpan.addEvent('agent.completed');
-agentSpan.end();
-```
-## Tool Call Spans
-Every tool invocation by an agent creates a child span of the agent invocation span.
-- **Span name pattern:** `tool.{tool_name}.execute`
-- **Required attributes:** `tool.name`, `tool.input_hash` (SHA-256), `tool.output_status`, `tool.duration_ms`, `tool.parameters_count`
-- Tool spans must be children of the invoking agent span. Set span status to `ERROR` when `tool.output_status` is `error` or `timeout`.
-- For tools performing I/O, create nested child spans using appropriate semantic conventions (`http.*`, `db.*`).
-```typescript
-const toolSpan = tracer.startSpan(
-  'tool.git_diff.execute',
-  { attributes: { 'tool.name': 'git_diff' } },
-  trace.setSpan(context.active(), agentSpan),
-);
-try {
-  const result = await tools.gitDiff(params);
-  toolSpan.setAttributes({
-    'tool.output_status': 'success',
-    'tool.duration_ms': performance.now() - startTime,
-    'tool.input_hash': hashInput(params),
-  });
-} catch (err) {
-  toolSpan.setAttributes({ 'tool.output_status': 'error' });
-  toolSpan.setStatus({ code: SpanStatusCode.ERROR, message: err.message });
-  toolSpan.recordException(err);
-  throw err;
-} finally {
-  toolSpan.end();
-}
-```
-## LLM Request/Response Tracing
-- **Span name pattern:** `gen_ai.{operation}` (e.g., `gen_ai.chat`, `gen_ai.completion`)
-- **Token tracking:** Capture `gen_ai.usage.input_tokens` and `gen_ai.usage.output_tokens`. Aggregate in metrics: Counter `gen_ai.tokens_total` with labels `{direction, model, agent_name}`, Histogram `gen_ai.request_duration_ms`.
-- **Model version tracking:** Record both `gen_ai.request.model` and `gen_ai.response.model` for drift detection.
-- **Retry spans:** Each retry attempt is a separate child span. Set `gen_ai.request.retries` on the final span. Record `http.response.status_code` on failed spans (429 vs 500+).
-- Never log raw prompt content or full model responses as span attributes. Use token counts for cost tracking and correlated logs for prompt debugging in non-production environments.
-- Sample GenAI spans at 50-100% in production (higher than general spans) because each call is expensive and low volume.
-## Tool Call Audit Trail
-Maintain a structured audit log for every tool invocation in agentic workflows, separate from tracing spans.
-| Field | Type | Description |
-|-------|------|-------------|
-| `tool.name` | string | Name of the tool invoked |
-| `tool.input_hash` | string | SHA-256 hash of tool input (never log raw input) |
-| `tool.output_status` | string | `success`, `error`, `timeout`, or `denied` |
-| `tool.duration_ms` | float | Execution time in milliseconds |
-| `agent.id` | string | ID of the invoking agent |
-| `agent.name` | string | Human-readable agent name |
-| `correlation.id` | string | Trace correlation ID |
-| `timestamp` | string | ISO 8601 timestamp |
-| `session.id` | string | Session identifier |
-- Log tool invocations at `info` level, failures at `error` level with `error.type` and `error.message`.
-- Aggregate tool call counts per agent per session for anomaly detection.
-- Retain audit logs for a minimum of 90 days.
-## Correlation IDs for Agent Workflows
-- Use UUIDv4 with workflow-type prefix: `{workflow-type}-{uuid}` (e.g., `agent-run-550e8400-...`).
-- Generate at the workflow entry point. Propagate to all sub-agents and tool calls.
-- Every log entry, span, and metric must include `correlation.id`.
-- Cross-process: propagate via `X-Correlation-ID` header alongside W3C Trace Context.
-- Use OpenTelemetry `SpanLink` for cross-workflow references (e.g., agent run triggered by CI event).
-```typescript
-import { randomUUID } from 'node:crypto';
-import { context, trace, SpanStatusCode } from '@opentelemetry/api';
-function generateCorrelationId(workflowType: string): string {
-  return `${workflowType}-${randomUUID()}`;
-}
-async function runAgentWorkflow(task: string): Promise<void> {
-  const correlationId = generateCorrelationId('agent-run');
-  const tracer = trace.getTracer('agent-orchestrator');
-  const rootSpan = tracer.startSpan('agent.orchestrator.invoke', {
-    attributes: {
-      'correlation.id': correlationId,
-      'agent.name': 'orchestrator',
-      'agent.task': task,
-    },
-  });
-  try {
-    await context.with(trace.setSpan(context.active(), rootSpan), async () => {
-      await delegateToSubAgent('code_reviewer', {
-        correlationId,
-        parentSpanId: rootSpan.spanContext().spanId,
-        task: 'review changes',
-      });
-    });
-  } catch (err) {
-    rootSpan.setStatus({ code: SpanStatusCode.ERROR, message: (err as Error).message });
-    rootSpan.recordException(err as Error);
-    throw err;
-  } finally {
-    rootSpan.end();
-  }
-}
-```
+The full content has been migrated to `hatch3r-observability-tracing`.

package/rules/hatch3r-observability-tracing-detail.mdc CHANGED Viewed

@@ -1,156 +1,14 @@
 ---
-description: Extended tracing reference -- AI agent instrumentation, tool call audit trails, LLM request tracing, and correlation ID patterns
+description: "[Deprecated] AI agent tracing detail rule -- consolidated into hatch3r-observability-tracing's AI Agent Instrumentation section"
 globs: ["**/*trac*", "**/*span*", "**/*telemetry*", "**/*otel*", "**/*agent*", "**/observability/**", "**/routes/**", "**/handlers/**", "**/services/**", "**/api/**", "**/middleware/**", "**/controllers/**", "**/lib/**"]
 alwaysApply: false
 ---
-# Observability -- Tracing Extended Reference
+# Observability -- Tracing Extended Reference (Deprecated Redirect)
-On-demand companion to `hatch3r-observability-tracing`. Load when instrumenting AI agent systems, implementing tool call audit trails, or setting up correlation IDs for multi-agent workflows.
+This rule has been merged into `hatch3r-observability-tracing`. Load that rule for AI agent instrumentation, tool call spans, LLM request/response tracing, tool call audit trails, and correlation ID patterns.
-## GenAI Span Attributes
+- See `hatch3r-observability-tracing` § "AI Agent Instrumentation" for: GenAI span attributes, agent invocation spans, tool call spans, LLM request/response tracing, tool call audit trail, correlation IDs for agent workflows.
-Use these attributes on all spans representing interactions with generative AI models:
+<!-- DEPRECATED-CONTENT-REMOVED -->
-| Attribute | Type | Description | Example |
-|-----------|------|-------------|---------|
-| `gen_ai.system` | string | GenAI provider system name | `openai`, `anthropic`, `azure_openai` |
-| `gen_ai.request.model` | string | Model name as specified in the request | `gpt-4o`, `claude-sonnet-4-20250514` |
-| `gen_ai.response.model` | string | Model name as returned in the response | `gpt-4o-2024-08-06` |
-| `gen_ai.request.max_tokens` | int | Maximum tokens requested for generation | `4096` |
-| `gen_ai.request.temperature` | float | Temperature parameter | `0.7` |
-| `gen_ai.response.finish_reasons` | string[] | Reasons the model stopped generating | `["stop"]`, `["length"]` |
-| `gen_ai.usage.input_tokens` | int | Tokens in the input/prompt | `1250` |
-| `gen_ai.usage.output_tokens` | int | Tokens in the generated output | `530` |
-- Always set `gen_ai.system` and `gen_ai.request.model` on every GenAI span.
-- Record `gen_ai.usage.input_tokens` and `gen_ai.usage.output_tokens` from the API response for cost dashboards.
-- Use `gen_ai.response.finish_reasons` to detect truncated outputs (`length`) and trigger re-prompting.
-## Agent Invocation Spans
-Instrument the full lifecycle of an agent invocation with a dedicated span. This span is the parent for all LLM calls, tool executions, and sub-agent delegations.
-- **Span name pattern:** `agent.{agent_name}.invoke`
-- **Required attributes:** `agent.id`, `agent.name`, `agent.parent_id`, `agent.task`, `agent.framework`
-- **Span events for state transitions:** `agent.planning`, `agent.tool_selection`, `agent.awaiting_human`, `agent.delegating`, `agent.completed`, `agent.error`
-```typescript
-const agentSpan = tracer.startSpan('agent.code_reviewer.invoke', {
-  attributes: {
-    'agent.id': invocationId,
-    'agent.name': 'code_reviewer',
-    'agent.parent_id': parentAgentId ?? '',
-    'agent.task': `review PR #${prNumber}`,
-    'agent.framework': 'custom',
-  },
-});
-agentSpan.addEvent('agent.planning');
-// ... agent reasoning and tool calls happen as child spans ...
-agentSpan.addEvent('agent.completed');
-agentSpan.end();
-```
-## Tool Call Spans
-Every tool invocation by an agent creates a child span of the agent invocation span.
-- **Span name pattern:** `tool.{tool_name}.execute`
-- **Required attributes:** `tool.name`, `tool.input_hash` (SHA-256), `tool.output_status`, `tool.duration_ms`, `tool.parameters_count`
-- Tool spans must be children of the invoking agent span. Set span status to `ERROR` when `tool.output_status` is `error` or `timeout`.
-- For tools performing I/O, create nested child spans using appropriate semantic conventions (`http.*`, `db.*`).
-```typescript
-const toolSpan = tracer.startSpan(
-  'tool.git_diff.execute',
-  { attributes: { 'tool.name': 'git_diff' } },
-  trace.setSpan(context.active(), agentSpan),
-);
-try {
-  const result = await tools.gitDiff(params);
-  toolSpan.setAttributes({
-    'tool.output_status': 'success',
-    'tool.duration_ms': performance.now() - startTime,
-    'tool.input_hash': hashInput(params),
-  });
-} catch (err) {
-  toolSpan.setAttributes({ 'tool.output_status': 'error' });
-  toolSpan.setStatus({ code: SpanStatusCode.ERROR, message: err.message });
-  toolSpan.recordException(err);
-  throw err;
-} finally {
-  toolSpan.end();
-}
-```
-## LLM Request/Response Tracing
-- **Span name pattern:** `gen_ai.{operation}` (e.g., `gen_ai.chat`, `gen_ai.completion`)
-- **Token tracking:** Capture `gen_ai.usage.input_tokens` and `gen_ai.usage.output_tokens`. Aggregate in metrics: Counter `gen_ai.tokens_total` with labels `{direction, model, agent_name}`, Histogram `gen_ai.request_duration_ms`.
-- **Model version tracking:** Record both `gen_ai.request.model` and `gen_ai.response.model` for drift detection.
-- **Retry spans:** Each retry attempt is a separate child span. Set `gen_ai.request.retries` on the final span. Record `http.response.status_code` on failed spans (429 vs 500+).
-- Never log raw prompt content or full model responses as span attributes. Use token counts for cost tracking and correlated logs for prompt debugging in non-production environments.
-- Sample GenAI spans at 50-100% in production (higher than general spans) because each call is expensive and low volume.
-## Tool Call Audit Trail
-Maintain a structured audit log for every tool invocation in agentic workflows, separate from tracing spans.
-| Field | Type | Description |
-|-------|------|-------------|
-| `tool.name` | string | Name of the tool invoked |
-| `tool.input_hash` | string | SHA-256 hash of tool input (never log raw input) |
-| `tool.output_status` | string | `success`, `error`, `timeout`, or `denied` |
-| `tool.duration_ms` | float | Execution time in milliseconds |
-| `agent.id` | string | ID of the invoking agent |
-| `agent.name` | string | Human-readable agent name |
-| `correlation.id` | string | Trace correlation ID |
-| `timestamp` | string | ISO 8601 timestamp |
-| `session.id` | string | Session identifier |
-- Log tool invocations at `info` level, failures at `error` level with `error.type` and `error.message`.
-- Aggregate tool call counts per agent per session for anomaly detection.
-- Retain audit logs for a minimum of 90 days.
-## Correlation IDs for Agent Workflows
-- Use UUIDv4 with workflow-type prefix: `{workflow-type}-{uuid}` (e.g., `agent-run-550e8400-...`).
-- Generate at the workflow entry point. Propagate to all sub-agents and tool calls.
-- Every log entry, span, and metric must include `correlation.id`.
-- Cross-process: propagate via `X-Correlation-ID` header alongside W3C Trace Context.
-- Use OpenTelemetry `SpanLink` for cross-workflow references (e.g., agent run triggered by CI event).
-```typescript
-import { randomUUID } from 'node:crypto';
-import { context, trace, SpanStatusCode } from '@opentelemetry/api';
-function generateCorrelationId(workflowType: string): string {
-  return `${workflowType}-${randomUUID()}`;
-}
-async function runAgentWorkflow(task: string): Promise<void> {
-  const correlationId = generateCorrelationId('agent-run');
-  const tracer = trace.getTracer('agent-orchestrator');
-  const rootSpan = tracer.startSpan('agent.orchestrator.invoke', {
-    attributes: {
-      'correlation.id': correlationId,
-      'agent.name': 'orchestrator',
-      'agent.task': task,
-    },
-  });
-  try {
-    await context.with(trace.setSpan(context.active(), rootSpan), async () => {
-      await delegateToSubAgent('code_reviewer', {
-        correlationId,
-        parentSpanId: rootSpan.spanContext().spanId,
-        task: 'review changes',
-      });
-    });
-  } catch (err) {
-    rootSpan.setStatus({ code: SpanStatusCode.ERROR, message: (err as Error).message });
-    rootSpan.recordException(err as Error);
-    throw err;
-  } finally {
-    rootSpan.end();
-  }
-}
-```
+The full content has been migrated to `hatch3r-observability-tracing`.