hatch3r 1.7.5 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/agents/hatch3r-context-rules.md +22 -6
- package/agents/hatch3r-creator.md +2 -1
- package/agents/hatch3r-handoff-loader.md +1 -1
- package/agents/hatch3r-implementer.md +8 -0
- package/agents/hatch3r-learnings-loader.md +1 -1
- package/agents/hatch3r-reviewer.md +2 -0
- package/agents/shared/user-content-templates.md +31 -1
- package/commands/hatch3r-agent-customize.md +4 -0
- package/commands/hatch3r-api-spec.md +7 -0
- package/commands/hatch3r-benchmark.md +7 -0
- package/commands/hatch3r-board-fill.md +7 -0
- package/commands/hatch3r-board-groom.md +4 -0
- package/commands/hatch3r-board-init.md +51 -0
- package/commands/hatch3r-board-pickup.md +8 -0
- package/commands/hatch3r-board-refresh.md +4 -0
- package/commands/hatch3r-board-shared.md +6 -6
- package/commands/hatch3r-bug-plan.md +7 -0
- package/commands/hatch3r-codebase-map.md +8 -0
- package/commands/hatch3r-command-customize.md +4 -0
- package/commands/hatch3r-context-health.md +5 -0
- package/commands/hatch3r-create.md +57 -4
- package/commands/hatch3r-debug.md +7 -0
- package/commands/hatch3r-dep-audit.md +4 -0
- package/commands/hatch3r-feature-plan.md +7 -0
- package/commands/hatch3r-handoff.md +7 -0
- package/commands/hatch3r-healthcheck.md +4 -0
- package/commands/hatch3r-hooks.md +4 -0
- package/commands/hatch3r-learn.md +16 -0
- package/commands/hatch3r-migration-plan.md +7 -0
- package/commands/hatch3r-onboard.md +7 -0
- package/commands/hatch3r-pr-resolve.md +8 -1
- package/commands/hatch3r-project-spec.md +8 -0
- package/commands/hatch3r-quick-change.md +7 -0
- package/commands/hatch3r-recipe.md +4 -0
- package/commands/hatch3r-refactor-plan.md +7 -0
- package/commands/hatch3r-release.md +5 -0
- package/commands/hatch3r-revision.md +7 -0
- package/commands/hatch3r-roadmap.md +8 -0
- package/commands/hatch3r-rule-customize.md +4 -0
- package/commands/hatch3r-security-audit.md +4 -0
- package/commands/hatch3r-skill-customize.md +4 -0
- package/commands/hatch3r-test-plan.md +7 -0
- package/commands/hatch3r-workflow.md +9 -1
- package/dist/cli/index.js +2600 -777
- package/dist/cli/index.js.map +1 -1
- package/package.json +8 -5
- package/rules/hatch3r-agent-orchestration-detail.md +3 -0
- package/rules/hatch3r-agent-orchestration-detail.mdc +3 -0
- package/rules/hatch3r-agent-orchestration.md +25 -2
- package/rules/hatch3r-agent-orchestration.mdc +25 -2
- package/rules/hatch3r-iteration-summary.md +2 -0
- package/rules/hatch3r-iteration-summary.mdc +2 -0
- package/rules/hatch3r-observability-tracing-detail.md +7 -148
- package/rules/hatch3r-observability-tracing-detail.mdc +6 -148
- package/rules/hatch3r-observability-tracing.md +154 -6
- package/rules/hatch3r-observability-tracing.mdc +154 -6
- package/skills/hatch3r-agent-customize/SKILL.md +10 -0
- package/skills/hatch3r-ai-feature/SKILL.md +2 -0
- package/skills/hatch3r-api-spec/SKILL.md +68 -0
- package/skills/hatch3r-cli-csvkit/SKILL.md +2 -2
- package/skills/hatch3r-cli-duckdb/SKILL.md +3 -3
- package/skills/hatch3r-cli-jq/SKILL.md +4 -0
- package/skills/hatch3r-cli-miller/SKILL.md +2 -2
- package/skills/hatch3r-cli-overview/SKILL.md +1 -1
- package/skills/{hatch3r-cli-xsv → hatch3r-cli-qsv}/SKILL.md +20 -18
- package/skills/hatch3r-cli-stagehand/SKILL.md +48 -16
- package/skills/hatch3r-command-customize/SKILL.md +10 -0
- package/skills/hatch3r-customize/SKILL.md +3 -0
- package/skills/hatch3r-design-system-detect/SKILL.md +2 -0
- package/skills/hatch3r-observability-verify/SKILL.md +4 -3
- package/skills/hatch3r-reliability-verify/SKILL.md +2 -0
- package/skills/hatch3r-rule-customize/SKILL.md +10 -0
- package/skills/hatch3r-skill-customize/SKILL.md +10 -0
- package/skills/hatch3r-ui-ux-verify/SKILL.md +2 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "hatch3r",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.8.0",
|
|
4
4
|
"description": "Battle-tested agentic coding setup framework. One command to hatch your agent stack -- agents, skills, rules, commands, and MCP for every major AI coding tool.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"exports": {
|
|
@@ -22,17 +22,19 @@
|
|
|
22
22
|
"test:watch": "vitest",
|
|
23
23
|
"inventory": "tsx scripts/inventory.ts",
|
|
24
24
|
"inventory:check-docs": "tsx scripts/inventory.ts --check-docs",
|
|
25
|
-
"validate:rule-parity": "tsx scripts/validate-rule-parity.ts",
|
|
26
|
-
"validate:efficiency": "tsx scripts/validate-efficiency-invariants.ts",
|
|
25
|
+
"validate:rule-parity": "tsx scripts/validate-rule-parity.ts && tsx scripts/validate-rule-pillar-currency.ts",
|
|
26
|
+
"validate:efficiency": "tsx scripts/validate-efficiency-invariants.ts && tsx scripts/validate-bridge-budget.ts && tsx scripts/validate-fanout-emission.ts",
|
|
27
27
|
"validate:cli-skills": "tsx scripts/validate-cli-skills.ts",
|
|
28
|
+
"validate:wiring": "tsx scripts/validate-wiring.ts",
|
|
28
29
|
"generate:cli-skills": "tsx scripts/generate-cli-skills.ts",
|
|
29
|
-
"validate": "npm run validate:rule-parity && npm run validate:efficiency && npm run validate:cli-skills",
|
|
30
|
+
"validate": "npm run validate:rule-parity && npm run validate:efficiency && npm run validate:cli-skills && npm run validate:wiring",
|
|
30
31
|
"audit:validate-registry": "tsx scripts/validate-finding-registry.ts",
|
|
31
32
|
"audit:migrate": "tsx scripts/migrate-finding-registry.ts",
|
|
32
33
|
"audit:archive": "tsx scripts/audit-archive.ts",
|
|
33
34
|
"audit:find": "tsx scripts/audit-find.ts",
|
|
34
35
|
"audit:reset": "tsx scripts/clean-audit-workspace.ts",
|
|
35
|
-
"lockfile:check": "lockfile-lint --path package-lock.json --type npm --allowed-hosts npm --validate-https"
|
|
36
|
+
"lockfile:check": "lockfile-lint --path package-lock.json --type npm --allowed-hosts npm --validate-https",
|
|
37
|
+
"mcp:cve-check": "tsx scripts/check-mcp-cves.ts"
|
|
36
38
|
},
|
|
37
39
|
"keywords": [
|
|
38
40
|
"agents",
|
|
@@ -90,6 +92,7 @@
|
|
|
90
92
|
"commander": "^14.0.3",
|
|
91
93
|
"inquirer": "^13.3.2",
|
|
92
94
|
"ora": "^9.3.0",
|
|
95
|
+
"p-limit": "^3.1.0",
|
|
93
96
|
"proper-lockfile": "^4.1.2",
|
|
94
97
|
"update-notifier": "^7.3.1",
|
|
95
98
|
"yaml": "^2.8.3"
|
|
@@ -5,8 +5,11 @@ description: Extended orchestration reference — PipelineContext schemas, resil
|
|
|
5
5
|
scope: conditional
|
|
6
6
|
globs: "**/.agents/**,**/pipeline/**,**/*orchestrat*,**/*agent*"
|
|
7
7
|
tags: [core]
|
|
8
|
+
precedence: normal
|
|
8
9
|
quality_charter: agents/shared/quality-charter.md
|
|
9
10
|
cache_friendly: true
|
|
11
|
+
detail_rule: true
|
|
12
|
+
consumed_by: hatch3r-agent-orchestration
|
|
10
13
|
---
|
|
11
14
|
# Agent Orchestration — Extended Reference
|
|
12
15
|
|
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
description: Extended orchestration reference — PipelineContext schemas, resilience protocols, observability integration, and auto-mode guardrails
|
|
3
3
|
globs: ["**/.agents/**", "**/pipeline/**", "**/*orchestrat*", "**/*agent*"]
|
|
4
4
|
alwaysApply: false
|
|
5
|
+
precedence: normal
|
|
6
|
+
detail_rule: true
|
|
7
|
+
consumed_by: hatch3r-agent-orchestration
|
|
5
8
|
---
|
|
6
9
|
# Agent Orchestration — Extended Reference
|
|
7
10
|
|
|
@@ -4,6 +4,7 @@ type: rule
|
|
|
4
4
|
description: Mandatory agent delegation, skill loading, and subagent usage directives for ALL tasks in ALL contexts
|
|
5
5
|
scope: always
|
|
6
6
|
tags: [core]
|
|
7
|
+
precedence: high
|
|
7
8
|
quality_charter: agents/shared/quality-charter.md
|
|
8
9
|
cache_friendly: true
|
|
9
10
|
---
|
|
@@ -103,6 +104,28 @@ Examples:
|
|
|
103
104
|
|
|
104
105
|
A missing header on a tracked Tier >= 2 task is a self-detectable drift signal — the user may halt the turn and request re-grounding. The header also functions as a per-reply cache prime: rendering it forces the orchestrator to re-resolve which phase it is in before choosing tools. Tier 1 tasks, read-only answers, and chat-only iterations do NOT require the header.
|
|
105
106
|
|
|
107
|
+
### End-of-Turn Delegation Attestation
|
|
108
|
+
|
|
109
|
+
When the turn is on a tracked task at Tier >= 2 AND caused at least one file mutation, the orchestrator MUST emit a closing block immediately before the Iteration Summary. The block enumerates every file mutated this turn, the spawning sub-agent invocation, and the `delegation_proof_id` returned by that sub-agent.
|
|
110
|
+
|
|
111
|
+
Format:
|
|
112
|
+
|
|
113
|
+
```
|
|
114
|
+
[hatch3r-delegation-attestation]
|
|
115
|
+
files_mutated_this_turn:
|
|
116
|
+
- <relative path>: via <agent-name> (proof: <delegation_proof_id>)
|
|
117
|
+
mutating_subagent_invocations: <integer>
|
|
118
|
+
inline_edits_by_orchestrator: none | <carve-out: hatch3r-quick-change Tier-1 + queued re-delegation>
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Rules:
|
|
122
|
+
|
|
123
|
+
- Each `files_mutated_this_turn` row MUST cite the spawning sub-agent invocation and quote the `delegation_proof_id` returned by that sub-agent verbatim. Unattributable rows are self-declared P8 B2 violations and the orchestrator MUST queue re-delegation in the next turn.
|
|
124
|
+
- `inline_edits_by_orchestrator: none` is the only acceptable value outside the `hatch3r-quick-change` Tier-1 carve-out declared in the "Inline implementation" definition above.
|
|
125
|
+
- Tier 1 read-only and chat-only turns are exempt — same scope as the Per-Turn Pipeline-State Header.
|
|
126
|
+
- Missing block on a Tier >= 2 mutating turn is a self-detectable drift signal — the user may halt the turn and re-ground per the same protocol as the missing-header signal.
|
|
127
|
+
- The block is consumed by reviewers and the next orchestrator turn; it sits beside the Iteration Summary, not inside it, preserving the existing 5-field iteration-summary contract verbatim.
|
|
128
|
+
|
|
106
129
|
### Mandatory Delegation Directive (No Inline Implementation)
|
|
107
130
|
|
|
108
131
|
Restating with maximum clarity for sub-agent prompt inclusion: the orchestrator MUST NOT call `Edit`, `Write`, `MultiEdit`, `NotebookEdit`, `replace_string_in_file`, `multi_replace_string_in_file`, `create_file`, `str_replace_based_edit_tool`, `apply_patch`, or any platform-equivalent code-writing tool from its own turn. The only path for code mutation is the Task tool spawning `hatch3r-implementer` (Phase 2) or `hatch3r-fixer` (Phase 3). Carve-out: `hatch3r-quick-change` Tier 1 trivial items per its declared scope. No other carve-out exists. Violations are bypass mode (see issue #73) — surface them by halting the turn and re-delegating.
|
|
@@ -132,14 +155,14 @@ For multi-sub-task implementations, the implementer performs a lightweight mini-
|
|
|
132
155
|
|
|
133
156
|
1. Spawn `hatch3r-reviewer` with diff and acceptance criteria. Reviewer includes blast radius summary.
|
|
134
157
|
2. Critical/Warning findings: spawn `hatch3r-fixer` with full reviewer output.
|
|
135
|
-
3. Re-review after fixes. Repeat until 0 Critical + 0 Warning, or max
|
|
158
|
+
3. Re-review after fixes. Repeat until 0 Critical + 0 Warning, or max 4 iterations (matches `DEFAULT_MAX_REVIEW_ITERATIONS` in `src/pipeline/reviewLoop.ts`; raised from 3 to 4 in Cycle 7.5 W2B2 finding H26 so the oscillation detector becomes reachable in default config). The rule default and the code constant are kept in sync by `src/__tests__/pipeline/reviewLoop.test.ts` (CI-enforced).
|
|
136
159
|
4. **Confirmation pass** after clean review: lightweight re-review for fix-driven regressions and acceptance criteria completeness. The confirmation pass checks only: (a) no new test failures compared to Phase 2 baseline, (b) no type errors introduced, (c) acceptance criteria from the issue are still met. It does not re-run the full review checklist.
|
|
137
160
|
5. Max iterations reached: surface to user with a structured summary: iteration count, remaining Critical findings (with file:line), remaining Warning findings, and a recommendation (fix manually vs. accept risk). Never present raw reviewer output without summarization.
|
|
138
161
|
6. **Review gate confidence signal:** When the review loop exits with a clean verdict, record the iteration count in `PipelineContext.reviewResult.iterations`. Clean-on-first-pass (iteration 1) signals higher confidence than clean-after-multiple-iterations (iteration 2-3). Phase 4 specialists and the orchestrator should factor this into their risk assessment.
|
|
139
162
|
|
|
140
163
|
**Phase 4 — Final Quality** (after review loop is clean):
|
|
141
164
|
|
|
142
|
-
Launch parallel
|
|
165
|
+
Launch Phase 4 specialists in parallel, bounded by `max_phase4_parallel` (default `3`, override via `HATCH3R_MAX_PHASE4_PARALLEL` env var; valid range 1-16, values outside the range fall back to default with a logged warning). The bound exists to cap per-orchestrator concurrent context cost — it does not soften the P8 B2 directive that fan-out scales with task decomposition. When the number of applicable specialists exceeds `max_phase4_parallel`, batch them by severity-descending priority: `CRITICAL → HIGH → MEDIUM → LOW` (severity is the worst-case finding class the specialist is expected to surface, per the `hatch3r-test-writer` / `hatch3r-security-auditor` always-on baseline → CRITICAL, conditional UI/security/perf → HIGH, docs/lint → MEDIUM, low-impact specialists → LOW). Within the same severity bucket, dispatch order is the trigger-table order in the table above. Each batch runs to completion (all specialists return SUCCESS/PARTIAL/FAILED) before the next batch starts; the validation pass below runs once after the final batch.
|
|
143
166
|
|
|
144
167
|
- **Always** (except when Phase Skip Criteria applies — see below)**:** `hatch3r-test-writer`, `hatch3r-security-auditor`
|
|
145
168
|
- **Evaluate:** `hatch3r-docs-writer` (when APIs/architecture/UX affected)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Mandatory agent delegation, skill loading, and subagent usage directives for ALL tasks in ALL contexts
|
|
3
3
|
alwaysApply: true
|
|
4
|
+
precedence: high
|
|
4
5
|
---
|
|
5
6
|
# Agent Orchestration
|
|
6
7
|
|
|
@@ -98,6 +99,28 @@ Examples:
|
|
|
98
99
|
|
|
99
100
|
A missing header on a tracked Tier >= 2 task is a self-detectable drift signal — the user may halt the turn and request re-grounding. The header also functions as a per-reply cache prime: rendering it forces the orchestrator to re-resolve which phase it is in before choosing tools. Tier 1 tasks, read-only answers, and chat-only iterations do NOT require the header.
|
|
100
101
|
|
|
102
|
+
### End-of-Turn Delegation Attestation
|
|
103
|
+
|
|
104
|
+
When the turn is on a tracked task at Tier >= 2 AND caused at least one file mutation, the orchestrator MUST emit a closing block immediately before the Iteration Summary. The block enumerates every file mutated this turn, the spawning sub-agent invocation, and the `delegation_proof_id` returned by that sub-agent.
|
|
105
|
+
|
|
106
|
+
Format:
|
|
107
|
+
|
|
108
|
+
```
|
|
109
|
+
[hatch3r-delegation-attestation]
|
|
110
|
+
files_mutated_this_turn:
|
|
111
|
+
- <relative path>: via <agent-name> (proof: <delegation_proof_id>)
|
|
112
|
+
mutating_subagent_invocations: <integer>
|
|
113
|
+
inline_edits_by_orchestrator: none | <carve-out: hatch3r-quick-change Tier-1 + queued re-delegation>
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Rules:
|
|
117
|
+
|
|
118
|
+
- Each `files_mutated_this_turn` row MUST cite the spawning sub-agent invocation and quote the `delegation_proof_id` returned by that sub-agent verbatim. Unattributable rows are self-declared P8 B2 violations and the orchestrator MUST queue re-delegation in the next turn.
|
|
119
|
+
- `inline_edits_by_orchestrator: none` is the only acceptable value outside the `hatch3r-quick-change` Tier-1 carve-out declared in the "Inline implementation" definition above.
|
|
120
|
+
- Tier 1 read-only and chat-only turns are exempt — same scope as the Per-Turn Pipeline-State Header.
|
|
121
|
+
- Missing block on a Tier >= 2 mutating turn is a self-detectable drift signal — the user may halt the turn and re-ground per the same protocol as the missing-header signal.
|
|
122
|
+
- The block is consumed by reviewers and the next orchestrator turn; it sits beside the Iteration Summary, not inside it, preserving the existing 5-field iteration-summary contract verbatim.
|
|
123
|
+
|
|
101
124
|
### Mandatory Delegation Directive (No Inline Implementation)
|
|
102
125
|
|
|
103
126
|
Restating with maximum clarity for sub-agent prompt inclusion: the orchestrator MUST NOT call `Edit`, `Write`, `MultiEdit`, `NotebookEdit`, `replace_string_in_file`, `multi_replace_string_in_file`, `create_file`, `str_replace_based_edit_tool`, `apply_patch`, or any platform-equivalent code-writing tool from its own turn. The only path for code mutation is the Task tool spawning `hatch3r-implementer` (Phase 2) or `hatch3r-fixer` (Phase 3). Carve-out: `hatch3r-quick-change` Tier 1 trivial items per its declared scope. No other carve-out exists. Violations are bypass mode (see issue #73) — surface them by halting the turn and re-delegating.
|
|
@@ -127,14 +150,14 @@ For multi-sub-task implementations, the implementer performs a lightweight mini-
|
|
|
127
150
|
|
|
128
151
|
1. Spawn `hatch3r-reviewer` with diff and acceptance criteria. Reviewer includes blast radius summary.
|
|
129
152
|
2. Critical/Warning findings: spawn `hatch3r-fixer` with full reviewer output.
|
|
130
|
-
3. Re-review after fixes. Repeat until 0 Critical + 0 Warning, or max
|
|
153
|
+
3. Re-review after fixes. Repeat until 0 Critical + 0 Warning, or max 4 iterations (matches `DEFAULT_MAX_REVIEW_ITERATIONS` in `src/pipeline/reviewLoop.ts`; raised from 3 to 4 in Cycle 7.5 W2B2 finding H26 so the oscillation detector becomes reachable in default config). The rule default and the code constant are kept in sync by `src/__tests__/pipeline/reviewLoop.test.ts` (CI-enforced).
|
|
131
154
|
4. **Confirmation pass** after clean review: lightweight re-review for fix-driven regressions and acceptance criteria completeness. The confirmation pass checks only: (a) no new test failures compared to Phase 2 baseline, (b) no type errors introduced, (c) acceptance criteria from the issue are still met. It does not re-run the full review checklist.
|
|
132
155
|
5. Max iterations reached: surface to user with a structured summary: iteration count, remaining Critical findings (with file:line), remaining Warning findings, and a recommendation (fix manually vs. accept risk). Never present raw reviewer output without summarization.
|
|
133
156
|
6. **Review gate confidence signal:** When the review loop exits with a clean verdict, record the iteration count in `PipelineContext.reviewResult.iterations`. Clean-on-first-pass (iteration 1) signals higher confidence than clean-after-multiple-iterations (iteration 2-3). Phase 4 specialists and the orchestrator should factor this into their risk assessment.
|
|
134
157
|
|
|
135
158
|
**Phase 4 — Final Quality** (after review loop is clean):
|
|
136
159
|
|
|
137
|
-
Launch parallel
|
|
160
|
+
Launch Phase 4 specialists in parallel, bounded by `max_phase4_parallel` (default `3`, override via `HATCH3R_MAX_PHASE4_PARALLEL` env var; valid range 1-16, values outside the range fall back to default with a logged warning). The bound exists to cap per-orchestrator concurrent context cost — it does not soften the P8 B2 directive that fan-out scales with task decomposition. When the number of applicable specialists exceeds `max_phase4_parallel`, batch them by severity-descending priority: `CRITICAL → HIGH → MEDIUM → LOW` (severity is the worst-case finding class the specialist is expected to surface, per the `hatch3r-test-writer` / `hatch3r-security-auditor` always-on baseline → CRITICAL, conditional UI/security/perf → HIGH, docs/lint → MEDIUM, low-impact specialists → LOW). Within the same severity bucket, dispatch order is the trigger-table order in the table above. Each batch runs to completion (all specialists return SUCCESS/PARTIAL/FAILED) before the next batch starts; the validation pass below runs once after the final batch.
|
|
138
161
|
|
|
139
162
|
- **Always** (except when Phase Skip Criteria applies — see below)**:** `hatch3r-test-writer`, `hatch3r-security-auditor`
|
|
140
163
|
- **Evaluate:** `hatch3r-docs-writer` (when APIs/architecture/UX affected)
|
|
@@ -67,6 +67,8 @@ Append only when they carry information. Do not include empty headers.
|
|
|
67
67
|
**Suggested Next Action:** {one line}
|
|
68
68
|
```
|
|
69
69
|
|
|
70
|
+
The **End-of-Turn Delegation Attestation** (defined in `hatch3r-agent-orchestration` -> End-of-Turn Delegation Attestation) is conditionally required and appears immediately BEFORE this Iteration Summary block. It applies when the turn is on a Tier >= 2 tracked task AND caused at least one file mutation. The Iteration Summary's 5-field contract is unchanged — the Attestation lives in a separate block to preserve backward compatibility for the 15 adapter outputs.
|
|
71
|
+
|
|
70
72
|
## Field Semantics
|
|
71
73
|
|
|
72
74
|
- **Outcome** is one sentence. The user should grasp what happened from this line alone.
|
|
@@ -62,6 +62,8 @@ Append only when they carry information. Do not include empty headers.
|
|
|
62
62
|
**Suggested Next Action:** {one line}
|
|
63
63
|
```
|
|
64
64
|
|
|
65
|
+
The **End-of-Turn Delegation Attestation** (defined in `hatch3r-agent-orchestration` -> End-of-Turn Delegation Attestation) is conditionally required and appears immediately BEFORE this Iteration Summary block. It applies when the turn is on a Tier >= 2 tracked task AND caused at least one file mutation. The Iteration Summary's 5-field contract is unchanged — the Attestation lives in a separate block to preserve backward compatibility for the 15 adapter outputs.
|
|
66
|
+
|
|
65
67
|
## Field Semantics
|
|
66
68
|
|
|
67
69
|
- **Outcome** is one sentence. The user should grasp what happened from this line alone.
|
|
@@ -1,161 +1,20 @@
|
|
|
1
1
|
---
|
|
2
2
|
id: hatch3r-observability-tracing-detail
|
|
3
3
|
type: rule
|
|
4
|
-
description:
|
|
4
|
+
description: "[Deprecated] AI agent tracing detail rule -- consolidated into hatch3r-observability-tracing's AI Agent Instrumentation section"
|
|
5
5
|
scope: conditional
|
|
6
6
|
globs: "**/*trac*,**/*span*,**/*telemetry*,**/*otel*,**/*agent*,**/observability/**,**/routes/**,**/handlers/**,**/services/**,**/api/**,**/middleware/**,**/controllers/**,**/lib/**"
|
|
7
7
|
tags: [devops]
|
|
8
8
|
quality_charter: agents/shared/quality-charter.md
|
|
9
|
+
deprecated: true
|
|
9
10
|
cache_friendly: true
|
|
10
11
|
---
|
|
11
|
-
# Observability -- Tracing Extended Reference
|
|
12
|
+
# Observability -- Tracing Extended Reference (Deprecated Redirect)
|
|
12
13
|
|
|
13
|
-
|
|
14
|
+
This rule has been merged into `hatch3r-observability-tracing`. Load that rule for AI agent instrumentation, tool call spans, LLM request/response tracing, tool call audit trails, and correlation ID patterns.
|
|
14
15
|
|
|
15
|
-
|
|
16
|
+
- See `hatch3r-observability-tracing` § "AI Agent Instrumentation" for: GenAI span attributes, agent invocation spans, tool call spans, LLM request/response tracing, tool call audit trail, correlation IDs for agent workflows.
|
|
16
17
|
|
|
17
|
-
|
|
18
|
+
<!-- DEPRECATED-CONTENT-REMOVED -->
|
|
18
19
|
|
|
19
|
-
|
|
20
|
-
|-----------|------|-------------|---------|
|
|
21
|
-
| `gen_ai.system` | string | GenAI provider system name | `openai`, `anthropic`, `azure_openai` |
|
|
22
|
-
| `gen_ai.request.model` | string | Model name as specified in the request | `gpt-4o`, `claude-sonnet-4-20250514` |
|
|
23
|
-
| `gen_ai.response.model` | string | Model name as returned in the response | `gpt-4o-2024-08-06` |
|
|
24
|
-
| `gen_ai.request.max_tokens` | int | Maximum tokens requested for generation | `4096` |
|
|
25
|
-
| `gen_ai.request.temperature` | float | Temperature parameter | `0.7` |
|
|
26
|
-
| `gen_ai.response.finish_reasons` | string[] | Reasons the model stopped generating | `["stop"]`, `["length"]` |
|
|
27
|
-
| `gen_ai.usage.input_tokens` | int | Tokens in the input/prompt | `1250` |
|
|
28
|
-
| `gen_ai.usage.output_tokens` | int | Tokens in the generated output | `530` |
|
|
29
|
-
|
|
30
|
-
- Always set `gen_ai.system` and `gen_ai.request.model` on every GenAI span.
|
|
31
|
-
- Record `gen_ai.usage.input_tokens` and `gen_ai.usage.output_tokens` from the API response for cost dashboards.
|
|
32
|
-
- Use `gen_ai.response.finish_reasons` to detect truncated outputs (`length`) and trigger re-prompting.
|
|
33
|
-
|
|
34
|
-
## Agent Invocation Spans
|
|
35
|
-
|
|
36
|
-
Instrument the full lifecycle of an agent invocation with a dedicated span. This span is the parent for all LLM calls, tool executions, and sub-agent delegations.
|
|
37
|
-
|
|
38
|
-
- **Span name pattern:** `agent.{agent_name}.invoke`
|
|
39
|
-
- **Required attributes:** `agent.id`, `agent.name`, `agent.parent_id`, `agent.task`, `agent.framework`
|
|
40
|
-
- **Span events for state transitions:** `agent.planning`, `agent.tool_selection`, `agent.awaiting_human`, `agent.delegating`, `agent.completed`, `agent.error`
|
|
41
|
-
|
|
42
|
-
```typescript
|
|
43
|
-
const agentSpan = tracer.startSpan('agent.code_reviewer.invoke', {
|
|
44
|
-
attributes: {
|
|
45
|
-
'agent.id': invocationId,
|
|
46
|
-
'agent.name': 'code_reviewer',
|
|
47
|
-
'agent.parent_id': parentAgentId ?? '',
|
|
48
|
-
'agent.task': `review PR #${prNumber}`,
|
|
49
|
-
'agent.framework': 'custom',
|
|
50
|
-
},
|
|
51
|
-
});
|
|
52
|
-
agentSpan.addEvent('agent.planning');
|
|
53
|
-
// ... agent reasoning and tool calls happen as child spans ...
|
|
54
|
-
agentSpan.addEvent('agent.completed');
|
|
55
|
-
agentSpan.end();
|
|
56
|
-
```
|
|
57
|
-
|
|
58
|
-
## Tool Call Spans
|
|
59
|
-
|
|
60
|
-
Every tool invocation by an agent creates a child span of the agent invocation span.
|
|
61
|
-
|
|
62
|
-
- **Span name pattern:** `tool.{tool_name}.execute`
|
|
63
|
-
- **Required attributes:** `tool.name`, `tool.input_hash` (SHA-256), `tool.output_status`, `tool.duration_ms`, `tool.parameters_count`
|
|
64
|
-
- Tool spans must be children of the invoking agent span. Set span status to `ERROR` when `tool.output_status` is `error` or `timeout`.
|
|
65
|
-
- For tools performing I/O, create nested child spans using appropriate semantic conventions (`http.*`, `db.*`).
|
|
66
|
-
|
|
67
|
-
```typescript
|
|
68
|
-
const toolSpan = tracer.startSpan(
|
|
69
|
-
'tool.git_diff.execute',
|
|
70
|
-
{ attributes: { 'tool.name': 'git_diff' } },
|
|
71
|
-
trace.setSpan(context.active(), agentSpan),
|
|
72
|
-
);
|
|
73
|
-
try {
|
|
74
|
-
const result = await tools.gitDiff(params);
|
|
75
|
-
toolSpan.setAttributes({
|
|
76
|
-
'tool.output_status': 'success',
|
|
77
|
-
'tool.duration_ms': performance.now() - startTime,
|
|
78
|
-
'tool.input_hash': hashInput(params),
|
|
79
|
-
});
|
|
80
|
-
} catch (err) {
|
|
81
|
-
toolSpan.setAttributes({ 'tool.output_status': 'error' });
|
|
82
|
-
toolSpan.setStatus({ code: SpanStatusCode.ERROR, message: err.message });
|
|
83
|
-
toolSpan.recordException(err);
|
|
84
|
-
throw err;
|
|
85
|
-
} finally {
|
|
86
|
-
toolSpan.end();
|
|
87
|
-
}
|
|
88
|
-
```
|
|
89
|
-
|
|
90
|
-
## LLM Request/Response Tracing
|
|
91
|
-
|
|
92
|
-
- **Span name pattern:** `gen_ai.{operation}` (e.g., `gen_ai.chat`, `gen_ai.completion`)
|
|
93
|
-
- **Token tracking:** Capture `gen_ai.usage.input_tokens` and `gen_ai.usage.output_tokens`. Aggregate in metrics: Counter `gen_ai.tokens_total` with labels `{direction, model, agent_name}`, Histogram `gen_ai.request_duration_ms`.
|
|
94
|
-
- **Model version tracking:** Record both `gen_ai.request.model` and `gen_ai.response.model` for drift detection.
|
|
95
|
-
- **Retry spans:** Each retry attempt is a separate child span. Set `gen_ai.request.retries` on the final span. Record `http.response.status_code` on failed spans (429 vs 500+).
|
|
96
|
-
- Never log raw prompt content or full model responses as span attributes. Use token counts for cost tracking and correlated logs for prompt debugging in non-production environments.
|
|
97
|
-
- Sample GenAI spans at 50-100% in production (higher than general spans) because each call is expensive and low volume.
|
|
98
|
-
|
|
99
|
-
## Tool Call Audit Trail
|
|
100
|
-
|
|
101
|
-
Maintain a structured audit log for every tool invocation in agentic workflows, separate from tracing spans.
|
|
102
|
-
|
|
103
|
-
| Field | Type | Description |
|
|
104
|
-
|-------|------|-------------|
|
|
105
|
-
| `tool.name` | string | Name of the tool invoked |
|
|
106
|
-
| `tool.input_hash` | string | SHA-256 hash of tool input (never log raw input) |
|
|
107
|
-
| `tool.output_status` | string | `success`, `error`, `timeout`, or `denied` |
|
|
108
|
-
| `tool.duration_ms` | float | Execution time in milliseconds |
|
|
109
|
-
| `agent.id` | string | ID of the invoking agent |
|
|
110
|
-
| `agent.name` | string | Human-readable agent name |
|
|
111
|
-
| `correlation.id` | string | Trace correlation ID |
|
|
112
|
-
| `timestamp` | string | ISO 8601 timestamp |
|
|
113
|
-
| `session.id` | string | Session identifier |
|
|
114
|
-
|
|
115
|
-
- Log tool invocations at `info` level, failures at `error` level with `error.type` and `error.message`.
|
|
116
|
-
- Aggregate tool call counts per agent per session for anomaly detection.
|
|
117
|
-
- Retain audit logs for a minimum of 90 days.
|
|
118
|
-
|
|
119
|
-
## Correlation IDs for Agent Workflows
|
|
120
|
-
|
|
121
|
-
- Use UUIDv4 with workflow-type prefix: `{workflow-type}-{uuid}` (e.g., `agent-run-550e8400-...`).
|
|
122
|
-
- Generate at the workflow entry point. Propagate to all sub-agents and tool calls.
|
|
123
|
-
- Every log entry, span, and metric must include `correlation.id`.
|
|
124
|
-
- Cross-process: propagate via `X-Correlation-ID` header alongside W3C Trace Context.
|
|
125
|
-
- Use OpenTelemetry `SpanLink` for cross-workflow references (e.g., agent run triggered by CI event).
|
|
126
|
-
|
|
127
|
-
```typescript
|
|
128
|
-
import { randomUUID } from 'node:crypto';
|
|
129
|
-
import { context, trace, SpanStatusCode } from '@opentelemetry/api';
|
|
130
|
-
|
|
131
|
-
function generateCorrelationId(workflowType: string): string {
|
|
132
|
-
return `${workflowType}-${randomUUID()}`;
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
async function runAgentWorkflow(task: string): Promise<void> {
|
|
136
|
-
const correlationId = generateCorrelationId('agent-run');
|
|
137
|
-
const tracer = trace.getTracer('agent-orchestrator');
|
|
138
|
-
const rootSpan = tracer.startSpan('agent.orchestrator.invoke', {
|
|
139
|
-
attributes: {
|
|
140
|
-
'correlation.id': correlationId,
|
|
141
|
-
'agent.name': 'orchestrator',
|
|
142
|
-
'agent.task': task,
|
|
143
|
-
},
|
|
144
|
-
});
|
|
145
|
-
try {
|
|
146
|
-
await context.with(trace.setSpan(context.active(), rootSpan), async () => {
|
|
147
|
-
await delegateToSubAgent('code_reviewer', {
|
|
148
|
-
correlationId,
|
|
149
|
-
parentSpanId: rootSpan.spanContext().spanId,
|
|
150
|
-
task: 'review changes',
|
|
151
|
-
});
|
|
152
|
-
});
|
|
153
|
-
} catch (err) {
|
|
154
|
-
rootSpan.setStatus({ code: SpanStatusCode.ERROR, message: (err as Error).message });
|
|
155
|
-
rootSpan.recordException(err as Error);
|
|
156
|
-
throw err;
|
|
157
|
-
} finally {
|
|
158
|
-
rootSpan.end();
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
```
|
|
20
|
+
The full content has been migrated to `hatch3r-observability-tracing`.
|
|
@@ -1,156 +1,14 @@
|
|
|
1
1
|
---
|
|
2
|
-
description:
|
|
2
|
+
description: "[Deprecated] AI agent tracing detail rule -- consolidated into hatch3r-observability-tracing's AI Agent Instrumentation section"
|
|
3
3
|
globs: ["**/*trac*", "**/*span*", "**/*telemetry*", "**/*otel*", "**/*agent*", "**/observability/**", "**/routes/**", "**/handlers/**", "**/services/**", "**/api/**", "**/middleware/**", "**/controllers/**", "**/lib/**"]
|
|
4
4
|
alwaysApply: false
|
|
5
5
|
---
|
|
6
|
-
# Observability -- Tracing Extended Reference
|
|
6
|
+
# Observability -- Tracing Extended Reference (Deprecated Redirect)
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
This rule has been merged into `hatch3r-observability-tracing`. Load that rule for AI agent instrumentation, tool call spans, LLM request/response tracing, tool call audit trails, and correlation ID patterns.
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
- See `hatch3r-observability-tracing` § "AI Agent Instrumentation" for: GenAI span attributes, agent invocation spans, tool call spans, LLM request/response tracing, tool call audit trail, correlation IDs for agent workflows.
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
<!-- DEPRECATED-CONTENT-REMOVED -->
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
|-----------|------|-------------|---------|
|
|
16
|
-
| `gen_ai.system` | string | GenAI provider system name | `openai`, `anthropic`, `azure_openai` |
|
|
17
|
-
| `gen_ai.request.model` | string | Model name as specified in the request | `gpt-4o`, `claude-sonnet-4-20250514` |
|
|
18
|
-
| `gen_ai.response.model` | string | Model name as returned in the response | `gpt-4o-2024-08-06` |
|
|
19
|
-
| `gen_ai.request.max_tokens` | int | Maximum tokens requested for generation | `4096` |
|
|
20
|
-
| `gen_ai.request.temperature` | float | Temperature parameter | `0.7` |
|
|
21
|
-
| `gen_ai.response.finish_reasons` | string[] | Reasons the model stopped generating | `["stop"]`, `["length"]` |
|
|
22
|
-
| `gen_ai.usage.input_tokens` | int | Tokens in the input/prompt | `1250` |
|
|
23
|
-
| `gen_ai.usage.output_tokens` | int | Tokens in the generated output | `530` |
|
|
24
|
-
|
|
25
|
-
- Always set `gen_ai.system` and `gen_ai.request.model` on every GenAI span.
|
|
26
|
-
- Record `gen_ai.usage.input_tokens` and `gen_ai.usage.output_tokens` from the API response for cost dashboards.
|
|
27
|
-
- Use `gen_ai.response.finish_reasons` to detect truncated outputs (`length`) and trigger re-prompting.
|
|
28
|
-
|
|
29
|
-
## Agent Invocation Spans
|
|
30
|
-
|
|
31
|
-
Instrument the full lifecycle of an agent invocation with a dedicated span. This span is the parent for all LLM calls, tool executions, and sub-agent delegations.
|
|
32
|
-
|
|
33
|
-
- **Span name pattern:** `agent.{agent_name}.invoke`
|
|
34
|
-
- **Required attributes:** `agent.id`, `agent.name`, `agent.parent_id`, `agent.task`, `agent.framework`
|
|
35
|
-
- **Span events for state transitions:** `agent.planning`, `agent.tool_selection`, `agent.awaiting_human`, `agent.delegating`, `agent.completed`, `agent.error`
|
|
36
|
-
|
|
37
|
-
```typescript
|
|
38
|
-
const agentSpan = tracer.startSpan('agent.code_reviewer.invoke', {
|
|
39
|
-
attributes: {
|
|
40
|
-
'agent.id': invocationId,
|
|
41
|
-
'agent.name': 'code_reviewer',
|
|
42
|
-
'agent.parent_id': parentAgentId ?? '',
|
|
43
|
-
'agent.task': `review PR #${prNumber}`,
|
|
44
|
-
'agent.framework': 'custom',
|
|
45
|
-
},
|
|
46
|
-
});
|
|
47
|
-
agentSpan.addEvent('agent.planning');
|
|
48
|
-
// ... agent reasoning and tool calls happen as child spans ...
|
|
49
|
-
agentSpan.addEvent('agent.completed');
|
|
50
|
-
agentSpan.end();
|
|
51
|
-
```
|
|
52
|
-
|
|
53
|
-
## Tool Call Spans
|
|
54
|
-
|
|
55
|
-
Every tool invocation by an agent creates a child span of the agent invocation span.
|
|
56
|
-
|
|
57
|
-
- **Span name pattern:** `tool.{tool_name}.execute`
|
|
58
|
-
- **Required attributes:** `tool.name`, `tool.input_hash` (SHA-256), `tool.output_status`, `tool.duration_ms`, `tool.parameters_count`
|
|
59
|
-
- Tool spans must be children of the invoking agent span. Set span status to `ERROR` when `tool.output_status` is `error` or `timeout`.
|
|
60
|
-
- For tools performing I/O, create nested child spans using appropriate semantic conventions (`http.*`, `db.*`).
|
|
61
|
-
|
|
62
|
-
```typescript
|
|
63
|
-
const toolSpan = tracer.startSpan(
|
|
64
|
-
'tool.git_diff.execute',
|
|
65
|
-
{ attributes: { 'tool.name': 'git_diff' } },
|
|
66
|
-
trace.setSpan(context.active(), agentSpan),
|
|
67
|
-
);
|
|
68
|
-
try {
|
|
69
|
-
const result = await tools.gitDiff(params);
|
|
70
|
-
toolSpan.setAttributes({
|
|
71
|
-
'tool.output_status': 'success',
|
|
72
|
-
'tool.duration_ms': performance.now() - startTime,
|
|
73
|
-
'tool.input_hash': hashInput(params),
|
|
74
|
-
});
|
|
75
|
-
} catch (err) {
|
|
76
|
-
toolSpan.setAttributes({ 'tool.output_status': 'error' });
|
|
77
|
-
toolSpan.setStatus({ code: SpanStatusCode.ERROR, message: err.message });
|
|
78
|
-
toolSpan.recordException(err);
|
|
79
|
-
throw err;
|
|
80
|
-
} finally {
|
|
81
|
-
toolSpan.end();
|
|
82
|
-
}
|
|
83
|
-
```
|
|
84
|
-
|
|
85
|
-
## LLM Request/Response Tracing
|
|
86
|
-
|
|
87
|
-
- **Span name pattern:** `gen_ai.{operation}` (e.g., `gen_ai.chat`, `gen_ai.completion`)
|
|
88
|
-
- **Token tracking:** Capture `gen_ai.usage.input_tokens` and `gen_ai.usage.output_tokens`. Aggregate in metrics: Counter `gen_ai.tokens_total` with labels `{direction, model, agent_name}`, Histogram `gen_ai.request_duration_ms`.
|
|
89
|
-
- **Model version tracking:** Record both `gen_ai.request.model` and `gen_ai.response.model` for drift detection.
|
|
90
|
-
- **Retry spans:** Each retry attempt is a separate child span. Set `gen_ai.request.retries` on the final span. Record `http.response.status_code` on failed spans (429 vs 500+).
|
|
91
|
-
- Never log raw prompt content or full model responses as span attributes. Use token counts for cost tracking and correlated logs for prompt debugging in non-production environments.
|
|
92
|
-
- Sample GenAI spans at 50-100% in production (higher than general spans) because each call is expensive and low volume.
|
|
93
|
-
|
|
94
|
-
## Tool Call Audit Trail
|
|
95
|
-
|
|
96
|
-
Maintain a structured audit log for every tool invocation in agentic workflows, separate from tracing spans.
|
|
97
|
-
|
|
98
|
-
| Field | Type | Description |
|
|
99
|
-
|-------|------|-------------|
|
|
100
|
-
| `tool.name` | string | Name of the tool invoked |
|
|
101
|
-
| `tool.input_hash` | string | SHA-256 hash of tool input (never log raw input) |
|
|
102
|
-
| `tool.output_status` | string | `success`, `error`, `timeout`, or `denied` |
|
|
103
|
-
| `tool.duration_ms` | float | Execution time in milliseconds |
|
|
104
|
-
| `agent.id` | string | ID of the invoking agent |
|
|
105
|
-
| `agent.name` | string | Human-readable agent name |
|
|
106
|
-
| `correlation.id` | string | Trace correlation ID |
|
|
107
|
-
| `timestamp` | string | ISO 8601 timestamp |
|
|
108
|
-
| `session.id` | string | Session identifier |
|
|
109
|
-
|
|
110
|
-
- Log tool invocations at `info` level, failures at `error` level with `error.type` and `error.message`.
|
|
111
|
-
- Aggregate tool call counts per agent per session for anomaly detection.
|
|
112
|
-
- Retain audit logs for a minimum of 90 days.
|
|
113
|
-
|
|
114
|
-
## Correlation IDs for Agent Workflows
|
|
115
|
-
|
|
116
|
-
- Use UUIDv4 with workflow-type prefix: `{workflow-type}-{uuid}` (e.g., `agent-run-550e8400-...`).
|
|
117
|
-
- Generate at the workflow entry point. Propagate to all sub-agents and tool calls.
|
|
118
|
-
- Every log entry, span, and metric must include `correlation.id`.
|
|
119
|
-
- Cross-process: propagate via `X-Correlation-ID` header alongside W3C Trace Context.
|
|
120
|
-
- Use OpenTelemetry `SpanLink` for cross-workflow references (e.g., agent run triggered by CI event).
|
|
121
|
-
|
|
122
|
-
```typescript
|
|
123
|
-
import { randomUUID } from 'node:crypto';
|
|
124
|
-
import { context, trace, SpanStatusCode } from '@opentelemetry/api';
|
|
125
|
-
|
|
126
|
-
function generateCorrelationId(workflowType: string): string {
|
|
127
|
-
return `${workflowType}-${randomUUID()}`;
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
async function runAgentWorkflow(task: string): Promise<void> {
|
|
131
|
-
const correlationId = generateCorrelationId('agent-run');
|
|
132
|
-
const tracer = trace.getTracer('agent-orchestrator');
|
|
133
|
-
const rootSpan = tracer.startSpan('agent.orchestrator.invoke', {
|
|
134
|
-
attributes: {
|
|
135
|
-
'correlation.id': correlationId,
|
|
136
|
-
'agent.name': 'orchestrator',
|
|
137
|
-
'agent.task': task,
|
|
138
|
-
},
|
|
139
|
-
});
|
|
140
|
-
try {
|
|
141
|
-
await context.with(trace.setSpan(context.active(), rootSpan), async () => {
|
|
142
|
-
await delegateToSubAgent('code_reviewer', {
|
|
143
|
-
correlationId,
|
|
144
|
-
parentSpanId: rootSpan.spanContext().spanId,
|
|
145
|
-
task: 'review changes',
|
|
146
|
-
});
|
|
147
|
-
});
|
|
148
|
-
} catch (err) {
|
|
149
|
-
rootSpan.setStatus({ code: SpanStatusCode.ERROR, message: (err as Error).message });
|
|
150
|
-
rootSpan.recordException(err as Error);
|
|
151
|
-
throw err;
|
|
152
|
-
} finally {
|
|
153
|
-
rootSpan.end();
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
```
|
|
14
|
+
The full content has been migrated to `hatch3r-observability-tracing`.
|