xtrm-tools 0.5.10 → 0.5.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -1
- package/README.md +28 -30
- package/cli/dist/index.cjs +1509 -2722
- package/cli/dist/index.cjs.map +1 -1
- package/cli/package.json +1 -1
- package/config/instructions/agents-top.md +87 -23
- package/config/instructions/claude-top.md +101 -23
- package/config/pi/extensions/beads/index.ts +3 -1
- package/config/pi/extensions/session-flow/index.ts +26 -90
- package/config/pi/extensions/xtrm-loader/index.ts +39 -2
- package/hooks/README.md +0 -14
- package/hooks/beads-gate-messages.mjs +8 -22
- package/hooks/gitnexus/gitnexus-hook.cjs +1 -1
- package/hooks/hooks.json +25 -27
- package/hooks/quality-check-env.mjs +79 -0
- package/hooks/quality-check.cjs +6 -6
- package/hooks/statusline.mjs +115 -0
- package/hooks/using-xtrm-reminder.mjs +35 -0
- package/package.json +1 -1
- package/skills/sync-docs-workspace/iteration-1/benchmark.json +293 -0
- package/skills/sync-docs-workspace/iteration-1/benchmark.md +13 -0
- package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/eval_metadata.json +27 -0
- package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/outputs/result.md +210 -0
- package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/grading.json +28 -0
- package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/timing.json +1 -0
- package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/outputs/result.md +101 -0
- package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/grading.json +28 -0
- package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/timing.json +5 -0
- package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/timing.json +5 -0
- package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/eval_metadata.json +27 -0
- package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/outputs/result.md +198 -0
- package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/grading.json +28 -0
- package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/timing.json +1 -0
- package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/outputs/result.md +94 -0
- package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/grading.json +28 -0
- package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/timing.json +1 -0
- package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/eval_metadata.json +27 -0
- package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/outputs/result.md +237 -0
- package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/grading.json +28 -0
- package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/timing.json +1 -0
- package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/outputs/result.md +134 -0
- package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/grading.json +28 -0
- package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/timing.json +1 -0
- package/skills/sync-docs-workspace/iteration-2/benchmark.json +297 -0
- package/skills/sync-docs-workspace/iteration-2/benchmark.md +13 -0
- package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/eval_metadata.json +27 -0
- package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/outputs/result.md +137 -0
- package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/grading.json +92 -0
- package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/timing.json +1 -0
- package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/outputs/result.md +134 -0
- package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/grading.json +86 -0
- package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/timing.json +1 -0
- package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/eval_metadata.json +27 -0
- package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/outputs/result.md +193 -0
- package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/grading.json +72 -0
- package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/timing.json +1 -0
- package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/outputs/result.md +211 -0
- package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/grading.json +91 -0
- package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/timing.json +5 -0
- package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/eval_metadata.json +27 -0
- package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/outputs/result.md +182 -0
- package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/grading.json +95 -0
- package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/timing.json +1 -0
- package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/outputs/result.md +222 -0
- package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/grading.json +88 -0
- package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/timing.json +5 -0
- package/skills/sync-docs-workspace/iteration-3/benchmark.json +298 -0
- package/skills/sync-docs-workspace/iteration-3/benchmark.md +13 -0
- package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/eval_metadata.json +27 -0
- package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/outputs/result.md +125 -0
- package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/grading.json +97 -0
- package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/timing.json +5 -0
- package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/outputs/result.md +144 -0
- package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/grading.json +78 -0
- package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/timing.json +5 -0
- package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/eval_metadata.json +27 -0
- package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/outputs/result.md +104 -0
- package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/grading.json +91 -0
- package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/timing.json +5 -0
- package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/outputs/result.md +79 -0
- package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/grading.json +82 -0
- package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/timing.json +5 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/eval_metadata.json +27 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase1_context.json +302 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase2_drift.txt +33 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase3_analysis.json +114 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase4_fix.txt +118 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase5_validate.txt +38 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/result.md +158 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/grading.json +95 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/timing.json +5 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/outputs/result.md +71 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/grading.json +90 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/timing.json +5 -0
- package/skills/using-xtrm/SKILL.md +84 -205
- package/config/pi/extensions/bg-process/index.ts +0 -230
- package/config/pi/extensions/bg-process/package.json +0 -16
- package/config/pi/extensions/minimal-mode/index.ts +0 -201
- package/config/pi/extensions/minimal-mode/package.json +0 -16
- package/config/pi/extensions/todo/index.ts +0 -299
- package/config/pi/extensions/todo/package.json +0 -16
- package/hooks/agent_context.py +0 -105
- package/hooks/branch-state.mjs +0 -39
- package/hooks/guard-rules.mjs +0 -118
- package/hooks/main-guard-post-push.mjs +0 -71
- package/hooks/main-guard.mjs +0 -119
package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/grading.json
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
{
|
|
2
|
+
"expectations": [
|
|
3
|
+
{
|
|
4
|
+
"text": "Ran context_gatherer.py and reported bd closed issues or merged PRs with specific data",
|
|
5
|
+
"passed": false,
|
|
6
|
+
"evidence": "The agent never ran context_gatherer.py. It gathered context using raw git commands (git log --oneline --merges, git diff --stat 10d6433..HEAD). It did report specific merged PRs (#111, #110, #109) with descriptions, but the script was not used. The expectation requires the specific script to be invoked, not just the outcome data to be present."
|
|
7
|
+
},
|
|
8
|
+
{
|
|
9
|
+
"text": "Ran doc_structure_analyzer.py and cited its structured output (STALE, EXTRACTABLE, MISSING, etc.)",
|
|
10
|
+
"passed": false,
|
|
11
|
+
"evidence": "No mention of doc_structure_analyzer.py anywhere in the output. The structured output categories (STALE, EXTRACTABLE, MISSING) never appear. The agent assessed doc staleness manually by reading files and comparing with git history."
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"text": "Detected the CHANGELOG version gap (package.json v2.4.0 vs CHANGELOG v2.0.0)",
|
|
15
|
+
"passed": false,
|
|
16
|
+
"evidence": "The output notes 'CHANGELOG.md (contains full history through v2.0.0)' and references the codebase being at v2.4.0, but the agent concluded CHANGELOG was 'accurate' and listed it under 'No Changes Needed'. It did not explicitly frame this as a version gap between package.json (v2.4.0) and CHANGELOG (v2.0.0), and it did not flag it as an issue requiring action. The gap was effectively missed because the agent treated the [Unreleased] section as sufficient coverage."
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"text": "Named at least one concrete next step with a specific file or action",
|
|
20
|
+
"passed": true,
|
|
21
|
+
"evidence": "The Observations section states: 'The CHANGELOG [Unreleased] section is still empty \u2014 it should capture the post-v2.4.0 sprint work (global-first arch, guard-rules centralization, Pi drift checks, xtrm init project detection) before the next release.' This identifies a specific file (CHANGELOG.md), a specific section ([Unreleased]), and concrete content items to add."
|
|
22
|
+
}
|
|
23
|
+
],
|
|
24
|
+
"summary": {
|
|
25
|
+
"passed": 1,
|
|
26
|
+
"failed": 3,
|
|
27
|
+
"total": 4,
|
|
28
|
+
"pass_rate": 0.25
|
|
29
|
+
},
|
|
30
|
+
"execution_metrics": {
|
|
31
|
+
"tool_calls": {},
|
|
32
|
+
"total_tool_calls": 0,
|
|
33
|
+
"total_steps": 0,
|
|
34
|
+
"errors_encountered": 0,
|
|
35
|
+
"output_chars": 3172,
|
|
36
|
+
"transcript_chars": 0
|
|
37
|
+
},
|
|
38
|
+
"timing": {
|
|
39
|
+
"executor_duration_seconds": 217.1,
|
|
40
|
+
"grader_duration_seconds": 0.0,
|
|
41
|
+
"total_duration_seconds": 217.1
|
|
42
|
+
},
|
|
43
|
+
"claims": [
|
|
44
|
+
{
|
|
45
|
+
"claim": "3 PRs merged in the most recent sprint: #111, #110, #109",
|
|
46
|
+
"type": "factual",
|
|
47
|
+
"verified": true,
|
|
48
|
+
"evidence": "Consistent with git log output cited in the result and with the repo's commit history (PR #111 referenced in CLAUDE.md recent commits section)"
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
"claim": "CHANGELOG.md is accurate and no changes are needed to it",
|
|
52
|
+
"type": "quality",
|
|
53
|
+
"verified": false,
|
|
54
|
+
"evidence": "The agent says CHANGELOG 'contains full history through v2.0.0' and the codebase is at v2.4.0. This means v2.1.0 through v2.4.0 entries are missing from CHANGELOG \u2014 a significant gap that contradicts the 'accurate' verdict. The [Unreleased] section does not substitute for missing versioned entries."
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
"claim": "XTRM-GUIDE.md required no changes as it was updated by sprint commits",
|
|
58
|
+
"type": "quality",
|
|
59
|
+
"verified": false,
|
|
60
|
+
"evidence": "The claim is plausible given commit f8e37f9, but the agent did not run doc_structure_analyzer.py or any systematic staleness check against XTRM-GUIDE.md \u2014 it relied on reading the file and comparing manually. Cannot fully verify without the script output."
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
"claim": "README was 'about 1.5 versions behind HEAD'",
|
|
64
|
+
"type": "factual",
|
|
65
|
+
"verified": true,
|
|
66
|
+
"evidence": "README said v2.3.0 while codebase was at v2.4.0 with unreleased post-v2.4.0 work on top \u2014 the characterization is reasonable given the 8 changes fixed."
|
|
67
|
+
}
|
|
68
|
+
],
|
|
69
|
+
"user_notes_summary": {
|
|
70
|
+
"uncertainties": [],
|
|
71
|
+
"needs_review": [],
|
|
72
|
+
"workarounds": []
|
|
73
|
+
},
|
|
74
|
+
"eval_feedback": {
|
|
75
|
+
"suggestions": [
|
|
76
|
+
{
|
|
77
|
+
"assertion": "Ran context_gatherer.py and reported bd closed issues or merged PRs with specific data",
|
|
78
|
+
"reason": "This assertion conflates two things: running the specific script AND reporting specific PR data. An agent that skips the script but manually finds the same PR data would fail on process but produce similar outputs. The eval would be stronger if split: one assertion for script invocation (verifiable from transcript tool calls) and one for PR data quality (verifiable from output content)."
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
"assertion": "Detected the CHANGELOG version gap (package.json v2.4.0 vs CHANGELOG v2.0.0)",
|
|
82
|
+
"reason": "The expectation is well-targeted, but the bar should be higher: not just 'detected' but 'flagged as a problem requiring action'. The agent did notice CHANGELOG goes to v2.0.0 while the code is at v2.4.0, yet concluded it was accurate. An assertion that checks whether the gap was identified as a documentation deficiency (not just noted in passing) would be more discriminating."
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
"reason": "No assertion covers output quality for the README edits that were actually made \u2014 the primary work product of this run. The agent claims to have fixed 8 categories of README issues, but no expectation checks whether those changes are correct, complete, or even present in the file. This is the largest unguarded outcome."
|
|
86
|
+
}
|
|
87
|
+
],
|
|
88
|
+
"overall": "The evals focus on process steps (run script X, detect gap Y) but miss the primary output (README changes). The CHANGELOG gap assertion is good but needs tighter framing. The script-invocation assertions are fragile without transcript access to verify tool calls."
|
|
89
|
+
}
|
|
90
|
+
}
|
|
@@ -1,245 +1,124 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: using-xtrm
|
|
3
3
|
description: >
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
when the user asks how to work with the xtrm stack, what tools are available, or how any
|
|
10
|
-
xtrm workflow operates.
|
|
4
|
+
Behavioral operating manual for an xtrm-equipped Claude Code session.
|
|
5
|
+
Covers when to use which tool, how to handle questions and triggers,
|
|
6
|
+
workflow examples, and skill routing. Reference material (hook list,
|
|
7
|
+
gate rules, full bd commands, git workflow) lives in CLAUDE.md.
|
|
8
|
+
Injected automatically at session start via additionalSystemPrompt.
|
|
11
9
|
priority: high
|
|
12
10
|
---
|
|
13
11
|
|
|
14
|
-
#
|
|
12
|
+
# XTRM — When to Use What
|
|
15
13
|
|
|
16
|
-
|
|
17
|
-
|
|
14
|
+
> Gates, commands, and git workflow are in CLAUDE.md.
|
|
15
|
+
> This is the behavioral layer: triggers, patterns, examples.
|
|
18
16
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
## Stack at a Glance
|
|
17
|
+
## Session Start
|
|
22
18
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
| **CLI** | `xtrm install / status / finish / reset / help` — sync and closure tooling |
|
|
30
|
-
| **beads (bd)** | Git-backed issue tracker with session gate enforcement |
|
|
19
|
+
```bash
|
|
20
|
+
bd prime # load workflow context + active claims
|
|
21
|
+
bd memories <today's topic> # retrieve relevant past context
|
|
22
|
+
bd ready # find available work
|
|
23
|
+
bd update <id> --claim # claim before any edit
|
|
24
|
+
```
|
|
31
25
|
|
|
32
26
|
---
|
|
33
27
|
|
|
34
|
-
##
|
|
35
|
-
|
|
36
|
-
Before executing any non-trivial task, improve the prompt mentally using XML structure.
|
|
37
|
-
Apply this silently — the user sees your improved execution, not the meta-work.
|
|
38
|
-
|
|
39
|
-
### Prompt Classification
|
|
40
|
-
|
|
41
|
-
Scan the user's message for task type:
|
|
42
|
-
|
|
43
|
-
| Type | Keywords | Enhancement |
|
|
44
|
-
|---|---|---|
|
|
45
|
-
| **ANALYSIS** | analyze, investigate, research, explain, why | Add `<thinking>` block, structure `<outputs>` |
|
|
46
|
-
| **DEV** | implement, create, build, add, fix, feature | Add 1-2 `<example>` blocks, define `<constraints>` |
|
|
47
|
-
| **REFACTOR** | refactor, improve, optimize, clean, simplify | Add `<constraints>` (preserve behavior, tests pass) + `<current_state>` |
|
|
48
|
-
|
|
49
|
-
### XML Prompt Structure
|
|
50
|
-
|
|
51
|
-
```xml
|
|
52
|
-
<task_name>
|
|
53
|
-
<description>What needs to be done and why</description>
|
|
54
|
-
<parameters>Relevant context: files, symbols, constraints</parameters>
|
|
55
|
-
<instructions>
|
|
56
|
-
Step-by-step approach
|
|
57
|
-
</instructions>
|
|
58
|
-
<!-- ANALYSIS tasks: -->
|
|
59
|
-
<thinking>Work through hypotheses before concluding</thinking>
|
|
60
|
-
<outputs>Expected result format</outputs>
|
|
61
|
-
<!-- DEV tasks: -->
|
|
62
|
-
<example>Concrete pattern to follow</example>
|
|
63
|
-
<!-- REFACTOR tasks: -->
|
|
64
|
-
<constraints>Must not break X, tests must pass, preserve API surface</constraints>
|
|
65
|
-
</task_name>
|
|
66
|
-
```
|
|
28
|
+
## Trigger Patterns
|
|
67
29
|
|
|
68
|
-
|
|
69
|
-
|
|
30
|
+
| Situation | Action |
|
|
31
|
+
|-----------|--------|
|
|
32
|
+
| User prompt contains `?` | `bd memories <keywords>` before answering — check stored context first |
|
|
33
|
+
| "What was I working on?" | `bd list --status=in_progress` |
|
|
34
|
+
| Unfamiliar area of code | `gitnexus_query({query: "concept"})` before opening any file |
|
|
35
|
+
| About to edit a symbol | `gitnexus_impact({target: "name", direction: "upstream"})` |
|
|
36
|
+
| Before `git commit` | `gitnexus_detect_changes({scope: "staged"})` to verify scope |
|
|
37
|
+
| Reading code | `get_symbols_overview` → `find_symbol` — never read whole files |
|
|
38
|
+
| Task is tests | use /test-planning
|
|
39
|
+
| Task is docs updates | use /sync-docs
|
|
40
|
+
| Session end (issue closed) | Memory gate fires — evaluate `bd remember` for each closed issue |
|
|
70
41
|
|
|
71
42
|
---
|
|
72
43
|
|
|
73
|
-
##
|
|
44
|
+
## Handling `?` Prompts
|
|
74
45
|
|
|
75
|
-
|
|
76
|
-
You cannot edit files without a claim, and you cannot safely end a closure-in-progress session.
|
|
46
|
+
When the user's message contains a question, check stored context before answering:
|
|
77
47
|
|
|
78
48
|
```bash
|
|
79
|
-
#
|
|
80
|
-
bd
|
|
81
|
-
bd update <id> --claim
|
|
82
|
-
# hook auto-sets session claim + auto-creates worktree + writes .xtrm-session-state.json
|
|
83
|
-
|
|
84
|
-
# 2. Work in the claimed branch/worktree
|
|
85
|
-
|
|
86
|
-
# 3. Close issue when implementation is done
|
|
87
|
-
bd close <id>
|
|
88
|
-
|
|
89
|
-
# 4. Session close protocol (single command)
|
|
90
|
-
xtrm finish
|
|
91
|
-
# blocking: commit/push/pr-create/auto-merge poll/worktree cleanup
|
|
49
|
+
bd memories <keywords from question> # search project memory
|
|
50
|
+
bd recall <key> # retrieve specific memory if key is known
|
|
92
51
|
```
|
|
93
52
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
-
|
|
98
|
-
|
|
99
|
-
- If blocked on stop: resolve state then re-run `xtrm finish`
|
|
100
|
-
|
|
101
|
-
---
|
|
102
|
-
|
|
103
|
-
## Code Editing — Serena LSP Workflow
|
|
104
|
-
|
|
105
|
-
Always use semantic tools. Never read entire large files or use generic Edit unless forced.
|
|
106
|
-
|
|
107
|
-
```
|
|
108
|
-
get_symbols_overview(file) → map the file structure first
|
|
109
|
-
find_symbol(name, include_body=true) → read only what you need
|
|
110
|
-
find_referencing_symbols(name) → check callers before changing signatures
|
|
111
|
-
replace_symbol_body(name, body) → atomic symbol-level edit
|
|
112
|
-
insert_after_symbol / insert_before_symbol → add new code precisely
|
|
53
|
+
Example — user asks *"why does the quality gate run twice?"*:
|
|
54
|
+
```bash
|
|
55
|
+
bd memories "quality gate"
|
|
56
|
+
# → "quality-check.cjs and quality-check.py are separate hooks —
|
|
57
|
+
# JS/TS and Python each get their own PostToolUse pass"
|
|
113
58
|
```
|
|
114
59
|
|
|
115
|
-
|
|
116
|
-
```
|
|
117
|
-
|
|
60
|
+
If it's a code question, also run:
|
|
61
|
+
```bash
|
|
62
|
+
gitnexus_query({query: "<topic>"}) # find relevant execution flows
|
|
118
63
|
```
|
|
119
64
|
|
|
120
|
-
**Fallback**: Use `Edit` only for non-code files or when a symbol can't be located.
|
|
121
|
-
|
|
122
65
|
---
|
|
123
66
|
|
|
124
|
-
##
|
|
125
|
-
|
|
126
|
-
Before editing any function, class, or method — always run impact analysis.
|
|
67
|
+
## Workflow Examples
|
|
127
68
|
|
|
69
|
+
**Fixing a bug:**
|
|
128
70
|
```bash
|
|
129
|
-
#
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
# 2
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
#
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
# 4. Before committing — verify scope
|
|
139
|
-
gitnexus_detect_changes({scope: "staged"})
|
|
71
|
+
bd ready # find the issue
|
|
72
|
+
bd update bd-xyz --claim # claim it
|
|
73
|
+
gitnexus_impact({target: "parseComposeServices", direction: "upstream"})
|
|
74
|
+
# → 2 callers, LOW risk — safe to edit
|
|
75
|
+
get_symbols_overview("hooks/init.ts") # map file
|
|
76
|
+
find_symbol("parseComposeServices", include_body=True) # read just this
|
|
77
|
+
replace_symbol_body("parseComposeServices", newBody) # Serena edit
|
|
78
|
+
bd close bd-xyz --reason="Fix YAML parse edge case" # close + auto-commit
|
|
79
|
+
xt end # push, PR, merge, cleanup
|
|
140
80
|
```
|
|
141
81
|
|
|
142
|
-
**
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
## Quality Gates — Automatic on Every Edit
|
|
152
|
-
|
|
153
|
-
After each file edit, quality-gates hooks run automatically:
|
|
154
|
-
- **TypeScript**: ESLint + tsc type check
|
|
155
|
-
- **Python**: Ruff lint + mypy type check
|
|
156
|
-
|
|
157
|
-
You do not invoke these manually — they fire via PostToolUse hooks. If a gate fails, fix the
|
|
158
|
-
lint/type error before continuing. Do not suppress errors with `// eslint-disable` or `# type: ignore`
|
|
159
|
-
unless there is a genuine reason.
|
|
160
|
-
|
|
161
|
-
> **Global-first behavior**: quality-gates hooks are global; no per-project install is needed.
|
|
162
|
-
> Run `xtrm init` once per repository to bootstrap project data, then ensure the repo has
|
|
163
|
-
> `eslint.config.*` (TS) or `pyproject.toml` / `ruff.toml` (Python) configured so checks can run.
|
|
164
|
-
|
|
165
|
-
---
|
|
166
|
-
|
|
167
|
-
## Skill Routing — When to Use What
|
|
168
|
-
|
|
169
|
-
| Situation | Use |
|
|
170
|
-
|---|---|
|
|
171
|
-
| Short/vague user prompt | Apply XML structure silently (this skill) or `/prompt-improving` |
|
|
172
|
-
| Simple task (tests, docs, typo fix) | `/delegating` → cost-optimized agent |
|
|
173
|
-
| Complex task needing second opinion | `/orchestrate adversarial "task"` |
|
|
174
|
-
| Reading/editing code | `using-serena-lsp` (Serena MCP) |
|
|
175
|
-
| Understanding code architecture | `gitnexus-exploring` |
|
|
176
|
-
| Tracing a bug | `gitnexus-debugging` |
|
|
177
|
-
| Changing a function | `gitnexus-impact-analysis` first, then Serena edit |
|
|
178
|
-
| Safe rename/refactor | `gitnexus-refactoring` |
|
|
179
|
-
| Docker service project | `using-service-skills` → activate expert persona |
|
|
180
|
-
| Writing new feature | Write tests alongside, quality gates auto-run after |
|
|
181
|
-
| Maintaining docs | `/documenting` (Serena SSOT drift detection) |
|
|
182
|
-
| Building/improving a skill | `skill-creator` |
|
|
183
|
-
|
|
184
|
-
---
|
|
185
|
-
|
|
186
|
-
## Available Skills (Full Catalog)
|
|
187
|
-
|
|
188
|
-
**Workflow:**
|
|
189
|
-
`prompt-improving`, `delegating`, `orchestrating-agents`, `using-serena-lsp`, `documenting`,
|
|
190
|
-
`using-xtrm` (this skill), `skill-creator`, `find-skills`
|
|
191
|
-
|
|
192
|
-
**Code Intelligence:**
|
|
193
|
-
`gitnexus-exploring`, `gitnexus-debugging`, `gitnexus-impact-analysis`, `gitnexus-refactoring`
|
|
194
|
-
|
|
195
|
-
**Domain Experts:**
|
|
196
|
-
`senior-backend`, `senior-devops`, `senior-security`, `senior-data-scientist`,
|
|
197
|
-
`docker-expert`, `python-testing`, `clean-code`
|
|
198
|
-
|
|
199
|
-
**Integrations:**
|
|
200
|
-
`obsidian-cli`, `hook-development`, `claude-api`
|
|
82
|
+
**Exploring unfamiliar code:**
|
|
83
|
+
```bash
|
|
84
|
+
gitnexus_query({query: "session claim enforcement"})
|
|
85
|
+
# → beads-gate-core.mjs, resolveClaimAndWorkState, decideCommitGate
|
|
86
|
+
gitnexus_context({name: "resolveClaimAndWorkState"}) # callers + callees
|
|
87
|
+
get_symbols_overview("hooks/beads-gate-core.mjs") # map the file
|
|
88
|
+
find_symbol("resolveClaimAndWorkState", include_body=True) # read only this
|
|
89
|
+
```
|
|
201
90
|
|
|
202
|
-
**
|
|
203
|
-
|
|
204
|
-
|
|
91
|
+
**Persisting an insight:**
|
|
92
|
+
```bash
|
|
93
|
+
bd remember "quality-check runs twice: separate .cjs (JS) and .py (Python) hooks"
|
|
94
|
+
# retrievable next session:
|
|
95
|
+
bd memories "quality check"
|
|
96
|
+
bd recall "quality-check-runs-twice-..."
|
|
97
|
+
```
|
|
205
98
|
|
|
206
99
|
---
|
|
207
100
|
|
|
208
|
-
##
|
|
209
|
-
|
|
210
|
-
These hooks run automatically — you cannot disable them mid-session:
|
|
211
|
-
|
|
212
|
-
| Hook | Trigger | Effect |
|
|
213
|
-
|---|---|---|
|
|
214
|
-
| `main-guard.mjs` | PreToolUse (Edit/Write/Serena/Bash) | Blocks edits and unsafe Bash on protected branches |
|
|
215
|
-
| `beads-edit-gate.mjs` | PreToolUse (Edit/Write/Serena) | Blocks edits without active claim |
|
|
216
|
-
| `beads-commit-gate.mjs` | PreToolUse (Bash: git commit) | Blocks commit with unclosed claim |
|
|
217
|
-
| `beads-claim-sync.mjs` | PostToolUse (Bash claim command) | Syncs claim + creates worktree + writes session state |
|
|
218
|
-
| `beads-stop-gate.mjs` | Stop | Blocks stop for unresolved session-flow phases |
|
|
219
|
-
| `beads-memory-gate.mjs` | Stop | Prompts for persistent insights after closure |
|
|
220
|
-
| `beads-compact-save/restore.mjs` | PreCompact / SessionStart | Preserves claim + session state across compact |
|
|
221
|
-
| `serena-workflow-reminder.py` | SessionStart | Reminds semantic editing workflow |
|
|
222
|
-
| `quality-check.(cjs|py)` | PostToolUse (Edit/Write) | Runs lint + type checks automatically |
|
|
101
|
+
## Prompt Shaping (silent, before every non-trivial task)
|
|
223
102
|
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
103
|
+
| Task type | Apply |
|
|
104
|
+
|-----------|-------|
|
|
105
|
+
| `analyze / investigate / why` | `<thinking>` block + structured `<outputs>` |
|
|
106
|
+
| `implement / build / fix` | 1-2 `<example>` blocks + `<constraints>` |
|
|
107
|
+
| `refactor / simplify` | `<constraints>` (preserve behavior, tests pass) + `<current_state>` |
|
|
227
108
|
|
|
228
|
-
|
|
229
|
-
|---|---|---|
|
|
230
|
-
| `serena` | Semantic code reading/editing | Auto-detected; activate project per session |
|
|
231
|
-
| `gitnexus` | Knowledge graph, impact analysis | `npm install -g gitnexus` + `npx gitnexus analyze` per project |
|
|
232
|
-
| `context7` | Library documentation lookup | No setup needed (free stdio transport) |
|
|
233
|
-
| `deepwiki` | Technical docs for GitHub repos | No setup needed |
|
|
234
|
-
| `github-grep` | Code search across GitHub | No setup needed |
|
|
109
|
+
Vague prompt (under 8 words, no specifics)? Ask one clarifying question before proceeding.
|
|
235
110
|
|
|
236
111
|
---
|
|
237
112
|
|
|
238
|
-
##
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
113
|
+
## Skill Routing
|
|
114
|
+
|
|
115
|
+
| Need | Use |
|
|
116
|
+
|------|-----|
|
|
117
|
+
| Code read / edit | Serena — `get_symbols_overview` → `find_symbol` → `replace_symbol_body` |
|
|
118
|
+
| Blast radius before edit | `gitnexus-impact-analysis` |
|
|
119
|
+
| Navigate unfamiliar code | `gitnexus-exploring` |
|
|
120
|
+
| Trace a bug | `gitnexus-debugging` |
|
|
121
|
+
| Safe rename / refactor | `gitnexus-refactoring` |
|
|
122
|
+
| Docs maintenance | `sync-docs` |
|
|
123
|
+
| Docker service project | `using-service-skills` |
|
|
124
|
+
| Build / improve a skill | `skill-creator` |
|
|
@@ -1,230 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* oh-pi Background Process Extension
|
|
3
|
-
*
|
|
4
|
-
* 任何 bash 命令超时未完成时,自动送到后台执行。
|
|
5
|
-
* 进程完成后自动通过 sendMessage 通知 LLM,无需轮询。
|
|
6
|
-
* 提供 bg_status 工具让 LLM 查看/停止后台进程。
|
|
7
|
-
*/
|
|
8
|
-
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
9
|
-
import { Type } from "@sinclair/typebox";
|
|
10
|
-
import { StringEnum } from "@mariozechner/pi-ai";
|
|
11
|
-
import { spawn, execSync } from "node:child_process";
|
|
12
|
-
import { writeFileSync, readFileSync, appendFileSync, existsSync } from "node:fs";
|
|
13
|
-
|
|
14
|
-
/** 超时阈值(毫秒),超过此时间自动后台化 */
|
|
15
|
-
const BG_TIMEOUT_MS = 10_000;
|
|
16
|
-
|
|
17
|
-
interface BgProcess {
|
|
18
|
-
pid: number;
|
|
19
|
-
command: string;
|
|
20
|
-
logFile: string;
|
|
21
|
-
startedAt: number;
|
|
22
|
-
finished: boolean;
|
|
23
|
-
exitCode: number | null;
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
export default function (pi: ExtensionAPI) {
|
|
27
|
-
const bgProcesses = new Map<number, BgProcess>();
|
|
28
|
-
|
|
29
|
-
// 覆盖内置 bash 工具
|
|
30
|
-
pi.registerTool({
|
|
31
|
-
name: "bash",
|
|
32
|
-
label: "Bash",
|
|
33
|
-
description: `Execute a bash command. Output is truncated to 2000 lines or 50KB. If a command runs longer than ${BG_TIMEOUT_MS / 1000}s, it is automatically backgrounded and you get the PID + log file path. Use the bg_status tool to check on backgrounded processes.`,
|
|
34
|
-
parameters: Type.Object({
|
|
35
|
-
command: Type.String({ description: "Bash command to execute" }),
|
|
36
|
-
timeout: Type.Optional(Type.Number({ description: "Timeout in seconds (optional)" })),
|
|
37
|
-
}),
|
|
38
|
-
async execute(toolCallId, params, signal) {
|
|
39
|
-
const { command } = params;
|
|
40
|
-
const userTimeout = params.timeout ? params.timeout * 1000 : undefined;
|
|
41
|
-
const effectiveTimeout = userTimeout ?? BG_TIMEOUT_MS;
|
|
42
|
-
|
|
43
|
-
return new Promise((resolve) => {
|
|
44
|
-
let stdout = "";
|
|
45
|
-
let stderr = "";
|
|
46
|
-
let settled = false;
|
|
47
|
-
let backgrounded = false;
|
|
48
|
-
|
|
49
|
-
const child = spawn("bash", ["-c", command], {
|
|
50
|
-
cwd: process.cwd(),
|
|
51
|
-
env: { ...process.env },
|
|
52
|
-
stdio: ["ignore", "pipe", "pipe"],
|
|
53
|
-
});
|
|
54
|
-
|
|
55
|
-
child.stdout?.on("data", (d: Buffer) => {
|
|
56
|
-
const chunk = d.toString();
|
|
57
|
-
stdout += chunk;
|
|
58
|
-
// 后台化后追加写入日志
|
|
59
|
-
if (backgrounded) {
|
|
60
|
-
try { appendFileSync(bgProcesses.get(child.pid!)?.logFile ?? "", chunk); } catch {}
|
|
61
|
-
}
|
|
62
|
-
});
|
|
63
|
-
child.stderr?.on("data", (d: Buffer) => {
|
|
64
|
-
const chunk = d.toString();
|
|
65
|
-
stderr += chunk;
|
|
66
|
-
if (backgrounded) {
|
|
67
|
-
try { appendFileSync(bgProcesses.get(child.pid!)?.logFile ?? "", chunk); } catch {}
|
|
68
|
-
}
|
|
69
|
-
});
|
|
70
|
-
|
|
71
|
-
// 超时处理:保持管道,标记为后台
|
|
72
|
-
const timer = setTimeout(() => {
|
|
73
|
-
if (settled) return;
|
|
74
|
-
settled = true;
|
|
75
|
-
backgrounded = true;
|
|
76
|
-
|
|
77
|
-
child.unref();
|
|
78
|
-
|
|
79
|
-
const logFile = `/tmp/oh-pi-bg-${Date.now()}.log`;
|
|
80
|
-
const pid = child.pid!;
|
|
81
|
-
|
|
82
|
-
// 把已有输出写入日志
|
|
83
|
-
writeFileSync(logFile, stdout + stderr);
|
|
84
|
-
|
|
85
|
-
const proc: BgProcess = { pid, command, logFile, startedAt: Date.now(), finished: false, exitCode: null };
|
|
86
|
-
bgProcesses.set(pid, proc);
|
|
87
|
-
|
|
88
|
-
// 监听完成事件,自动通知 LLM
|
|
89
|
-
child.on("close", (code) => {
|
|
90
|
-
proc.finished = true;
|
|
91
|
-
proc.exitCode = code;
|
|
92
|
-
const tail = (stdout + stderr).slice(-3000);
|
|
93
|
-
const truncated = (stdout + stderr).length > 3000 ? "[...truncated]\n" + tail : tail;
|
|
94
|
-
// 最终输出写入日志
|
|
95
|
-
try { writeFileSync(logFile, stdout + stderr); } catch {}
|
|
96
|
-
|
|
97
|
-
pi.sendMessage({
|
|
98
|
-
content: `[BG_PROCESS_DONE] PID ${pid} finished (exit ${code ?? "?"})\nCommand: ${command}\n\nOutput (last 3000 chars):\n${truncated}`,
|
|
99
|
-
display: true,
|
|
100
|
-
triggerTurn: true,
|
|
101
|
-
deliverAs: "followUp",
|
|
102
|
-
});
|
|
103
|
-
});
|
|
104
|
-
|
|
105
|
-
const preview = (stdout + stderr).slice(0, 500);
|
|
106
|
-
const text = `Command still running after ${effectiveTimeout / 1000}s, moved to background.\nPID: ${pid}\nLog: ${logFile}\nStop: kill ${pid}\n\nOutput so far:\n${preview}\n\n⏳ You will be notified automatically when it finishes. No need to poll.`;
|
|
107
|
-
|
|
108
|
-
resolve({
|
|
109
|
-
content: [{ type: "text", text }],
|
|
110
|
-
details: {},
|
|
111
|
-
});
|
|
112
|
-
}, effectiveTimeout);
|
|
113
|
-
|
|
114
|
-
// 正常结束(超时前)
|
|
115
|
-
child.on("close", (code) => {
|
|
116
|
-
if (settled) return;
|
|
117
|
-
settled = true;
|
|
118
|
-
clearTimeout(timer);
|
|
119
|
-
|
|
120
|
-
const output = (stdout + stderr).trim();
|
|
121
|
-
const exitInfo = code !== 0 ? `\n[Exit code: ${code}]` : "";
|
|
122
|
-
|
|
123
|
-
resolve({
|
|
124
|
-
content: [{ type: "text", text: output + exitInfo }],
|
|
125
|
-
details: {},
|
|
126
|
-
});
|
|
127
|
-
});
|
|
128
|
-
|
|
129
|
-
child.on("error", (err) => {
|
|
130
|
-
if (settled) return;
|
|
131
|
-
settled = true;
|
|
132
|
-
clearTimeout(timer);
|
|
133
|
-
|
|
134
|
-
resolve({
|
|
135
|
-
content: [{ type: "text", text: `Error: ${err.message}` }],
|
|
136
|
-
details: {},
|
|
137
|
-
isError: true,
|
|
138
|
-
});
|
|
139
|
-
});
|
|
140
|
-
|
|
141
|
-
// 处理 abort signal
|
|
142
|
-
if (signal) {
|
|
143
|
-
signal.addEventListener("abort", () => {
|
|
144
|
-
if (settled) return;
|
|
145
|
-
settled = true;
|
|
146
|
-
clearTimeout(timer);
|
|
147
|
-
try { child.kill(); } catch {}
|
|
148
|
-
resolve({
|
|
149
|
-
content: [{ type: "text", text: "Command cancelled." }],
|
|
150
|
-
details: {},
|
|
151
|
-
});
|
|
152
|
-
}, { once: true });
|
|
153
|
-
}
|
|
154
|
-
});
|
|
155
|
-
},
|
|
156
|
-
});
|
|
157
|
-
|
|
158
|
-
// bg_status 工具:查看/管理后台进程
|
|
159
|
-
pi.registerTool({
|
|
160
|
-
name: "bg_status",
|
|
161
|
-
label: "Background Process Status",
|
|
162
|
-
description: "Check status, view output, or stop background processes that were auto-backgrounded.",
|
|
163
|
-
parameters: Type.Object({
|
|
164
|
-
action: StringEnum(["list", "log", "stop"] as const, { description: "list=show all, log=view output, stop=kill process" }),
|
|
165
|
-
pid: Type.Optional(Type.Number({ description: "PID of the process (required for log/stop)" })),
|
|
166
|
-
}),
|
|
167
|
-
async execute(toolCallId, params) {
|
|
168
|
-
const { action, pid } = params;
|
|
169
|
-
|
|
170
|
-
if (action === "list") {
|
|
171
|
-
if (bgProcesses.size === 0) {
|
|
172
|
-
return { content: [{ type: "text", text: "No background processes." }], details: {} };
|
|
173
|
-
}
|
|
174
|
-
const lines = [...bgProcesses.values()].map((p) => {
|
|
175
|
-
const status = p.finished ? `⚪ stopped (exit ${p.exitCode ?? "?"})` : (isAlive(p.pid) ? "🟢 running" : "⚪ stopped");
|
|
176
|
-
return `PID: ${p.pid} | ${status} | Log: ${p.logFile}\n Cmd: ${p.command}`;
|
|
177
|
-
});
|
|
178
|
-
return { content: [{ type: "text", text: lines.join("\n\n") }], details: {} };
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
if (!pid) {
|
|
182
|
-
return { content: [{ type: "text", text: "Error: pid is required for log/stop" }], details: {}, isError: true };
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
const proc = bgProcesses.get(pid);
|
|
186
|
-
|
|
187
|
-
if (action === "log") {
|
|
188
|
-
const logFile = proc?.logFile;
|
|
189
|
-
if (logFile && existsSync(logFile)) {
|
|
190
|
-
try {
|
|
191
|
-
const content = readFileSync(logFile, "utf-8");
|
|
192
|
-
const tail = content.slice(-5000);
|
|
193
|
-
const truncated = content.length > 5000 ? `[...truncated, showing last 5000 chars]\n${tail}` : tail;
|
|
194
|
-
return { content: [{ type: "text", text: truncated || "(empty)" }], details: {} };
|
|
195
|
-
} catch (e: any) {
|
|
196
|
-
return { content: [{ type: "text", text: `Error reading log: ${e.message}` }], details: {}, isError: true };
|
|
197
|
-
}
|
|
198
|
-
}
|
|
199
|
-
return { content: [{ type: "text", text: "No log available for this PID." }], details: {} };
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
if (action === "stop") {
|
|
203
|
-
try {
|
|
204
|
-
process.kill(pid, "SIGTERM");
|
|
205
|
-
bgProcesses.delete(pid);
|
|
206
|
-
return { content: [{ type: "text", text: `Process ${pid} terminated.` }], details: {} };
|
|
207
|
-
} catch {
|
|
208
|
-
bgProcesses.delete(pid);
|
|
209
|
-
return { content: [{ type: "text", text: `Process ${pid} not found (already stopped?).` }], details: {} };
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
return { content: [{ type: "text", text: `Unknown action: ${action}` }], details: {}, isError: true };
|
|
214
|
-
},
|
|
215
|
-
});
|
|
216
|
-
|
|
217
|
-
// 清理:退出时杀掉所有后台进程
|
|
218
|
-
pi.on("session_shutdown", async () => {
|
|
219
|
-
for (const [pid, proc] of bgProcesses) {
|
|
220
|
-
if (!proc.finished) {
|
|
221
|
-
try { process.kill(pid, "SIGTERM"); } catch {}
|
|
222
|
-
}
|
|
223
|
-
}
|
|
224
|
-
bgProcesses.clear();
|
|
225
|
-
});
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
function isAlive(pid: number): boolean {
|
|
229
|
-
try { process.kill(pid, 0); return true; } catch { return false; }
|
|
230
|
-
}
|