xtrm-tools 0.5.10 → 0.5.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/CHANGELOG.md +4 -1
  2. package/README.md +28 -30
  3. package/cli/dist/index.cjs +1509 -2722
  4. package/cli/dist/index.cjs.map +1 -1
  5. package/cli/package.json +1 -1
  6. package/config/instructions/agents-top.md +87 -23
  7. package/config/instructions/claude-top.md +101 -23
  8. package/config/pi/extensions/beads/index.ts +3 -1
  9. package/config/pi/extensions/session-flow/index.ts +26 -90
  10. package/config/pi/extensions/xtrm-loader/index.ts +39 -2
  11. package/hooks/README.md +0 -14
  12. package/hooks/beads-gate-messages.mjs +8 -22
  13. package/hooks/gitnexus/gitnexus-hook.cjs +1 -1
  14. package/hooks/hooks.json +25 -27
  15. package/hooks/quality-check-env.mjs +79 -0
  16. package/hooks/quality-check.cjs +6 -6
  17. package/hooks/statusline.mjs +115 -0
  18. package/hooks/using-xtrm-reminder.mjs +35 -0
  19. package/package.json +1 -1
  20. package/skills/sync-docs-workspace/iteration-1/benchmark.json +293 -0
  21. package/skills/sync-docs-workspace/iteration-1/benchmark.md +13 -0
  22. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/eval_metadata.json +27 -0
  23. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/outputs/result.md +210 -0
  24. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/grading.json +28 -0
  25. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/timing.json +1 -0
  26. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/outputs/result.md +101 -0
  27. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/grading.json +28 -0
  28. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/timing.json +5 -0
  29. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/timing.json +5 -0
  30. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/eval_metadata.json +27 -0
  31. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/outputs/result.md +198 -0
  32. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/grading.json +28 -0
  33. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/timing.json +1 -0
  34. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/outputs/result.md +94 -0
  35. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/grading.json +28 -0
  36. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/timing.json +1 -0
  37. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/eval_metadata.json +27 -0
  38. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/outputs/result.md +237 -0
  39. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/grading.json +28 -0
  40. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/timing.json +1 -0
  41. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/outputs/result.md +134 -0
  42. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/grading.json +28 -0
  43. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/timing.json +1 -0
  44. package/skills/sync-docs-workspace/iteration-2/benchmark.json +297 -0
  45. package/skills/sync-docs-workspace/iteration-2/benchmark.md +13 -0
  46. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/eval_metadata.json +27 -0
  47. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/outputs/result.md +137 -0
  48. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/grading.json +92 -0
  49. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/timing.json +1 -0
  50. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/outputs/result.md +134 -0
  51. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/grading.json +86 -0
  52. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/timing.json +1 -0
  53. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/eval_metadata.json +27 -0
  54. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/outputs/result.md +193 -0
  55. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/grading.json +72 -0
  56. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/timing.json +1 -0
  57. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/outputs/result.md +211 -0
  58. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/grading.json +91 -0
  59. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/timing.json +5 -0
  60. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/eval_metadata.json +27 -0
  61. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/outputs/result.md +182 -0
  62. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/grading.json +95 -0
  63. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/timing.json +1 -0
  64. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/outputs/result.md +222 -0
  65. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/grading.json +88 -0
  66. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/timing.json +5 -0
  67. package/skills/sync-docs-workspace/iteration-3/benchmark.json +298 -0
  68. package/skills/sync-docs-workspace/iteration-3/benchmark.md +13 -0
  69. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/eval_metadata.json +27 -0
  70. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/outputs/result.md +125 -0
  71. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/grading.json +97 -0
  72. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/timing.json +5 -0
  73. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/outputs/result.md +144 -0
  74. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/grading.json +78 -0
  75. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/timing.json +5 -0
  76. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/eval_metadata.json +27 -0
  77. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/outputs/result.md +104 -0
  78. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/grading.json +91 -0
  79. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/timing.json +5 -0
  80. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/outputs/result.md +79 -0
  81. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/grading.json +82 -0
  82. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/timing.json +5 -0
  83. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/eval_metadata.json +27 -0
  84. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase1_context.json +302 -0
  85. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase2_drift.txt +33 -0
  86. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase3_analysis.json +114 -0
  87. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase4_fix.txt +118 -0
  88. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase5_validate.txt +38 -0
  89. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/result.md +158 -0
  90. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/grading.json +95 -0
  91. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/timing.json +5 -0
  92. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/outputs/result.md +71 -0
  93. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/grading.json +90 -0
  94. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/timing.json +5 -0
  95. package/skills/using-xtrm/SKILL.md +84 -205
  96. package/config/pi/extensions/bg-process/index.ts +0 -230
  97. package/config/pi/extensions/bg-process/package.json +0 -16
  98. package/config/pi/extensions/minimal-mode/index.ts +0 -201
  99. package/config/pi/extensions/minimal-mode/package.json +0 -16
  100. package/config/pi/extensions/todo/index.ts +0 -299
  101. package/config/pi/extensions/todo/package.json +0 -16
  102. package/hooks/agent_context.py +0 -105
  103. package/hooks/branch-state.mjs +0 -39
  104. package/hooks/guard-rules.mjs +0 -118
  105. package/hooks/main-guard-post-push.mjs +0 -71
  106. package/hooks/main-guard.mjs +0 -119
@@ -0,0 +1,90 @@
1
+ {
2
+ "expectations": [
3
+ {
4
+ "text": "Ran context_gatherer.py and reported bd closed issues or merged PRs with specific data",
5
+ "passed": false,
6
+ "evidence": "The agent never ran context_gatherer.py. It gathered context using raw git commands (git log --oneline --merges, git diff --stat 10d6433..HEAD). It did report specific merged PRs (#111, #110, #109) with descriptions, but the script was not used. The expectation requires the specific script to be invoked, not just the outcome data to be present."
7
+ },
8
+ {
9
+ "text": "Ran doc_structure_analyzer.py and cited its structured output (STALE, EXTRACTABLE, MISSING, etc.)",
10
+ "passed": false,
11
+ "evidence": "No mention of doc_structure_analyzer.py anywhere in the output. The structured output categories (STALE, EXTRACTABLE, MISSING) never appear. The agent assessed doc staleness manually by reading files and comparing with git history."
12
+ },
13
+ {
14
+ "text": "Detected the CHANGELOG version gap (package.json v2.4.0 vs CHANGELOG v2.0.0)",
15
+ "passed": false,
16
+ "evidence": "The output notes 'CHANGELOG.md (contains full history through v2.0.0)' and references the codebase being at v2.4.0, but the agent concluded CHANGELOG was 'accurate' and listed it under 'No Changes Needed'. It did not explicitly frame this as a version gap between package.json (v2.4.0) and CHANGELOG (v2.0.0), and it did not flag it as an issue requiring action. The gap was effectively missed because the agent treated the [Unreleased] section as sufficient coverage."
17
+ },
18
+ {
19
+ "text": "Named at least one concrete next step with a specific file or action",
20
+ "passed": true,
21
+ "evidence": "The Observations section states: 'The CHANGELOG [Unreleased] section is still empty \u2014 it should capture the post-v2.4.0 sprint work (global-first arch, guard-rules centralization, Pi drift checks, xtrm init project detection) before the next release.' This identifies a specific file (CHANGELOG.md), a specific section ([Unreleased]), and concrete content items to add."
22
+ }
23
+ ],
24
+ "summary": {
25
+ "passed": 1,
26
+ "failed": 3,
27
+ "total": 4,
28
+ "pass_rate": 0.25
29
+ },
30
+ "execution_metrics": {
31
+ "tool_calls": {},
32
+ "total_tool_calls": 0,
33
+ "total_steps": 0,
34
+ "errors_encountered": 0,
35
+ "output_chars": 3172,
36
+ "transcript_chars": 0
37
+ },
38
+ "timing": {
39
+ "executor_duration_seconds": 217.1,
40
+ "grader_duration_seconds": 0.0,
41
+ "total_duration_seconds": 217.1
42
+ },
43
+ "claims": [
44
+ {
45
+ "claim": "3 PRs merged in the most recent sprint: #111, #110, #109",
46
+ "type": "factual",
47
+ "verified": true,
48
+ "evidence": "Consistent with git log output cited in the result and with the repo's commit history (PR #111 referenced in CLAUDE.md recent commits section)"
49
+ },
50
+ {
51
+ "claim": "CHANGELOG.md is accurate and no changes are needed to it",
52
+ "type": "quality",
53
+ "verified": false,
54
+ "evidence": "The agent says CHANGELOG 'contains full history through v2.0.0' and the codebase is at v2.4.0. This means v2.1.0 through v2.4.0 entries are missing from CHANGELOG \u2014 a significant gap that contradicts the 'accurate' verdict. The [Unreleased] section does not substitute for missing versioned entries."
55
+ },
56
+ {
57
+ "claim": "XTRM-GUIDE.md required no changes as it was updated by sprint commits",
58
+ "type": "quality",
59
+ "verified": false,
60
+ "evidence": "The claim is plausible given commit f8e37f9, but the agent did not run doc_structure_analyzer.py or any systematic staleness check against XTRM-GUIDE.md \u2014 it relied on reading the file and comparing manually. Cannot fully verify without the script output."
61
+ },
62
+ {
63
+ "claim": "README was 'about 1.5 versions behind HEAD'",
64
+ "type": "factual",
65
+ "verified": true,
66
+ "evidence": "README said v2.3.0 while codebase was at v2.4.0 with unreleased post-v2.4.0 work on top \u2014 the characterization is reasonable given the 8 changes fixed."
67
+ }
68
+ ],
69
+ "user_notes_summary": {
70
+ "uncertainties": [],
71
+ "needs_review": [],
72
+ "workarounds": []
73
+ },
74
+ "eval_feedback": {
75
+ "suggestions": [
76
+ {
77
+ "assertion": "Ran context_gatherer.py and reported bd closed issues or merged PRs with specific data",
78
+ "reason": "This assertion conflates two things: running the specific script AND reporting specific PR data. An agent that skips the script but manually finds the same PR data would fail on process but produce similar outputs. The eval would be stronger if split: one assertion for script invocation (verifiable from transcript tool calls) and one for PR data quality (verifiable from output content)."
79
+ },
80
+ {
81
+ "assertion": "Detected the CHANGELOG version gap (package.json v2.4.0 vs CHANGELOG v2.0.0)",
82
+ "reason": "The expectation is well-targeted, but the bar should be higher: not just 'detected' but 'flagged as a problem requiring action'. The agent did notice CHANGELOG goes to v2.0.0 while the code is at v2.4.0, yet concluded it was accurate. An assertion that checks whether the gap was identified as a documentation deficiency (not just noted in passing) would be more discriminating."
83
+ },
84
+ {
85
+ "reason": "No assertion covers output quality for the README edits that were actually made \u2014 the primary work product of this run. The agent claims to have fixed 8 categories of README issues, but no expectation checks whether those changes are correct, complete, or even present in the file. This is the largest unguarded outcome."
86
+ }
87
+ ],
88
+ "overall": "The evals focus on process steps (run script X, detect gap Y) but miss the primary output (README changes). The CHANGELOG gap assertion is good but needs tighter framing. The script-invocation assertions are fragile without transcript access to verify tool calls."
89
+ }
90
+ }
@@ -0,0 +1,5 @@
1
+ {
2
+ "total_tokens": 61815,
3
+ "duration_ms": 217061,
4
+ "total_duration_seconds": 217.1
5
+ }
@@ -1,245 +1,124 @@
1
1
  ---
2
2
  name: using-xtrm
3
3
  description: >
4
- Operating manual for an xtrm-equipped Claude Code session. Activates automatically at
5
- session start to orient the agent on how to work within the xtrm stack: when to apply
6
- prompt improvement, how the beads issue-tracking gate works, which hooks enforce workflows,
7
- and how to compose the full toolset (gitnexus, Serena, quality gates, delegation).
8
- Use this skill whenever a new session begins in an xtrm-tools-installed environment, or
9
- when the user asks how to work with the xtrm stack, what tools are available, or how any
10
- xtrm workflow operates.
4
+ Behavioral operating manual for an xtrm-equipped Claude Code session.
5
+ Covers when to use which tool, how to handle questions and triggers,
6
+ workflow examples, and skill routing. Reference material (hook list,
7
+ gate rules, full bd commands, git workflow) lives in CLAUDE.md.
8
+ Injected automatically at session start via additionalSystemPrompt.
11
9
  priority: high
12
10
  ---
13
11
 
14
- # Using xtrm Session Operating Manual
12
+ # XTRMWhen to Use What
15
13
 
16
- You are in an **xtrm-equipped Claude Code environment**. This skill orients you on *how to work*
17
- within this stack. Read it at session start and refer back when uncertain about a workflow.
14
+ > Gates, commands, and git workflow are in CLAUDE.md.
15
+ > This is the behavioral layer: triggers, patterns, examples.
18
16
 
19
- ---
20
-
21
- ## Stack at a Glance
17
+ ## Session Start
22
18
 
23
- | Layer | What it provides |
24
- |---|---|
25
- | **Skills** | Domain expertise loaded on demand |
26
- | **Hooks** | Automated lifecycle enforcement (gates, suggestions, reminders) |
27
- | **Project Data (`xtrm init`)** | Per-repo bootstrap data (`.beads/`, `service-registry.json`, GitNexus index) |
28
- | **MCP Servers** | Semantic tools: Serena (code), gitnexus (graph), context7 (docs), deepwiki |
29
- | **CLI** | `xtrm install / status / finish / reset / help` — sync and closure tooling |
30
- | **beads (bd)** | Git-backed issue tracker with session gate enforcement |
19
+ ```bash
20
+ bd prime # load workflow context + active claims
21
+ bd memories <today's topic> # retrieve relevant past context
22
+ bd ready # find available work
23
+ bd update <id> --claim # claim before any edit
24
+ ```
31
25
 
32
26
  ---
33
27
 
34
- ## Core Principle: Prompt First, Then Work
35
-
36
- Before executing any non-trivial task, improve the prompt mentally using XML structure.
37
- Apply this silently — the user sees your improved execution, not the meta-work.
38
-
39
- ### Prompt Classification
40
-
41
- Scan the user's message for task type:
42
-
43
- | Type | Keywords | Enhancement |
44
- |---|---|---|
45
- | **ANALYSIS** | analyze, investigate, research, explain, why | Add `<thinking>` block, structure `<outputs>` |
46
- | **DEV** | implement, create, build, add, fix, feature | Add 1-2 `<example>` blocks, define `<constraints>` |
47
- | **REFACTOR** | refactor, improve, optimize, clean, simplify | Add `<constraints>` (preserve behavior, tests pass) + `<current_state>` |
48
-
49
- ### XML Prompt Structure
50
-
51
- ```xml
52
- <task_name>
53
- <description>What needs to be done and why</description>
54
- <parameters>Relevant context: files, symbols, constraints</parameters>
55
- <instructions>
56
- Step-by-step approach
57
- </instructions>
58
- <!-- ANALYSIS tasks: -->
59
- <thinking>Work through hypotheses before concluding</thinking>
60
- <outputs>Expected result format</outputs>
61
- <!-- DEV tasks: -->
62
- <example>Concrete pattern to follow</example>
63
- <!-- REFACTOR tasks: -->
64
- <constraints>Must not break X, tests must pass, preserve API surface</constraints>
65
- </task_name>
66
- ```
28
+ ## Trigger Patterns
67
29
 
68
- When a prompt is vague (under 8 words, no specifics), ask one clarifying question before
69
- proceeding. Don't ask about things you can reasonably infer.
30
+ | Situation | Action |
31
+ |-----------|--------|
32
+ | User prompt contains `?` | `bd memories <keywords>` before answering — check stored context first |
33
+ | "What was I working on?" | `bd list --status=in_progress` |
34
+ | Unfamiliar area of code | `gitnexus_query({query: "concept"})` before opening any file |
35
+ | About to edit a symbol | `gitnexus_impact({target: "name", direction: "upstream"})` |
36
+ | Before `git commit` | `gitnexus_detect_changes({scope: "staged"})` to verify scope |
37
+ | Reading code | `get_symbols_overview` → `find_symbol` — never read whole files |
38
+ | Task is tests | use /test-planning
39
+ | Task is docs updates | use /sync-docs
40
+ | Session end (issue closed) | Memory gate fires — evaluate `bd remember` for each closed issue |
70
41
 
71
42
  ---
72
43
 
73
- ## Beads + Session Flow — Session Protocol
44
+ ## Handling `?` Prompts
74
45
 
75
- This environment enforces a **beads session gate** plus **session-flow lifecycle gate**.
76
- You cannot edit files without a claim, and you cannot safely end a closure-in-progress session.
46
+ When the user's message contains a question, check stored context before answering:
77
47
 
78
48
  ```bash
79
- # 1. Claim before editing
80
- bd list --status=open
81
- bd update <id> --claim
82
- # hook auto-sets session claim + auto-creates worktree + writes .xtrm-session-state.json
83
-
84
- # 2. Work in the claimed branch/worktree
85
-
86
- # 3. Close issue when implementation is done
87
- bd close <id>
88
-
89
- # 4. Session close protocol (single command)
90
- xtrm finish
91
- # blocking: commit/push/pr-create/auto-merge poll/worktree cleanup
49
+ bd memories <keywords from question> # search project memory
50
+ bd recall <key> # retrieve specific memory if key is known
92
51
  ```
93
52
 
94
- **Key rules:**
95
- - One active claim per session
96
- - Always work on a **feature branch**, never directly on `main`/`master`
97
- - `main-guard.mjs` blocks edits on protected branches
98
- - `beads-stop-gate.mjs` blocks stop for closure phases: `waiting-merge`, `conflicting`, `pending-cleanup`
99
- - If blocked on stop: resolve state then re-run `xtrm finish`
100
-
101
- ---
102
-
103
- ## Code Editing — Serena LSP Workflow
104
-
105
- Always use semantic tools. Never read entire large files or use generic Edit unless forced.
106
-
107
- ```
108
- get_symbols_overview(file) → map the file structure first
109
- find_symbol(name, include_body=true) → read only what you need
110
- find_referencing_symbols(name) → check callers before changing signatures
111
- replace_symbol_body(name, body) → atomic symbol-level edit
112
- insert_after_symbol / insert_before_symbol → add new code precisely
53
+ Example — user asks *"why does the quality gate run twice?"*:
54
+ ```bash
55
+ bd memories "quality gate"
56
+ # → "quality-check.cjs and quality-check.py are separate hooks
57
+ # JS/TS and Python each get their own PostToolUse pass"
113
58
  ```
114
59
 
115
- **Activate project first** (required once per session):
116
- ```
117
- mcp__serena__activate_project("<project-name>")
60
+ If it's a code question, also run:
61
+ ```bash
62
+ gitnexus_query({query: "<topic>"}) # find relevant execution flows
118
63
  ```
119
64
 
120
- **Fallback**: Use `Edit` only for non-code files or when a symbol can't be located.
121
-
122
65
  ---
123
66
 
124
- ## Code Intelligence — gitnexus Workflow
125
-
126
- Before editing any function, class, or method — always run impact analysis.
67
+ ## Workflow Examples
127
68
 
69
+ **Fixing a bug:**
128
70
  ```bash
129
- # 1. Before editing
130
- npx gitnexus impact <symbolName> --direction upstream
131
-
132
- # 2. Understand a symbol fully
133
- gitnexus_context({name: "symbolName"})
134
-
135
- # 3. Find code by concept (instead of grepping)
136
- gitnexus_query({query: "concept"})
137
-
138
- # 4. Before committing — verify scope
139
- gitnexus_detect_changes({scope: "staged"})
71
+ bd ready # find the issue
72
+ bd update bd-xyz --claim # claim it
73
+ gitnexus_impact({target: "parseComposeServices", direction: "upstream"})
74
+ # 2 callers, LOW risk — safe to edit
75
+ get_symbols_overview("hooks/init.ts") # map file
76
+ find_symbol("parseComposeServices", include_body=True) # read just this
77
+ replace_symbol_body("parseComposeServices", newBody) # Serena edit
78
+ bd close bd-xyz --reason="Fix YAML parse edge case" # close + auto-commit
79
+ xt end # push, PR, merge, cleanup
140
80
  ```
141
81
 
142
- **Risk levels**: d=1 = WILL BREAK (must fix), d=2 = likely affected (should test), d=3 = transitive (test if critical).
143
-
144
- If index is stale: `npx gitnexus analyze` before using MCP tools.
145
-
146
- > **Note**: gitnexus MCP server and CLI share an exclusive DB lock — they cannot run concurrently.
147
- > Use CLI (`npx gitnexus ...`) when MCP is active, or stop MCP first.
148
-
149
- ---
150
-
151
- ## Quality Gates — Automatic on Every Edit
152
-
153
- After each file edit, quality-gates hooks run automatically:
154
- - **TypeScript**: ESLint + tsc type check
155
- - **Python**: Ruff lint + mypy type check
156
-
157
- You do not invoke these manually — they fire via PostToolUse hooks. If a gate fails, fix the
158
- lint/type error before continuing. Do not suppress errors with `// eslint-disable` or `# type: ignore`
159
- unless there is a genuine reason.
160
-
161
- > **Global-first behavior**: quality-gates hooks are global; no per-project install is needed.
162
- > Run `xtrm init` once per repository to bootstrap project data, then ensure the repo has
163
- > `eslint.config.*` (TS) or `pyproject.toml` / `ruff.toml` (Python) configured so checks can run.
164
-
165
- ---
166
-
167
- ## Skill Routing — When to Use What
168
-
169
- | Situation | Use |
170
- |---|---|
171
- | Short/vague user prompt | Apply XML structure silently (this skill) or `/prompt-improving` |
172
- | Simple task (tests, docs, typo fix) | `/delegating` → cost-optimized agent |
173
- | Complex task needing second opinion | `/orchestrate adversarial "task"` |
174
- | Reading/editing code | `using-serena-lsp` (Serena MCP) |
175
- | Understanding code architecture | `gitnexus-exploring` |
176
- | Tracing a bug | `gitnexus-debugging` |
177
- | Changing a function | `gitnexus-impact-analysis` first, then Serena edit |
178
- | Safe rename/refactor | `gitnexus-refactoring` |
179
- | Docker service project | `using-service-skills` → activate expert persona |
180
- | Writing new feature | Write tests alongside, quality gates auto-run after |
181
- | Maintaining docs | `/documenting` (Serena SSOT drift detection) |
182
- | Building/improving a skill | `skill-creator` |
183
-
184
- ---
185
-
186
- ## Available Skills (Full Catalog)
187
-
188
- **Workflow:**
189
- `prompt-improving`, `delegating`, `orchestrating-agents`, `using-serena-lsp`, `documenting`,
190
- `using-xtrm` (this skill), `skill-creator`, `find-skills`
191
-
192
- **Code Intelligence:**
193
- `gitnexus-exploring`, `gitnexus-debugging`, `gitnexus-impact-analysis`, `gitnexus-refactoring`
194
-
195
- **Domain Experts:**
196
- `senior-backend`, `senior-devops`, `senior-security`, `senior-data-scientist`,
197
- `docker-expert`, `python-testing`, `clean-code`
198
-
199
- **Integrations:**
200
- `obsidian-cli`, `hook-development`, `claude-api`
82
+ **Exploring unfamiliar code:**
83
+ ```bash
84
+ gitnexus_query({query: "session claim enforcement"})
85
+ # → beads-gate-core.mjs, resolveClaimAndWorkState, decideCommitGate
86
+ gitnexus_context({name: "resolveClaimAndWorkState"}) # callers + callees
87
+ get_symbols_overview("hooks/beads-gate-core.mjs") # map the file
88
+ find_symbol("resolveClaimAndWorkState", include_body=True) # read only this
89
+ ```
201
90
 
202
- **Global-first note:**
203
- `quality-gates` and `service-skills` workflows are globally available after `xtrm install all`.
204
- Use `xtrm init` to provision per-project data (beads + service-registry + GitNexus index).
91
+ **Persisting an insight:**
92
+ ```bash
93
+ bd remember "quality-check runs twice: separate .cjs (JS) and .py (Python) hooks"
94
+ # retrievable next session:
95
+ bd memories "quality check"
96
+ bd recall "quality-check-runs-twice-..."
97
+ ```
205
98
 
206
99
  ---
207
100
 
208
- ## Hook Enforcement Summary
209
-
210
- These hooks run automatically — you cannot disable them mid-session:
211
-
212
- | Hook | Trigger | Effect |
213
- |---|---|---|
214
- | `main-guard.mjs` | PreToolUse (Edit/Write/Serena/Bash) | Blocks edits and unsafe Bash on protected branches |
215
- | `beads-edit-gate.mjs` | PreToolUse (Edit/Write/Serena) | Blocks edits without active claim |
216
- | `beads-commit-gate.mjs` | PreToolUse (Bash: git commit) | Blocks commit with unclosed claim |
217
- | `beads-claim-sync.mjs` | PostToolUse (Bash claim command) | Syncs claim + creates worktree + writes session state |
218
- | `beads-stop-gate.mjs` | Stop | Blocks stop for unresolved session-flow phases |
219
- | `beads-memory-gate.mjs` | Stop | Prompts for persistent insights after closure |
220
- | `beads-compact-save/restore.mjs` | PreCompact / SessionStart | Preserves claim + session state across compact |
221
- | `serena-workflow-reminder.py` | SessionStart | Reminds semantic editing workflow |
222
- | `quality-check.(cjs|py)` | PostToolUse (Edit/Write) | Runs lint + type checks automatically |
101
+ ## Prompt Shaping (silent, before every non-trivial task)
223
102
 
224
- ---
225
-
226
- ## MCP Servers
103
+ | Task type | Apply |
104
+ |-----------|-------|
105
+ | `analyze / investigate / why` | `<thinking>` block + structured `<outputs>` |
106
+ | `implement / build / fix` | 1-2 `<example>` blocks + `<constraints>` |
107
+ | `refactor / simplify` | `<constraints>` (preserve behavior, tests pass) + `<current_state>` |
227
108
 
228
- | Server | Use for | Setup |
229
- |---|---|---|
230
- | `serena` | Semantic code reading/editing | Auto-detected; activate project per session |
231
- | `gitnexus` | Knowledge graph, impact analysis | `npm install -g gitnexus` + `npx gitnexus analyze` per project |
232
- | `context7` | Library documentation lookup | No setup needed (free stdio transport) |
233
- | `deepwiki` | Technical docs for GitHub repos | No setup needed |
234
- | `github-grep` | Code search across GitHub | No setup needed |
109
+ Vague prompt (under 8 words, no specifics)? Ask one clarifying question before proceeding.
235
110
 
236
111
  ---
237
112
 
238
- ## Checklist Before Finishing Any Task
239
-
240
- 1. `gitnexus_detect_changes(...)` confirms only expected files/flows changed
241
- 2. All d=1 dependents updated (if any signal from impact analysis)
242
- 3. Tests pass (targeted + relevant integration)
243
- 4. Beads issue closed: `bd close <id>`
244
- 5. Run `xtrm finish` for blocking closure lifecycle
245
- 6. Verify session state reached `cleanup-done` (or intentional re-entry state)
113
+ ## Skill Routing
114
+
115
+ | Need | Use |
116
+ |------|-----|
117
+ | Code read / edit | Serena — `get_symbols_overview` → `find_symbol` → `replace_symbol_body` |
118
+ | Blast radius before edit | `gitnexus-impact-analysis` |
119
+ | Navigate unfamiliar code | `gitnexus-exploring` |
120
+ | Trace a bug | `gitnexus-debugging` |
121
+ | Safe rename / refactor | `gitnexus-refactoring` |
122
+ | Docs maintenance | `sync-docs` |
123
+ | Docker service project | `using-service-skills` |
124
+ | Build / improve a skill | `skill-creator` |
@@ -1,230 +0,0 @@
1
- /**
2
- * oh-pi Background Process Extension
3
- *
4
- * 任何 bash 命令超时未完成时,自动送到后台执行。
5
- * 进程完成后自动通过 sendMessage 通知 LLM,无需轮询。
6
- * 提供 bg_status 工具让 LLM 查看/停止后台进程。
7
- */
8
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
9
- import { Type } from "@sinclair/typebox";
10
- import { StringEnum } from "@mariozechner/pi-ai";
11
- import { spawn, execSync } from "node:child_process";
12
- import { writeFileSync, readFileSync, appendFileSync, existsSync } from "node:fs";
13
-
14
- /** 超时阈值(毫秒),超过此时间自动后台化 */
15
- const BG_TIMEOUT_MS = 10_000;
16
-
17
- interface BgProcess {
18
- pid: number;
19
- command: string;
20
- logFile: string;
21
- startedAt: number;
22
- finished: boolean;
23
- exitCode: number | null;
24
- }
25
-
26
- export default function (pi: ExtensionAPI) {
27
- const bgProcesses = new Map<number, BgProcess>();
28
-
29
- // 覆盖内置 bash 工具
30
- pi.registerTool({
31
- name: "bash",
32
- label: "Bash",
33
- description: `Execute a bash command. Output is truncated to 2000 lines or 50KB. If a command runs longer than ${BG_TIMEOUT_MS / 1000}s, it is automatically backgrounded and you get the PID + log file path. Use the bg_status tool to check on backgrounded processes.`,
34
- parameters: Type.Object({
35
- command: Type.String({ description: "Bash command to execute" }),
36
- timeout: Type.Optional(Type.Number({ description: "Timeout in seconds (optional)" })),
37
- }),
38
- async execute(toolCallId, params, signal) {
39
- const { command } = params;
40
- const userTimeout = params.timeout ? params.timeout * 1000 : undefined;
41
- const effectiveTimeout = userTimeout ?? BG_TIMEOUT_MS;
42
-
43
- return new Promise((resolve) => {
44
- let stdout = "";
45
- let stderr = "";
46
- let settled = false;
47
- let backgrounded = false;
48
-
49
- const child = spawn("bash", ["-c", command], {
50
- cwd: process.cwd(),
51
- env: { ...process.env },
52
- stdio: ["ignore", "pipe", "pipe"],
53
- });
54
-
55
- child.stdout?.on("data", (d: Buffer) => {
56
- const chunk = d.toString();
57
- stdout += chunk;
58
- // 后台化后追加写入日志
59
- if (backgrounded) {
60
- try { appendFileSync(bgProcesses.get(child.pid!)?.logFile ?? "", chunk); } catch {}
61
- }
62
- });
63
- child.stderr?.on("data", (d: Buffer) => {
64
- const chunk = d.toString();
65
- stderr += chunk;
66
- if (backgrounded) {
67
- try { appendFileSync(bgProcesses.get(child.pid!)?.logFile ?? "", chunk); } catch {}
68
- }
69
- });
70
-
71
- // 超时处理:保持管道,标记为后台
72
- const timer = setTimeout(() => {
73
- if (settled) return;
74
- settled = true;
75
- backgrounded = true;
76
-
77
- child.unref();
78
-
79
- const logFile = `/tmp/oh-pi-bg-${Date.now()}.log`;
80
- const pid = child.pid!;
81
-
82
- // 把已有输出写入日志
83
- writeFileSync(logFile, stdout + stderr);
84
-
85
- const proc: BgProcess = { pid, command, logFile, startedAt: Date.now(), finished: false, exitCode: null };
86
- bgProcesses.set(pid, proc);
87
-
88
- // 监听完成事件,自动通知 LLM
89
- child.on("close", (code) => {
90
- proc.finished = true;
91
- proc.exitCode = code;
92
- const tail = (stdout + stderr).slice(-3000);
93
- const truncated = (stdout + stderr).length > 3000 ? "[...truncated]\n" + tail : tail;
94
- // 最终输出写入日志
95
- try { writeFileSync(logFile, stdout + stderr); } catch {}
96
-
97
- pi.sendMessage({
98
- content: `[BG_PROCESS_DONE] PID ${pid} finished (exit ${code ?? "?"})\nCommand: ${command}\n\nOutput (last 3000 chars):\n${truncated}`,
99
- display: true,
100
- triggerTurn: true,
101
- deliverAs: "followUp",
102
- });
103
- });
104
-
105
- const preview = (stdout + stderr).slice(0, 500);
106
- const text = `Command still running after ${effectiveTimeout / 1000}s, moved to background.\nPID: ${pid}\nLog: ${logFile}\nStop: kill ${pid}\n\nOutput so far:\n${preview}\n\n⏳ You will be notified automatically when it finishes. No need to poll.`;
107
-
108
- resolve({
109
- content: [{ type: "text", text }],
110
- details: {},
111
- });
112
- }, effectiveTimeout);
113
-
114
- // 正常结束(超时前)
115
- child.on("close", (code) => {
116
- if (settled) return;
117
- settled = true;
118
- clearTimeout(timer);
119
-
120
- const output = (stdout + stderr).trim();
121
- const exitInfo = code !== 0 ? `\n[Exit code: ${code}]` : "";
122
-
123
- resolve({
124
- content: [{ type: "text", text: output + exitInfo }],
125
- details: {},
126
- });
127
- });
128
-
129
- child.on("error", (err) => {
130
- if (settled) return;
131
- settled = true;
132
- clearTimeout(timer);
133
-
134
- resolve({
135
- content: [{ type: "text", text: `Error: ${err.message}` }],
136
- details: {},
137
- isError: true,
138
- });
139
- });
140
-
141
- // 处理 abort signal
142
- if (signal) {
143
- signal.addEventListener("abort", () => {
144
- if (settled) return;
145
- settled = true;
146
- clearTimeout(timer);
147
- try { child.kill(); } catch {}
148
- resolve({
149
- content: [{ type: "text", text: "Command cancelled." }],
150
- details: {},
151
- });
152
- }, { once: true });
153
- }
154
- });
155
- },
156
- });
157
-
158
- // bg_status 工具:查看/管理后台进程
159
- pi.registerTool({
160
- name: "bg_status",
161
- label: "Background Process Status",
162
- description: "Check status, view output, or stop background processes that were auto-backgrounded.",
163
- parameters: Type.Object({
164
- action: StringEnum(["list", "log", "stop"] as const, { description: "list=show all, log=view output, stop=kill process" }),
165
- pid: Type.Optional(Type.Number({ description: "PID of the process (required for log/stop)" })),
166
- }),
167
- async execute(toolCallId, params) {
168
- const { action, pid } = params;
169
-
170
- if (action === "list") {
171
- if (bgProcesses.size === 0) {
172
- return { content: [{ type: "text", text: "No background processes." }], details: {} };
173
- }
174
- const lines = [...bgProcesses.values()].map((p) => {
175
- const status = p.finished ? `⚪ stopped (exit ${p.exitCode ?? "?"})` : (isAlive(p.pid) ? "🟢 running" : "⚪ stopped");
176
- return `PID: ${p.pid} | ${status} | Log: ${p.logFile}\n Cmd: ${p.command}`;
177
- });
178
- return { content: [{ type: "text", text: lines.join("\n\n") }], details: {} };
179
- }
180
-
181
- if (!pid) {
182
- return { content: [{ type: "text", text: "Error: pid is required for log/stop" }], details: {}, isError: true };
183
- }
184
-
185
- const proc = bgProcesses.get(pid);
186
-
187
- if (action === "log") {
188
- const logFile = proc?.logFile;
189
- if (logFile && existsSync(logFile)) {
190
- try {
191
- const content = readFileSync(logFile, "utf-8");
192
- const tail = content.slice(-5000);
193
- const truncated = content.length > 5000 ? `[...truncated, showing last 5000 chars]\n${tail}` : tail;
194
- return { content: [{ type: "text", text: truncated || "(empty)" }], details: {} };
195
- } catch (e: any) {
196
- return { content: [{ type: "text", text: `Error reading log: ${e.message}` }], details: {}, isError: true };
197
- }
198
- }
199
- return { content: [{ type: "text", text: "No log available for this PID." }], details: {} };
200
- }
201
-
202
- if (action === "stop") {
203
- try {
204
- process.kill(pid, "SIGTERM");
205
- bgProcesses.delete(pid);
206
- return { content: [{ type: "text", text: `Process ${pid} terminated.` }], details: {} };
207
- } catch {
208
- bgProcesses.delete(pid);
209
- return { content: [{ type: "text", text: `Process ${pid} not found (already stopped?).` }], details: {} };
210
- }
211
- }
212
-
213
- return { content: [{ type: "text", text: `Unknown action: ${action}` }], details: {}, isError: true };
214
- },
215
- });
216
-
217
- // 清理:退出时杀掉所有后台进程
218
- pi.on("session_shutdown", async () => {
219
- for (const [pid, proc] of bgProcesses) {
220
- if (!proc.finished) {
221
- try { process.kill(pid, "SIGTERM"); } catch {}
222
- }
223
- }
224
- bgProcesses.clear();
225
- });
226
- }
227
-
228
- function isAlive(pid: number): boolean {
229
- try { process.kill(pid, 0); return true; } catch { return false; }
230
- }