lithermes-ai 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +245 -0
- package/README_Ko-KR.md +245 -0
- package/assets/lithermes-plugin/NOTICE.md +37 -0
- package/assets/lithermes-plugin/README.md +40 -0
- package/assets/lithermes-plugin/__init__.py +179 -0
- package/assets/lithermes-plugin/core.py +853 -0
- package/assets/lithermes-plugin/litgoal/__init__.py +10 -0
- package/assets/lithermes-plugin/litgoal/cli.py +133 -0
- package/assets/lithermes-plugin/litgoal/hook.py +48 -0
- package/assets/lithermes-plugin/litgoal/model.py +171 -0
- package/assets/lithermes-plugin/litgoal/runtime.py +273 -0
- package/assets/lithermes-plugin/litgoal/store.py +93 -0
- package/assets/lithermes-plugin/litgoal/tools.py +228 -0
- package/assets/lithermes-plugin/payload-version.json +471 -0
- package/assets/lithermes-plugin/plugin.yaml +9 -0
- package/assets/lithermes-plugin/skills/ai-slop-remover/SKILL.md +142 -0
- package/assets/lithermes-plugin/skills/comment-checker/SKILL.md +50 -0
- package/assets/lithermes-plugin/skills/debugging/SKILL.md +116 -0
- package/assets/lithermes-plugin/skills/debugging/references/methodology/00-setup.md +108 -0
- package/assets/lithermes-plugin/skills/debugging/references/methodology/02-investigate.md +121 -0
- package/assets/lithermes-plugin/skills/debugging/references/methodology/04-oracle-triple.md +136 -0
- package/assets/lithermes-plugin/skills/debugging/references/methodology/05-escalate.md +69 -0
- package/assets/lithermes-plugin/skills/debugging/references/methodology/06-fix.md +116 -0
- package/assets/lithermes-plugin/skills/debugging/references/methodology/08-qa.md +94 -0
- package/assets/lithermes-plugin/skills/debugging/references/methodology/09-cleanup.md +164 -0
- package/assets/lithermes-plugin/skills/debugging/references/methodology/partial-runtime-evidence.md +229 -0
- package/assets/lithermes-plugin/skills/debugging/references/runtimes/bundled-js-binary.md +415 -0
- package/assets/lithermes-plugin/skills/debugging/references/runtimes/go.md +252 -0
- package/assets/lithermes-plugin/skills/debugging/references/runtimes/native-binary.md +484 -0
- package/assets/lithermes-plugin/skills/debugging/references/runtimes/node.md +260 -0
- package/assets/lithermes-plugin/skills/debugging/references/runtimes/python.md +248 -0
- package/assets/lithermes-plugin/skills/debugging/references/runtimes/rust.md +234 -0
- package/assets/lithermes-plugin/skills/debugging/references/tools/ghidra.md +212 -0
- package/assets/lithermes-plugin/skills/debugging/references/tools/playwright-cli.md +194 -0
- package/assets/lithermes-plugin/skills/debugging/references/tools/pwndbg.md +263 -0
- package/assets/lithermes-plugin/skills/debugging/references/tools/pwntools.md +265 -0
- package/assets/lithermes-plugin/skills/frontend-ui-ux/SKILL.md +77 -0
- package/assets/lithermes-plugin/skills/lit-plan/SKILL.md +374 -0
- package/assets/lithermes-plugin/skills/litgoal/.gitkeep +0 -0
- package/assets/lithermes-plugin/skills/litgoal/SKILL.md +207 -0
- package/assets/lithermes-plugin/skills/litwork/SKILL.md +262 -0
- package/assets/lithermes-plugin/skills/lsp/SKILL.md +53 -0
- package/assets/lithermes-plugin/skills/programming/SKILL.md +463 -0
- package/assets/lithermes-plugin/skills/programming/references/go/README.md +90 -0
- package/assets/lithermes-plugin/skills/programming/references/go/backend-stack.md +641 -0
- package/assets/lithermes-plugin/skills/programming/references/go/bootstrap.md +328 -0
- package/assets/lithermes-plugin/skills/programming/references/go/bubbletea-v2.md +360 -0
- package/assets/lithermes-plugin/skills/programming/references/go/cobra-stack.md +468 -0
- package/assets/lithermes-plugin/skills/programming/references/go/concurrency.md +362 -0
- package/assets/lithermes-plugin/skills/programming/references/go/data-modeling.md +329 -0
- package/assets/lithermes-plugin/skills/programming/references/go/error-handling.md +359 -0
- package/assets/lithermes-plugin/skills/programming/references/go/golangci-strict.md +236 -0
- package/assets/lithermes-plugin/skills/programming/references/go/grpc-connect.md +375 -0
- package/assets/lithermes-plugin/skills/programming/references/go/libraries.md +337 -0
- package/assets/lithermes-plugin/skills/programming/references/go/one-liners.md +202 -0
- package/assets/lithermes-plugin/skills/programming/references/go/sqlc-pgx.md +471 -0
- package/assets/lithermes-plugin/skills/programming/references/go/testing.md +467 -0
- package/assets/lithermes-plugin/skills/programming/references/go/type-patterns.md +298 -0
- package/assets/lithermes-plugin/skills/programming/references/python/README.md +314 -0
- package/assets/lithermes-plugin/skills/programming/references/python/async-anyio.md +442 -0
- package/assets/lithermes-plugin/skills/programming/references/python/data-modeling.md +233 -0
- package/assets/lithermes-plugin/skills/programming/references/python/data-processing.md +133 -0
- package/assets/lithermes-plugin/skills/programming/references/python/error-handling.md +218 -0
- package/assets/lithermes-plugin/skills/programming/references/python/fastapi-stack.md +316 -0
- package/assets/lithermes-plugin/skills/programming/references/python/httpx2-optimization.md +360 -0
- package/assets/lithermes-plugin/skills/programming/references/python/libraries.md +307 -0
- package/assets/lithermes-plugin/skills/programming/references/python/one-liners.md +268 -0
- package/assets/lithermes-plugin/skills/programming/references/python/orjson-stack.md +378 -0
- package/assets/lithermes-plugin/skills/programming/references/python/pydantic-ai.md +285 -0
- package/assets/lithermes-plugin/skills/programming/references/python/pyproject-strict.md +232 -0
- package/assets/lithermes-plugin/skills/programming/references/python/textual-tui.md +201 -0
- package/assets/lithermes-plugin/skills/programming/references/python/type-patterns.md +176 -0
- package/assets/lithermes-plugin/skills/programming/references/rust/README.md +317 -0
- package/assets/lithermes-plugin/skills/programming/references/rust/async-tokio.md +299 -0
- package/assets/lithermes-plugin/skills/programming/references/rust/axum-stack.md +467 -0
- package/assets/lithermes-plugin/skills/programming/references/rust/cargo-strict.md +317 -0
- package/assets/lithermes-plugin/skills/programming/references/rust/clap-stack.md +409 -0
- package/assets/lithermes-plugin/skills/programming/references/rust/concurrency.md +375 -0
- package/assets/lithermes-plugin/skills/programming/references/rust/libraries.md +439 -0
- package/assets/lithermes-plugin/skills/programming/references/rust/one-liners.md +291 -0
- package/assets/lithermes-plugin/skills/programming/references/rust/proptest-insta.md +429 -0
- package/assets/lithermes-plugin/skills/programming/references/rust/type-state.md +354 -0
- package/assets/lithermes-plugin/skills/programming/references/rust/unsafe-discipline.md +250 -0
- package/assets/lithermes-plugin/skills/programming/references/rust/zero-cost-safety.md +527 -0
- package/assets/lithermes-plugin/skills/programming/references/rust-ub/README.md +289 -0
- package/assets/lithermes-plugin/skills/programming/references/rust-ub/miri-sanitizers-loom.md +411 -0
- package/assets/lithermes-plugin/skills/programming/references/rust-ub/ub-taxonomy.md +269 -0
- package/assets/lithermes-plugin/skills/programming/references/typescript/README.md +195 -0
- package/assets/lithermes-plugin/skills/programming/references/typescript/backend-hono.md +672 -0
- package/assets/lithermes-plugin/skills/programming/references/typescript/bootstrap.md +199 -0
- package/assets/lithermes-plugin/skills/programming/references/typescript/data-modeling.md +202 -0
- package/assets/lithermes-plugin/skills/programming/references/typescript/error-handling.md +169 -0
- package/assets/lithermes-plugin/skills/programming/references/typescript/tsconfig-strict.md +152 -0
- package/assets/lithermes-plugin/skills/programming/references/typescript/type-patterns.md +196 -0
- package/assets/lithermes-plugin/skills/programming/scripts/go/check-no-excuse-rules.sh +173 -0
- package/assets/lithermes-plugin/skills/programming/scripts/go/new-project.py +138 -0
- package/assets/lithermes-plugin/skills/programming/scripts/go/templates/.editorconfig +13 -0
- package/assets/lithermes-plugin/skills/programming/scripts/go/templates/.golangci.yml +95 -0
- package/assets/lithermes-plugin/skills/programming/scripts/go/templates/AGENTS.md.tmpl +24 -0
- package/assets/lithermes-plugin/skills/programming/scripts/go/templates/README.md.tmpl +12 -0
- package/assets/lithermes-plugin/skills/programming/scripts/go/templates/Taskfile.yml +40 -0
- package/assets/lithermes-plugin/skills/programming/scripts/go/templates/ci.yml +37 -0
- package/assets/lithermes-plugin/skills/programming/scripts/go/templates/config.go +24 -0
- package/assets/lithermes-plugin/skills/programming/scripts/go/templates/gitignore +15 -0
- package/assets/lithermes-plugin/skills/programming/scripts/go/templates/main.go.tmpl +22 -0
- package/assets/lithermes-plugin/skills/programming/scripts/go/templates/run.go +15 -0
- package/assets/lithermes-plugin/skills/programming/scripts/python/check-no-excuse-rules.py +687 -0
- package/assets/lithermes-plugin/skills/programming/scripts/python/new-project.py +172 -0
- package/assets/lithermes-plugin/skills/programming/scripts/python/new-script.py +116 -0
- package/assets/lithermes-plugin/skills/programming/scripts/rust/check-no-excuse-rules.py +296 -0
- package/assets/lithermes-plugin/skills/programming/scripts/rust/check-no-excuse-rules.sh +158 -0
- package/assets/lithermes-plugin/skills/programming/scripts/rust/new-project.py +175 -0
- package/assets/lithermes-plugin/skills/programming/scripts/typescript/check-no-excuse-rules.ts +282 -0
- package/assets/lithermes-plugin/skills/programming/scripts/typescript/new-project.ts +177 -0
- package/assets/lithermes-plugin/skills/refactor/SKILL.md +770 -0
- package/assets/lithermes-plugin/skills/remove-ai-slops/SKILL.md +335 -0
- package/assets/lithermes-plugin/skills/review-work/SKILL.md +562 -0
- package/assets/lithermes-plugin/skills/rules/SKILL.md +41 -0
- package/assets/lithermes-plugin/skills/start-work/SKILL.md +332 -0
- package/bin/lithermes.js +8 -0
- package/cover.png +0 -0
- package/package.json +39 -0
- package/src/cli.js +129 -0
- package/src/lib/check.js +94 -0
- package/src/lib/config.js +170 -0
- package/src/lib/files.js +65 -0
- package/src/lib/hermesDiscovery.js +50 -0
- package/src/lib/hud.js +121 -0
- package/src/lib/install.js +159 -0
- package/src/lib/patch.js +153 -0
- package/src/lib/skins.js +113 -0
- package/src/lib/spinner.js +104 -0
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: start-work
|
|
3
|
+
description: Hermes-native plan executor for /start-work — resume from durable run state, then drive every plan checkbox through test + manual-QA + cleanup gates with independent verification.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# LitHermes Start-Work
|
|
7
|
+
|
|
8
|
+
> **Hermes-native overrides (authoritative — read first).** Hermes has **no
|
|
9
|
+
> model-facing goal tools**: do **not** call `create_goal`, `get_goal`, or
|
|
10
|
+
> `update_goal` (they do not exist). LitHermes binds the native standing `/goal`
|
|
11
|
+
> via the session goal manager; track success criteria and evidence with the
|
|
12
|
+
> durable LitHermes goal tools (`goal_set`, `goal_add_criterion`, `goal_evidence`,
|
|
13
|
+
> `goal_criterion_status`, `goal_steer`, `goal_checkpoint`, `goal_complete`) and
|
|
14
|
+
> inspect with `hermes lithermes goal status`. Wherever legacy docs say "Call
|
|
15
|
+
> `create_goal`" or "open a `# Goal` block", treat the goal as already bound and
|
|
16
|
+
> use `goal_set` for the durable criteria layer. To run a reviewer or worker lane,
|
|
17
|
+
> use the native **`delegate_task`** tool — `tasks:[{goal, context, toolsets?, role?}]`
|
|
18
|
+
> for a parallel batch, parent blocks until all children stop. No spawn_agent,
|
|
19
|
+
> no named-agent registry, no per-child model selection.
|
|
20
|
+
|
|
21
|
+
This skill governs all `/start-work` invocations in Hermes. It resolves a plan
|
|
22
|
+
file (`plans/<slug>.md`), opens or resumes a durable run, and drives every
|
|
23
|
+
top-level checkbox to completion through strict gates. The skill never re-plans
|
|
24
|
+
from scratch mid-run; all recovery is from durable artifacts.
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## When this skill fires
|
|
29
|
+
|
|
30
|
+
The trigger is any of:
|
|
31
|
+
|
|
32
|
+
- `/start-work <slug>` — resolve `plans/<slug>.md`, open a new run.
|
|
33
|
+
- `/start-work <slug> --resume` — locate the most recent run for `<slug>` under
|
|
34
|
+
`.hermes/lithermes/runs/` and resume from where `state.json` says.
|
|
35
|
+
- `/start-work` with no slug but a one-liner brief supplied — bootstrap a plan
|
|
36
|
+
first (see §0 below), then open the run.
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## §0 — No-plan bootstrap (only when no plan file exists)
|
|
41
|
+
|
|
42
|
+
If `/start-work` was given a brief but `plans/<slug>.md` does not yet exist:
|
|
43
|
+
|
|
44
|
+
1. Derive a slug from the brief (kebab-case, ≤ 40 chars).
|
|
45
|
+
2. Write `plans/<slug>.md` with:
|
|
46
|
+
- A one-line **Goal** heading.
|
|
47
|
+
- A **Tasks** section: one `- [ ] T-NNN | <imperative verb phrase>` line per
|
|
48
|
+
deliverable, ordered by dependency.
|
|
49
|
+
- A **Success Criteria** section: one machine-parseable row per criterion:
|
|
50
|
+
`- [ ] C-NNN | channel: <http|tmux|browser|computer> | test: <file::id> | scenario: <one-line>`
|
|
51
|
+
3. Then proceed to §1 as if the plan existed from the start.
|
|
52
|
+
|
|
53
|
+
The brief is the contract. Do not expand scope beyond it.
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## §1 — Resume first (MANDATORY — never skip)
|
|
58
|
+
|
|
59
|
+
Before touching any file or running any command, re-read the durable run artifacts
|
|
60
|
+
in this order:
|
|
61
|
+
|
|
62
|
+
1. `state.json` — identifies the active run ID, the last completed checkbox index,
|
|
63
|
+
and any in-progress task that was interrupted.
|
|
64
|
+
2. `notepad.md` — your working memory from prior turns; surface the `## Now` and
|
|
65
|
+
`## Todo` sections.
|
|
66
|
+
3. `ledger.jsonl` — append-only event log; scan for `task_completed` entries to
|
|
67
|
+
confirm which checkboxes are truly done.
|
|
68
|
+
4. The plan file (`plans/<slug>.md`) — count remaining unchecked `- [ ]` lines to
|
|
69
|
+
set the loop boundary.
|
|
70
|
+
|
|
71
|
+
All four reads happen before any other action. If an interrupted task is recorded
|
|
72
|
+
in `state.json`, resume it from the last safe checkpoint — do not repeat gates
|
|
73
|
+
that `ledger.jsonl` already records as passed. Do not re-read the brief and
|
|
74
|
+
re-plan. Do not ask the user what to do next unless the run directory is missing
|
|
75
|
+
entirely (no prior run exists).
|
|
76
|
+
|
|
77
|
+
Run state and evidence live under:
|
|
78
|
+
|
|
79
|
+
```
|
|
80
|
+
.hermes/lithermes/runs/<run-id>/
|
|
81
|
+
state.json ← active checkbox index, interrupt record
|
|
82
|
+
notepad.md ← working memory (append-only)
|
|
83
|
+
ledger.jsonl ← event log (append-only)
|
|
84
|
+
evidence/ ← artifact files captured during QA gates
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## §2 — Per-checkbox execution loop
|
|
90
|
+
|
|
91
|
+
For each top-level `- [ ] T-NNN | …` in the plan, run all five gates **in order**
|
|
92
|
+
before flipping the checkbox. Gates are not optional and not reorderable.
|
|
93
|
+
|
|
94
|
+
### Gate A — Plan reread
|
|
95
|
+
|
|
96
|
+
Re-read the task row and any referenced sections in the plan. State in the notepad:
|
|
97
|
+
|
|
98
|
+
- The task's exact intent (one sentence).
|
|
99
|
+
- Which Success Criteria rows (`C-NNN`) it advances.
|
|
100
|
+
- Any files, APIs, or boundaries named in the plan.
|
|
101
|
+
|
|
102
|
+
### Gate B — Failing test first (RED)
|
|
103
|
+
|
|
104
|
+
Write the automated test **before** any production code. The test file and test ID
|
|
105
|
+
must match the `test:` field of the relevant `C-NNN` row(s). Run the test. Capture
|
|
106
|
+
the exact assertion message that proves it fails for the right reason — not a syntax
|
|
107
|
+
error, not a missing import, not a crash before the assertion. Paste the RED output
|
|
108
|
+
into `notepad.md`.
|
|
109
|
+
|
|
110
|
+
No production code may be written until RED is confirmed and recorded.
|
|
111
|
+
|
|
112
|
+
### Gate C — Smallest green change (GREEN)
|
|
113
|
+
|
|
114
|
+
Write the minimum production change that flips RED → GREEN. Re-run the test.
|
|
115
|
+
Capture the GREEN output. If making GREEN required more than roughly 20 lines of
|
|
116
|
+
production change, the test was too coarse — split the test and re-run from Gate B.
|
|
117
|
+
|
|
118
|
+
Run LSP diagnostics on every modified file. Zero errors allowed before proceeding.
|
|
119
|
+
|
|
120
|
+
### Gate D — Manual-QA channel scenario (YOU EXECUTE — NO STUBS)
|
|
121
|
+
|
|
122
|
+
Identify the `channel:` value for the relevant `C-NNN` row and run the corresponding
|
|
123
|
+
scenario yourself. The full test suite being green is **never** a substitute for
|
|
124
|
+
this gate. "Should work" and "looks correct" are not evidence.
|
|
125
|
+
|
|
126
|
+
**Channel table:**
|
|
127
|
+
|
|
128
|
+
| Channel | What to do | Artifact |
|
|
129
|
+
|---------|-----------|---------|
|
|
130
|
+
| `http` | Hit the live endpoint with `curl -i`; capture status line + headers + body. | curl transcript |
|
|
131
|
+
| `tmux` | `tmux new-session -d -s sw-qa-<criterion>`, drive with `send-keys`, dump via `tmux capture-pane -pS -E -`. | session transcript |
|
|
132
|
+
| `browser` | Drive the real page via Playwright / puppeteer / Chromium; capture action log + screenshot path. | log + screenshot |
|
|
133
|
+
| `computer` | OS-level GUI automation (AppleScript, xdotool, computer-use agent) against the running app; capture action log + screenshot. | log + screenshot |
|
|
134
|
+
|
|
135
|
+
Paste the artifact path into `notepad.md` immediately after capture.
|
|
136
|
+
|
|
137
|
+
**Adversarial classes to exercise where applicable:**
|
|
138
|
+
|
|
139
|
+
- Malformed input (truncated payload, wrong type, empty body, oversized field).
|
|
140
|
+
- Prompt injection or boundary-crossing inputs.
|
|
141
|
+
- Cancel / resume: interrupt the task mid-way, restart, verify state is consistent.
|
|
142
|
+
- Stale state: run the scenario against an artifact left from a prior incomplete run.
|
|
143
|
+
- Dirty worktree: ensure the feature behaves correctly with uncommitted sibling changes present.
|
|
144
|
+
- Hung commands: send a well-formed request to a temporarily unavailable dependency; verify timeout and error surface.
|
|
145
|
+
- Flaky test detection: run the test suite three times in a row; flag any non-deterministic result.
|
|
146
|
+
- Misleading success output: verify the happy-path output does not mask a silent failure (exit code 0 with error text in stdout).
|
|
147
|
+
- Repeated interruptions: interrupt the QA scenario twice at different points; confirm recovery each time.
|
|
148
|
+
|
|
149
|
+
Not every class applies to every task. Record which were exercised and which were
|
|
150
|
+
skipped with a one-line justification in `notepad.md`.
|
|
151
|
+
|
|
152
|
+
### Gate E — Paired cleanup (never skip — no receipt = checkbox stays open)
|
|
153
|
+
|
|
154
|
+
Every runtime artifact spawned in Gate D **must** be torn down before this gate
|
|
155
|
+
is considered complete:
|
|
156
|
+
|
|
157
|
+
- Server PIDs: `kill <pid>`; verify with `kill -0 <pid>` (must fail).
|
|
158
|
+
- tmux sessions: `tmux kill-session -t sw-qa-<criterion>`; verify with `tmux ls`.
|
|
159
|
+
- Browser / Playwright contexts: `.close()`.
|
|
160
|
+
- Containers: `docker rm -f <name>`.
|
|
161
|
+
- Bound ports: `lsof -i :<port>` must return empty.
|
|
162
|
+
- Temp sockets, files, dirs: `rm -rf <mktemp path>`.
|
|
163
|
+
- QA-only environment variables: `unset <VAR>`.
|
|
164
|
+
|
|
165
|
+
Append a one-line cleanup receipt to `notepad.md` immediately after teardown:
|
|
166
|
+
|
|
167
|
+
```
|
|
168
|
+
cleanup [C-NNN]: killed PID 12345; tmux kill-session sw-qa-c-003; rm -rf /tmp/sw.aB12cD
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
No receipt → the checkbox stays open. This is not negotiable.
|
|
172
|
+
|
|
173
|
+
---
|
|
174
|
+
|
|
175
|
+
## §3 — Independent verification gate
|
|
176
|
+
|
|
177
|
+
Trigger this gate when **any** of the following are true:
|
|
178
|
+
|
|
179
|
+
- The task touches 3 or more files.
|
|
180
|
+
- The task is marked security-sensitive, has network-facing behavior, or modifies
|
|
181
|
+
shared state.
|
|
182
|
+
- The plan row contains `verify: strict` or the user said "rigorously" / "엄밀"
|
|
183
|
+
/ "deeply" / "깊게".
|
|
184
|
+
- 20 or more turns have elapsed since the run opened.
|
|
185
|
+
|
|
186
|
+
**Procedure:**
|
|
187
|
+
|
|
188
|
+
1. Dispatch a `delegate_task` child as an independent verifier. Its goal must
|
|
189
|
+
contain: the task row, the Success Criteria rows it maps to, the full diff
|
|
190
|
+
since the last commit, the `notepad.md` path, and the artifact paths from
|
|
191
|
+
Gates B–D. The child's role is explicitly to **refute** the done-claim — not
|
|
192
|
+
to rubber-stamp it.
|
|
193
|
+
2. The verifier re-reads the diff, re-runs the test suite, re-runs the Gate D
|
|
194
|
+
channel scenario independently, and returns a verdict.
|
|
195
|
+
3. Treat the verdict as binding. There is no "false positive". "Looks good but…"
|
|
196
|
+
is a rejection. Do not argue, minimise, or explain away concerns.
|
|
197
|
+
4. Fix every issue raised. Re-run Gates C, D, and E. Capture fresh evidence.
|
|
198
|
+
5. Re-dispatch the same verifier. Loop until the verdict is **unconditional
|
|
199
|
+
approval** with no qualifications.
|
|
200
|
+
6. Record the verifier's final approval message in `ledger.jsonl` as a
|
|
201
|
+
`verification_approved` event before proceeding.
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## §4 — Mark-progress loop (CONTINUE WITHOUT ASKING)
|
|
206
|
+
|
|
207
|
+
After all five gates (and §3 if triggered) pass for a task:
|
|
208
|
+
|
|
209
|
+
1. Flip the checkbox in the plan file: `- [ ] T-NNN` → `- [x] T-NNN`.
|
|
210
|
+
2. Re-read the plan file. Count the remaining unchecked `- [ ]` lines. Assert
|
|
211
|
+
the count decreased by exactly one from the previous count. If it did not,
|
|
212
|
+
stop and surface the discrepancy before continuing.
|
|
213
|
+
3. Append a `task_completed` entry to `ledger.jsonl`:
|
|
214
|
+
```json
|
|
215
|
+
{"event":"task_completed","task":"T-NNN","ts":"<ISO>","evidence":["<path>","<path>"],"cleanup_receipt":"<one-line>"}
|
|
216
|
+
```
|
|
217
|
+
4. Update `state.json` to reflect the new last-completed index.
|
|
218
|
+
5. Move to the next unchecked task **without asking the user**. The loop
|
|
219
|
+
continues autonomously until every top-level checkbox is done.
|
|
220
|
+
|
|
221
|
+
Do not pause between tasks. Do not summarise progress mid-loop. Do not ask for
|
|
222
|
+
confirmation. The only permitted pause is after 2 consecutive identical failures
|
|
223
|
+
on the same gate — surface what was tried and ask before a third attempt.
|
|
224
|
+
|
|
225
|
+
---
|
|
226
|
+
|
|
227
|
+
## §5 — Final verification wave (F1–F4)
|
|
228
|
+
|
|
229
|
+
After every top-level checkbox is flipped to `[x]`, run the four-phase final wave
|
|
230
|
+
before declaring the run complete.
|
|
231
|
+
|
|
232
|
+
### F1 — Full scenario replay
|
|
233
|
+
|
|
234
|
+
Re-run every `C-NNN` channel scenario from §2 Gate D, in order, against the final
|
|
235
|
+
state. Capture fresh artifacts. Record each as a `final_qa` event in `ledger.jsonl`.
|
|
236
|
+
|
|
237
|
+
### F2 — Full test suite
|
|
238
|
+
|
|
239
|
+
Run the complete test suite (all files, no skip flags, no `.only`, no `xfail`
|
|
240
|
+
added this run). Every test must be green. Record the suite output path.
|
|
241
|
+
|
|
242
|
+
### F3 — LSP diagnostics sweep
|
|
243
|
+
|
|
244
|
+
Run LSP diagnostics across every file modified during the run. Zero errors
|
|
245
|
+
permitted. Warnings that existed before the run are acceptable; new warnings
|
|
246
|
+
introduced during the run must be resolved.
|
|
247
|
+
|
|
248
|
+
### F4 — Ledger integrity check
|
|
249
|
+
|
|
250
|
+
Read `ledger.jsonl` end-to-end. Verify:
|
|
251
|
+
|
|
252
|
+
- Every `T-NNN` task has a corresponding `task_completed` entry.
|
|
253
|
+
- Every `task_completed` entry has a non-empty `cleanup_receipt`.
|
|
254
|
+
- Every `C-NNN` criterion has at least one `evidence` path that exists on disk.
|
|
255
|
+
- A `final_qa` entry exists for every `C-NNN` criterion.
|
|
256
|
+
|
|
257
|
+
If any check fails, fix the gap before proceeding.
|
|
258
|
+
|
|
259
|
+
---
|
|
260
|
+
|
|
261
|
+
## §6 — Commits
|
|
262
|
+
|
|
263
|
+
Atomic commits per logical change, following Conventional Commits:
|
|
264
|
+
`<type>(<scope>): <imperative>` — types: `feat`, `fix`, `refactor`, `test`,
|
|
265
|
+
`docs`, `chore`, `build`, `ci`, `perf`. Each commit must build and pass the full
|
|
266
|
+
test suite on its own. No WIP commits on the final branch.
|
|
267
|
+
|
|
268
|
+
Final commit footer must include:
|
|
269
|
+
|
|
270
|
+
```
|
|
271
|
+
Plan: plans/<slug>.md
|
|
272
|
+
Run: .hermes/lithermes/runs/<run-id>/
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
Do **not** auto-commit unless the user requested or pre-authorised this session.
|
|
276
|
+
Default: stage + draft message + present for approval.
|
|
277
|
+
|
|
278
|
+
---
|
|
279
|
+
|
|
280
|
+
## §7 — Stop conditions
|
|
281
|
+
|
|
282
|
+
The run is done **only** when all of the following are true:
|
|
283
|
+
|
|
284
|
+
- Every top-level `- [ ]` checkbox in the plan file is `[x]`.
|
|
285
|
+
- F1–F4 final wave all passed with captured artifacts.
|
|
286
|
+
- Every cleanup receipt is recorded in `ledger.jsonl`.
|
|
287
|
+
- The notepad `## Todo` section is empty.
|
|
288
|
+
- If §3 was triggered: `ledger.jsonl` contains an unconditional
|
|
289
|
+
`verification_approved` event for every task that triggered the gate.
|
|
290
|
+
|
|
291
|
+
Leftover state disqualifies completion: a QA-spawned process still alive, a tmux
|
|
292
|
+
session still listed by `tmux ls`, a browser context still open, a bound port, a
|
|
293
|
+
temp file still on disk. Tear it down, record the receipt, then re-check.
|
|
294
|
+
|
|
295
|
+
After 2 consecutive identical failures on the same gate, surface what was tried
|
|
296
|
+
and ask the user before a third attempt. After 2 parallel exploration waves yield
|
|
297
|
+
no new useful facts, stop exploring and act.
|
|
298
|
+
|
|
299
|
+
---
|
|
300
|
+
|
|
301
|
+
## §8 — Output discipline
|
|
302
|
+
|
|
303
|
+
- First line this turn: `START-WORK ACTIVE — run <run-id>`
|
|
304
|
+
- After artifact reads (§1): one paragraph summarising what the ledger says is
|
|
305
|
+
done, what is in-progress, and how many checkboxes remain.
|
|
306
|
+
- During the loop: surface only state changes — RED captured, GREEN captured,
|
|
307
|
+
QA scenario PASS/FAIL with artifact path, verifier verdict, checkbox flipped.
|
|
308
|
+
- No commentary between gates. No "now I will…" narration. State changes only.
|
|
309
|
+
- Final message: run ID + plan path + per-task summary table (task | status |
|
|
310
|
+
evidence paths | cleanup receipt) + F1–F4 results + commit list
|
|
311
|
+
(`<sha> <subject>`) if commits were made.
|
|
312
|
+
|
|
313
|
+
---
|
|
314
|
+
|
|
315
|
+
## §9 — Constraints (non-negotiable)
|
|
316
|
+
|
|
317
|
+
- TDD is mandatory on every production change — features, fixes, refactors,
|
|
318
|
+
glue, config-with-logic. No "too small", "too obvious", or "just a one-liner"
|
|
319
|
+
exemptions. If production code was written without a preceding failing test in
|
|
320
|
+
the same notepad, stop, revert, write the test, watch it fail, then redo.
|
|
321
|
+
- The only changes exempt from a new test: pure formatting, comment-only edits,
|
|
322
|
+
dependency version bumps with no behavior delta, rename-only moves. Each
|
|
323
|
+
exemption must be justified in `## Findings`; unjustified exemption is a gate
|
|
324
|
+
failure.
|
|
325
|
+
- Never suppress lints, errors, or test failures. Never delete, skip, `.only`,
|
|
326
|
+
`.skip`, or comment out tests to green the suite.
|
|
327
|
+
- Never claim a checkbox done from inference alone — only from RED → GREEN +
|
|
328
|
+
channel artifact + cleanup receipt.
|
|
329
|
+
- Parallel tool calls for any independent work within a step; never parallelise
|
|
330
|
+
RED and GREEN of the same criterion.
|
|
331
|
+
- Plan files are read-only except for flipping `[ ]` → `[x]`. Do not add,
|
|
332
|
+
remove, or reorder tasks.
|
package/bin/lithermes.js
ADDED
package/cover.png
ADDED
|
Binary file
|
package/package.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "lithermes-ai",
|
|
3
|
+
"version": "0.5.0",
|
|
4
|
+
"description": "npx/bunx installer for the LitHermes Hermes plugin",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"repository": {
|
|
7
|
+
"type": "git",
|
|
8
|
+
"url": "git+https://github.com/wjgoarxiv/lithermes.git",
|
|
9
|
+
"directory": "packages/lithermes-installer"
|
|
10
|
+
},
|
|
11
|
+
"homepage": "https://www.npmjs.com/package/lithermes-ai",
|
|
12
|
+
"bugs": {
|
|
13
|
+
"url": "https://github.com/wjgoarxiv/lithermes/issues"
|
|
14
|
+
},
|
|
15
|
+
"bin": {
|
|
16
|
+
"lithermes": "bin/lithermes.js"
|
|
17
|
+
},
|
|
18
|
+
"files": [
|
|
19
|
+
"bin",
|
|
20
|
+
"src",
|
|
21
|
+
"assets",
|
|
22
|
+
"!assets/**/__pycache__/**",
|
|
23
|
+
"!assets/**/*.pyc",
|
|
24
|
+
"!assets/**/upstream/**",
|
|
25
|
+
"README.md",
|
|
26
|
+
"README_Ko-KR.md",
|
|
27
|
+
"cover.png"
|
|
28
|
+
],
|
|
29
|
+
"scripts": {
|
|
30
|
+
"test": "node --test test/*.test.js",
|
|
31
|
+
"sync-plugin": "node scripts/sync-plugin.js",
|
|
32
|
+
"pack:dry": "npm pack --dry-run --json",
|
|
33
|
+
"clean:payload": "find assets -name __pycache__ -type d -prune -exec rm -rf {} + && find assets -name '*.py[co]' -delete",
|
|
34
|
+
"prepack": "npm run clean:payload"
|
|
35
|
+
},
|
|
36
|
+
"engines": {
|
|
37
|
+
"node": ">=18"
|
|
38
|
+
}
|
|
39
|
+
}
|
package/src/cli.js
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
const path = require("node:path");
|
|
2
|
+
const { installLitHermes, uninstallLitHermes } = require("./lib/install");
|
|
3
|
+
const { checkLitHermes, doctorLitHermes } = require("./lib/check");
|
|
4
|
+
const { createSpinner, shouldUseSpinner } = require("./lib/spinner");
|
|
5
|
+
const { listHud, applyHud, clearHud, resolveHome, promptAccent } = require("./lib/hud");
|
|
6
|
+
const { supportsColor, findAccent } = require("./lib/skins");
|
|
7
|
+
|
|
8
|
+
function parseArgs(argv) {
|
|
9
|
+
const flags = {};
|
|
10
|
+
const positionals = [];
|
|
11
|
+
for (let i = 0; i < argv.length; i += 1) {
|
|
12
|
+
const item = argv[i];
|
|
13
|
+
if (!item.startsWith("--")) {
|
|
14
|
+
positionals.push(item);
|
|
15
|
+
continue;
|
|
16
|
+
}
|
|
17
|
+
const key = item.slice(2);
|
|
18
|
+
if (["yes", "offline", "dry-run", "spinner", "no-spinner", "gateway-offline", "force", "patch-installed-hermes", "no-patch-installed-hermes", "rollback-patches", "list", "no-hud"].includes(key)) {
|
|
19
|
+
flags[key] = true;
|
|
20
|
+
continue;
|
|
21
|
+
}
|
|
22
|
+
const value = argv[i + 1];
|
|
23
|
+
if (!value || value.startsWith("--")) {
|
|
24
|
+
flags[key] = "";
|
|
25
|
+
continue;
|
|
26
|
+
}
|
|
27
|
+
flags[key] = value;
|
|
28
|
+
i += 1;
|
|
29
|
+
}
|
|
30
|
+
return { command: positionals[0] || "help", flags, positionals };
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function packageVersion() {
|
|
34
|
+
const pkg = require(path.join(__dirname, "..", "package.json"));
|
|
35
|
+
// Print the command/brand name (the bin), not the npm package name — the
|
|
36
|
+
// command is `lithermes` even though the npm package is published as `lithermes-ai`.
|
|
37
|
+
const brand = Object.keys(pkg.bin || {})[0] || pkg.name;
|
|
38
|
+
return `${brand} ${pkg.version}`;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Pick a HUD accent after install: explicit --hud <accent>, an interactive TTY
|
|
42
|
+
// prompt, or skip (--no-hud / non-TTY / --dry-run).
|
|
43
|
+
async function maybePickHudAccent(flags) {
|
|
44
|
+
if (flags["dry-run"]) return;
|
|
45
|
+
let entry = null;
|
|
46
|
+
if (typeof flags.hud === "string" && flags.hud.trim()) {
|
|
47
|
+
entry = findAccent(flags.hud);
|
|
48
|
+
if (!entry) {
|
|
49
|
+
console.error(`HUD: unknown accent '${flags.hud}'; skipping (run \`lithermes hud --list\`).`);
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
} else if (flags["no-hud"]) {
|
|
53
|
+
return;
|
|
54
|
+
} else if (process.stdin.isTTY && process.stdout.isTTY && !process.env.CI) {
|
|
55
|
+
entry = await promptAccent({ color: supportsColor() });
|
|
56
|
+
} else {
|
|
57
|
+
return;
|
|
58
|
+
}
|
|
59
|
+
if (entry) console.log(applyHud(entry.accent, flags));
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
async function main(argv) {
|
|
63
|
+
const { command, flags, positionals } = parseArgs(argv);
|
|
64
|
+
if (command === "version" || command === "--version" || command === "-v") {
|
|
65
|
+
console.log(packageVersion());
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
if (command === "hud") {
|
|
69
|
+
const accent = positionals[1];
|
|
70
|
+
if (flags.list || !accent) {
|
|
71
|
+
console.log(listHud({ color: supportsColor(), hermesHome: resolveHome(flags) }));
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
if (accent === "off" || accent === "clear" || accent === "none") {
|
|
75
|
+
console.log(clearHud(flags));
|
|
76
|
+
return;
|
|
77
|
+
}
|
|
78
|
+
try {
|
|
79
|
+
console.log(applyHud(accent, flags));
|
|
80
|
+
} catch (error) {
|
|
81
|
+
console.error(error.message);
|
|
82
|
+
process.exitCode = error.exitCode || 1;
|
|
83
|
+
}
|
|
84
|
+
return;
|
|
85
|
+
}
|
|
86
|
+
if (command === "install") {
|
|
87
|
+
const spinner = shouldUseSpinner({ flags }) ? createSpinner() : null;
|
|
88
|
+
try {
|
|
89
|
+
if (spinner) spinner.start();
|
|
90
|
+
const result = installLitHermes({
|
|
91
|
+
...flags,
|
|
92
|
+
onProgress: spinner ? (message) => spinner.update(message) : undefined,
|
|
93
|
+
});
|
|
94
|
+
if (spinner) spinner.succeed("Installing LitHermes complete");
|
|
95
|
+
console.log(result.message);
|
|
96
|
+
} catch (error) {
|
|
97
|
+
if (spinner) spinner.fail();
|
|
98
|
+
throw error;
|
|
99
|
+
}
|
|
100
|
+
await maybePickHudAccent(flags);
|
|
101
|
+
return;
|
|
102
|
+
}
|
|
103
|
+
if (command === "uninstall") {
|
|
104
|
+
const result = uninstallLitHermes(flags);
|
|
105
|
+
console.log(result.message);
|
|
106
|
+
return;
|
|
107
|
+
}
|
|
108
|
+
if (command === "check") {
|
|
109
|
+
const result = checkLitHermes(flags);
|
|
110
|
+
console.log(result.message);
|
|
111
|
+
return;
|
|
112
|
+
}
|
|
113
|
+
if (command === "doctor") {
|
|
114
|
+
const result = doctorLitHermes(flags);
|
|
115
|
+
console.log(result.message);
|
|
116
|
+
return;
|
|
117
|
+
}
|
|
118
|
+
console.log([
|
|
119
|
+
"lithermes commands:",
|
|
120
|
+
" install [--yes] [--dry-run] [--spinner] [--no-spinner] [--hud <accent>] [--no-hud] [--hermes-home PATH]",
|
|
121
|
+
" check [--offline] [--gateway-offline] [--hermes-home PATH]",
|
|
122
|
+
" doctor [--offline] [--hermes-home PATH] [--hermes-repo PATH]",
|
|
123
|
+
" uninstall [--yes] [--hermes-home PATH]",
|
|
124
|
+
" hud [<accent>|off|--list] [--hermes-home PATH] (Hermes HUD skin accents)",
|
|
125
|
+
" version",
|
|
126
|
+
].join("\n"));
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
module.exports = { main, parseArgs };
|
package/src/lib/check.js
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
const fs = require("node:fs");
|
|
2
|
+
const path = require("node:path");
|
|
3
|
+
const { configHasLitHermes, readConfig } = require("./config");
|
|
4
|
+
const { ensureHermesHome, LitHermesError } = require("./hermesDiscovery");
|
|
5
|
+
const { assetRoot, pluginDest } = require("./install");
|
|
6
|
+
|
|
7
|
+
const requiredCommands = ["lit", "lit-loop", "lit-plan"];
|
|
8
|
+
|
|
9
|
+
function scanPluginCommands(pluginPath) {
|
|
10
|
+
const init = path.join(pluginPath, "__init__.py");
|
|
11
|
+
if (!fs.existsSync(init)) return [];
|
|
12
|
+
const source = fs.readFileSync(init, "utf8");
|
|
13
|
+
return requiredCommands.filter((cmd) => source.includes(`"${cmd}"`) || source.includes(`'${cmd}'`));
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function checkGatewaySource(repo) {
|
|
17
|
+
if (!repo) return { ok: true, skipped: true };
|
|
18
|
+
const file = path.join(repo, "gateway", "run.py");
|
|
19
|
+
if (!fs.existsSync(file)) return { ok: false, reason: "gateway/run.py missing" };
|
|
20
|
+
const source = fs.readFileSync(file, "utf8");
|
|
21
|
+
return {
|
|
22
|
+
ok: source.includes('command.replace("_", "-")') && source.includes("_plugin_agent_dispatch_payload"),
|
|
23
|
+
reason: "gateway underscore dispatch or structured payload helper missing",
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function checkCliSource(repo) {
|
|
28
|
+
if (!repo) return { ok: true, skipped: true };
|
|
29
|
+
const file = path.join(repo, "cli.py");
|
|
30
|
+
if (!fs.existsSync(file)) return { ok: false, reason: "cli.py missing" };
|
|
31
|
+
const source = fs.readFileSync(file, "utf8");
|
|
32
|
+
return {
|
|
33
|
+
ok: source.includes("agent_message") && source.includes("_pending_input.put") && source.includes("ast.literal_eval"),
|
|
34
|
+
reason: "CLI plugin structured payload dispatch missing",
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function checkTuiSource(repo) {
|
|
39
|
+
if (!repo) return { ok: true, skipped: true };
|
|
40
|
+
const file = path.join(repo, "tui_gateway", "server.py");
|
|
41
|
+
if (!fs.existsSync(file)) return { ok: false, reason: "tui_gateway/server.py missing" };
|
|
42
|
+
const source = fs.readFileSync(file, "utf8");
|
|
43
|
+
return {
|
|
44
|
+
ok: source.includes("_plugin_agent_dispatch_payload") && source.includes("command.dispatch") && source.includes('"type": "send"'),
|
|
45
|
+
reason: "TUI plugin structured payload dispatch missing",
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function checkLitHermes(flags = {}) {
|
|
50
|
+
const { hermesHome, hermesRepo } = ensureHermesHome(flags);
|
|
51
|
+
const pluginPath = pluginDest(hermesHome);
|
|
52
|
+
const commands = scanPluginCommands(pluginPath);
|
|
53
|
+
const configOk = configHasLitHermes(readConfig(hermesHome));
|
|
54
|
+
const missing = requiredCommands.filter((cmd) => !commands.includes(cmd));
|
|
55
|
+
if (missing.length || !configOk) {
|
|
56
|
+
throw new LitHermesError(`LitHermes check FAIL\nmissing: ${missing.join(", ") || "none"}\nconfig enabled: ${configOk}`, 6);
|
|
57
|
+
}
|
|
58
|
+
const lines = [`LitHermes check PASS`, `commands: ${commands.join(", ")}`];
|
|
59
|
+
if (flags["gateway-offline"]) {
|
|
60
|
+
const gateway = checkGatewaySource(hermesRepo);
|
|
61
|
+
if (!gateway.ok && !gateway.skipped) throw new LitHermesError(`gateway check FAIL: ${gateway.reason}`, 7);
|
|
62
|
+
lines.push("gateway /lit_loop PASS");
|
|
63
|
+
lines.push("gateway /lit_plan PASS");
|
|
64
|
+
}
|
|
65
|
+
return { message: lines.join("\n") };
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function doctorLitHermes(flags = {}) {
|
|
69
|
+
const { hermesHome, hermesRepo } = ensureHermesHome(flags);
|
|
70
|
+
const pluginPath = pluginDest(hermesHome);
|
|
71
|
+
const commands = scanPluginCommands(pluginPath);
|
|
72
|
+
const configOk = configHasLitHermes(readConfig(hermesHome));
|
|
73
|
+
const cli = checkCliSource(hermesRepo);
|
|
74
|
+
const tui = checkTuiSource(hermesRepo);
|
|
75
|
+
const gateway = checkGatewaySource(hermesRepo);
|
|
76
|
+
const installedPass = requiredCommands.every((cmd) => commands.includes(cmd)) && configOk;
|
|
77
|
+
const lines = [
|
|
78
|
+
`plugin discovery: ${installedPass ? "PASS" : "PATCH_AVAILABLE"}`,
|
|
79
|
+
`cli payload dispatch: ${cli.ok ? (cli.skipped ? "SKIPPED" : "PASS") : "PATCH_AVAILABLE"}`,
|
|
80
|
+
`tui payload dispatch: ${tui.ok ? (tui.skipped ? "SKIPPED" : "PASS") : "PATCH_AVAILABLE"}`,
|
|
81
|
+
`gateway underscore dispatch: ${gateway.ok ? "PASS" : "PATCH_AVAILABLE"}`,
|
|
82
|
+
];
|
|
83
|
+
return { message: lines.join("\n") };
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
module.exports = {
|
|
87
|
+
checkCliSource,
|
|
88
|
+
checkGatewaySource,
|
|
89
|
+
checkLitHermes,
|
|
90
|
+
checkTuiSource,
|
|
91
|
+
doctorLitHermes,
|
|
92
|
+
requiredCommands,
|
|
93
|
+
scanPluginCommands,
|
|
94
|
+
};
|