@lumoai/cli 1.34.0 → 1.36.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/skill/SKILL.md +3 -3
- package/assets/skill/references/sessions.md +31 -38
- package/assets/skill/references/task-context.md +1 -1
- package/assets/skill/references/verify.md +41 -4
- package/dist/cli/src/commands/session-wrap.js +6 -9
- package/dist/cli/src/commands/task-lineage.js +8 -1
- package/dist/cli/src/commands/task-status.js +39 -9
- package/dist/cli/src/commands/verify.js +8 -0
- package/dist/cli/src/commands/wrap/crossings-reminder.js +19 -11
- package/dist/cli/src/index.js +3 -3
- package/dist/cli/src/lib/open-crossings.js +16 -8
- package/package.json +1 -1
- package/dist/cli/src/commands/wrap/progress-comment-section.js +0 -81
- package/dist/cli/src/lib/progress-comment-api.js +0 -47
package/assets/skill/SKILL.md
CHANGED
|
@@ -51,7 +51,7 @@ The command catalog below is a **map**: it lists every command grouped by domain
|
|
|
51
51
|
- `lumo task figma context <id> <linkId>` — Figma link metadata (v1)
|
|
52
52
|
- `lumo task comments list <id>` — comment thread, capped to the output budget (`--full` prints every comment; read-only; ≠ `task comment`)
|
|
53
53
|
- `lumo task pr show <id> <number>` — synced PR metadata (v1)
|
|
54
|
-
- `lumo task lineage <id>` — show the causal trail: fragments that fed the task + each one's outcome + the run's token/loop cost (read-only audit view); `lumo task lineage <id> --signal` also appends workspace-level usage signal-health (used distribution, per-session variance, used-vs-base merge rate)
|
|
54
|
+
- `lumo task lineage <id>` — show the causal trail: fragments that fed the task + each one's outcome + the run's token/loop cost (read-only audit view); `lumo task lineage <id> --signal` also appends workspace-level usage signal-health (used distribution, per-session variance, used-vs-base merge rate via iteration-taint fold — tasks with a send-back/reopen/PR-close count as the negative class even if later merged; shows negative-class size per side; prints "metric cannot discriminate" when no failure outcomes exist yet)
|
|
55
55
|
|
|
56
56
|
**Tasks** — see [tasks.md](references/tasks.md)
|
|
57
57
|
|
|
@@ -79,7 +79,7 @@ The command catalog below is a **map**: it lists every command grouped by domain
|
|
|
79
79
|
**Verification (machine acceptance loop)** — see [verify.md](references/verify.md)
|
|
80
80
|
|
|
81
81
|
- `lumo verify [task] [--timeout <seconds>]` — run every MACHINE criterion's checkpointer locally, report one structured PASS/FAIL verdict per criterion to the server, print next actions. Defaults to the session-bound task. Round cap 3: an all-pass round moves the task to IN_REVIEW (agent stops there); a round-3 fail escalates to a human (stop retrying). **Run this before claiming a task is done.**
|
|
82
|
-
- `lumo task status [task] [--json]` — read-only acceptance self-check (no LLM, milliseconds): the contract with each criterion's latest verdict (REVIEW_ADDED provenance visible), verification history, current round, last round's failure reasons, `nextActions` = the unmet criteria (the declarative "what's next" — no separate plan), and any OPEN (undispositioned) boundary crossings (count + per crossing category/severity/detail + a read-only attribution line `↳ by model=…·agent=…·session=…` naming who/what crossed, `unknown` when unresolved — LUM-469; `--json` adds an `openCrossings
|
|
82
|
+
- `lumo task status [task] [--json]` — read-only acceptance self-check (no LLM, milliseconds): the contract with each criterion's latest verdict (REVIEW_ADDED provenance visible), verification history, current round, last round's failure reasons, `nextActions` = the unmet criteria (the declarative "what's next" — no separate plan), and any OPEN (undispositioned) boundary crossings (count + per crossing category/severity/detail + a read-only attribution line `↳ by model=…·agent=…·session=…` naming who/what crossed, `unknown` when unresolved — LUM-469; `--json` adds an `openCrossings` field, each entry carrying an `attribution` object) — read-only awareness, disposition stays web + human-only (LUM-448). The crossings check fails closed (LUM-480): if the read errors, the block prints `⚠ Boundary-crossing check failed` instead of staying silent, and `--json` sets `openCrossings: null` (distinct from `[]` = a successful read with zero open — treat `null` as "could not confirm", not "safe"). Defaults to the session-bound task; `--json` emits a versioned payload (`version` field). **Run it first when resuming a task in a new session or after a verification round was rejected.**
|
|
83
83
|
- `lumo verdict [task] --pass | --pass-with-followup | --fail` — acceptance verdicts (LUM-422). `--pass` / `--pass-with-followup` open the browser to the human verdict bar focused on the passing action (a deep link — **records nothing**; a passing data row is only ever a human's own click). `--fail --reason <enum> [--note <text>] [--criterion <id>…]` records an **AGENT send-back** (verifierType=AGENT, verdict hard-coded FAIL) and bounces the task to IN_PROGRESS. Defaults to the session-bound task. **An unresolved send-back (machine/AGENT/human FAIL) blocks the agent/CLI DONE transition with 409** — clear it (re-verify) before `task update --status done`.
|
|
84
84
|
|
|
85
85
|
**Artifacts & Figma** — see [artifacts-figma.md](references/artifacts-figma.md)
|
|
@@ -126,7 +126,7 @@ The command catalog below is a **map**: it lists every command grouped by domain
|
|
|
126
126
|
|
|
127
127
|
- `lumo session attach <id>` — bind this session to a task (then run `task context`). **Lifetime lock**: re-attaching to the same task is a no-op; attaching to a _different_ task is refused with 409 — start a new Claude Code session instead. No `--force`, no `session detach`.
|
|
128
128
|
- `lumo session status` — show current binding
|
|
129
|
-
- `lumo session wrap [--yes] [--dry-run] [--used <indices>]` — end-of-session panel:
|
|
129
|
+
- `lumo session wrap [--yes] [--dry-run] [--used <indices>]` — end-of-session panel: memory review + fragment-usage vote (`--used`, LUM-300) + blocked-tag prompt, then a read-only reminder when the bound task has ≥1 OPEN boundary crossing still undispositioned (silent only on a genuine empty read — no wrap-up noise; a crossings-check failure prints a "could not confirm" warning instead of staying silent, LUM-480; pointer is web + human-only, LUM-448). Usage is now also audited automatically when a task reaches DONE (evidence-gated, true-only — confident fragments marked used, the rest left NULL); `session wrap --used` remains the manual override and takes precedence for a session.
|
|
130
130
|
- Git-suggest at session start (suggests `session attach`, never auto-binds) + Layer-2 project-memory review — see the reference
|
|
131
131
|
|
|
132
132
|
**Worktrees (local dev tooling)** — see [worktree.md](references/worktree.md)
|
|
@@ -125,18 +125,11 @@ lumo session status
|
|
|
125
125
|
|
|
126
126
|
When to suggest: the user asks "which task am I on", "what's this session bound to", or you need to decide whether to suggest `session attach` for a mentioned task ID.
|
|
127
127
|
|
|
128
|
-
### `lumo session wrap [--yes] [--dry-run] [--used <indices>]` — wrap-up panel:
|
|
128
|
+
### `lumo session wrap [--yes] [--dry-run] [--used <indices>]` — wrap-up panel: memory review + fragment-usage vote + blocked-tag prompt
|
|
129
129
|
|
|
130
|
-
Session-end wrap-up panel with **
|
|
130
|
+
Session-end wrap-up panel with **three sections, run in order**:
|
|
131
131
|
|
|
132
|
-
**1.
|
|
133
|
-
`turnSummary` rows (the one-line summaries written each STOP), aggregates
|
|
134
|
-
every turn **since the last progress comment** into one bulleted body, and — after
|
|
135
|
-
a `[y] post / [e] edit / [s] skip` confirmation — posts it as a comment on the
|
|
136
|
-
session's bound task. A server-side watermark (`Session.lastProgressCommentAt`)
|
|
137
|
-
means re-running never re-posts the same turns.
|
|
138
|
-
|
|
139
|
-
**2. Memory review** — lists the Layer1 memories this session sedimented since the
|
|
132
|
+
**1. Memory review** — lists the Layer1 memories this session sedimented since the
|
|
140
133
|
last review (deduped by a per-session watermark `Session.lastMemoryReviewAt`).
|
|
141
134
|
Each new memory is shown as `[SCOPE] CATEGORY headline`, numbered from 1. You
|
|
142
135
|
curate with a single line: `d 1,3` deletes rows 1 and 3, `p 2` promotes row 2 to
|
|
@@ -147,7 +140,7 @@ Out-of-range indices are ignored. Deletes/promotes run server-side, scoped to
|
|
|
147
140
|
memories this session created (you can't touch other sessions' memories through
|
|
148
141
|
this panel). With no new memories the section prints "(no content)" and does nothing.
|
|
149
142
|
|
|
150
|
-
**
|
|
143
|
+
**2. Fragment-usage vote (LUM-300)** — lists the context
|
|
151
144
|
fragments this session **consumed** (its lineage edges: memory / slack / web /
|
|
152
145
|
figma / PR / review-todo / session), numbered from 1 with a content snippet
|
|
153
146
|
label. The agent records which it **actually used** via
|
|
@@ -161,7 +154,7 @@ upgrades the flywheel signal from "co-loaded" (constant, no information) to
|
|
|
161
154
|
fragment's usage-based merge rate, falling back to the weaker presence rate when
|
|
162
155
|
usage samples are thin. With no consumed fragments the section prints "(no content)".
|
|
163
156
|
|
|
164
|
-
**
|
|
157
|
+
**3. Blocked check (blocked-tag prompt, LUM-153)** — if the **same kind of failure
|
|
165
158
|
recurred ≥ 3 times** in this session (server-aggregated from
|
|
166
159
|
`POST_TOOL_USE_FAILURE` events grouped by tool name, plus `STOP_FAILURE`
|
|
167
160
|
turn-level failures), the section surfaces the dominant failure (`This session looks repeatedly stuck on <tool> (N failures).` + last error summary) and prompts `[y] tag / [s] skip` whether to
|
|
@@ -177,44 +170,44 @@ shared board requires an interactive `y`, so `--yes` (and non-TTY) prints the
|
|
|
177
170
|
suggestion and moves on rather than silently flipping board state. When there's
|
|
178
171
|
nothing to prompt, the section prints "(no content)".
|
|
179
172
|
|
|
180
|
-
**After the panel — open-crossings reminder (LUM-448).** Once the
|
|
173
|
+
**After the panel — open-crossings reminder (LUM-448).** Once the three sections
|
|
181
174
|
finish, `session wrap` prints a one-shot read-only reminder **if** the bound
|
|
182
175
|
task has ≥1 OPEN (undispositioned) boundary crossing: `⚠ N open boundary
|
|
183
176
|
crossing(s) on LUM-N still undispositioned:` then a line per crossing `• [SEVERITY]
|
|
184
|
-
CATEGORY` and a web pointer. When
|
|
185
|
-
it prints **nothing** (truly silent, not a "(no content)"
|
|
186
|
-
adds no wrap-up noise. **
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
177
|
+
CATEGORY` and a web pointer. When the read genuinely comes back empty — or the
|
|
178
|
+
session is unbound — it prints **nothing** (truly silent, not a "(no content)"
|
|
179
|
+
line), so a clean task adds no wrap-up noise. **But a crossings-check failure is
|
|
180
|
+
not silent (LUM-480):** if the read errors (network / server), it prints
|
|
181
|
+
`⚠ Could not check boundary crossings on LUM-N (network/server error) — unable
|
|
182
|
+
to confirm whether any are still undispositioned`, so a failed safety check never
|
|
183
|
+
masquerades as "0 open / safe". **Awareness only:** it points at the web
|
|
184
|
+
acceptance panel; there is **no CLI path** to disposition or clear a crossing.
|
|
185
|
+
Disposition stays web + human-only (LUM-426/435/422) — an agent/CLI bearer cannot
|
|
186
|
+
clear its own crossing from the terminal.
|
|
190
187
|
|
|
191
188
|
```bash
|
|
192
189
|
lumo session wrap # interactive: preview each section, choose per-section
|
|
193
|
-
lumo session wrap --yes #
|
|
190
|
+
lumo session wrap --yes # memories kept; blocked tag NOT auto-applied (needs interactive y)
|
|
194
191
|
lumo session wrap --yes --used 1,3 # also record fragments 1 & 3 as used (the rest used=false)
|
|
195
192
|
lumo session wrap --used none # record that none of the injected fragments were used
|
|
196
|
-
lumo session wrap --dry-run # print all drafts only; never
|
|
193
|
+
lumo session wrap --dry-run # print all drafts only; never mutates, never advances watermarks
|
|
197
194
|
```
|
|
198
195
|
|
|
199
196
|
The usage vote is a two-step flow for agents: run `lumo session wrap` once to
|
|
200
197
|
see the numbered fragment list, decide which you actually used, then re-run with
|
|
201
198
|
`--used <indices>`. Re-running is safe — the other sections are watermark-guarded
|
|
202
|
-
(
|
|
199
|
+
(reviewed memories won't re-list).
|
|
203
200
|
|
|
204
201
|
- Requires `$CLAUDE_CODE_SESSION_ID` (must run inside Claude Code) and a bound
|
|
205
|
-
task (`lumo session attach <LUM-N>` first).
|
|
206
|
-
|
|
207
|
-
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
When to suggest: at the end of a working session on a bound task, to record what
|
|
219
|
-
was done as a progress comment — offer `lumo session wrap` rather than composing
|
|
220
|
-
a `task comment` by hand.
|
|
202
|
+
task (`lumo session attach <LUM-N>` first).
|
|
203
|
+
- `--yes` keeps all memories (no deletes/promotes) while advancing the
|
|
204
|
+
memory-review watermark; for the blocked-tag section it prints the suggestion
|
|
205
|
+
but does **not** apply the tag.
|
|
206
|
+
- `--dry-run` prints all drafts; never mutates memories/tags, never advances the
|
|
207
|
+
memory-review watermark.
|
|
208
|
+
- Non-TTY without `--yes`: prints the drafts and does **not** mutate or tag (safe
|
|
209
|
+
default).
|
|
210
|
+
|
|
211
|
+
When to suggest: at the end of a working session on a bound task, to review the
|
|
212
|
+
memories it sedimented, vote which injected fragments were actually used, and
|
|
213
|
+
flag the task `blocked` if it got repeatedly stuck — offer `lumo session wrap`.
|
|
@@ -132,7 +132,7 @@ identifier (`LUM-N`), prints the causal trail:
|
|
|
132
132
|
|
|
133
133
|
```bash
|
|
134
134
|
lumo task lineage LUM-42 # per-session causal trail + cost
|
|
135
|
-
lumo task lineage LUM-42 --signal # append workspace-level usage signal-health
|
|
135
|
+
lumo task lineage LUM-42 --signal # append workspace-level usage signal-health; used-vs-base merge rate uses iteration-taint fold (send-back/reopen/PR-close = negative class even if later merged); shows negative-class size per side; prints "metric cannot discriminate" when no failure outcomes exist yet
|
|
136
136
|
```
|
|
137
137
|
|
|
138
138
|
- **Totals banner** — distinct sessions, fragment count, edge count,
|
|
@@ -63,6 +63,18 @@ only).
|
|
|
63
63
|
them; the server rejects partial rounds.
|
|
64
64
|
- Criteria added during review (`REVIEW_ADDED`) appear in the contract and
|
|
65
65
|
are picked up automatically by the next round.
|
|
66
|
+
- **Session bound to a different task (LUM-459)** → the server returns 409,
|
|
67
|
+
which the command surfaces as an error. No advisory is printed; the verify
|
|
68
|
+
round is rejected outright.
|
|
69
|
+
- **Provably-unbound session** → the server includes `bindingAdvisory: 'unbound'`
|
|
70
|
+
in the round response, and the command prints:
|
|
71
|
+
`⚠ Working unbound — this verify ran from a Claude Code session not attached to the task.`
|
|
72
|
+
The run is recorded as a `SESSION_BINDING_MISSING` boundary crossing visible in
|
|
73
|
+
`lumo task status` open crossings. Run `lumo session attach <LUM-N>` before the
|
|
74
|
+
next verify to bind the session.
|
|
75
|
+
- **Unconfirmed session binding** → `bindingAdvisory: 'unconfirmed'` causes a
|
|
76
|
+
softer advisory: `⚠ Could not confirm this session is attached to the task.`
|
|
77
|
+
Same remediation: `lumo session attach <LUM-N>`.
|
|
66
78
|
|
|
67
79
|
## Round discipline
|
|
68
80
|
|
|
@@ -125,10 +137,23 @@ what's unmet and why (the exact failure tails), and how many rounds are left.
|
|
|
125
137
|
|
|
126
138
|
- Header: task identifier/title/status + `verification round N/3` (round 0 =
|
|
127
139
|
never verified) + an escalation warning when the machine loop is exhausted.
|
|
140
|
+
- **Machine verification rollup** (LUM-470) — directly under the `Criteria`
|
|
141
|
+
header, one line `Machine verification: N machine-verified / M human override
|
|
142
|
+
(of T MACHINE criteria)` over the active MACHINE criteria, aligned with the web
|
|
143
|
+
read model (LUM-456). Printed whenever the contract has ≥1 MACHINE criterion,
|
|
144
|
+
so the terminal rollup never reads as all-human when a checkpointer actually
|
|
145
|
+
verified the work.
|
|
128
146
|
- **Criteria** — every criterion as `<glyph> <id> [TYPE] SOURCE@rN
|
|
129
147
|
statement` (✓ latest verdict passed / ✗ failed / ○ no verdict yet) with its
|
|
130
148
|
checkpointer and latest verdict line (evidence pointer on pass, failure
|
|
131
149
|
tail on fail). `REVIEW_ADDED@rN` provenance is visible per row.
|
|
150
|
+
- A passing **MACHINE** criterion's verdict line carries a machine-state tag
|
|
151
|
+
derived from the read model's `machinePassed` flag, NOT the latest verdict
|
|
152
|
+
(LUM-470): `· machine-verified` when a checkpointer actually passed it (even
|
|
153
|
+
after a human later signs the task off), or `· human override (no machine
|
|
154
|
+
pass)` when it passes only on a human sign-off with no machine run underneath.
|
|
155
|
+
This keeps the terminal honest with web — a machine-verified criterion that
|
|
156
|
+
a human co-signed no longer reads as a plain human pass.
|
|
132
157
|
- A pass can carry a **`⚠ pre-edit version`** note (LUM-457): the criterion
|
|
133
158
|
was changed after that verdict (reworded, or its checkpointer was swapped so
|
|
134
159
|
the recorded evidence ran a different command). The pass still counts as met
|
|
@@ -154,19 +179,31 @@ statement` (✓ latest verdict passed / ✗ failed / ○ no verdict yet) with it
|
|
|
154
179
|
**Read-only awareness** — this surfaces crossings detected elsewhere
|
|
155
180
|
(LUM-426/435/442); there is no CLI path to disposition or clear one.
|
|
156
181
|
Disposition stays web + human-only (LUM-426/435/422): an agent/CLI bearer
|
|
157
|
-
cannot clear its own crossing from the terminal.
|
|
182
|
+
cannot clear its own crossing from the terminal. **The check fails closed
|
|
183
|
+
(LUM-480):** if the crossings read itself errors (network / server / parse),
|
|
184
|
+
the block prints `⚠ Boundary-crossing check failed (network/server error) —
|
|
185
|
+
could not confirm whether any are undispositioned` instead of staying silent.
|
|
186
|
+
Silence means a successful read with zero open crossings, never a failed
|
|
187
|
+
check — a hiccup can no longer masquerade as "all clear".
|
|
158
188
|
|
|
159
189
|
### --json contract
|
|
160
190
|
|
|
161
191
|
`--json` emits the full read model with a top-level `version` field
|
|
162
192
|
(currently `1`). The schema is versioned: breaking shape changes bump the
|
|
163
193
|
major; additive fields don't. Pin on `version` when scripting against it.
|
|
194
|
+
Each criterion carries `machinePassed` (boolean — a checkpointer currently
|
|
195
|
+
vouches for it; LUM-456/470), and the payload carries a top-level
|
|
196
|
+
`machineVerification` aggregate `{ total, machineVerified, humanOverridden }`
|
|
197
|
+
over the active MACHINE criteria — read these, not `latestVerdict` alone, to
|
|
198
|
+
tell a machine-verified criterion from a human override.
|
|
164
199
|
The open boundary crossings ride along as an additive top-level
|
|
165
|
-
`openCrossings
|
|
200
|
+
`openCrossings` (each entry `{ id, category, severity, detail, attribution }`,
|
|
166
201
|
where `attribution` is `{ workspaceMemberId, sessionId, agent, worktreeBranch,
|
|
167
202
|
model }` with every field nullable — null = unknown, never fabricated; LUM-469;
|
|
168
|
-
the array length is the count
|
|
169
|
-
|
|
203
|
+
the array length is the count) — same read-only awareness, no write path.
|
|
204
|
+
**`openCrossings` is `null` when the crossings check failed (LUM-480)** —
|
|
205
|
+
distinct from `[]`, which is a successful read with zero open crossings. Script
|
|
206
|
+
consumers must treat `null` as "unknown / could not confirm", not "safe".
|
|
170
207
|
|
|
171
208
|
`status` reads; `verify` judges. Running status never starts a round, never
|
|
172
209
|
escalates, and never changes task state — loop rules (cap 3, IN_REVIEW on
|
|
@@ -3,7 +3,6 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.sessionWrap = sessionWrap;
|
|
4
4
|
const config_1 = require("../lib/config");
|
|
5
5
|
const wrap_panel_1 = require("../lib/wrap-panel");
|
|
6
|
-
const progress_comment_section_1 = require("./wrap/progress-comment-section");
|
|
7
6
|
const memory_review_section_1 = require("./wrap/memory-review-section");
|
|
8
7
|
const fragment_usage_section_1 = require("./wrap/fragment-usage-section");
|
|
9
8
|
const blocked_prompt_section_1 = require("./wrap/blocked-prompt-section");
|
|
@@ -11,13 +10,12 @@ const crossings_reminder_1 = require("./wrap/crossings-reminder");
|
|
|
11
10
|
/**
|
|
12
11
|
* `lumo session wrap [--yes] [--dry-run]`
|
|
13
12
|
*
|
|
14
|
-
* Session-end wrap-up panel with
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
*
|
|
18
|
-
*
|
|
19
|
-
* (LUM-
|
|
20
|
-
* prompt whether to flag the bound task with a `blocked` tag (LUM-153).
|
|
13
|
+
* Session-end wrap-up panel with three sections, run in order: (1) review the
|
|
14
|
+
* Layer1 memories this session sedimented — keep/delete/promote, deduped by a
|
|
15
|
+
* per-session watermark; (2) vote which injected context fragments were
|
|
16
|
+
* actually used (LUM-300, via `--used`); (3) if the session repeatedly hit the
|
|
17
|
+
* same failure, prompt whether to flag the bound task with a `blocked` tag
|
|
18
|
+
* (LUM-153).
|
|
21
19
|
*/
|
|
22
20
|
async function sessionWrap(options) {
|
|
23
21
|
const sessionId = process.env.CLAUDE_CODE_SESSION_ID;
|
|
@@ -32,7 +30,6 @@ async function sessionWrap(options) {
|
|
|
32
30
|
return 1;
|
|
33
31
|
}
|
|
34
32
|
const sections = [
|
|
35
|
-
new progress_comment_section_1.ProgressCommentSection({ creds, sessionId }),
|
|
36
33
|
new memory_review_section_1.MemoryReviewSection({ creds, sessionId }),
|
|
37
34
|
new fragment_usage_section_1.FragmentUsageSection({ creds, sessionId, used: options.used }),
|
|
38
35
|
new blocked_prompt_section_1.BlockedPromptSection({ creds, sessionId }),
|
|
@@ -141,7 +141,14 @@ function formatSignalHealth(h) {
|
|
|
141
141
|
lines.push(`- Distribution: used ${h.distribution.used} · null ${h.distribution.abstained} · false ${h.distribution.unused}`);
|
|
142
142
|
lines.push(`- Per-session variance: ${h.perSessionVariance.toFixed(2)} (${h.votedSessions} voted sessions)`);
|
|
143
143
|
if (h.usedMergeRate !== null && h.baseMergeRate !== null) {
|
|
144
|
-
|
|
144
|
+
if (h.baseFailedTasks === 0) {
|
|
145
|
+
// No failure outcomes exist yet, so any rate is non-discriminating by
|
|
146
|
+
// construction — say so honestly instead of printing a misleading 100%.
|
|
147
|
+
lines.push('- Used × outcome: no failure outcomes yet — metric cannot discriminate');
|
|
148
|
+
}
|
|
149
|
+
else {
|
|
150
|
+
lines.push(`- Used × outcome: merge-rate(used) ${Math.round(h.usedMergeRate * 100)}% (${h.usedResolvedTasks} resolved, ${h.usedFailedTasks} failed) vs base ${Math.round(h.baseMergeRate * 100)}% (${h.baseResolvedTasks} resolved, ${h.baseFailedTasks} failed)`);
|
|
151
|
+
}
|
|
145
152
|
}
|
|
146
153
|
else {
|
|
147
154
|
lines.push('- Used × outcome: insufficient resolved tasks');
|
|
@@ -46,6 +46,13 @@ function formatTaskStatus(data, extras = {}) {
|
|
|
46
46
|
}
|
|
47
47
|
lines.push('');
|
|
48
48
|
lines.push(`Criteria (${data.criteria.length} total, ${data.nextActions.length} unmet):`);
|
|
49
|
+
// LUM-470: honest machine-verification rollup over the active MACHINE criteria
|
|
50
|
+
// (same read model as web, LUM-456) — so the terminal rollup never reads as
|
|
51
|
+
// all-human when a checkpointer actually verified the work.
|
|
52
|
+
const mv = data.machineVerification;
|
|
53
|
+
if (mv.total > 0) {
|
|
54
|
+
lines.push(`Machine verification: ${mv.machineVerified} machine-verified / ${mv.humanOverridden} human override (of ${mv.total} MACHINE criteria)`);
|
|
55
|
+
}
|
|
49
56
|
for (const c of data.criteria) {
|
|
50
57
|
const glyph = c.latestVerdict == null
|
|
51
58
|
? '○'
|
|
@@ -81,7 +88,16 @@ function formatTaskStatus(data, extras = {}) {
|
|
|
81
88
|
const evidencePart = v.evidencePointer
|
|
82
89
|
? ` · ${(0, sanitize_1.sanitizeField)(v.evidencePointer)}`
|
|
83
90
|
: '';
|
|
84
|
-
|
|
91
|
+
// LUM-470: tag a passing MACHINE criterion by the read model's
|
|
92
|
+
// machinePassed flag, not the latest verdict — a criterion a checkpointer
|
|
93
|
+
// verified reads as machine-verified even after a human signs off, and a
|
|
94
|
+
// pass with no machine run underneath reads as a human override.
|
|
95
|
+
const machineTag = c.verifierType === 'MACHINE'
|
|
96
|
+
? c.machinePassed
|
|
97
|
+
? ' · machine-verified'
|
|
98
|
+
: ' · human override (no machine pass)'
|
|
99
|
+
: '';
|
|
100
|
+
lines.push(` ✓ ${v.verdict}@r${v.round}${evidencePart}${machineTag}`);
|
|
85
101
|
// LUM-457: a pass that vouches for a pre-edit version of the criterion —
|
|
86
102
|
// render-only downgrade, the criterion still counts met.
|
|
87
103
|
if (c.verdictStale || c.checkMismatch) {
|
|
@@ -146,7 +162,17 @@ function formatTaskStatus(data, extras = {}) {
|
|
|
146
162
|
* a crossing from the terminal (LUM-426/435/422).
|
|
147
163
|
*/
|
|
148
164
|
function pushOpenCrossings(lines, extras) {
|
|
149
|
-
const
|
|
165
|
+
const result = extras.openCrossings;
|
|
166
|
+
if (!result)
|
|
167
|
+
return;
|
|
168
|
+
// LUM-480: a failed check is NOT "0 open / safe" — say so explicitly rather
|
|
169
|
+
// than rendering an empty (implicitly-clear) block.
|
|
170
|
+
if (result.status === 'error') {
|
|
171
|
+
lines.push('');
|
|
172
|
+
lines.push('⚠ Boundary-crossing check failed (network/server error) — could not confirm whether any are undispositioned.');
|
|
173
|
+
return;
|
|
174
|
+
}
|
|
175
|
+
const open = result.crossings;
|
|
150
176
|
if (open.length === 0)
|
|
151
177
|
return;
|
|
152
178
|
lines.push('');
|
|
@@ -224,20 +250,24 @@ async function taskStatus(identifier, options = {}) {
|
|
|
224
250
|
}
|
|
225
251
|
const data = (await res.json());
|
|
226
252
|
// Read-only awareness (LUM-448): surface the task's OPEN boundary crossings
|
|
227
|
-
// via the existing LUM-435 endpoint.
|
|
228
|
-
//
|
|
229
|
-
// never
|
|
230
|
-
//
|
|
231
|
-
|
|
253
|
+
// via the existing LUM-435 endpoint. fetchOpenCrossings returns a result that
|
|
254
|
+
// distinguishes a check FAILURE from a genuine 0-open read (LUM-480), so this
|
|
255
|
+
// supplementary safety signal never masquerades a hiccup as "all clear" — yet
|
|
256
|
+
// it still never throws, so it can't block the primary acceptance status. The
|
|
257
|
+
// resolved taskId (the identifier the status was fetched for) is the key here.
|
|
258
|
+
const crossingsResult = await (0, open_crossings_1.fetchOpenCrossings)(base, creds.token, taskId);
|
|
232
259
|
if (options.json) {
|
|
233
260
|
// JSON.stringify escapes control chars (…), so the payload is safe
|
|
234
261
|
// to emit raw — and consumers get byte-faithful server data.
|
|
235
|
-
//
|
|
262
|
+
// openCrossings rides alongside as an additive field: an array on success
|
|
263
|
+
// (count = length), or `null` when the check failed (LUM-480) — distinct
|
|
264
|
+
// from `[]`, which is a successful read with zero open crossings.
|
|
265
|
+
const openCrossings = crossingsResult.status === 'ok' ? crossingsResult.crossings : null;
|
|
236
266
|
process.stdout.write(JSON.stringify({ ...data, openCrossings }, null, 2) + '\n');
|
|
237
267
|
return;
|
|
238
268
|
}
|
|
239
269
|
process.stdout.write(formatTaskStatus(data, {
|
|
240
|
-
openCrossings,
|
|
270
|
+
openCrossings: crossingsResult,
|
|
241
271
|
dispositionUrl: (0, open_crossings_1.dispositionUrl)(base, creds.workspaceSlug ?? 'lumo', data.task.identifier),
|
|
242
272
|
}));
|
|
243
273
|
}
|
|
@@ -181,6 +181,14 @@ async function verify(identifier, options = {}) {
|
|
|
181
181
|
}
|
|
182
182
|
const outcome = (await res.json());
|
|
183
183
|
process.stdout.write(`\nRound ${outcome.round}/${outcome.maxRounds} recorded.\n`);
|
|
184
|
+
if (outcome.bindingAdvisory === 'unbound') {
|
|
185
|
+
process.stdout.write('⚠ Working unbound — this verify ran from a Claude Code session not attached to the task. ' +
|
|
186
|
+
'Run `lumo session attach <LUM-N>` to bind (recorded as a boundary crossing).\n');
|
|
187
|
+
}
|
|
188
|
+
else if (outcome.bindingAdvisory === 'unconfirmed') {
|
|
189
|
+
process.stdout.write('⚠ Could not confirm this session is attached to the task. ' +
|
|
190
|
+
'If you are working with Claude Code, run `lumo session attach <LUM-N>`.\n');
|
|
191
|
+
}
|
|
184
192
|
if (outcome.allPassed) {
|
|
185
193
|
process.stdout.write(`✓ All MACHINE criteria passed — task is now ${outcome.taskStatus}.\n` +
|
|
186
194
|
`Stop here: human adjudication (and any HUMAN criteria) take over from this point.\n`);
|
|
@@ -7,13 +7,20 @@ const sanitize_1 = require("../../lib/sanitize");
|
|
|
7
7
|
const open_crossings_1 = require("../../lib/open-crossings");
|
|
8
8
|
/**
|
|
9
9
|
* Build the wrap-up reminder for a task's OPEN boundary crossings (LUM-448).
|
|
10
|
-
* Returns the reminder string when there is ≥1 open crossing, and `null`
|
|
11
|
-
*
|
|
12
|
-
* noise at wrap time.
|
|
13
|
-
*
|
|
14
|
-
* the
|
|
10
|
+
* Returns the reminder string when there is ≥1 open crossing, and `null` only on
|
|
11
|
+
* a genuine empty read — the caller prints nothing on null, so a clean task
|
|
12
|
+
* makes NO noise at wrap time. A check FAILURE (LUM-480) is NOT silent: it
|
|
13
|
+
* returns a warning so a failed safety check never reads as "0 open / safe".
|
|
14
|
+
* Read-only awareness: the reminder points at the human-only web disposition
|
|
15
|
+
* panel and offers no way to clear a crossing from the terminal (LUM-426/435/422).
|
|
15
16
|
*/
|
|
16
|
-
function formatCrossingReminder(taskIdentifier,
|
|
17
|
+
function formatCrossingReminder(taskIdentifier, result, url) {
|
|
18
|
+
if (result.status === 'error') {
|
|
19
|
+
return (`⚠ Could not check boundary crossings on ${taskIdentifier} ` +
|
|
20
|
+
`(network/server error) — unable to confirm whether any are still ` +
|
|
21
|
+
`undispositioned. Review in the web panel: ${url}\n`);
|
|
22
|
+
}
|
|
23
|
+
const open = result.crossings;
|
|
17
24
|
if (open.length === 0)
|
|
18
25
|
return null;
|
|
19
26
|
const n = open.length;
|
|
@@ -28,14 +35,15 @@ function formatCrossingReminder(taskIdentifier, open, url) {
|
|
|
28
35
|
}
|
|
29
36
|
/**
|
|
30
37
|
* Resolve the session's bound task and surface its OPEN boundary crossings as a
|
|
31
|
-
* wrap-up reminder (LUM-448), or `null` when the session is unbound or
|
|
32
|
-
*
|
|
33
|
-
*
|
|
38
|
+
* wrap-up reminder (LUM-448), or `null` when the session is unbound or the read
|
|
39
|
+
* genuinely came back empty. A crossings-check failure yields a warning, not
|
|
40
|
+
* silence (LUM-480). Pure read — `fetchOpenCrossings` hits only the LUM-435 GET
|
|
41
|
+
* endpoint and there is no disposition write path here.
|
|
34
42
|
*/
|
|
35
43
|
async function openCrossingReminder(creds) {
|
|
36
44
|
const taskIdentifier = await (0, resolve_bound_task_1.resolveBoundTaskIdentifier)(creds.apiUrl, creds.token);
|
|
37
45
|
if (!taskIdentifier)
|
|
38
46
|
return null;
|
|
39
|
-
const
|
|
40
|
-
return formatCrossingReminder(taskIdentifier,
|
|
47
|
+
const result = await (0, open_crossings_1.fetchOpenCrossings)(creds.apiUrl, creds.token, taskIdentifier);
|
|
48
|
+
return formatCrossingReminder(taskIdentifier, result, (0, open_crossings_1.dispositionUrl)(creds.apiUrl, creds.workspaceSlug, taskIdentifier));
|
|
41
49
|
}
|
package/dist/cli/src/index.js
CHANGED
|
@@ -247,9 +247,9 @@ session
|
|
|
247
247
|
.action(wrap(() => (0, session_status_1.sessionStatus)()));
|
|
248
248
|
session
|
|
249
249
|
.command('wrap')
|
|
250
|
-
.description(
|
|
251
|
-
.option('-y, --yes', '
|
|
252
|
-
.option('--dry-run', 'Print the
|
|
250
|
+
.description('Session-end wrap-up: review the memories sedimented this session, vote which injected context fragments were actually used, and optionally flag the bound task blocked.')
|
|
251
|
+
.option('-y, --yes', 'Keep all memories without prompting (agent-friendly); does not auto-apply the blocked tag')
|
|
252
|
+
.option('--dry-run', 'Print the section drafts but do not mutate memories/tags or advance watermarks')
|
|
253
253
|
.option('--used <indices>', 'Mark which injected context fragments you actually used (1-based indices, comma/space separated; "none" for all-unused). Omit to skip recording.')
|
|
254
254
|
.action(wrap(options => (0, session_wrap_1.sessionWrap)(options)));
|
|
255
255
|
const task = program
|
|
@@ -32,9 +32,13 @@ function normalizeSeverity(s) {
|
|
|
32
32
|
* way to clear a crossing — disposition stays web + human-only
|
|
33
33
|
* (LUM-426/435/422).
|
|
34
34
|
*
|
|
35
|
-
*
|
|
36
|
-
*
|
|
37
|
-
*
|
|
35
|
+
* Fails *closed*, not open (LUM-480): any transport / non-ok HTTP / parse
|
|
36
|
+
* failure returns `{ status: 'error', reason }` so the caller can say "check
|
|
37
|
+
* failed — could not confirm" instead of mistaking a hiccup for "0 open / all
|
|
38
|
+
* clear". A successful read returns `{ status: 'ok', crossings }`; the list is
|
|
39
|
+
* empty only when the server genuinely reports no open crossings. Either way the
|
|
40
|
+
* supplementary safety signal never throws into the caller's primary output (the
|
|
41
|
+
* acceptance status, the wrap-up panel) — failure is a value, not an exception.
|
|
38
42
|
*/
|
|
39
43
|
async function fetchOpenCrossings(apiUrl, token, taskIdentifier) {
|
|
40
44
|
const url = `${(0, api_1.trimTrailingSlash)(apiUrl)}/api/tasks/${encodeURIComponent(taskIdentifier)}/boundary-crossings`;
|
|
@@ -42,20 +46,23 @@ async function fetchOpenCrossings(apiUrl, token, taskIdentifier) {
|
|
|
42
46
|
try {
|
|
43
47
|
res = await fetch(url, { headers: { Authorization: `Bearer ${token}` } });
|
|
44
48
|
}
|
|
45
|
-
catch {
|
|
46
|
-
return
|
|
49
|
+
catch (err) {
|
|
50
|
+
return {
|
|
51
|
+
status: 'error',
|
|
52
|
+
reason: err instanceof Error ? err.message : 'network error',
|
|
53
|
+
};
|
|
47
54
|
}
|
|
48
55
|
if (!res.ok)
|
|
49
|
-
return
|
|
56
|
+
return { status: 'error', reason: `HTTP ${res.status}` };
|
|
50
57
|
let data;
|
|
51
58
|
try {
|
|
52
59
|
data = (await res.json());
|
|
53
60
|
}
|
|
54
61
|
catch {
|
|
55
|
-
return
|
|
62
|
+
return { status: 'error', reason: 'invalid response body' };
|
|
56
63
|
}
|
|
57
64
|
const rows = Array.isArray(data.crossings) ? data.crossings : [];
|
|
58
|
-
|
|
65
|
+
const crossings = rows
|
|
59
66
|
.filter(c => c.disposition == null)
|
|
60
67
|
.map(c => ({
|
|
61
68
|
id: c.id,
|
|
@@ -65,6 +72,7 @@ async function fetchOpenCrossings(apiUrl, token, taskIdentifier) {
|
|
|
65
72
|
attribution: normalizeAttribution(c.attribution),
|
|
66
73
|
}))
|
|
67
74
|
.sort((a, b) => SEVERITY_RANK[b.severity] - SEVERITY_RANK[a.severity]);
|
|
75
|
+
return { status: 'ok', crossings };
|
|
68
76
|
}
|
|
69
77
|
/**
|
|
70
78
|
* The web deep link where a HUMAN dispositions crossings. Disposition is
|
package/package.json
CHANGED
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.ProgressCommentSection = void 0;
|
|
4
|
-
exports.formatProgressBody = formatProgressBody;
|
|
5
|
-
const sanitize_1 = require("../../lib/sanitize");
|
|
6
|
-
const line_prompt_1 = require("../../lib/line-prompt");
|
|
7
|
-
const editor_1 = require("../../lib/editor");
|
|
8
|
-
const progress_comment_api_1 = require("../../lib/progress-comment-api");
|
|
9
|
-
const HEADER = 'Session progress';
|
|
10
|
-
/** Join turn summaries into a bulleted progress comment body under a header. */
|
|
11
|
-
function formatProgressBody(summaries) {
|
|
12
|
-
return [HEADER, ...summaries.map(s => `- ${s}`)].join('\n');
|
|
13
|
-
}
|
|
14
|
-
/**
|
|
15
|
-
* Wrap-panel section that drafts a progress comment from the session's
|
|
16
|
-
* unposted turnSummaries and posts it after y/e/s confirmation. Holds its own
|
|
17
|
-
* draft + body state between prepare() and run().
|
|
18
|
-
*/
|
|
19
|
-
class ProgressCommentSection {
|
|
20
|
-
deps;
|
|
21
|
-
title = 'Progress comment';
|
|
22
|
-
draft = null;
|
|
23
|
-
body = '';
|
|
24
|
-
constructor(deps) {
|
|
25
|
-
this.deps = deps;
|
|
26
|
-
}
|
|
27
|
-
async prepare() {
|
|
28
|
-
this.draft = await (0, progress_comment_api_1.fetchProgressDraft)(this.deps.creds, this.deps.sessionId);
|
|
29
|
-
if (!this.draft.taskIdentifier || this.draft.summaries.length === 0) {
|
|
30
|
-
return false;
|
|
31
|
-
}
|
|
32
|
-
this.body = formatProgressBody(this.draft.summaries.map(s => s.turnSummary));
|
|
33
|
-
return true;
|
|
34
|
-
}
|
|
35
|
-
async run(opts) {
|
|
36
|
-
const draft = this.draft;
|
|
37
|
-
if (!draft || !draft.watermark)
|
|
38
|
-
return;
|
|
39
|
-
// Preview: sanitize the server free-text before it hits the terminal.
|
|
40
|
-
process.stdout.write(`Will post to ${draft.taskIdentifier} "${(0, sanitize_1.sanitizeField)(draft.taskTitle ?? '')}":\n`);
|
|
41
|
-
process.stdout.write(`${(0, sanitize_1.sanitizeField)(this.body)}\n`);
|
|
42
|
-
if (opts.dryRun) {
|
|
43
|
-
process.stdout.write('(dry-run, not posted)\n');
|
|
44
|
-
return;
|
|
45
|
-
}
|
|
46
|
-
if (opts.yes) {
|
|
47
|
-
await this.post(draft.watermark, this.body);
|
|
48
|
-
return;
|
|
49
|
-
}
|
|
50
|
-
const choice = (await (0, line_prompt_1.promptLine)('[y] post [e] edit [s] skip > ')).toLowerCase();
|
|
51
|
-
if (choice === 's' || choice === '') {
|
|
52
|
-
process.stdout.write('Skipped.\n');
|
|
53
|
-
return;
|
|
54
|
-
}
|
|
55
|
-
if (choice === 'e') {
|
|
56
|
-
const edited = (await (0, editor_1.editInEditor)(this.body)).trim();
|
|
57
|
-
if (edited.length === 0) {
|
|
58
|
-
process.stdout.write('Empty body — skipped.\n');
|
|
59
|
-
return;
|
|
60
|
-
}
|
|
61
|
-
process.stdout.write(`${(0, sanitize_1.sanitizeField)(edited)}\n`);
|
|
62
|
-
const confirm = (await (0, line_prompt_1.promptLine)('[y] post [s] skip > ')).toLowerCase();
|
|
63
|
-
if (confirm !== 'y') {
|
|
64
|
-
process.stdout.write('Skipped.\n');
|
|
65
|
-
return;
|
|
66
|
-
}
|
|
67
|
-
await this.post(draft.watermark, edited);
|
|
68
|
-
return;
|
|
69
|
-
}
|
|
70
|
-
if (choice === 'y') {
|
|
71
|
-
await this.post(draft.watermark, this.body);
|
|
72
|
-
return;
|
|
73
|
-
}
|
|
74
|
-
process.stdout.write('Unrecognized choice — skipped.\n');
|
|
75
|
-
}
|
|
76
|
-
async post(watermark, body) {
|
|
77
|
-
const { commentId } = await (0, progress_comment_api_1.postProgressComment)(this.deps.creds, this.deps.sessionId, { body, watermark });
|
|
78
|
-
process.stdout.write(`Posted progress comment (comment ${commentId})\n`);
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
exports.ProgressCommentSection = ProgressCommentSection;
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.fetchProgressDraft = fetchProgressDraft;
|
|
4
|
-
exports.postProgressComment = postProgressComment;
|
|
5
|
-
const api_1 = require("./api");
|
|
6
|
-
function base(creds) {
|
|
7
|
-
return (0, api_1.trimTrailingSlash)((0, api_1.resolveAuthedApiUrl)(creds.apiUrl));
|
|
8
|
-
}
|
|
9
|
-
/** GET the unposted progress draft for the session. Throws on transport / non-200. */
|
|
10
|
-
async function fetchProgressDraft(creds, sessionId) {
|
|
11
|
-
const url = `${base(creds)}/api/sessions/${encodeURIComponent(sessionId)}/turn-summaries`;
|
|
12
|
-
const res = await fetch(url, {
|
|
13
|
-
headers: { Authorization: `Bearer ${creds.token}` },
|
|
14
|
-
});
|
|
15
|
-
if (res.status === 401)
|
|
16
|
-
throw new Error('API key invalid or revoked. Run `lumo auth login`.');
|
|
17
|
-
if (!res.ok)
|
|
18
|
-
throw new Error(`progress draft fetch failed (HTTP ${res.status})`);
|
|
19
|
-
return (await res.json());
|
|
20
|
-
}
|
|
21
|
-
/** POST the (possibly edited) body + watermark. Throws the server message on non-201. */
|
|
22
|
-
async function postProgressComment(creds, sessionId, payload) {
|
|
23
|
-
const url = `${base(creds)}/api/sessions/${encodeURIComponent(sessionId)}/progress-comment`;
|
|
24
|
-
const res = await fetch(url, {
|
|
25
|
-
method: 'POST',
|
|
26
|
-
headers: {
|
|
27
|
-
Authorization: `Bearer ${creds.token}`,
|
|
28
|
-
'Content-Type': 'application/json',
|
|
29
|
-
},
|
|
30
|
-
body: JSON.stringify(payload),
|
|
31
|
-
});
|
|
32
|
-
if (res.status === 401)
|
|
33
|
-
throw new Error('API key invalid or revoked. Run `lumo auth login`.');
|
|
34
|
-
if (res.status !== 201) {
|
|
35
|
-
let serverMsg = null;
|
|
36
|
-
try {
|
|
37
|
-
const errBody = (await res.json());
|
|
38
|
-
if (typeof errBody.error === 'string')
|
|
39
|
-
serverMsg = errBody.error;
|
|
40
|
-
}
|
|
41
|
-
catch {
|
|
42
|
-
// body wasn't JSON
|
|
43
|
-
}
|
|
44
|
-
throw new Error(serverMsg ?? `progress comment failed (HTTP ${res.status})`);
|
|
45
|
-
}
|
|
46
|
-
return (await res.json());
|
|
47
|
-
}
|