wotann 0.5.95 → 0.5.97
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +68 -24
- package/dist/orchestration/proof-bundles.d.ts +8 -0
- package/dist/orchestration/proof-bundles.js +2 -0
- package/dist/security/approval-binding.d.ts +52 -0
- package/dist/security/approval-binding.js +57 -0
- package/dist/security/human-approval.d.ts +2 -0
- package/dist/security/human-approval.js +15 -24
- package/dist/ui/components/v3/AppV3.d.ts +10 -1
- package/dist/ui/components/v3/AppV3.js +34 -5
- package/dist/ui/components/v3/Transcript.d.ts +21 -1
- package/dist/ui/components/v3/Transcript.js +18 -58
- package/dist/ui/components/v3/TranscriptRow.d.ts +45 -0
- package/dist/ui/components/v3/TranscriptRow.js +102 -0
- package/dist/ui/inline-render.d.ts +28 -0
- package/dist/ui/inline-render.js +35 -0
- package/dist/verification/reproduction/autonomous-gate.d.ts +52 -0
- package/dist/verification/reproduction/autonomous-gate.js +71 -0
- package/dist/verification/reproduction/checkout-prep.d.ts +48 -0
- package/dist/verification/reproduction/checkout-prep.js +78 -0
- package/dist/verification/reproduction/diff-checker.d.ts +26 -0
- package/dist/verification/reproduction/diff-checker.js +33 -0
- package/dist/verification/reproduction/enforcement.d.ts +14 -0
- package/dist/verification/reproduction/enforcement.js +30 -0
- package/dist/verification/reproduction/exec-runner.d.ts +15 -0
- package/dist/verification/reproduction/exec-runner.js +47 -0
- package/dist/verification/reproduction/index.d.ts +10 -0
- package/dist/verification/reproduction/index.js +10 -0
- package/dist/verification/reproduction/mutation-gate.d.ts +42 -0
- package/dist/verification/reproduction/mutation-gate.js +43 -0
- package/dist/verification/reproduction/proof-artifact.d.ts +16 -0
- package/dist/verification/reproduction/proof-artifact.js +22 -0
- package/dist/verification/reproduction/replay-runner.d.ts +37 -0
- package/dist/verification/reproduction/replay-runner.js +28 -0
- package/dist/verification/reproduction/reproduce.d.ts +34 -0
- package/dist/verification/reproduction/reproduce.js +31 -0
- package/dist/verification/reproduction/verdict.d.ts +39 -0
- package/dist/verification/reproduction/verdict.js +40 -0
- package/package.json +1 -1
- package/dist/ui/opentui-chat.d.ts +0 -19
- package/dist/ui/opentui-chat.js +0 -285
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export { decideReproductionVerdict, } from "./verdict.js";
|
|
2
|
+
export { checkDiff, DEFAULT_PROTECTED_PATTERNS, } from "./diff-checker.js";
|
|
3
|
+
export { runReplay, } from "./replay-runner.js";
|
|
4
|
+
export { buildProofArtifact } from "./proof-artifact.js";
|
|
5
|
+
export { gateMutation, runMutationGate, DEFAULT_MUTATION_THRESHOLD, } from "./mutation-gate.js";
|
|
6
|
+
export { enforceReproductionVerdict, } from "./enforcement.js";
|
|
7
|
+
export { reproduceRun, } from "./reproduce.js";
|
|
8
|
+
export { buildExecReplayRunner } from "./exec-runner.js";
|
|
9
|
+
export { prepareVerifierCheckout, buildExecGitRunner, } from "./checkout-prep.js";
|
|
10
|
+
export { reproduceAutonomousRun, runWorkspaceReproduction, } from "./autonomous-gate.js";
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mutation gate (BenchJack V6 defense): a green test run is worthless if the
|
|
3
|
+
* tests have no real assertions ("100% coverage, 0% mutation score"). Mutate
|
|
4
|
+
* the changed code; if the agent's OWN tests don't kill enough mutants,
|
|
5
|
+
* downgrade a "reproduced" verdict to "weak-tests".
|
|
6
|
+
*
|
|
7
|
+
* The gate DECISION is pure. Running the mutation tool (Stryker/mutmut) inside
|
|
8
|
+
* the verifier box is the injected `MutationRunner` (production wiring deferred,
|
|
9
|
+
* same DI pattern as replay-runner / vm-isolation).
|
|
10
|
+
*/
|
|
11
|
+
export interface MutationResult {
|
|
12
|
+
readonly killed: number;
|
|
13
|
+
readonly total: number;
|
|
14
|
+
}
|
|
15
|
+
export interface MutationGateResult {
|
|
16
|
+
readonly weakTests: boolean;
|
|
17
|
+
readonly score: number;
|
|
18
|
+
readonly threshold: number;
|
|
19
|
+
readonly reason?: string;
|
|
20
|
+
}
|
|
21
|
+
export declare const DEFAULT_MUTATION_THRESHOLD = 0.6;
|
|
22
|
+
/**
|
|
23
|
+
* Pure. `total <= 0` means no mutants were generated — the tests do not
|
|
24
|
+
* exercise the changed code at all — which is the worst case, so it is `weak`.
|
|
25
|
+
*/
|
|
26
|
+
export declare function gateMutation(result: MutationResult, threshold?: number): MutationGateResult;
|
|
27
|
+
/** Injected runner (DI like replay-runner). Production wires Stryker/mutmut; tests stub. */
|
|
28
|
+
export interface MutationRunner {
|
|
29
|
+
readonly probe: () => Promise<{
|
|
30
|
+
ok: boolean;
|
|
31
|
+
reason?: string;
|
|
32
|
+
}>;
|
|
33
|
+
readonly run: (dir: string, changedFiles: readonly string[]) => Promise<MutationResult>;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Honest stub: a failed probe means the mutation tool is unavailable. Mutation
|
|
37
|
+
* testing is a BONUS downgrade (the reproduction channel carries enforcement),
|
|
38
|
+
* so "unavailable" does NOT set `weakTests` — it just records why it was skipped.
|
|
39
|
+
*/
|
|
40
|
+
export declare function runMutationGate(dir: string, changedFiles: readonly string[], runner: MutationRunner, threshold?: number): Promise<MutationGateResult & {
|
|
41
|
+
readonly unavailable?: string;
|
|
42
|
+
}>;
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
export const DEFAULT_MUTATION_THRESHOLD = 0.6;
|
|
2
|
+
/**
|
|
3
|
+
* Pure. `total <= 0` means no mutants were generated — the tests do not
|
|
4
|
+
* exercise the changed code at all — which is the worst case, so it is `weak`.
|
|
5
|
+
*/
|
|
6
|
+
export function gateMutation(result, threshold = DEFAULT_MUTATION_THRESHOLD) {
|
|
7
|
+
if (result.total <= 0) {
|
|
8
|
+
return {
|
|
9
|
+
weakTests: true,
|
|
10
|
+
score: 0,
|
|
11
|
+
threshold,
|
|
12
|
+
reason: "no mutants generated — tests do not exercise the changed code",
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
const score = result.killed / result.total;
|
|
16
|
+
if (score < threshold) {
|
|
17
|
+
return {
|
|
18
|
+
weakTests: true,
|
|
19
|
+
score,
|
|
20
|
+
threshold,
|
|
21
|
+
reason: `mutation score ${score.toFixed(2)} < threshold ${threshold}`,
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
return { weakTests: false, score, threshold };
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Honest stub: a failed probe means the mutation tool is unavailable. Mutation
|
|
28
|
+
* testing is a BONUS downgrade (the reproduction channel carries enforcement),
|
|
29
|
+
* so "unavailable" does NOT set `weakTests` — it just records why it was skipped.
|
|
30
|
+
*/
|
|
31
|
+
export async function runMutationGate(dir, changedFiles, runner, threshold = DEFAULT_MUTATION_THRESHOLD) {
|
|
32
|
+
const probe = await runner.probe();
|
|
33
|
+
if (!probe.ok) {
|
|
34
|
+
return {
|
|
35
|
+
weakTests: false,
|
|
36
|
+
score: 0,
|
|
37
|
+
threshold,
|
|
38
|
+
unavailable: `mutation:unavailable${probe.reason ? `:${probe.reason}` : ""}`,
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
const result = await runner.run(dir, changedFiles);
|
|
42
|
+
return gateMutation(result, threshold);
|
|
43
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { type ChainExport } from "../../security/hash-audit-chain.js";
|
|
2
|
+
import type { ReproductionResult, ReproductionVerdict, ClaimedChecks, ObservedChecks } from "./verdict.js";
|
|
3
|
+
/**
|
|
4
|
+
* The verdict is a first-class, hash-chained, harness-signed PROOF ARTIFACT —
|
|
5
|
+
* not a bare boolean. `chainExport` is tamper-evident (SHA-256 linked); any
|
|
6
|
+
* post-hoc edit to the recorded verdict/observed breaks `HashAuditChain.verify()`.
|
|
7
|
+
*/
|
|
8
|
+
export interface ProofArtifact {
|
|
9
|
+
readonly verdict: ReproductionVerdict;
|
|
10
|
+
readonly diffCid: string;
|
|
11
|
+
readonly claimed: ClaimedChecks;
|
|
12
|
+
readonly observed: ObservedChecks;
|
|
13
|
+
readonly contradictions: readonly string[];
|
|
14
|
+
readonly chainExport: ChainExport;
|
|
15
|
+
}
|
|
16
|
+
export declare function buildProofArtifact(result: ReproductionResult, diffText: string, actor?: string): ProofArtifact;
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { HashAuditChain } from "../../security/hash-audit-chain.js";
|
|
2
|
+
import { cidOf } from "../../core/content-cid.js";
|
|
3
|
+
export function buildProofArtifact(result, diffText, actor = "wotann-verifier") {
|
|
4
|
+
const diffCid = cidOf(diffText);
|
|
5
|
+
const chain = new HashAuditChain();
|
|
6
|
+
chain.append("reproduction.verdict", actor, {
|
|
7
|
+
verdict: result.verdict,
|
|
8
|
+
diffCid,
|
|
9
|
+
claimed: result.claimed,
|
|
10
|
+
observed: result.observed,
|
|
11
|
+
contradictions: result.contradictions,
|
|
12
|
+
...(result.infraError ? { infraError: result.infraError } : {}),
|
|
13
|
+
});
|
|
14
|
+
return {
|
|
15
|
+
verdict: result.verdict,
|
|
16
|
+
diffCid,
|
|
17
|
+
claimed: result.claimed,
|
|
18
|
+
observed: result.observed,
|
|
19
|
+
contradictions: result.contradictions,
|
|
20
|
+
chainExport: chain.exportChain(),
|
|
21
|
+
};
|
|
22
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import type { ObservedChecks } from "./verdict.js";
|
|
2
|
+
/**
|
|
3
|
+
* Structural runner (mirrors VmRunner in computer-use/vm-isolation.ts). Tests
|
|
4
|
+
* inject an in-memory stub; production wires this to run argv INSIDE the
|
|
5
|
+
* verifier box (container/sandbox = the separate trust boundary). Argv-only,
|
|
6
|
+
* never a shell string — no interpolation.
|
|
7
|
+
*/
|
|
8
|
+
export interface ReplayRunner {
|
|
9
|
+
readonly probe: () => Promise<{
|
|
10
|
+
ok: boolean;
|
|
11
|
+
reason?: string;
|
|
12
|
+
}>;
|
|
13
|
+
readonly runInDir: (dir: string, argv: readonly string[]) => Promise<{
|
|
14
|
+
exitCode: number;
|
|
15
|
+
stdout: string;
|
|
16
|
+
stderr: string;
|
|
17
|
+
}>;
|
|
18
|
+
}
|
|
19
|
+
export interface ReplayCommands {
|
|
20
|
+
readonly test?: readonly string[];
|
|
21
|
+
readonly typecheck?: readonly string[];
|
|
22
|
+
readonly lint?: readonly string[];
|
|
23
|
+
}
|
|
24
|
+
export interface ReplayInput {
|
|
25
|
+
/** The clean, applied-diff checkout in the verifier box (separate trust boundary). */
|
|
26
|
+
readonly checkoutDir: string;
|
|
27
|
+
readonly commands: ReplayCommands;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Re-run the claimed commands in the verifier box and report the HARNESS's own
|
|
31
|
+
* pass/fail (exit code 0 = pass). NEVER reads the agent's self-reported result.
|
|
32
|
+
* Honest stub: a failed probe yields all-null observed + infraError, so the
|
|
33
|
+
* verdict logic emits `infra-error` and never auto-passes.
|
|
34
|
+
*/
|
|
35
|
+
export declare function runReplay(input: ReplayInput, runner: ReplayRunner): Promise<ObservedChecks & {
|
|
36
|
+
readonly infraError?: string;
|
|
37
|
+
}>;
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Re-run the claimed commands in the verifier box and report the HARNESS's own
|
|
3
|
+
* pass/fail (exit code 0 = pass). NEVER reads the agent's self-reported result.
|
|
4
|
+
* Honest stub: a failed probe yields all-null observed + infraError, so the
|
|
5
|
+
* verdict logic emits `infra-error` and never auto-passes.
|
|
6
|
+
*/
|
|
7
|
+
export async function runReplay(input, runner) {
|
|
8
|
+
const probe = await runner.probe();
|
|
9
|
+
if (!probe.ok) {
|
|
10
|
+
return {
|
|
11
|
+
testsPass: null,
|
|
12
|
+
typecheckPass: null,
|
|
13
|
+
lintPass: null,
|
|
14
|
+
infraError: `replay:unavailable${probe.reason ? `:${probe.reason}` : ""}`,
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
const observe = async (argv) => {
|
|
18
|
+
if (!argv || argv.length === 0)
|
|
19
|
+
return null;
|
|
20
|
+
const r = await runner.runInDir(input.checkoutDir, argv);
|
|
21
|
+
return r.exitCode === 0;
|
|
22
|
+
};
|
|
23
|
+
return {
|
|
24
|
+
testsPass: await observe(input.commands.test),
|
|
25
|
+
typecheckPass: await observe(input.commands.typecheck),
|
|
26
|
+
lintPass: await observe(input.commands.lint),
|
|
27
|
+
};
|
|
28
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { type TestAuthorship } from "./diff-checker.js";
|
|
2
|
+
import { type ReplayRunner, type ReplayCommands } from "./replay-runner.js";
|
|
3
|
+
import { type MutationRunner } from "./mutation-gate.js";
|
|
4
|
+
import { type ClaimedChecks, type ReproductionResult } from "./verdict.js";
|
|
5
|
+
import { type ProofArtifact } from "./proof-artifact.js";
|
|
6
|
+
export interface ReproduceInput {
|
|
7
|
+
readonly claimed: ClaimedChecks;
|
|
8
|
+
/** Repo-relative POSIX paths the agent's diff touched. */
|
|
9
|
+
readonly changedPaths: readonly string[];
|
|
10
|
+
/** The clean, applied-diff checkout in the verifier box (separate trust boundary). */
|
|
11
|
+
readonly checkoutDir: string;
|
|
12
|
+
readonly commands: ReplayCommands;
|
|
13
|
+
/** The diff text (for the proof artifact's content identity). */
|
|
14
|
+
readonly diffText: string;
|
|
15
|
+
readonly authorship?: TestAuthorship;
|
|
16
|
+
/** Changed source files to mutation-test (Phase-B bonus gate). */
|
|
17
|
+
readonly mutationFiles?: readonly string[];
|
|
18
|
+
}
|
|
19
|
+
export interface ReproduceRunners {
|
|
20
|
+
readonly replay: ReplayRunner;
|
|
21
|
+
/** Optional — when present and the reproduction is clean, gates weak-tests. */
|
|
22
|
+
readonly mutation?: MutationRunner;
|
|
23
|
+
}
|
|
24
|
+
export interface ReproduceOutput {
|
|
25
|
+
readonly result: ReproductionResult;
|
|
26
|
+
readonly proof: ProofArtifact;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Compose the reproduction library into one call: tamper-check the diff, replay
|
|
30
|
+
* the claimed commands in the verifier box (capturing the harness's OWN result),
|
|
31
|
+
* optionally mutation-gate a clean reproduction, decide the verdict, and emit
|
|
32
|
+
* the hash-chained proof artifact. Pure orchestration over injected runners.
|
|
33
|
+
*/
|
|
34
|
+
export declare function reproduceRun(input: ReproduceInput, runners: ReproduceRunners): Promise<ReproduceOutput>;
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { checkDiff } from "./diff-checker.js";
|
|
2
|
+
import { runReplay } from "./replay-runner.js";
|
|
3
|
+
import { runMutationGate } from "./mutation-gate.js";
|
|
4
|
+
import { decideReproductionVerdict, } from "./verdict.js";
|
|
5
|
+
import { buildProofArtifact } from "./proof-artifact.js";
|
|
6
|
+
/**
|
|
7
|
+
* Compose the reproduction library into one call: tamper-check the diff, replay
|
|
8
|
+
* the claimed commands in the verifier box (capturing the harness's OWN result),
|
|
9
|
+
* optionally mutation-gate a clean reproduction, decide the verdict, and emit
|
|
10
|
+
* the hash-chained proof artifact. Pure orchestration over injected runners.
|
|
11
|
+
*/
|
|
12
|
+
export async function reproduceRun(input, runners) {
|
|
13
|
+
const diff = checkDiff(input.changedPaths, undefined, input.authorship);
|
|
14
|
+
const observed = await runReplay({ checkoutDir: input.checkoutDir, commands: input.commands }, runners.replay);
|
|
15
|
+
// The mutation gate is a BONUS downgrade — only meaningful on an otherwise
|
|
16
|
+
// clean reproduction (skip it when we already have tamper or infra-error).
|
|
17
|
+
let weakTests = false;
|
|
18
|
+
if (runners.mutation && !diff.tampered && !observed.infraError) {
|
|
19
|
+
const gate = await runMutationGate(input.checkoutDir, input.mutationFiles ?? [], runners.mutation);
|
|
20
|
+
weakTests = gate.weakTests;
|
|
21
|
+
}
|
|
22
|
+
const result = decideReproductionVerdict({
|
|
23
|
+
claimed: input.claimed,
|
|
24
|
+
observed,
|
|
25
|
+
tampered: diff.tampered,
|
|
26
|
+
weakTests,
|
|
27
|
+
infraError: observed.infraError,
|
|
28
|
+
});
|
|
29
|
+
const proof = buildProofArtifact(result, input.diffText);
|
|
30
|
+
return { result, proof };
|
|
31
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reproduction verdict — the harness's OWN judgment, produced by re-running
|
|
3
|
+
* the claimed work in a separate trust boundary. NEVER the agent's self-report
|
|
4
|
+
* (that is the V7 violation this whole module exists to defeat).
|
|
5
|
+
*/
|
|
6
|
+
export type ReproductionVerdict = "reproduced" | "contradicted" | "weak-tests" | "tamper" | "infra-error";
|
|
7
|
+
export interface ClaimedChecks {
|
|
8
|
+
readonly testsPass: boolean;
|
|
9
|
+
readonly typecheckPass: boolean;
|
|
10
|
+
readonly lintPass: boolean;
|
|
11
|
+
}
|
|
12
|
+
/** `null` = the check was not run / unavailable, so it cannot contradict a claim. */
|
|
13
|
+
export interface ObservedChecks {
|
|
14
|
+
readonly testsPass: boolean | null;
|
|
15
|
+
readonly typecheckPass: boolean | null;
|
|
16
|
+
readonly lintPass: boolean | null;
|
|
17
|
+
}
|
|
18
|
+
export interface ReproductionInput {
|
|
19
|
+
readonly claimed: ClaimedChecks;
|
|
20
|
+
readonly observed: ObservedChecks;
|
|
21
|
+
readonly tampered: boolean;
|
|
22
|
+
/** Set by the Phase-B mutation gate; Phase A never sets it. */
|
|
23
|
+
readonly weakTests?: boolean;
|
|
24
|
+
/** Set when the replay could not run at all. */
|
|
25
|
+
readonly infraError?: string;
|
|
26
|
+
}
|
|
27
|
+
export interface ReproductionResult {
|
|
28
|
+
readonly verdict: ReproductionVerdict;
|
|
29
|
+
readonly claimed: ClaimedChecks;
|
|
30
|
+
readonly observed: ObservedChecks;
|
|
31
|
+
readonly contradictions: readonly string[];
|
|
32
|
+
readonly infraError?: string;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Pure decision. Precedence (a false PASS is the moat-killer; a false BLOCK is
|
|
36
|
+
* recoverable, so we bias toward blocking): tamper > infra-error > contradicted
|
|
37
|
+
* > weak-tests > reproduced.
|
|
38
|
+
*/
|
|
39
|
+
export declare function decideReproductionVerdict(input: ReproductionInput): ReproductionResult;
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
const CHECKS = [
|
|
2
|
+
{ name: "tests", claimedKey: "testsPass", observedKey: "testsPass" },
|
|
3
|
+
{ name: "typecheck", claimedKey: "typecheckPass", observedKey: "typecheckPass" },
|
|
4
|
+
{ name: "lint", claimedKey: "lintPass", observedKey: "lintPass" },
|
|
5
|
+
];
|
|
6
|
+
/**
|
|
7
|
+
* Pure decision. Precedence (a false PASS is the moat-killer; a false BLOCK is
|
|
8
|
+
* recoverable, so we bias toward blocking): tamper > infra-error > contradicted
|
|
9
|
+
* > weak-tests > reproduced.
|
|
10
|
+
*/
|
|
11
|
+
export function decideReproductionVerdict(input) {
|
|
12
|
+
const contradictions = [];
|
|
13
|
+
for (const c of CHECKS) {
|
|
14
|
+
const claimed = input.claimed[c.claimedKey];
|
|
15
|
+
const observed = input.observed[c.observedKey];
|
|
16
|
+
// Only a claimed-PASS that the harness observed as FAIL is a contradiction.
|
|
17
|
+
// observed === null means "not run", which cannot contradict anything.
|
|
18
|
+
if (claimed === true && observed === false) {
|
|
19
|
+
contradictions.push(`${c.name}: agent claimed pass, harness observed fail`);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
let verdict;
|
|
23
|
+
if (input.tampered)
|
|
24
|
+
verdict = "tamper";
|
|
25
|
+
else if (input.infraError)
|
|
26
|
+
verdict = "infra-error";
|
|
27
|
+
else if (contradictions.length > 0)
|
|
28
|
+
verdict = "contradicted";
|
|
29
|
+
else if (input.weakTests)
|
|
30
|
+
verdict = "weak-tests";
|
|
31
|
+
else
|
|
32
|
+
verdict = "reproduced";
|
|
33
|
+
return {
|
|
34
|
+
verdict,
|
|
35
|
+
claimed: input.claimed,
|
|
36
|
+
observed: input.observed,
|
|
37
|
+
contradictions,
|
|
38
|
+
...(input.infraError ? { infraError: input.infraError } : {}),
|
|
39
|
+
};
|
|
40
|
+
}
|
package/package.json
CHANGED
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
import { type CliRenderer } from "@opentui/core";
|
|
2
|
-
import { type Root } from "@opentui/react";
|
|
3
|
-
import type { ProviderName, ProviderStatus } from "../core/types.js";
|
|
4
|
-
import type { WotannRuntime } from "../core/runtime.js";
|
|
5
|
-
export interface OpenTuiChatOptions {
|
|
6
|
-
readonly version: string;
|
|
7
|
-
readonly providers: readonly ProviderStatus[];
|
|
8
|
-
readonly initialProvider?: ProviderName;
|
|
9
|
-
readonly initialModel?: string;
|
|
10
|
-
readonly runtime: WotannRuntime;
|
|
11
|
-
readonly fullscreen?: boolean;
|
|
12
|
-
}
|
|
13
|
-
export interface OpenTuiChatHandle {
|
|
14
|
-
readonly renderer: CliRenderer;
|
|
15
|
-
readonly root: Root;
|
|
16
|
-
readonly waitUntilExit: Promise<void>;
|
|
17
|
-
stop(): Promise<void>;
|
|
18
|
-
}
|
|
19
|
-
export declare function mountOpenTuiChat(options: OpenTuiChatOptions): Promise<OpenTuiChatHandle>;
|
package/dist/ui/opentui-chat.js
DELETED
|
@@ -1,285 +0,0 @@
|
|
|
1
|
-
import React, { useCallback, useEffect, useMemo, useRef, useState } from "react";
|
|
2
|
-
import { createCliRenderer } from "@opentui/core";
|
|
3
|
-
import { createRoot, useKeyboard } from "@opentui/react";
|
|
4
|
-
import { runAgent } from "../core/runtime-agent-loop.js";
|
|
5
|
-
import { buildAgentToolContext } from "../core/agent-tool-context.js";
|
|
6
|
-
import { AGENT_TOOL_DEFINITIONS, executeAgentTool } from "../tools/agent-tools.js";
|
|
7
|
-
export async function mountOpenTuiChat(options) {
|
|
8
|
-
const screenMode = options.fullscreen === false ? "main-screen" : "alternate-screen";
|
|
9
|
-
const renderer = await createCliRenderer({
|
|
10
|
-
screenMode,
|
|
11
|
-
exitOnCtrlC: false,
|
|
12
|
-
useMouse: true,
|
|
13
|
-
enableMouseMovement: true,
|
|
14
|
-
targetFps: 30,
|
|
15
|
-
maxFps: 60,
|
|
16
|
-
clearOnShutdown: options.fullscreen !== false,
|
|
17
|
-
useKittyKeyboard: {
|
|
18
|
-
disambiguate: true,
|
|
19
|
-
alternateKeys: true,
|
|
20
|
-
events: false,
|
|
21
|
-
allKeysAsEscapes: false,
|
|
22
|
-
reportText: true,
|
|
23
|
-
},
|
|
24
|
-
});
|
|
25
|
-
const root = createRoot(renderer);
|
|
26
|
-
let done = false;
|
|
27
|
-
let resolveExit;
|
|
28
|
-
const waitUntilExit = new Promise((resolve) => {
|
|
29
|
-
resolveExit = resolve;
|
|
30
|
-
});
|
|
31
|
-
const stop = async () => {
|
|
32
|
-
if (done)
|
|
33
|
-
return;
|
|
34
|
-
done = true;
|
|
35
|
-
root.unmount();
|
|
36
|
-
renderer.destroy();
|
|
37
|
-
resolveExit?.();
|
|
38
|
-
};
|
|
39
|
-
root.render(React.createElement(OpenTuiChatApp, { ...options, onExit: () => void stop() }));
|
|
40
|
-
return { renderer, root, waitUntilExit, stop };
|
|
41
|
-
}
|
|
42
|
-
function OpenTuiChatApp(props) {
|
|
43
|
-
const providerSummary = useMemo(() => summarizeProviders(props.providers), [props.providers]);
|
|
44
|
-
const [draft, setDraft] = useState("");
|
|
45
|
-
const [isStreaming, setIsStreaming] = useState(false);
|
|
46
|
-
const [messages, setMessages] = useState(() => [
|
|
47
|
-
{
|
|
48
|
-
id: "system-welcome",
|
|
49
|
-
role: "system",
|
|
50
|
-
content: `WOTANN ${props.version} native OpenTUI renderer ready. ` +
|
|
51
|
-
"Type a prompt, /help, /clear, or /quit.",
|
|
52
|
-
},
|
|
53
|
-
]);
|
|
54
|
-
const messagesRef = useRef(messages);
|
|
55
|
-
const queueRef = useRef([]);
|
|
56
|
-
const abortRef = useRef(null);
|
|
57
|
-
const submitPromptRef = useRef(async () => { });
|
|
58
|
-
useEffect(() => {
|
|
59
|
-
messagesRef.current = messages;
|
|
60
|
-
}, [messages]);
|
|
61
|
-
const appendMessage = useCallback((message) => {
|
|
62
|
-
setMessages((prev) => [...prev, message]);
|
|
63
|
-
}, []);
|
|
64
|
-
const appendAssistantDelta = useCallback((id, content, model, provider) => {
|
|
65
|
-
setMessages((prev) => {
|
|
66
|
-
const existing = prev.find((message) => message.id === id);
|
|
67
|
-
if (!existing) {
|
|
68
|
-
return [...prev, { id, role: "assistant", content, model, provider }];
|
|
69
|
-
}
|
|
70
|
-
return prev.map((message) => message.id === id
|
|
71
|
-
? {
|
|
72
|
-
...message,
|
|
73
|
-
content: `${message.content}${content}`,
|
|
74
|
-
...(model ? { model } : {}),
|
|
75
|
-
...(provider ? { provider } : {}),
|
|
76
|
-
}
|
|
77
|
-
: message);
|
|
78
|
-
});
|
|
79
|
-
}, []);
|
|
80
|
-
const runQueued = useCallback(() => {
|
|
81
|
-
const [next, ...rest] = queueRef.current;
|
|
82
|
-
queueRef.current = rest;
|
|
83
|
-
if (next) {
|
|
84
|
-
void submitPromptRef.current(next);
|
|
85
|
-
}
|
|
86
|
-
}, []);
|
|
87
|
-
const submitPrompt = useCallback(async (rawPrompt) => {
|
|
88
|
-
const prompt = rawPrompt.trim();
|
|
89
|
-
if (!prompt)
|
|
90
|
-
return;
|
|
91
|
-
if (prompt === "/quit" || prompt === "/exit") {
|
|
92
|
-
props.onExit();
|
|
93
|
-
return;
|
|
94
|
-
}
|
|
95
|
-
if (prompt === "/clear") {
|
|
96
|
-
setMessages([]);
|
|
97
|
-
return;
|
|
98
|
-
}
|
|
99
|
-
if (prompt === "/help") {
|
|
100
|
-
appendMessage({
|
|
101
|
-
id: `system-${Date.now()}`,
|
|
102
|
-
role: "system",
|
|
103
|
-
content: "Commands: /help, /clear, /quit. Enter submits. Esc aborts an active turn. " +
|
|
104
|
-
"OpenTUI provides native scroll, mouse, keyboard, and alternate-screen rendering.",
|
|
105
|
-
});
|
|
106
|
-
return;
|
|
107
|
-
}
|
|
108
|
-
if (isStreaming) {
|
|
109
|
-
queueRef.current = [...queueRef.current, prompt];
|
|
110
|
-
appendMessage({
|
|
111
|
-
id: `queued-${Date.now()}`,
|
|
112
|
-
role: "system",
|
|
113
|
-
content: `Queued next turn: ${prompt}`,
|
|
114
|
-
});
|
|
115
|
-
return;
|
|
116
|
-
}
|
|
117
|
-
const userMessage = {
|
|
118
|
-
id: `user-${Date.now()}`,
|
|
119
|
-
role: "user",
|
|
120
|
-
content: prompt,
|
|
121
|
-
};
|
|
122
|
-
const turnContext = messagesRef.current;
|
|
123
|
-
const abortController = new AbortController();
|
|
124
|
-
abortRef.current = abortController;
|
|
125
|
-
setIsStreaming(true);
|
|
126
|
-
appendMessage(userMessage);
|
|
127
|
-
const assistantId = `assistant-${Date.now()}`;
|
|
128
|
-
let failed = false;
|
|
129
|
-
try {
|
|
130
|
-
for await (const event of runAgent({
|
|
131
|
-
prompt,
|
|
132
|
-
context: turnContext,
|
|
133
|
-
model: props.initialModel || undefined,
|
|
134
|
-
provider: props.initialProvider || undefined,
|
|
135
|
-
tools: AGENT_TOOL_DEFINITIONS,
|
|
136
|
-
signal: abortController.signal,
|
|
137
|
-
query: (queryOptions) => props.runtime.query(queryOptions),
|
|
138
|
-
executeTool: (name, input) => executeAgentTool(name, input, buildAgentToolContext(props.runtime, {
|
|
139
|
-
workingDir: props.runtime.getWorkingDir(),
|
|
140
|
-
permissionMode: props.runtime.getPermissionMode(),
|
|
141
|
-
})),
|
|
142
|
-
})) {
|
|
143
|
-
if (abortController.signal.aborted)
|
|
144
|
-
break;
|
|
145
|
-
if ("kind" in event) {
|
|
146
|
-
if (event.kind === "tool_result") {
|
|
147
|
-
appendMessage({
|
|
148
|
-
id: `tool-${Date.now()}`,
|
|
149
|
-
role: "tool",
|
|
150
|
-
toolName: event.toolName,
|
|
151
|
-
toolCallId: event.toolCallId,
|
|
152
|
-
content: `${event.toolName}: ${previewToolResult(event.result)}`,
|
|
153
|
-
});
|
|
154
|
-
}
|
|
155
|
-
continue;
|
|
156
|
-
}
|
|
157
|
-
if (event.type === "text" && event.content.length > 0) {
|
|
158
|
-
appendAssistantDelta(assistantId, event.content, event.model, event.provider);
|
|
159
|
-
}
|
|
160
|
-
else if (event.type === "error") {
|
|
161
|
-
failed = true;
|
|
162
|
-
appendMessage({
|
|
163
|
-
id: `error-${Date.now()}`,
|
|
164
|
-
role: "system",
|
|
165
|
-
content: `Runtime error: ${event.content || "query failed"}`,
|
|
166
|
-
});
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
catch (err) {
|
|
171
|
-
failed = true;
|
|
172
|
-
appendMessage({
|
|
173
|
-
id: `error-${Date.now()}`,
|
|
174
|
-
role: "system",
|
|
175
|
-
content: `Runtime error: ${err instanceof Error ? err.message : String(err)}`,
|
|
176
|
-
});
|
|
177
|
-
}
|
|
178
|
-
finally {
|
|
179
|
-
abortRef.current = null;
|
|
180
|
-
setIsStreaming(false);
|
|
181
|
-
if (!failed && abortController.signal.aborted) {
|
|
182
|
-
appendMessage({
|
|
183
|
-
id: `abort-${Date.now()}`,
|
|
184
|
-
role: "system",
|
|
185
|
-
content: "Turn aborted.",
|
|
186
|
-
});
|
|
187
|
-
}
|
|
188
|
-
runQueued();
|
|
189
|
-
}
|
|
190
|
-
}, [
|
|
191
|
-
appendAssistantDelta,
|
|
192
|
-
appendMessage,
|
|
193
|
-
isStreaming,
|
|
194
|
-
props,
|
|
195
|
-
runQueued,
|
|
196
|
-
]);
|
|
197
|
-
useEffect(() => {
|
|
198
|
-
submitPromptRef.current = submitPrompt;
|
|
199
|
-
}, [submitPrompt]);
|
|
200
|
-
useKeyboard((key) => {
|
|
201
|
-
if (key.name === "escape" && abortRef.current) {
|
|
202
|
-
abortRef.current.abort();
|
|
203
|
-
return;
|
|
204
|
-
}
|
|
205
|
-
if ((key.name === "c" && key.ctrl) || key.name === "q") {
|
|
206
|
-
props.onExit();
|
|
207
|
-
}
|
|
208
|
-
});
|
|
209
|
-
const transcript = messages.map(formatMessage).join("\n\n");
|
|
210
|
-
const status = isStreaming
|
|
211
|
-
? "streaming - Esc aborts"
|
|
212
|
-
: queueRef.current.length > 0
|
|
213
|
-
? `${queueRef.current.length} queued`
|
|
214
|
-
: "ready";
|
|
215
|
-
return React.createElement("box", {
|
|
216
|
-
id: "wotann-opentui-root",
|
|
217
|
-
flexDirection: "column",
|
|
218
|
-
width: "100%",
|
|
219
|
-
height: "100%",
|
|
220
|
-
padding: 1,
|
|
221
|
-
backgroundColor: "#05070d",
|
|
222
|
-
gap: 1,
|
|
223
|
-
}, React.createElement("box", {
|
|
224
|
-
id: "wotann-opentui-header",
|
|
225
|
-
border: true,
|
|
226
|
-
borderColor: "#6ee7f9",
|
|
227
|
-
paddingX: 1,
|
|
228
|
-
height: 5,
|
|
229
|
-
flexDirection: "column",
|
|
230
|
-
}, React.createElement("text", {
|
|
231
|
-
content: `WOTANN ${props.version} | OpenTUI native renderer | ${status}`,
|
|
232
|
-
fg: "#67e8f9",
|
|
233
|
-
}), React.createElement("text", {
|
|
234
|
-
content: providerSummary,
|
|
235
|
-
fg: "#a7f3d0",
|
|
236
|
-
})), React.createElement("scrollbox", {
|
|
237
|
-
id: "wotann-opentui-transcript",
|
|
238
|
-
flexGrow: 1,
|
|
239
|
-
border: true,
|
|
240
|
-
borderColor: "#334155",
|
|
241
|
-
paddingX: 1,
|
|
242
|
-
scrollY: true,
|
|
243
|
-
stickyScroll: true,
|
|
244
|
-
stickyStart: "bottom",
|
|
245
|
-
}, React.createElement("text", {
|
|
246
|
-
content: transcript || "No messages yet.",
|
|
247
|
-
fg: "#e5e7eb",
|
|
248
|
-
})), React.createElement("box", {
|
|
249
|
-
id: "wotann-opentui-composer",
|
|
250
|
-
border: true,
|
|
251
|
-
borderColor: isStreaming ? "#f59e0b" : "#22c55e",
|
|
252
|
-
paddingX: 1,
|
|
253
|
-
height: 4,
|
|
254
|
-
}, React.createElement("input", {
|
|
255
|
-
id: "wotann-opentui-input",
|
|
256
|
-
value: draft,
|
|
257
|
-
placeholder: isStreaming ? "Streaming... type to queue next prompt" : "Ask WOTANN...",
|
|
258
|
-
focused: true,
|
|
259
|
-
onInput: setDraft,
|
|
260
|
-
onChange: setDraft,
|
|
261
|
-
onSubmit: (value) => {
|
|
262
|
-
setDraft("");
|
|
263
|
-
void submitPrompt(value);
|
|
264
|
-
},
|
|
265
|
-
})));
|
|
266
|
-
}
|
|
267
|
-
function summarizeProviders(providers) {
|
|
268
|
-
const available = providers.filter((provider) => provider.available);
|
|
269
|
-
if (available.length === 0) {
|
|
270
|
-
return "No provider configured. Run wotann init or set a provider API key.";
|
|
271
|
-
}
|
|
272
|
-
return `Providers: ${available.map((provider) => provider.label || provider.provider).join(", ")}`;
|
|
273
|
-
}
|
|
274
|
-
function formatMessage(message) {
|
|
275
|
-
const label = message.role === "assistant" && message.provider
|
|
276
|
-
? `assistant:${message.provider}${message.model ? `/${message.model}` : ""}`
|
|
277
|
-
: message.toolName
|
|
278
|
-
? `tool:${message.toolName}`
|
|
279
|
-
: message.role;
|
|
280
|
-
return `${label}> ${message.content}`;
|
|
281
|
-
}
|
|
282
|
-
function previewToolResult(result) {
|
|
283
|
-
const oneLine = result.replace(/\s+/g, " ").trim();
|
|
284
|
-
return oneLine.length > 240 ? `${oneLine.slice(0, 237)}...` : oneLine;
|
|
285
|
-
}
|