ultimate-pi 0.14.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/.agents/skills/harness-debate-plan/SKILL.md +41 -61
  2. package/.agents/skills/harness-governor/SKILL.md +11 -0
  3. package/.agents/skills/harness-orchestration/SKILL.md +5 -3
  4. package/.agents/skills/harness-plan/SKILL.md +11 -9
  5. package/.pi/agents/harness/adversary.md +1 -1
  6. package/.pi/agents/harness/evaluator.md +1 -1
  7. package/.pi/agents/harness/executor.md +1 -1
  8. package/.pi/agents/harness/incident-recorder.md +1 -1
  9. package/.pi/agents/harness/meta-optimizer.md +1 -1
  10. package/.pi/agents/harness/planning/decompose.md +8 -35
  11. package/.pi/agents/harness/planning/execution-plan-author.md +27 -15
  12. package/.pi/agents/harness/planning/hypothesis-validator.md +23 -6
  13. package/.pi/agents/harness/planning/hypothesis.md +4 -27
  14. package/.pi/agents/harness/planning/implementation-researcher.md +43 -0
  15. package/.pi/agents/harness/planning/plan-adversary.md +20 -5
  16. package/.pi/agents/harness/planning/plan-evaluator.md +28 -6
  17. package/.pi/agents/harness/planning/review-integrator.md +23 -10
  18. package/.pi/agents/harness/planning/scout-graphify.md +4 -23
  19. package/.pi/agents/harness/planning/scout-semantic.md +3 -18
  20. package/.pi/agents/harness/planning/scout-structure.md +3 -18
  21. package/.pi/agents/harness/planning/sprint-contract-auditor.md +22 -6
  22. package/.pi/agents/harness/planning/stack-researcher.md +21 -11
  23. package/.pi/agents/harness/tie-breaker.md +1 -1
  24. package/.pi/agents/harness/trace-librarian.md +1 -1
  25. package/.pi/extensions/budget-guard.ts +33 -19
  26. package/.pi/extensions/harness-debate-tools.ts +280 -19
  27. package/.pi/extensions/harness-live-widget.ts +39 -159
  28. package/.pi/extensions/harness-plan-approval.ts +47 -5
  29. package/.pi/extensions/harness-run-context.ts +96 -2
  30. package/.pi/extensions/harness-subagent-submit.ts +195 -0
  31. package/.pi/extensions/lib/debate-bus-core.ts +108 -17
  32. package/.pi/extensions/lib/debate-bus-state.ts +6 -0
  33. package/.pi/extensions/lib/harness-subagent-policy.ts +45 -0
  34. package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +82 -0
  35. package/.pi/extensions/lib/harness-subagent-submit-registry.ts +172 -0
  36. package/.pi/extensions/lib/harness-subagents-bridge.ts +42 -0
  37. package/.pi/extensions/lib/plan-approval/plan-review.ts +56 -0
  38. package/.pi/extensions/lib/plan-approval/types.ts +1 -0
  39. package/.pi/extensions/lib/plan-debate-eligibility.ts +214 -0
  40. package/.pi/extensions/lib/plan-debate-focus.ts +151 -0
  41. package/.pi/extensions/lib/plan-debate-gate.ts +88 -34
  42. package/.pi/extensions/lib/plan-debate-lane.ts +15 -0
  43. package/.pi/extensions/lib/plan-debate-lanes.ts +44 -0
  44. package/.pi/extensions/lib/plan-debate-round-status.ts +63 -20
  45. package/.pi/extensions/lib/plan-messenger.ts +93 -17
  46. package/.pi/extensions/policy-gate.ts +1 -1
  47. package/.pi/harness/README.md +1 -1
  48. package/.pi/harness/agents.manifest.json +25 -21
  49. package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +1 -3
  50. package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +13 -5
  51. package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +51 -0
  52. package/.pi/harness/docs/adrs/0037-subagent-submit-tools.md +31 -0
  53. package/.pi/harness/docs/adrs/0038-budget-telemetry-only.md +23 -0
  54. package/.pi/harness/docs/adrs/README.md +4 -0
  55. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/implementation-research.yaml +28 -0
  56. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r1.yaml +24 -0
  57. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r2.yaml +25 -0
  58. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-packet.yaml +196 -0
  59. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-review.md +14 -0
  60. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/research-brief.yaml +62 -0
  61. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/implementation-research.yaml +28 -0
  62. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r2.yaml +24 -0
  63. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r3.yaml +24 -0
  64. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +29 -0
  65. package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +97 -16
  66. package/.pi/harness/specs/harness-executor-handoff.schema.json +19 -0
  67. package/.pi/harness/specs/harness-human-required.schema.json +16 -0
  68. package/.pi/harness/specs/plan-implementation-research-brief.schema.json +128 -0
  69. package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
  70. package/.pi/harness/specs/plan-scout-findings.schema.json +19 -0
  71. package/.pi/harness/specs/round-result.schema.json +15 -2
  72. package/.pi/lib/harness-agent-output.ts +45 -0
  73. package/.pi/lib/harness-budget-enforce.ts +18 -0
  74. package/.pi/lib/harness-schema-validate.ts +89 -0
  75. package/.pi/lib/harness-spawn-parse.ts +86 -0
  76. package/.pi/lib/harness-subagent-submit-path.ts +41 -0
  77. package/.pi/lib/harness-ui-state.ts +107 -2
  78. package/.pi/prompts/harness-auto.md +2 -2
  79. package/.pi/prompts/harness-plan.md +94 -42
  80. package/.pi/prompts/harness-run.md +2 -2
  81. package/.pi/prompts/planning-rubrics.md +31 -0
  82. package/.pi/scripts/harness-verify.mjs +2 -0
  83. package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
  84. package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
  85. package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
  86. package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
  87. package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
  88. package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
  89. package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
  90. package/CHANGELOG.md +21 -0
  91. package/package.json +4 -2
  92. package/vendor/pi-subagents/src/subagents.ts +29 -3
@@ -0,0 +1,128 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://ultimate-pi.local/.pi/harness/specs/plan-implementation-research-brief.schema.json",
4
+ "title": "PlanImplementationResearchBrief",
5
+ "type": "object",
6
+ "additionalProperties": false,
7
+ "required": [
8
+ "schema_version",
9
+ "problem_framing",
10
+ "sub_problems",
11
+ "internal_references",
12
+ "external_references",
13
+ "solution_patterns",
14
+ "similar_implementations",
15
+ "recommended_approach",
16
+ "anti_patterns",
17
+ "open_questions"
18
+ ],
19
+ "properties": {
20
+ "schema_version": { "type": "string", "const": "1.0.0" },
21
+ "problem_framing": { "type": "string", "minLength": 1 },
22
+ "sub_problems": {
23
+ "type": "array",
24
+ "items": { "type": "string", "minLength": 1 }
25
+ },
26
+ "internal_references": {
27
+ "type": "array",
28
+ "items": { "$ref": "#/$defs/internal_reference" }
29
+ },
30
+ "external_references": {
31
+ "type": "array",
32
+ "items": { "$ref": "#/$defs/external_reference" }
33
+ },
34
+ "solution_patterns": {
35
+ "type": "array",
36
+ "items": { "$ref": "#/$defs/solution_pattern" }
37
+ },
38
+ "similar_implementations": {
39
+ "type": "array",
40
+ "items": { "$ref": "#/$defs/similar_implementation" }
41
+ },
42
+ "recommended_approach": { "$ref": "#/$defs/recommended_approach" },
43
+ "anti_patterns": {
44
+ "type": "array",
45
+ "items": { "type": "string", "minLength": 1 }
46
+ },
47
+ "open_questions": {
48
+ "type": "array",
49
+ "items": { "type": "string", "minLength": 1 }
50
+ },
51
+ "deep_research_recommended": { "type": "boolean" }
52
+ },
53
+ "$defs": {
54
+ "internal_reference": {
55
+ "type": "object",
56
+ "additionalProperties": false,
57
+ "required": ["path", "relevance", "reuse_signal"],
58
+ "properties": {
59
+ "path": { "type": "string" },
60
+ "relevance": { "type": "string" },
61
+ "reuse_signal": {
62
+ "type": "string",
63
+ "enum": ["high", "med", "low", "none"]
64
+ }
65
+ }
66
+ },
67
+ "external_reference": {
68
+ "type": "object",
69
+ "additionalProperties": false,
70
+ "required": ["url", "source_type", "summary", "evidence_grade"],
71
+ "properties": {
72
+ "url": { "type": "string" },
73
+ "source_type": { "type": "string" },
74
+ "summary": { "type": "string" },
75
+ "evidence_grade": {
76
+ "type": "string",
77
+ "enum": ["primary", "secondary", "anecdotal"]
78
+ }
79
+ }
80
+ },
81
+ "solution_pattern": {
82
+ "type": "object",
83
+ "additionalProperties": false,
84
+ "required": ["name", "provenance", "fit", "tradeoffs", "risks"],
85
+ "properties": {
86
+ "name": { "type": "string" },
87
+ "provenance": { "type": "string" },
88
+ "fit": { "type": "string" },
89
+ "tradeoffs": {
90
+ "type": "object",
91
+ "required": ["pros", "cons"],
92
+ "properties": {
93
+ "pros": { "type": "array", "items": { "type": "string" } },
94
+ "cons": { "type": "array", "items": { "type": "string" } }
95
+ }
96
+ },
97
+ "risks": { "type": "array", "items": { "type": "string" } }
98
+ }
99
+ },
100
+ "similar_implementation": {
101
+ "type": "object",
102
+ "additionalProperties": false,
103
+ "required": ["name", "what_it_solves", "gap_vs_us"],
104
+ "properties": {
105
+ "name": { "type": "string" },
106
+ "what_it_solves": { "type": "string" },
107
+ "gap_vs_us": { "type": "string" }
108
+ }
109
+ },
110
+ "recommended_approach": {
111
+ "type": "object",
112
+ "additionalProperties": false,
113
+ "required": ["summary", "recommended_approach_confidence"],
114
+ "properties": {
115
+ "summary": { "type": "string", "minLength": 1 },
116
+ "recommended_approach_confidence": {
117
+ "type": "string",
118
+ "enum": ["low", "med", "high"]
119
+ },
120
+ "confidence_rationale": { "type": "string" },
121
+ "evidence_refs": {
122
+ "type": "array",
123
+ "items": { "type": "string" }
124
+ }
125
+ }
126
+ }
127
+ }
128
+ }
@@ -13,7 +13,7 @@
13
13
  ],
14
14
  "properties": {
15
15
  "schema_version": { "type": "string", "const": "1.0.0" },
16
- "round_index": { "type": "integer", "minimum": 1, "maximum": 4 },
16
+ "round_index": { "type": "integer", "minimum": 1, "maximum": 12 },
17
17
  "debate_round_focus": {
18
18
  "type": "string",
19
19
  "enum": ["spec", "wbs", "schedule", "quality"]
@@ -0,0 +1,19 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://ultimate-pi.local/.pi/harness/specs/plan-scout-findings.schema.json",
4
+ "title": "PlanScoutFindings",
5
+ "type": "object",
6
+ "additionalProperties": true,
7
+ "required": ["schema_version", "lane", "summary"],
8
+ "properties": {
9
+ "schema_version": { "type": "string", "const": "1.0.0" },
10
+ "lane": {
11
+ "type": "string",
12
+ "enum": ["graphify", "structure", "semantic"]
13
+ },
14
+ "scout_lane": { "type": "string" },
15
+ "summary": { "type": "string", "minLength": 1 },
16
+ "key_paths": { "type": "array", "items": { "type": "string" } },
17
+ "findings": { "type": "array" }
18
+ }
19
+ }
@@ -39,7 +39,7 @@
39
39
  "round_index": {
40
40
  "type": "integer",
41
41
  "minimum": 1,
42
- "maximum": 6
42
+ "maximum": 12
43
43
  },
44
44
  "participants": {
45
45
  "type": "array",
@@ -104,7 +104,9 @@
104
104
  "additionalProperties": false,
105
105
  "required": [
106
106
  "name",
107
+ "min_focus_rounds",
107
108
  "max_rounds",
109
+ "max_exchanges_per_round",
108
110
  "round_token_cap",
109
111
  "debate_global_cap"
110
112
  ],
@@ -113,8 +115,19 @@
113
115
  "type": "string",
114
116
  "enum": ["aggressive", "plan"]
115
117
  },
118
+ "min_focus_rounds": {
119
+ "type": "integer",
120
+ "minimum": 1
121
+ },
116
122
  "max_rounds": {
117
- "type": "integer"
123
+ "type": "integer",
124
+ "minimum": 1,
125
+ "maximum": 12
126
+ },
127
+ "max_exchanges_per_round": {
128
+ "type": "integer",
129
+ "minimum": 1,
130
+ "maximum": 6
118
131
  },
119
132
  "round_token_cap": {
120
133
  "type": "integer"
@@ -21,6 +21,51 @@ export function extractJsonBlock(text: string): string | null {
21
21
  return null;
22
22
  }
23
23
 
24
+ export interface ToolCallPartLike {
25
+ type?: string;
26
+ name?: string;
27
+ arguments?: Record<string, unknown>;
28
+ }
29
+
30
+ export interface MessageLike {
31
+ role?: string;
32
+ content?: ToolCallPartLike[] | unknown;
33
+ }
34
+
35
+ /** Last matching submit_* tool call in subprocess messages (chain-safe). */
36
+ export function extractLastSubmitCall(
37
+ messages: MessageLike[],
38
+ toolNames: string | string[],
39
+ ): { toolName: string; document: Record<string, unknown> } | null {
40
+ const allowed = new Set(
41
+ (Array.isArray(toolNames) ? toolNames : [toolNames]).map((n) => n.trim()),
42
+ );
43
+ let last: { toolName: string; document: Record<string, unknown> } | null =
44
+ null;
45
+ for (const msg of messages) {
46
+ if (msg.role !== "assistant" || !Array.isArray(msg.content)) continue;
47
+ for (const part of msg.content) {
48
+ if (part.type !== "toolCall" || !part.name) continue;
49
+ if (!allowed.has(part.name)) continue;
50
+ const doc = part.arguments?.document;
51
+ if (doc && typeof doc === "object" && !Array.isArray(doc)) {
52
+ last = {
53
+ toolName: part.name,
54
+ document: doc as Record<string, unknown>,
55
+ };
56
+ }
57
+ }
58
+ }
59
+ return last;
60
+ }
61
+
62
+ export function extractLastSubmitCallForAgent(
63
+ messages: MessageLike[],
64
+ agentToolNames: readonly string[],
65
+ ): { toolName: string; document: Record<string, unknown> } | null {
66
+ return extractLastSubmitCall(messages, [...agentToolNames]);
67
+ }
68
+
24
69
  export function parseHarnessAgentJson<T extends Record<string, unknown>>(
25
70
  text: string,
26
71
  ): { ok: true; value: T } | { ok: false; error: string } {
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Central switch for harness token/debate budget enforcement.
3
+ * Default: telemetry-only (HARNESS_BUDGET_ENFORCE off).
4
+ */
5
+
6
+ export function isHarnessBudgetEnforceOn(): boolean {
7
+ const raw = (process.env.HARNESS_BUDGET_ENFORCE ?? "off").toLowerCase();
8
+ return raw === "1" || raw === "true" || raw === "on";
9
+ }
10
+
11
+ /** When false, soft-limit and debate telemetry must not block UI or gates. */
12
+ export function shouldEmitBlockingBudgetExhausted(): boolean {
13
+ if (!isHarnessBudgetEnforceOn()) return false;
14
+ return (
15
+ process.env.HARNESS_BUDGET_HARD_STOP === "true" ||
16
+ process.env.HARNESS_DEBATE_HARD_STOP === "true"
17
+ );
18
+ }
@@ -0,0 +1,89 @@
1
+ /**
2
+ * JSON Schema validation for harness submit tools (Ajv draft 2020-12, offline).
3
+ */
4
+
5
+ import { appendFile, readFile } from "node:fs/promises";
6
+ import { join } from "node:path";
7
+ import Ajv2020 from "ajv/dist/2020";
8
+ import addFormats from "ajv-formats";
9
+
10
+ type ValidateFn = (data: unknown) => boolean;
11
+
12
+ const compileCache = new Map<string, ValidateFn>();
13
+ const DEBUG_LOG_PATH =
14
+ "/home/aryaniyaps/ai-projects/ultimate-pi/.cursor/debug-2ca12b.log";
15
+
16
+ let ajvSingleton: InstanceType<typeof Ajv2020> | null = null;
17
+
18
+ function getAjv(): InstanceType<typeof Ajv2020> {
19
+ if (!ajvSingleton) {
20
+ ajvSingleton = new Ajv2020({
21
+ allErrors: true,
22
+ strict: false,
23
+ validateSchema: false,
24
+ });
25
+ addFormats(ajvSingleton);
26
+ }
27
+ return ajvSingleton;
28
+ }
29
+
30
+ async function debugLog(
31
+ hypothesisId: string,
32
+ message: string,
33
+ data: Record<string, unknown>,
34
+ ): Promise<void> {
35
+ // #region agent log
36
+ try {
37
+ await appendFile(
38
+ DEBUG_LOG_PATH,
39
+ `${JSON.stringify({
40
+ sessionId: "2ca12b",
41
+ hypothesisId,
42
+ location: "harness-schema-validate.ts",
43
+ message,
44
+ data,
45
+ timestamp: Date.now(),
46
+ })}\n`,
47
+ );
48
+ } catch {
49
+ /* ignore */
50
+ }
51
+ // #endregion
52
+ }
53
+
54
+ export async function validateAgainstHarnessSchema(
55
+ specsDir: string,
56
+ schemaFile: string,
57
+ document: unknown,
58
+ ): Promise<{ ok: true } | { ok: false; errors: string[] }> {
59
+ const cacheKey = `${specsDir}:${schemaFile}`;
60
+ let validate = compileCache.get(cacheKey);
61
+ if (!validate) {
62
+ const schemaPath = join(specsDir, schemaFile);
63
+ const raw = await readFile(schemaPath, "utf-8");
64
+ const schema = JSON.parse(raw) as Record<string, unknown>;
65
+ try {
66
+ const ajv = getAjv();
67
+ const compiled = ajv.compile(schema);
68
+ validate = compiled;
69
+ compileCache.set(cacheKey, compiled);
70
+ await debugLog("H3", "schema compile ok", { schemaFile });
71
+ } catch (err) {
72
+ const msg = err instanceof Error ? err.message : String(err);
73
+ await debugLog("H3", "schema compile failed", { schemaFile, error: msg });
74
+ return { ok: false, errors: [`schema compile failed: ${msg}`] };
75
+ }
76
+ }
77
+ const ok = validate(document);
78
+ if (ok) return { ok: true };
79
+ const errors = (
80
+ (
81
+ validate as {
82
+ errors?: Array<{ instancePath?: string; message?: string }>;
83
+ }
84
+ ).errors ?? []
85
+ ).map((e: { instancePath?: string; message?: string }) =>
86
+ `${e.instancePath || "/"} ${e.message ?? "invalid"}`.trim(),
87
+ );
88
+ return { ok: false, errors };
89
+ }
@@ -0,0 +1,86 @@
1
+ /**
2
+ * Parse HarnessSpawnContext embedded in subagent task strings.
3
+ */
4
+
5
+ const SPAWN_CTX_EQ_RE = /HarnessSpawnContext\s*=\s*(\{[\s\S]*?\})(?:\s|$|\.)/;
6
+
7
+ export interface ParsedSpawnContext {
8
+ run_id?: string;
9
+ run_dir?: string;
10
+ agent?: string;
11
+ plan_packet_path?: string;
12
+ }
13
+
14
+ function extractBalancedJsonObject(s: string, start: number): string | null {
15
+ if (s[start] !== "{") return null;
16
+ let depth = 0;
17
+ let inString = false;
18
+ let escaped = false;
19
+ for (let i = start; i < s.length; i++) {
20
+ const ch = s[i];
21
+ if (inString) {
22
+ if (escaped) escaped = false;
23
+ else if (ch === "\\") escaped = true;
24
+ else if (ch === '"') inString = false;
25
+ continue;
26
+ }
27
+ if (ch === '"') {
28
+ inString = true;
29
+ continue;
30
+ }
31
+ if (ch === "{") depth++;
32
+ else if (ch === "}") {
33
+ depth--;
34
+ if (depth === 0) return s.slice(start, i + 1);
35
+ }
36
+ }
37
+ return null;
38
+ }
39
+
40
+ function normalizeSpawnContext(parsed: unknown): ParsedSpawnContext | null {
41
+ if (!parsed || typeof parsed !== "object") return null;
42
+ const o = parsed as Record<string, unknown>;
43
+ const run_id = typeof o.run_id === "string" ? o.run_id : undefined;
44
+ const run_dir = typeof o.run_dir === "string" ? o.run_dir : undefined;
45
+ const agent = typeof o.agent === "string" ? o.agent : undefined;
46
+ const plan_packet_path =
47
+ typeof o.plan_packet_path === "string" ? o.plan_packet_path : undefined;
48
+ if (!run_id && !run_dir) return null;
49
+ return { run_id, run_dir, agent, plan_packet_path };
50
+ }
51
+
52
+ export function parseSpawnContextFromTask(
53
+ task: string,
54
+ ): ParsedSpawnContext | null {
55
+ const eqMatch = SPAWN_CTX_EQ_RE.exec(task);
56
+ if (eqMatch?.[1]) {
57
+ try {
58
+ return normalizeSpawnContext(JSON.parse(eqMatch[1]));
59
+ } catch {
60
+ // fall through to JSON-object forms
61
+ }
62
+ }
63
+
64
+ const firstBrace = task.indexOf("{");
65
+ if (firstBrace >= 0) {
66
+ const blob = extractBalancedJsonObject(task, firstBrace);
67
+ if (blob) {
68
+ try {
69
+ const outer = JSON.parse(blob) as Record<string, unknown>;
70
+ if (
71
+ outer.HarnessSpawnContext &&
72
+ typeof outer.HarnessSpawnContext === "object"
73
+ ) {
74
+ return normalizeSpawnContext(outer.HarnessSpawnContext);
75
+ }
76
+ if (typeof outer.run_id === "string") {
77
+ return normalizeSpawnContext(outer);
78
+ }
79
+ } catch {
80
+ // ignore
81
+ }
82
+ }
83
+ }
84
+
85
+ return null;
86
+ }
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Resolve and guard harness run directories for subagent submit tools.
3
+ */
4
+
5
+ import { realpath } from "node:fs/promises";
6
+ import { join, resolve } from "node:path";
7
+
8
+ export function harnessRunsRoot(projectRoot: string): string {
9
+ return join(projectRoot, ".pi", "harness", "runs");
10
+ }
11
+
12
+ export async function resolveGuardedRunDir(opts: {
13
+ projectRoot: string;
14
+ runId: string;
15
+ runDirEnv?: string;
16
+ }): Promise<{ ok: true; runDir: string } | { ok: false; error: string }> {
17
+ const { projectRoot, runId } = opts;
18
+ if (!runId.trim()) {
19
+ return { ok: false, error: "run_id is required" };
20
+ }
21
+ const expected = join(harnessRunsRoot(projectRoot), runId);
22
+ let candidate = opts.runDirEnv?.trim()
23
+ ? resolve(projectRoot, opts.runDirEnv)
24
+ : expected;
25
+ try {
26
+ candidate = await realpath(candidate);
27
+ const expectedReal = await realpath(expected);
28
+ if (
29
+ candidate !== expectedReal &&
30
+ !candidate.startsWith(`${expectedReal}/`)
31
+ ) {
32
+ return {
33
+ ok: false,
34
+ error: `run_dir must stay under ${expectedReal}`,
35
+ };
36
+ }
37
+ return { ok: true, runDir: candidate };
38
+ } catch {
39
+ return { ok: false, error: `run directory not found for run_id=${runId}` };
40
+ }
41
+ }
@@ -1,4 +1,5 @@
1
1
  import type { ExtensionContext } from "@earendil-works/pi-coding-agent";
2
+ import { shouldEmitBlockingBudgetExhausted } from "./harness-budget-enforce.js";
2
3
 
3
4
  export type HarnessPhase =
4
5
  | "plan"
@@ -133,6 +134,9 @@ const RELEVANT_CUSTOM_TYPES = new Set([
133
134
  "harness-consensus-packet",
134
135
  "harness-round-result",
135
136
  "harness-budget-exhausted",
137
+ "harness-budget-soft-limit",
138
+ "harness-budget-telemetry",
139
+ "harness-debate-budget-telemetry",
136
140
  "harness-review-integrity",
137
141
  "harness-test-integrity-flag",
138
142
  "harness-run-trace",
@@ -189,7 +193,7 @@ function deriveFlowSubstate(state: HarnessUiState): HarnessFlowSubstate {
189
193
  return "idle";
190
194
  }
191
195
 
192
- function createStateFromEntries(entries: unknown[]): HarnessUiState {
196
+ export function createStateFromEntries(entries: unknown[]): HarnessUiState {
193
197
  const latest = pickLatestCustomEntries(entries);
194
198
  const state: HarnessUiState = {
195
199
  ...DEFAULT_STATE,
@@ -212,7 +216,7 @@ function createStateFromEntries(entries: unknown[]): HarnessUiState {
212
216
  const budget = latest.get("harness-budget-exhausted") as
213
217
  | BudgetExhaustedLike
214
218
  | undefined;
215
- if (budget) {
219
+ if (budget && shouldEmitBlockingBudgetExhausted()) {
216
220
  state.budgetExhausted = true;
217
221
  state.budgetReason =
218
222
  typeof budget.exhaustion_reason === "string"
@@ -223,6 +227,15 @@ function createStateFromEntries(entries: unknown[]): HarnessUiState {
223
227
  const cap = asNumber(budget.caps?.debate_global_cap);
224
228
  if (cap != null) state.debateBudgetCap = cap;
225
229
  }
230
+ const telemetry = latest.get("harness-budget-telemetry") as
231
+ | BudgetExhaustedLike
232
+ | undefined;
233
+ if (telemetry && !state.budgetExhausted) {
234
+ const budgetUsed = asNumber(telemetry.budget_used);
235
+ if (budgetUsed != null) state.debateBudgetUsed = budgetUsed;
236
+ const cap = asNumber(telemetry.caps?.debate_global_cap);
237
+ if (cap != null) state.debateBudgetCap = cap;
238
+ }
226
239
 
227
240
  const testIntegrity = latest.get("harness-test-integrity-flag") as
228
241
  | TestIntegrityLike
@@ -299,6 +312,98 @@ function createStateFromEntries(entries: unknown[]): HarnessUiState {
299
312
  return state;
300
313
  }
301
314
 
315
+ export type HarnessStatusSeverity =
316
+ | "accent"
317
+ | "warning"
318
+ | "error"
319
+ | "success"
320
+ | "muted";
321
+
322
+ export const HARNESS_PHASE_ORDER: readonly HarnessPhase[] = [
323
+ "plan",
324
+ "execute",
325
+ "evaluate",
326
+ "adversary",
327
+ "merge",
328
+ ] as const;
329
+
330
+ export function formatHarnessPhaseLabel(phase: HarnessPhase): string {
331
+ switch (phase) {
332
+ case "plan":
333
+ return "plan";
334
+ case "execute":
335
+ return "build";
336
+ case "evaluate":
337
+ return "eval";
338
+ case "adversary":
339
+ return "review";
340
+ case "merge":
341
+ return "merge";
342
+ }
343
+ }
344
+
345
+ export function nextHarnessPhase(phase: HarnessPhase): HarnessPhase | null {
346
+ const index = HARNESS_PHASE_ORDER.indexOf(phase);
347
+ if (index < 0 || index >= HARNESS_PHASE_ORDER.length - 1) return null;
348
+ return HARNESS_PHASE_ORDER[index + 1] ?? null;
349
+ }
350
+
351
+ function truncateStatusCommand(command: string, maxLen = 40): string {
352
+ if (command.length <= maxLen) return command;
353
+ return `${command.slice(0, maxLen - 3)}...`;
354
+ }
355
+
356
+ export function deriveHarnessStatusHint(state: HarnessUiState): {
357
+ text: string;
358
+ severity: HarnessStatusSeverity;
359
+ } {
360
+ if (state.budgetExhausted) {
361
+ return { text: "Budget limit reached", severity: "error" };
362
+ }
363
+ if (state.testIntegritySeverity === "high") {
364
+ return { text: "Test integrity issue", severity: "error" };
365
+ }
366
+ if (state.policyDecision === "block") {
367
+ return { text: "Blocked — fix issues first", severity: "error" };
368
+ }
369
+ if (
370
+ state.policyDecision === "human_required" ||
371
+ state.flowSubstate === "human-required"
372
+ ) {
373
+ return { text: "Waiting for your input", severity: "warning" };
374
+ }
375
+ if (state.nextRecommendedCommand) {
376
+ return {
377
+ text: `Next: ${truncateStatusCommand(state.nextRecommendedCommand)}`,
378
+ severity: "accent",
379
+ };
380
+ }
381
+ if (state.phase === "plan") {
382
+ if (!state.planApproved) {
383
+ return { text: "Approve plan to continue", severity: "warning" };
384
+ }
385
+ return { text: "Plan approved", severity: "success" };
386
+ }
387
+ if (state.policyDecision === "pass") {
388
+ return { text: "Checks passed", severity: "success" };
389
+ }
390
+ if (state.policyDecision === "conditional_pass") {
391
+ return { text: "Passed with notes", severity: "warning" };
392
+ }
393
+ switch (state.phase) {
394
+ case "execute":
395
+ return { text: "Implementing changes", severity: "accent" };
396
+ case "evaluate":
397
+ return { text: "Running checks", severity: "accent" };
398
+ case "adversary":
399
+ return { text: "Review gate", severity: "accent" };
400
+ case "merge":
401
+ return { text: "Ready to finish", severity: "accent" };
402
+ default:
403
+ return { text: "Planning", severity: "muted" };
404
+ }
405
+ }
406
+
302
407
  export class HarnessUiStateStore {
303
408
  private lastEntriesLen = -1;
304
409
  private cachedState: HarnessUiState = {
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Full strict harness pipeline with locked governance decisions.
3
- argument-hint: "\"<task>\" [--quick] [--risk low|med|high] [--budget <amount>]"
3
+ argument-hint: "\"<task>\" [--quick] [--risk low|med|high]"
4
4
  ---
5
5
 
6
6
  # harness-auto
@@ -10,7 +10,7 @@ Pipeline orchestrator — one session, sequential phase handoffs. Invoke **harne
10
10
  ## Step 0 — Parse arguments
11
11
 
12
12
  - required task (quoted or first token)
13
- - optional: `--quick`, `--risk`, `--budget`
13
+ - optional: `--quick`, `--risk` (`--budget` reserved/no-op)
14
14
 
15
15
  If task missing:
16
16