ultimate-pi 0.16.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-context/SKILL.md +13 -6
- package/.agents/skills/harness-debate-plan/SKILL.md +37 -20
- package/.agents/skills/harness-eval/SKILL.md +6 -21
- package/.agents/skills/harness-governor/SKILL.md +4 -3
- package/.agents/skills/harness-orchestration/SKILL.md +39 -51
- package/.agents/skills/harness-plan/SKILL.md +23 -12
- package/.agents/skills/harness-review/SKILL.md +52 -0
- package/.agents/skills/harness-sentrux-setup/SKILL.md +13 -1
- package/.agents/skills/harness-steer/SKILL.md +14 -0
- package/.pi/agents/harness/adversary.md +3 -10
- package/.pi/agents/harness/evaluator.md +3 -12
- package/.pi/agents/harness/executor.md +12 -14
- package/.pi/agents/harness/planning/decompose.md +7 -4
- package/.pi/agents/harness/planning/hypothesis-validator.md +2 -0
- package/.pi/agents/harness/planning/hypothesis.md +4 -2
- package/.pi/agents/harness/planning/implementation-researcher.md +1 -1
- package/.pi/agents/harness/planning/plan-adversary.md +2 -0
- package/.pi/agents/harness/planning/plan-evaluator.md +2 -0
- package/.pi/agents/harness/planning/plan-synthesizer.md +25 -0
- package/.pi/agents/harness/planning/planning-context.md +48 -0
- package/.pi/agents/harness/planning/review-integrator.md +2 -0
- package/.pi/agents/harness/planning/scout-graphify.md +3 -1
- package/.pi/agents/harness/planning/scout-semantic.md +3 -1
- package/.pi/agents/harness/planning/scout-structure.md +3 -1
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +2 -0
- package/.pi/agents/harness/sentrux-steward.md +51 -0
- package/.pi/extensions/00-posthog-network-bootstrap.ts +11 -0
- package/.pi/extensions/harness-debate-tools.ts +12 -3
- package/.pi/extensions/harness-live-widget.ts +27 -1
- package/.pi/extensions/harness-plan-approval.ts +62 -56
- package/.pi/extensions/harness-run-context.ts +553 -84
- package/.pi/extensions/harness-subagent-submit.ts +43 -33
- package/.pi/extensions/harness-telemetry.ts +29 -4
- package/.pi/extensions/lib/debate-bus-core.ts +15 -9
- package/.pi/extensions/lib/harness-artifact-gate.ts +182 -0
- package/.pi/extensions/lib/harness-posthog.ts +9 -5
- package/.pi/extensions/lib/harness-spawn-topology.ts +188 -0
- package/.pi/extensions/lib/harness-subagent-auth.ts +105 -19
- package/.pi/extensions/lib/harness-subagent-policy.ts +37 -19
- package/.pi/extensions/lib/harness-subagent-precheck.ts +35 -9
- package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +66 -2
- package/.pi/extensions/lib/harness-subagent-submit-registry.ts +21 -3
- package/.pi/extensions/lib/harness-subagents-bridge.ts +91 -28
- package/.pi/extensions/lib/harness-subprocess-bootstrap.ts +73 -0
- package/.pi/extensions/lib/plan-approval/create-plan.ts +2 -3
- package/.pi/extensions/lib/plan-approval/resolve-disk.ts +102 -0
- package/.pi/extensions/lib/plan-approval/schema.ts +22 -8
- package/.pi/extensions/lib/plan-approval/types.ts +1 -1
- package/.pi/extensions/lib/plan-approval/validate.ts +2 -2
- package/.pi/extensions/lib/plan-approval-readiness.ts +241 -0
- package/.pi/extensions/lib/plan-debate-eligibility.ts +67 -7
- package/.pi/extensions/lib/plan-debate-focus.ts +21 -9
- package/.pi/extensions/lib/plan-debate-gate.ts +101 -17
- package/.pi/extensions/lib/plan-debate-lanes.ts +57 -3
- package/.pi/extensions/lib/plan-debate-round-status.ts +18 -7
- package/.pi/extensions/lib/plan-messenger.ts +4 -0
- package/.pi/extensions/lib/plan-review-gate.ts +59 -0
- package/.pi/extensions/lib/posthog-client.ts +76 -0
- package/.pi/extensions/policy-gate.ts +24 -19
- package/.pi/extensions/trace-recorder.ts +1 -0
- package/.pi/harness/agents.manifest.json +24 -16
- package/.pi/harness/corpus/cron.example +8 -0
- package/.pi/harness/corpus/graphify-kb-updater.config.json +159 -0
- package/.pi/harness/corpus/systemd/graphify-kb-updater.env.template +4 -0
- package/.pi/harness/corpus/systemd/graphify-kb-updater.service +17 -0
- package/.pi/harness/corpus/systemd/graphify-kb-updater.timer +11 -0
- package/.pi/harness/docs/adrs/0001-harness-constitution.md +2 -1
- package/.pi/harness/docs/adrs/0006-sentrux-dual-layer.md +7 -6
- package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +6 -1
- package/.pi/harness/docs/adrs/0031-harness-run-context.md +1 -1
- package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +7 -0
- package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +3 -3
- package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +8 -5
- package/.pi/harness/docs/adrs/0039-harness-post-run-review-gate.md +47 -0
- package/.pi/harness/docs/adrs/0040-practice-grounded-orchestration.md +40 -0
- package/.pi/harness/docs/adrs/0041-intelligent-planning-reconnaissance.md +39 -0
- package/.pi/harness/docs/adrs/0042-agent-native-orchestration.md +35 -0
- package/.pi/harness/docs/adrs/0043-path-first-harness-tools.md +38 -0
- package/.pi/harness/docs/adrs/0044-harness-steer-loop.md +36 -0
- package/.pi/harness/docs/adrs/README.md +10 -0
- package/.pi/harness/docs/graphify-kb-updater-runbook.md +157 -0
- package/.pi/harness/docs/practice-map.md +110 -0
- package/.pi/harness/env.harness.template +5 -3
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/review-round-consolidated.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/research-brief.yaml +62 -0
- package/.pi/harness/evals/smoke/sentrux-stub.json +1 -1
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +43 -17
- package/.pi/harness/specs/README.md +1 -1
- package/.pi/harness/specs/harness-run-context.schema.json +11 -0
- package/.pi/harness/specs/harness-spawn-context.schema.json +14 -0
- package/.pi/harness/specs/plan-execution-plan.schema.json +39 -1
- package/.pi/harness/specs/plan-packet.schema.json +4 -0
- package/.pi/harness/specs/plan-phase-status.schema.json +17 -0
- package/.pi/harness/specs/plan-phase-waiver.schema.json +25 -0
- package/.pi/harness/specs/plan-planning-context.schema.json +50 -0
- package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
- package/.pi/harness/specs/repair-brief.schema.json +45 -0
- package/.pi/harness/specs/review-outcome.schema.json +46 -0
- package/.pi/harness/specs/sentrux-manifest-proposal.schema.json +80 -0
- package/.pi/harness/specs/sentrux-signal.schema.json +43 -0
- package/.pi/harness/specs/steer-state.schema.json +20 -0
- package/.pi/lib/harness-context-mode-policy.ts +256 -0
- package/.pi/lib/harness-repair-brief.ts +145 -0
- package/.pi/lib/harness-run-context.ts +591 -32
- package/.pi/lib/harness-ui-state.ts +87 -9
- package/.pi/model-router.example.json +13 -4
- package/.pi/prompts/harness-auto.md +9 -9
- package/.pi/prompts/harness-critic.md +3 -30
- package/.pi/prompts/harness-eval.md +4 -37
- package/.pi/prompts/harness-plan.md +139 -57
- package/.pi/prompts/harness-review.md +150 -15
- package/.pi/prompts/harness-run.md +62 -10
- package/.pi/prompts/harness-sentrux-steward.md +55 -0
- package/.pi/prompts/harness-setup.md +4 -4
- package/.pi/prompts/harness-steer.md +30 -0
- package/.pi/scripts/graphify-kb-updater.mjs +358 -0
- package/.pi/scripts/harness-generate-model-router.mjs +118 -36
- package/.pi/scripts/harness-model-router-routing.test.mjs +97 -0
- package/.pi/scripts/harness-sync-model-router.mjs +15 -2
- package/.pi/scripts/harness-verify.mjs +51 -6
- package/.pi/scripts/harness-web-policy-guard.mjs +68 -0
- package/.pi/scripts/validate-plan-dag.mjs +3 -3
- package/AGENTS.md +1 -0
- package/CHANGELOG.md +22 -0
- package/package.json +5 -4
- package/vendor/pi-model-router/UPSTREAM_PIN.md +3 -1
- package/vendor/pi-model-router/extensions/commands.ts +4 -4
- package/vendor/pi-model-router/extensions/index.ts +21 -0
- package/vendor/pi-model-router/extensions/provider.ts +130 -79
- package/vendor/pi-model-router/extensions/routing.ts +148 -0
- package/vendor/pi-model-router/extensions/state.ts +3 -0
- package/vendor/pi-model-router/extensions/types.ts +9 -0
- package/vendor/pi-model-router/extensions/ui.ts +16 -2
- package/.pi/prompts/git-sync.md +0 -124
|
@@ -1,23 +1,32 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Resolve concrete LLM credentials for harness subagent subprocesses.
|
|
3
3
|
*
|
|
4
|
-
* Parent sessions often use `router
|
|
4
|
+
* Parent sessions often use `router/<profile>` (pi-model-router). Subagents run with
|
|
5
5
|
* `--no-extensions`, so they cannot use the logical router provider — they need
|
|
6
6
|
* a real provider/model plus that provider's API key.
|
|
7
|
+
*
|
|
8
|
+
* Session-locked routing: subprocess model is chosen once from agent system prompt
|
|
9
|
+
* complexity (same analysis as parent session lock), not from per-turn parent tier.
|
|
7
10
|
*/
|
|
8
11
|
|
|
9
12
|
import { existsSync, readFileSync } from "node:fs";
|
|
10
13
|
import { join } from "node:path";
|
|
14
|
+
import { resolveTierFromPrompt } from "../../../vendor/pi-model-router/extensions/routing.js";
|
|
15
|
+
import type {
|
|
16
|
+
RouterProfile,
|
|
17
|
+
RouterTier,
|
|
18
|
+
RoutingRule,
|
|
19
|
+
} from "../../../vendor/pi-model-router/extensions/types.js";
|
|
11
20
|
import type { AgentConfig } from "../../../vendor/pi-subagents/src/agents.js";
|
|
12
21
|
|
|
13
22
|
const ROUTER_SENTINEL_KEY = "pi-model-router";
|
|
14
23
|
const SENTINEL_API_KEYS = new Set([ROUTER_SENTINEL_KEY, "<authenticated>"]);
|
|
15
24
|
|
|
16
|
-
type RouterTier = "high" | "medium" | "low";
|
|
17
|
-
|
|
18
25
|
interface ModelRouterJson {
|
|
19
26
|
defaultProfile?: string;
|
|
20
|
-
|
|
27
|
+
phaseBias?: number;
|
|
28
|
+
rules?: RoutingRule[];
|
|
29
|
+
profiles?: Record<string, RouterProfile>;
|
|
21
30
|
}
|
|
22
31
|
|
|
23
32
|
export function isUsableApiKey(key: string | undefined): key is string {
|
|
@@ -35,7 +44,34 @@ export function parseModelRef(
|
|
|
35
44
|
return { provider, modelId };
|
|
36
45
|
}
|
|
37
46
|
|
|
38
|
-
|
|
47
|
+
/** Planning subagents that should prefer low/medium router tier for latency. */
|
|
48
|
+
const ROUTINE_PLANNING_AGENT_PATHS = new Set([
|
|
49
|
+
"harness/planning/plan-evaluator",
|
|
50
|
+
"harness/planning/plan-adversary",
|
|
51
|
+
"harness/planning/review-integrator",
|
|
52
|
+
"harness/planning/hypothesis-validator",
|
|
53
|
+
"harness/planning/sprint-contract-auditor",
|
|
54
|
+
"harness/planning/planning-context",
|
|
55
|
+
"harness/planning/scout-structure",
|
|
56
|
+
"harness/planning/scout-semantic",
|
|
57
|
+
"harness/planning/decompose",
|
|
58
|
+
"harness/planning/hypothesis",
|
|
59
|
+
"harness/planning/stack-research",
|
|
60
|
+
"harness/planning/plan-validator",
|
|
61
|
+
]);
|
|
62
|
+
|
|
63
|
+
export function isRoutinePlanningAgent(agentName: string): boolean {
|
|
64
|
+
return ROUTINE_PLANNING_AGENT_PATHS.has(agentName);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function thinkingToRouterTier(
|
|
68
|
+
thinking?: string,
|
|
69
|
+
agentName?: string,
|
|
70
|
+
): RouterTier {
|
|
71
|
+
if (agentName && isRoutinePlanningAgent(agentName)) {
|
|
72
|
+
if (thinking === "high" || thinking === "xhigh") return "medium";
|
|
73
|
+
return "low";
|
|
74
|
+
}
|
|
39
75
|
if (thinking === "high" || thinking === "xhigh") return "high";
|
|
40
76
|
if (thinking === "off" || thinking === "minimal" || thinking === "low") {
|
|
41
77
|
return "low";
|
|
@@ -43,6 +79,64 @@ export function thinkingToRouterTier(thinking?: string): RouterTier {
|
|
|
43
79
|
return "medium";
|
|
44
80
|
}
|
|
45
81
|
|
|
82
|
+
function loadModelRouterConfig(cwd: string): ModelRouterJson | undefined {
|
|
83
|
+
const path = join(cwd, ".pi", "model-router.json");
|
|
84
|
+
if (!existsSync(path)) return undefined;
|
|
85
|
+
try {
|
|
86
|
+
return JSON.parse(readFileSync(path, "utf8")) as ModelRouterJson;
|
|
87
|
+
} catch {
|
|
88
|
+
return undefined;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function resolveRouterProfileEntry(
|
|
93
|
+
config: ModelRouterJson,
|
|
94
|
+
profileId: string,
|
|
95
|
+
): { profileId: string; profile: RouterProfile } | undefined {
|
|
96
|
+
const profiles = config.profiles;
|
|
97
|
+
if (!profiles) return undefined;
|
|
98
|
+
const candidates = [
|
|
99
|
+
profileId,
|
|
100
|
+
config.defaultProfile ?? "auto",
|
|
101
|
+
"auto",
|
|
102
|
+
"opencode-go",
|
|
103
|
+
];
|
|
104
|
+
const seen = new Set<string>();
|
|
105
|
+
for (const id of candidates) {
|
|
106
|
+
if (!id || seen.has(id)) continue;
|
|
107
|
+
seen.add(id);
|
|
108
|
+
const profile = profiles[id];
|
|
109
|
+
if (profile?.high?.model && profile.medium?.model && profile.low?.model) {
|
|
110
|
+
return { profileId: id, profile };
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
return undefined;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/** Tier from agent system prompt (+ optional task line) for session model lock. */
|
|
117
|
+
export function resolveSubagentRouterTier(
|
|
118
|
+
cwd: string,
|
|
119
|
+
profileId: string,
|
|
120
|
+
agent: AgentConfig,
|
|
121
|
+
taskSnippet?: string,
|
|
122
|
+
): RouterTier {
|
|
123
|
+
const config = loadModelRouterConfig(cwd);
|
|
124
|
+
if (config) {
|
|
125
|
+
const entry = resolveRouterProfileEntry(config, profileId);
|
|
126
|
+
if (entry) {
|
|
127
|
+
return resolveTierFromPrompt(
|
|
128
|
+
agent.systemPrompt ?? "",
|
|
129
|
+
taskSnippet?.trim() ?? "",
|
|
130
|
+
entry.profileId,
|
|
131
|
+
entry.profile,
|
|
132
|
+
config.rules,
|
|
133
|
+
config.phaseBias ?? 0.5,
|
|
134
|
+
);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
return thinkingToRouterTier(agent.thinking, agent.name);
|
|
138
|
+
}
|
|
139
|
+
|
|
46
140
|
/** Map router profile tier → concrete `provider/model` from `.pi/model-router.json`. */
|
|
47
141
|
export function resolveRouterConcreteModelRef(
|
|
48
142
|
cwd: string,
|
|
@@ -51,19 +145,10 @@ export function resolveRouterConcreteModelRef(
|
|
|
51
145
|
): string | undefined {
|
|
52
146
|
const path = join(cwd, ".pi", "model-router.json");
|
|
53
147
|
if (!existsSync(path)) return undefined;
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
return undefined;
|
|
59
|
-
}
|
|
60
|
-
const profiles = raw.profiles;
|
|
61
|
-
if (!profiles) return undefined;
|
|
62
|
-
const profile =
|
|
63
|
-
profiles[profileId] ??
|
|
64
|
-
profiles[raw.defaultProfile ?? "auto"] ??
|
|
65
|
-
profiles.auto;
|
|
66
|
-
const model = profile?.[tier]?.model;
|
|
148
|
+
const raw = loadModelRouterConfig(cwd);
|
|
149
|
+
if (!raw) return undefined;
|
|
150
|
+
const entry = resolveRouterProfileEntry(raw, profileId);
|
|
151
|
+
const model = entry?.profile[tier]?.model;
|
|
67
152
|
return typeof model === "string" && model.includes("/") ? model : undefined;
|
|
68
153
|
}
|
|
69
154
|
|
|
@@ -83,6 +168,7 @@ export function resolveConcreteSubagentModel(
|
|
|
83
168
|
cwd: string,
|
|
84
169
|
parentModel: { provider: string; id: string } | undefined,
|
|
85
170
|
agent: AgentConfig,
|
|
171
|
+
taskSnippet?: string,
|
|
86
172
|
): ConcreteSubagentModel | undefined {
|
|
87
173
|
if (agent.model && !agent.model.startsWith("router/")) {
|
|
88
174
|
const parsed = parseModelRef(agent.model);
|
|
@@ -109,7 +195,7 @@ export function resolveConcreteSubagentModel(
|
|
|
109
195
|
agentIsRouter && agent.model
|
|
110
196
|
? agent.model.slice("router/".length)
|
|
111
197
|
: (parentModel?.id ?? "auto");
|
|
112
|
-
const tier =
|
|
198
|
+
const tier = resolveSubagentRouterTier(cwd, profileId, agent, taskSnippet);
|
|
113
199
|
const concrete = resolveRouterConcreteModelRef(cwd, profileId, tier);
|
|
114
200
|
if (!concrete) return undefined;
|
|
115
201
|
const parsed = parseModelRef(concrete);
|
|
@@ -2,6 +2,11 @@
|
|
|
2
2
|
* Per-agent tool policy for harness/* subagents (defense in depth with frontmatter).
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
import {
|
|
6
|
+
evaluateContextModeMutation,
|
|
7
|
+
isMutatingBash,
|
|
8
|
+
} from "../../lib/harness-context-mode-policy.js";
|
|
9
|
+
import type { HarnessPhase } from "../../lib/harness-run-context.js";
|
|
5
10
|
import {
|
|
6
11
|
isSubmitToolName,
|
|
7
12
|
SUBMIT_TOOLS_BY_AGENT,
|
|
@@ -24,6 +29,9 @@ export type HarnessAgentKind =
|
|
|
24
29
|
|
|
25
30
|
const MUTATING_TOOLS = new Set(["write", "edit"]);
|
|
26
31
|
|
|
32
|
+
/** Planning agents must use submit_* → canonical artifacts/*.yaml, not JSON dumps. */
|
|
33
|
+
const PLANNING_ARTIFACT_JSON_WRITE = /artifacts\/[^\s'"`;]+\.json\b/i;
|
|
34
|
+
|
|
27
35
|
const PLANNING_BASH_DENY_PATTERNS = [
|
|
28
36
|
/\bgraphify\s+update\b/i,
|
|
29
37
|
/\bgraphify\s+extract\b/i,
|
|
@@ -37,21 +45,6 @@ const PLANNING_BASH_DENY_PATTERNS = [
|
|
|
37
45
|
/\buv\s+tool\s+install\b.*cocoindex/i,
|
|
38
46
|
];
|
|
39
47
|
|
|
40
|
-
const BASH_MUTATION_PATTERNS = [
|
|
41
|
-
/\brm\s+-/i,
|
|
42
|
-
/\bmv\s+/i,
|
|
43
|
-
/\bcp\s+/i,
|
|
44
|
-
/\btouch\s+/i,
|
|
45
|
-
/\bmkdir\s+/i,
|
|
46
|
-
/\btee\s+/i,
|
|
47
|
-
/\bgit\s+(add|commit|push|reset|checkout|merge|rebase|cherry-pick|apply)\b/i,
|
|
48
|
-
/\bnpm\s+(install|uninstall|ci)\b/i,
|
|
49
|
-
/\bpnpm\s+(add|install|remove)\b/i,
|
|
50
|
-
/\byarn\s+(add|install|remove)\b/i,
|
|
51
|
-
/\bsed\s+-i\b/i,
|
|
52
|
-
/\bperl\s+-i\b/i,
|
|
53
|
-
];
|
|
54
|
-
|
|
55
48
|
const READ_ONLY_KINDS = new Set<HarnessAgentKind>([
|
|
56
49
|
"planner",
|
|
57
50
|
"evaluator",
|
|
@@ -92,10 +85,6 @@ export function classifyHarnessAgent(agentType: string): HarnessAgentKind {
|
|
|
92
85
|
}
|
|
93
86
|
}
|
|
94
87
|
|
|
95
|
-
function isMutatingBash(command: string): boolean {
|
|
96
|
-
return BASH_MUTATION_PATTERNS.some((pattern) => pattern.test(command));
|
|
97
|
-
}
|
|
98
|
-
|
|
99
88
|
export function isHarnessPackageAgent(agentType: string): boolean {
|
|
100
89
|
return agentType.startsWith("harness/");
|
|
101
90
|
}
|
|
@@ -174,6 +163,17 @@ export function evaluateHarnessSubagentToolCall(
|
|
|
174
163
|
|
|
175
164
|
if (toolName === "bash") {
|
|
176
165
|
const command = String(input?.command ?? "");
|
|
166
|
+
if (
|
|
167
|
+
kind === "planner" &&
|
|
168
|
+
command &&
|
|
169
|
+
PLANNING_ARTIFACT_JSON_WRITE.test(command)
|
|
170
|
+
) {
|
|
171
|
+
return {
|
|
172
|
+
action: "block",
|
|
173
|
+
reason:
|
|
174
|
+
"harness-subagent-policy: artifacts must be YAML only — use submit_* (e.g. submit_hypothesis_brief → artifacts/hypothesis.yaml), not bash writes to .json.",
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
177
|
if (command && isMutatingBash(command)) {
|
|
178
178
|
return {
|
|
179
179
|
action: "block",
|
|
@@ -193,6 +193,24 @@ export function evaluateHarnessSubagentToolCall(
|
|
|
193
193
|
}
|
|
194
194
|
}
|
|
195
195
|
|
|
196
|
+
const ctxPhase =
|
|
197
|
+
(harnessSubagentPhaseHint(agentType) as HarnessPhase | null) ?? "plan";
|
|
198
|
+
const ctxDecision = evaluateContextModeMutation(
|
|
199
|
+
toolName,
|
|
200
|
+
input ?? {},
|
|
201
|
+
ctxPhase,
|
|
202
|
+
{ aborted: false, readOnlyAgent: true },
|
|
203
|
+
);
|
|
204
|
+
if (ctxDecision.blocked) {
|
|
205
|
+
return {
|
|
206
|
+
action: "block",
|
|
207
|
+
reason: ctxDecision.reason.replace(
|
|
208
|
+
/^policy-gate:/,
|
|
209
|
+
"harness-subagent-policy:",
|
|
210
|
+
),
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
|
|
196
214
|
return { action: "allow" };
|
|
197
215
|
}
|
|
198
216
|
|
|
@@ -6,8 +6,11 @@ import {
|
|
|
6
6
|
type AgentConfig,
|
|
7
7
|
agentAllowsMutatingTools,
|
|
8
8
|
} from "../../../vendor/pi-subagents/src/agents.js";
|
|
9
|
-
import
|
|
10
|
-
|
|
9
|
+
import {
|
|
10
|
+
type HarnessPhase,
|
|
11
|
+
inferHarnessPhase,
|
|
12
|
+
} from "../../lib/harness-run-context.js";
|
|
13
|
+
import { validateHarnessSpawnTopology } from "./harness-spawn-topology.js";
|
|
11
14
|
import { classifyHarnessAgent } from "./harness-subagent-policy.js";
|
|
12
15
|
|
|
13
16
|
export interface SubagentTaskRef {
|
|
@@ -19,6 +22,11 @@ export interface PrecheckResult {
|
|
|
19
22
|
message?: string;
|
|
20
23
|
}
|
|
21
24
|
|
|
25
|
+
export interface PrecheckOptions {
|
|
26
|
+
projectRoot?: string;
|
|
27
|
+
runId?: string | null;
|
|
28
|
+
}
|
|
29
|
+
|
|
22
30
|
function collectAgents(params: {
|
|
23
31
|
agent?: string;
|
|
24
32
|
tasks?: SubagentTaskRef[];
|
|
@@ -40,7 +48,7 @@ function resolveAgent(
|
|
|
40
48
|
return agents.find((a) => a.name === name);
|
|
41
49
|
}
|
|
42
50
|
|
|
43
|
-
export function precheckHarnessSubagentSpawn(
|
|
51
|
+
export async function precheckHarnessSubagentSpawn(
|
|
44
52
|
params: {
|
|
45
53
|
agent?: string;
|
|
46
54
|
tasks?: SubagentTaskRef[];
|
|
@@ -49,7 +57,8 @@ export function precheckHarnessSubagentSpawn(
|
|
|
49
57
|
},
|
|
50
58
|
agents: AgentConfig[],
|
|
51
59
|
phase: HarnessPhase,
|
|
52
|
-
|
|
60
|
+
opts?: PrecheckOptions,
|
|
61
|
+
): Promise<PrecheckResult> {
|
|
53
62
|
const names = collectAgents(params);
|
|
54
63
|
const mutating = names.filter((n) => {
|
|
55
64
|
const cfg = resolveAgent(agents, n);
|
|
@@ -67,7 +76,17 @@ export function precheckHarnessSubagentSpawn(
|
|
|
67
76
|
};
|
|
68
77
|
}
|
|
69
78
|
|
|
70
|
-
|
|
79
|
+
const parallelEvalAdversary =
|
|
80
|
+
(params.tasks?.length ?? 0) === 2 &&
|
|
81
|
+
params.tasks?.some((t) => t.agent === "harness/evaluator") &&
|
|
82
|
+
params.tasks?.some((t) => t.agent === "harness/adversary") &&
|
|
83
|
+
phase === "evaluate";
|
|
84
|
+
|
|
85
|
+
if (
|
|
86
|
+
(params.tasks?.length ?? 0) > 1 &&
|
|
87
|
+
mutating.length > 1 &&
|
|
88
|
+
!parallelEvalAdversary
|
|
89
|
+
) {
|
|
71
90
|
return {
|
|
72
91
|
ok: false,
|
|
73
92
|
message:
|
|
@@ -76,12 +95,19 @@ export function precheckHarnessSubagentSpawn(
|
|
|
76
95
|
};
|
|
77
96
|
}
|
|
78
97
|
|
|
98
|
+
const parallelTaskCount = params.tasks?.length ?? (params.agent ? 1 : 0);
|
|
99
|
+
const topology = await validateHarnessSpawnTopology(names, phase, {
|
|
100
|
+
parallelTaskCount,
|
|
101
|
+
projectRoot: opts?.projectRoot,
|
|
102
|
+
runId: opts?.runId,
|
|
103
|
+
});
|
|
104
|
+
if (!topology.ok) {
|
|
105
|
+
return topology;
|
|
106
|
+
}
|
|
107
|
+
|
|
79
108
|
for (const name of names) {
|
|
80
109
|
if (!name.startsWith("harness/")) continue;
|
|
81
|
-
|
|
82
|
-
if (kind === "planner" && phase !== "plan") {
|
|
83
|
-
// allowed — planning agents can run in plan only ideally
|
|
84
|
-
}
|
|
110
|
+
classifyHarnessAgent(name);
|
|
85
111
|
}
|
|
86
112
|
|
|
87
113
|
return { ok: true };
|
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
* Shared write pipeline for harness subagent submit tools.
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
import { mkdir } from "node:fs/promises";
|
|
6
|
-
import { dirname, join } from "node:path";
|
|
5
|
+
import { mkdir, readFile } from "node:fs/promises";
|
|
6
|
+
import { dirname, join, resolve } from "node:path";
|
|
7
7
|
import { validateAgainstHarnessSchema } from "../../lib/harness-schema-validate.js";
|
|
8
8
|
import { resolveGuardedRunDir } from "../../lib/harness-subagent-submit-path.js";
|
|
9
9
|
import { writeYamlFile } from "../../lib/harness-yaml.js";
|
|
@@ -24,6 +24,54 @@ export interface SubmitPipelineResult {
|
|
|
24
24
|
human_required?: boolean;
|
|
25
25
|
}
|
|
26
26
|
|
|
27
|
+
export async function loadSubmitDocument(opts: {
|
|
28
|
+
projectRoot: string;
|
|
29
|
+
runDir: string;
|
|
30
|
+
document?: Record<string, unknown>;
|
|
31
|
+
source_path?: string;
|
|
32
|
+
}): Promise<
|
|
33
|
+
| { ok: true; document: Record<string, unknown> }
|
|
34
|
+
| { ok: false; validation_errors: string[] }
|
|
35
|
+
> {
|
|
36
|
+
if (opts.document && typeof opts.document === "object") {
|
|
37
|
+
return { ok: true, document: opts.document };
|
|
38
|
+
}
|
|
39
|
+
const rel = opts.source_path?.trim();
|
|
40
|
+
if (!rel) {
|
|
41
|
+
return {
|
|
42
|
+
ok: false,
|
|
43
|
+
validation_errors: ["submit_* requires document or source_path"],
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
const abs = resolve(opts.runDir, rel.replace(/^\//, ""));
|
|
47
|
+
if (!abs.startsWith(resolve(opts.runDir))) {
|
|
48
|
+
return {
|
|
49
|
+
ok: false,
|
|
50
|
+
validation_errors: [
|
|
51
|
+
"source_path must stay under the active run directory",
|
|
52
|
+
],
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
try {
|
|
56
|
+
const raw = await readFile(abs, "utf-8");
|
|
57
|
+
const { parse } = await import("yaml");
|
|
58
|
+
const doc = parse(raw) as Record<string, unknown>;
|
|
59
|
+
if (!doc || typeof doc !== "object") {
|
|
60
|
+
return {
|
|
61
|
+
ok: false,
|
|
62
|
+
validation_errors: ["source_path did not parse to an object"],
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
return { ok: true, document: doc };
|
|
66
|
+
} catch (e) {
|
|
67
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
68
|
+
return {
|
|
69
|
+
ok: false,
|
|
70
|
+
validation_errors: [`source_path read failed: ${msg}`],
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
27
75
|
export async function executeSubmitPipeline(opts: {
|
|
28
76
|
projectRoot: string;
|
|
29
77
|
specsDir: string;
|
|
@@ -56,6 +104,22 @@ export async function executeSubmitPipeline(opts: {
|
|
|
56
104
|
await mkdir(dirname(absPath), { recursive: true });
|
|
57
105
|
await writeYamlFile(absPath, opts.document);
|
|
58
106
|
|
|
107
|
+
if (opts.spec.toolName === "submit_executor_handoff") {
|
|
108
|
+
const rollback = opts.document.rollback_refs;
|
|
109
|
+
if (rollback && typeof rollback === "object" && !Array.isArray(rollback)) {
|
|
110
|
+
const rollbackPath = join(
|
|
111
|
+
runResolved.runDir,
|
|
112
|
+
"artifacts",
|
|
113
|
+
"executor-rollback.yaml",
|
|
114
|
+
);
|
|
115
|
+
await mkdir(dirname(rollbackPath), { recursive: true });
|
|
116
|
+
await writeYamlFile(rollbackPath, {
|
|
117
|
+
schema_version: "1.0.0",
|
|
118
|
+
...(rollback as Record<string, unknown>),
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
59
123
|
let laneResult: ApplyDebateLaneResult | undefined;
|
|
60
124
|
if (opts.spec.debateLane) {
|
|
61
125
|
laneResult = await applyDebateLaneFromDoc({
|
|
@@ -22,6 +22,12 @@ function roundPath(prefix: string, doc: Record<string, unknown>): string {
|
|
|
22
22
|
}
|
|
23
23
|
|
|
24
24
|
export const SUBMIT_TOOL_SPECS: readonly SubmitToolSpec[] = [
|
|
25
|
+
{
|
|
26
|
+
toolName: "submit_planning_context",
|
|
27
|
+
agents: ["harness/planning/planning-context"],
|
|
28
|
+
schemaFile: "plan-planning-context.schema.json",
|
|
29
|
+
artifactPath: "artifacts/planning-context.yaml",
|
|
30
|
+
},
|
|
25
31
|
{
|
|
26
32
|
toolName: "submit_scout_findings",
|
|
27
33
|
agents: [
|
|
@@ -42,13 +48,16 @@ export const SUBMIT_TOOL_SPECS: readonly SubmitToolSpec[] = [
|
|
|
42
48
|
},
|
|
43
49
|
{
|
|
44
50
|
toolName: "submit_decomposition_brief",
|
|
45
|
-
agents: ["harness/planning/decompose"],
|
|
51
|
+
agents: ["harness/planning/decompose", "harness/planning/plan-synthesizer"],
|
|
46
52
|
schemaFile: "plan-decomposition-brief.schema.json",
|
|
47
53
|
artifactPath: "artifacts/decomposition.yaml",
|
|
48
54
|
},
|
|
49
55
|
{
|
|
50
56
|
toolName: "submit_hypothesis_brief",
|
|
51
|
-
agents: [
|
|
57
|
+
agents: [
|
|
58
|
+
"harness/planning/hypothesis",
|
|
59
|
+
"harness/planning/plan-synthesizer",
|
|
60
|
+
],
|
|
52
61
|
schemaFile: "plan-hypothesis-brief.schema.json",
|
|
53
62
|
artifactPath: "artifacts/hypothesis.yaml",
|
|
54
63
|
},
|
|
@@ -66,7 +75,10 @@ export const SUBMIT_TOOL_SPECS: readonly SubmitToolSpec[] = [
|
|
|
66
75
|
},
|
|
67
76
|
{
|
|
68
77
|
toolName: "submit_execution_plan_brief",
|
|
69
|
-
agents: [
|
|
78
|
+
agents: [
|
|
79
|
+
"harness/planning/execution-plan-author",
|
|
80
|
+
"harness/planning/plan-synthesizer",
|
|
81
|
+
],
|
|
70
82
|
schemaFile: "plan-execution-plan-brief.schema.json",
|
|
71
83
|
artifactPath: "artifacts/execution-plan-draft.yaml",
|
|
72
84
|
},
|
|
@@ -129,6 +141,12 @@ export const SUBMIT_TOOL_SPECS: readonly SubmitToolSpec[] = [
|
|
|
129
141
|
artifactPath: "artifacts/human-required.yaml",
|
|
130
142
|
humanRequired: true,
|
|
131
143
|
},
|
|
144
|
+
{
|
|
145
|
+
toolName: "submit_sentrux_manifest_proposal",
|
|
146
|
+
agents: ["harness/sentrux-steward"],
|
|
147
|
+
schemaFile: "sentrux-manifest-proposal.schema.json",
|
|
148
|
+
artifactPath: "artifacts/sentrux-manifest-proposal.yaml",
|
|
149
|
+
},
|
|
132
150
|
] as const;
|
|
133
151
|
|
|
134
152
|
export const SUBMIT_TOOLS_BY_AGENT: Readonly<
|
|
@@ -13,6 +13,11 @@ import {
|
|
|
13
13
|
type HarnessSubagentsOptions,
|
|
14
14
|
type SpawnAuthForward,
|
|
15
15
|
} from "../../../vendor/pi-subagents/src/subagents.js";
|
|
16
|
+
import {
|
|
17
|
+
getLatestRunContext,
|
|
18
|
+
getRunIdFromSession,
|
|
19
|
+
type HarnessPhase,
|
|
20
|
+
} from "../../lib/harness-run-context.js";
|
|
16
21
|
import { parseSpawnContextFromTask } from "../../lib/harness-spawn-parse.js";
|
|
17
22
|
import { harnessSubagentSubmitExtensionPath } from "../harness-subagent-submit.js";
|
|
18
23
|
import { refreshHarnessCocoindexIndex } from "./harness-cocoindex-refresh.js";
|
|
@@ -35,6 +40,51 @@ import {
|
|
|
35
40
|
|
|
36
41
|
const spawnBudget = createSpawnBudgetState();
|
|
37
42
|
let lastSessionId = "harness";
|
|
43
|
+
let spawnGroupCounter = 0;
|
|
44
|
+
type PendingSpawnTelemetry = {
|
|
45
|
+
harness_run_id: string;
|
|
46
|
+
run_id: string;
|
|
47
|
+
harness_plan_id: string;
|
|
48
|
+
harness_phase: HarnessPhase;
|
|
49
|
+
agent_ids: string[];
|
|
50
|
+
spawn_group_id: string;
|
|
51
|
+
};
|
|
52
|
+
let pendingSpawnTelemetry: PendingSpawnTelemetry | null = null;
|
|
53
|
+
|
|
54
|
+
function collectHarnessAgentIds(params: Record<string, unknown>): string[] {
|
|
55
|
+
const out = new Set<string>();
|
|
56
|
+
const maybe = params as {
|
|
57
|
+
agent?: string;
|
|
58
|
+
chain?: Array<{ agent?: string }>;
|
|
59
|
+
tasks?: Array<{ agent?: string }>;
|
|
60
|
+
aggregator?: { agent?: string };
|
|
61
|
+
};
|
|
62
|
+
if (typeof maybe.agent === "string" && maybe.agent.startsWith("harness/")) {
|
|
63
|
+
out.add(maybe.agent);
|
|
64
|
+
}
|
|
65
|
+
for (const item of maybe.chain ?? []) {
|
|
66
|
+
if (typeof item?.agent === "string" && item.agent.startsWith("harness/")) {
|
|
67
|
+
out.add(item.agent);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
for (const item of maybe.tasks ?? []) {
|
|
71
|
+
if (typeof item?.agent === "string" && item.agent.startsWith("harness/")) {
|
|
72
|
+
out.add(item.agent);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
if (
|
|
76
|
+
typeof maybe.aggregator?.agent === "string" &&
|
|
77
|
+
maybe.aggregator.agent.startsWith("harness/")
|
|
78
|
+
) {
|
|
79
|
+
out.add(maybe.aggregator.agent);
|
|
80
|
+
}
|
|
81
|
+
return Array.from(out.values()).sort();
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function nextSpawnGroupId(sessionId: string): string {
|
|
85
|
+
spawnGroupCounter += 1;
|
|
86
|
+
return `${sessionId}-${Date.now()}-${spawnGroupCounter}`;
|
|
87
|
+
}
|
|
38
88
|
|
|
39
89
|
async function resolveHarnessSpawnAuth(
|
|
40
90
|
ctx: ExtensionContext,
|
|
@@ -68,32 +118,6 @@ export function createHarnessSubagentsExtension(
|
|
|
68
118
|
resolveSubprocessEnv: (task, agent) => {
|
|
69
119
|
if (!agent.name.startsWith("harness/")) return undefined;
|
|
70
120
|
const ctx = parseSpawnContextFromTask(task);
|
|
71
|
-
// #region agent log
|
|
72
|
-
fetch(
|
|
73
|
-
"http://127.0.0.1:7928/ingest/a5d40896-34cb-4f12-97db-df7ada0b22f0",
|
|
74
|
-
{
|
|
75
|
-
method: "POST",
|
|
76
|
-
headers: {
|
|
77
|
-
"Content-Type": "application/json",
|
|
78
|
-
"X-Debug-Session-Id": "2ca12b",
|
|
79
|
-
},
|
|
80
|
-
body: JSON.stringify({
|
|
81
|
-
sessionId: "2ca12b",
|
|
82
|
-
hypothesisId: "H1",
|
|
83
|
-
location: "harness-subagents-bridge.ts:resolveSubprocessEnv",
|
|
84
|
-
message: "parsed spawn context for subprocess env",
|
|
85
|
-
data: {
|
|
86
|
-
agent: agent.name,
|
|
87
|
-
hasCtx: Boolean(ctx?.run_id),
|
|
88
|
-
run_id: ctx?.run_id ?? null,
|
|
89
|
-
run_dir: ctx?.run_dir ?? null,
|
|
90
|
-
taskPrefix: task.slice(0, 160),
|
|
91
|
-
},
|
|
92
|
-
timestamp: Date.now(),
|
|
93
|
-
}),
|
|
94
|
-
},
|
|
95
|
-
).catch(() => {});
|
|
96
|
-
// #endregion
|
|
97
121
|
if (!ctx?.run_id) return undefined;
|
|
98
122
|
return {
|
|
99
123
|
HARNESS_RUN_ID: ctx.run_id,
|
|
@@ -111,16 +135,23 @@ export function createHarnessSubagentsExtension(
|
|
|
111
135
|
const { harnessCount } = countHarnessAgentsInRequest(
|
|
112
136
|
params as Parameters<typeof countHarnessAgentsInRequest>[0],
|
|
113
137
|
);
|
|
138
|
+
pendingSpawnTelemetry = null;
|
|
114
139
|
if (harnessCount > 0) {
|
|
115
140
|
const budget = checkHarnessSpawnBudget(spawnBudget, harnessCount);
|
|
116
141
|
if (!budget.ok) {
|
|
117
142
|
return { ok: false, message: budget.message };
|
|
118
143
|
}
|
|
119
|
-
const
|
|
120
|
-
const
|
|
144
|
+
const entries = ctx.sessionManager.getEntries();
|
|
145
|
+
const runCtx = getLatestRunContext(entries);
|
|
146
|
+
const phase = inferPhaseForPrecheck(entries);
|
|
147
|
+
const pre = await precheckHarnessSubagentSpawn(
|
|
121
148
|
params as Parameters<typeof precheckHarnessSubagentSpawn>[0],
|
|
122
149
|
agents,
|
|
123
150
|
phase,
|
|
151
|
+
{
|
|
152
|
+
projectRoot: ctx.cwd,
|
|
153
|
+
runId: runCtx?.run_id ?? null,
|
|
154
|
+
},
|
|
124
155
|
);
|
|
125
156
|
if (!pre.ok) {
|
|
126
157
|
return { ok: false, message: pre.message };
|
|
@@ -133,6 +164,18 @@ export function createHarnessSubagentsExtension(
|
|
|
133
164
|
return { ok: false, message: refreshMsg };
|
|
134
165
|
}
|
|
135
166
|
}
|
|
167
|
+
const runId =
|
|
168
|
+
runCtx?.run_id ??
|
|
169
|
+
getRunIdFromSession(entries, lastSessionId) ??
|
|
170
|
+
lastSessionId;
|
|
171
|
+
pendingSpawnTelemetry = {
|
|
172
|
+
harness_run_id: runId,
|
|
173
|
+
run_id: runId,
|
|
174
|
+
harness_plan_id: runCtx?.plan_id ?? "plan-unknown",
|
|
175
|
+
harness_phase: phase,
|
|
176
|
+
agent_ids: collectHarnessAgentIds(params as Record<string, unknown>),
|
|
177
|
+
spawn_group_id: nextSpawnGroupId(lastSessionId),
|
|
178
|
+
};
|
|
136
179
|
}
|
|
137
180
|
return { ok: true };
|
|
138
181
|
},
|
|
@@ -142,6 +185,16 @@ export function createHarnessSubagentsExtension(
|
|
|
142
185
|
captureHarnessEvent(lastSessionId, "harness_subagent_spawned", {
|
|
143
186
|
active_after: spawnBudget.active,
|
|
144
187
|
spawn_count: harnessCount,
|
|
188
|
+
harness_run_id: pendingSpawnTelemetry?.harness_run_id ?? lastSessionId,
|
|
189
|
+
run_id: pendingSpawnTelemetry?.run_id ?? lastSessionId,
|
|
190
|
+
harness_plan_id:
|
|
191
|
+
pendingSpawnTelemetry?.harness_plan_id ?? "plan-unknown",
|
|
192
|
+
harness_phase: pendingSpawnTelemetry?.harness_phase ?? "plan",
|
|
193
|
+
agent_ids: pendingSpawnTelemetry?.agent_ids ?? [],
|
|
194
|
+
agent_count: pendingSpawnTelemetry?.agent_ids.length ?? harnessCount,
|
|
195
|
+
spawn_group_id:
|
|
196
|
+
pendingSpawnTelemetry?.spawn_group_id ??
|
|
197
|
+
nextSpawnGroupId(lastSessionId),
|
|
145
198
|
});
|
|
146
199
|
},
|
|
147
200
|
onSpawnEnd: (harnessCount) => {
|
|
@@ -154,7 +207,17 @@ export function createHarnessSubagentsExtension(
|
|
|
154
207
|
mode,
|
|
155
208
|
duration_ms: durationMs,
|
|
156
209
|
agent_count: agents.length,
|
|
210
|
+
agent_ids: agents,
|
|
211
|
+
harness_run_id: pendingSpawnTelemetry?.harness_run_id ?? lastSessionId,
|
|
212
|
+
run_id: pendingSpawnTelemetry?.run_id ?? lastSessionId,
|
|
213
|
+
harness_plan_id:
|
|
214
|
+
pendingSpawnTelemetry?.harness_plan_id ?? "plan-unknown",
|
|
215
|
+
harness_phase: pendingSpawnTelemetry?.harness_phase ?? "plan",
|
|
216
|
+
spawn_group_id:
|
|
217
|
+
pendingSpawnTelemetry?.spawn_group_id ??
|
|
218
|
+
nextSpawnGroupId(lastSessionId),
|
|
157
219
|
});
|
|
220
|
+
pendingSpawnTelemetry = null;
|
|
158
221
|
},
|
|
159
222
|
};
|
|
160
223
|
|