@bluecopa/harness 0.1.0-snapshot.129 → 0.1.0-snapshot.130
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -2601,7 +2601,7 @@ function createProcess(request, config) {
|
|
|
2601
2601
|
}
|
|
2602
2602
|
}
|
|
2603
2603
|
};
|
|
2604
|
-
const seedPromise =
|
|
2604
|
+
const seedPromise = buildSessionContext(config.episodeStore, config.taskId, request.expectedFiles);
|
|
2605
2605
|
const startTime = Date.now();
|
|
2606
2606
|
void (async () => {
|
|
2607
2607
|
const traceMarkers = [];
|
|
@@ -2945,37 +2945,70 @@ async function getNextEpisodeIndex(store, taskId) {
|
|
|
2945
2945
|
const episodes = await store.getEpisodesByTask(taskId);
|
|
2946
2946
|
return episodes.length;
|
|
2947
2947
|
}
|
|
2948
|
-
async function
|
|
2949
|
-
|
|
2950
|
-
|
|
2951
|
-
|
|
2952
|
-
|
|
2953
|
-
|
|
2954
|
-
|
|
2955
|
-
|
|
2956
|
-
|
|
2957
|
-
|
|
2958
|
-
|
|
2959
|
-
]
|
|
2960
|
-
|
|
2961
|
-
const structuredOutput = JSON.stringify(episode.structuredOutput, null, 2);
|
|
2962
|
-
const truncated = structuredOutput.length > 2e3 ? structuredOutput.slice(0, 2e3) + "\n... [truncated]" : structuredOutput;
|
|
2963
|
-
blocks.push(`Structured output:
|
|
2964
|
-
${truncated}`);
|
|
2965
|
-
}
|
|
2966
|
-
if (episode.artifactKeys && episode.artifactKeys.length > 0) {
|
|
2967
|
-
blocks.push(`Artifact handles:
|
|
2968
|
-
${episode.artifactKeys.join(", ")}`);
|
|
2948
|
+
async function buildSessionContext(episodeStore, taskId, expectedFiles) {
|
|
2949
|
+
const episodes = await episodeStore.getEpisodesByTask(taskId);
|
|
2950
|
+
if (episodes.length === 0) return [];
|
|
2951
|
+
const normalizedExpected = new Set(
|
|
2952
|
+
(expectedFiles ?? []).map((f) => f.trim().replace(/\\/g, "/").replace(/^\.\//, ""))
|
|
2953
|
+
);
|
|
2954
|
+
const overlappingIds = /* @__PURE__ */ new Set();
|
|
2955
|
+
for (const ep of episodes) {
|
|
2956
|
+
const epFiles = [...ep.filesModified ?? [], ...ep.filesRead ?? []].map(
|
|
2957
|
+
(f) => f.trim().replace(/\\/g, "/").replace(/^\.\//, "")
|
|
2958
|
+
);
|
|
2959
|
+
if (epFiles.some((f) => normalizedExpected.has(f) || [...normalizedExpected].some((e) => f.endsWith(`/${e}`) || e.endsWith(`/${f}`)))) {
|
|
2960
|
+
overlappingIds.add(ep.id);
|
|
2969
2961
|
}
|
|
2970
|
-
|
|
2962
|
+
}
|
|
2963
|
+
const recentEpisodes = episodes.slice(-5);
|
|
2964
|
+
const recentIds = new Set(recentEpisodes.map((e) => e.id));
|
|
2965
|
+
const oldEpisodes = episodes.filter((e) => !recentIds.has(e.id) && !overlappingIds.has(e.id));
|
|
2966
|
+
const overlapEpisodes = episodes.filter((e) => overlappingIds.has(e.id) && !recentIds.has(e.id));
|
|
2967
|
+
const messages = [];
|
|
2968
|
+
if (oldEpisodes.length > 0) {
|
|
2969
|
+
const groupSummary = [
|
|
2970
|
+
`Grouped summary of ${oldEpisodes.length} earlier episodes:`,
|
|
2971
|
+
` Successful: ${oldEpisodes.filter((e) => e.success).length}`,
|
|
2972
|
+
` Failed: ${oldEpisodes.filter((e) => !e.success).length}`,
|
|
2973
|
+
` Files modified: ${[...new Set(oldEpisodes.flatMap((e) => e.filesModified))].slice(0, 10).join(", ")}${oldEpisodes.flatMap((e) => e.filesModified).length > 10 ? "..." : ""}`,
|
|
2974
|
+
` Actions: ${oldEpisodes.map((e) => e.threadAction.slice(0, 40)).slice(0, 5).join("; ")}${oldEpisodes.length > 5 ? "..." : ""}`
|
|
2975
|
+
].join("\n");
|
|
2976
|
+
messages.push({ role: "system", content: groupSummary });
|
|
2977
|
+
}
|
|
2978
|
+
for (const episode of overlapEpisodes) {
|
|
2979
|
+
messages.push({
|
|
2980
|
+
role: "system",
|
|
2981
|
+
content: formatEpisodeDetail(episode, "file-overlap")
|
|
2982
|
+
});
|
|
2983
|
+
}
|
|
2984
|
+
for (const episode of recentEpisodes) {
|
|
2971
2985
|
messages.push({
|
|
2972
2986
|
role: "system",
|
|
2973
|
-
content:
|
|
2974
|
-
${blocks.join("\n\n")}`
|
|
2987
|
+
content: formatEpisodeDetail(episode, "recent")
|
|
2975
2988
|
});
|
|
2976
2989
|
}
|
|
2977
2990
|
return messages;
|
|
2978
2991
|
}
|
|
2992
|
+
function formatEpisodeDetail(episode, tag) {
|
|
2993
|
+
const blocks = [
|
|
2994
|
+
`Episode E${episode.index} [${episode.id}] (${tag}, ${episode.success ? "ok" : "failed"}):`,
|
|
2995
|
+
`Action: ${episode.threadAction}`,
|
|
2996
|
+
`Summary:
|
|
2997
|
+
${episode.summary}`
|
|
2998
|
+
];
|
|
2999
|
+
if (episode.structuredOutput && Object.keys(episode.structuredOutput).length > 0) {
|
|
3000
|
+
const structuredOutput = JSON.stringify(episode.structuredOutput, null, 2);
|
|
3001
|
+
const truncated = structuredOutput.length > 2e3 ? structuredOutput.slice(0, 2e3) + "\n... [truncated]" : structuredOutput;
|
|
3002
|
+
blocks.push(`Structured output:
|
|
3003
|
+
${truncated}`);
|
|
3004
|
+
}
|
|
3005
|
+
if (episode.artifactKeys && episode.artifactKeys.length > 0) {
|
|
3006
|
+
blocks.push(`Artifact handles:
|
|
3007
|
+
${episode.artifactKeys.join(", ")}`);
|
|
3008
|
+
}
|
|
3009
|
+
blocks.push("Use ReadEpisode for more detail if needed.");
|
|
3010
|
+
return blocks.join("\n\n");
|
|
3011
|
+
}
|
|
2979
3012
|
function normalizeSeedContext(ctx) {
|
|
2980
3013
|
if (!ctx) return [];
|
|
2981
3014
|
if (typeof ctx === "string") {
|
|
@@ -3484,6 +3517,39 @@ var ProcessManager = class {
|
|
|
3484
3517
|
const existingId = this.actionIndex.get(normalizeAction(action));
|
|
3485
3518
|
return existingId ? this.processes.get(existingId) : void 0;
|
|
3486
3519
|
}
|
|
3520
|
+
/**
|
|
3521
|
+
* Auto-inject contextEpisodeIds for file ownership.
|
|
3522
|
+
* Returns a new request with any missing episode IDs added.
|
|
3523
|
+
* This eliminates the need for the LLM to track episode IDs manually.
|
|
3524
|
+
*/
|
|
3525
|
+
autoInjectFileOwnership(request) {
|
|
3526
|
+
if (!isExclusiveRequest(request)) return request;
|
|
3527
|
+
const currentOwnedFiles = ownedFiles(request);
|
|
3528
|
+
const currentContextEpisodeIds = new Set(request.contextEpisodeIds ?? []);
|
|
3529
|
+
const injectedIds = [];
|
|
3530
|
+
for (const proc of this.processes.values()) {
|
|
3531
|
+
if (proc.status !== "completed" || !proc.result) continue;
|
|
3532
|
+
const procRequest = this.processRequests.get(proc.id);
|
|
3533
|
+
if (!isExclusiveRequest(procRequest)) continue;
|
|
3534
|
+
const procEpisodeId = proc.result.episode.id;
|
|
3535
|
+
if (currentContextEpisodeIds.has(procEpisodeId)) continue;
|
|
3536
|
+
const procOwnedFiles = /* @__PURE__ */ new Set([
|
|
3537
|
+
...ownedFiles(procRequest),
|
|
3538
|
+
...(proc.result.episode.filesModified ?? []).map(normalizeExpectedPath)
|
|
3539
|
+
]);
|
|
3540
|
+
const hasOverlap = currentOwnedFiles.some(
|
|
3541
|
+
(currentFile) => [...procOwnedFiles].some((ownedFile) => pathsOverlap(currentFile, ownedFile))
|
|
3542
|
+
);
|
|
3543
|
+
if (hasOverlap) {
|
|
3544
|
+
injectedIds.push(procEpisodeId);
|
|
3545
|
+
}
|
|
3546
|
+
}
|
|
3547
|
+
if (injectedIds.length === 0) return request;
|
|
3548
|
+
return {
|
|
3549
|
+
...request,
|
|
3550
|
+
contextEpisodeIds: [...request.contextEpisodeIds ?? [], ...injectedIds]
|
|
3551
|
+
};
|
|
3552
|
+
}
|
|
3487
3553
|
findDispatchConflict(request) {
|
|
3488
3554
|
const existing = this.findByAction(request.action ?? "");
|
|
3489
3555
|
if (existing && (existing.status === "running" || existing.status === "pending")) {
|
|
@@ -3494,7 +3560,7 @@ var ProcessManager = class {
|
|
|
3494
3560
|
}
|
|
3495
3561
|
const currentExclusive = isExclusiveRequest(request);
|
|
3496
3562
|
const currentOwnedFiles = ownedFiles(request);
|
|
3497
|
-
|
|
3563
|
+
new Set(request.contextEpisodeIds ?? []);
|
|
3498
3564
|
for (const proc of this.processes.values()) {
|
|
3499
3565
|
if (proc.status !== "running" && proc.status !== "pending") continue;
|
|
3500
3566
|
const procRequest = this.processRequests.get(proc.id);
|
|
@@ -3511,30 +3577,6 @@ var ProcessManager = class {
|
|
|
3511
3577
|
return { kind: "exclusive_running", process: proc };
|
|
3512
3578
|
}
|
|
3513
3579
|
}
|
|
3514
|
-
if (currentExclusive) {
|
|
3515
|
-
for (const proc of this.processes.values()) {
|
|
3516
|
-
if (proc.status !== "completed" || !proc.result) continue;
|
|
3517
|
-
const procRequest = this.processRequests.get(proc.id);
|
|
3518
|
-
if (!isExclusiveRequest(procRequest)) continue;
|
|
3519
|
-
const procEpisodeId = proc.result.episode.id;
|
|
3520
|
-
if (currentContextEpisodeIds.has(procEpisodeId)) continue;
|
|
3521
|
-
const procOwnedFiles = /* @__PURE__ */ new Set([
|
|
3522
|
-
...ownedFiles(procRequest),
|
|
3523
|
-
...(proc.result.episode.filesModified ?? []).map(normalizeExpectedPath)
|
|
3524
|
-
]);
|
|
3525
|
-
const overlappingFiles = currentOwnedFiles.filter(
|
|
3526
|
-
(currentFile) => [...procOwnedFiles].some((ownedFile) => pathsOverlap(currentFile, ownedFile))
|
|
3527
|
-
);
|
|
3528
|
-
if (overlappingFiles.length > 0) {
|
|
3529
|
-
return {
|
|
3530
|
-
kind: "missing_context_for_edit_retry",
|
|
3531
|
-
process: proc,
|
|
3532
|
-
files: overlappingFiles,
|
|
3533
|
-
episodeId: procEpisodeId
|
|
3534
|
-
};
|
|
3535
|
-
}
|
|
3536
|
-
}
|
|
3537
|
-
}
|
|
3538
3580
|
return null;
|
|
3539
3581
|
}
|
|
3540
3582
|
validateDispatchRequest(request) {
|
|
@@ -3618,13 +3660,6 @@ var ProcessManager = class {
|
|
|
3618
3660
|
message: `FILE OWNERSHIP CONFLICT \u2014 process ${conflict.process.id} currently owns ${conflict.files.join(", ")}. Wait for it to finish before dispatching overlapping work.`
|
|
3619
3661
|
};
|
|
3620
3662
|
}
|
|
3621
|
-
if (conflict.kind === "missing_context_for_edit_retry") {
|
|
3622
|
-
return {
|
|
3623
|
-
allow: false,
|
|
3624
|
-
reason: `missing contextEpisodeIds for follow-up edit on files owned by episode ${conflict.episodeId}`,
|
|
3625
|
-
message: `EDIT RETRY MUST CHAIN CONTEXT \u2014 overlapping files ${conflict.files.join(", ")} were already edited in process ${conflict.process.id} (episodeId: ${conflict.episodeId}). Dispatch exactly one follow-up edit thread with contextEpisodeIds: ["${conflict.episodeId}"] instead of starting a fresh edit thread.`
|
|
3626
|
-
};
|
|
3627
|
-
}
|
|
3628
3663
|
return {
|
|
3629
3664
|
allow: false,
|
|
3630
3665
|
reason: `exclusive edit/verify conflict with process ${conflict.process.id}`,
|
|
@@ -3875,15 +3910,6 @@ var ProcessManager = class {
|
|
|
3875
3910
|
const { loopConfig } = this.config;
|
|
3876
3911
|
return async (action) => {
|
|
3877
3912
|
if (action.name === "ReadEpisode") {
|
|
3878
|
-
const allowedEpisodeIds = new Set(request.contextEpisodeIds ?? []);
|
|
3879
|
-
const requestedEpisodeId = String(action.args.id ?? "");
|
|
3880
|
-
if (!allowedEpisodeIds.has(requestedEpisodeId)) {
|
|
3881
|
-
return {
|
|
3882
|
-
success: false,
|
|
3883
|
-
output: "",
|
|
3884
|
-
error: `ReadEpisode is limited to contextEpisodeIds for this thread. Allowed episode IDs: ${[...allowedEpisodeIds].join(", ") || "(none)"}`
|
|
3885
|
-
};
|
|
3886
|
-
}
|
|
3887
3913
|
const output = await renderEpisodeReadResult({
|
|
3888
3914
|
episodeStore: loopConfig.episodeStore,
|
|
3889
3915
|
args: action.args,
|
|
@@ -4064,7 +4090,7 @@ var OrchestratorTurnRunner = class {
|
|
|
4064
4090
|
const toolCallId = call.toolCallId ?? randomUUID();
|
|
4065
4091
|
this.config.trace({ type: "tool_call", tool: call.toolName });
|
|
4066
4092
|
if (call.toolName === "Thread") {
|
|
4067
|
-
const
|
|
4093
|
+
const rawRequest = this.toProcessRequest(call.args);
|
|
4068
4094
|
if (exclusiveThreadIndex !== -1 && index !== exclusiveThreadIndex) {
|
|
4069
4095
|
const resultText2 = "SAME-TURN EDIT SERIALIZATION \u2014 a bounded edit or verification thread is already being dispatched in this turn. Let that single owner thread read, edit, and verify before dispatching any other threads.";
|
|
4070
4096
|
toolResultMessages.push({
|
|
@@ -4072,9 +4098,10 @@ var OrchestratorTurnRunner = class {
|
|
|
4072
4098
|
content: resultText2,
|
|
4073
4099
|
toolResults: [{ toolCallId, toolName: "Thread", result: resultText2 }]
|
|
4074
4100
|
});
|
|
4075
|
-
yield { type: "thread_rejected", action:
|
|
4101
|
+
yield { type: "thread_rejected", action: rawRequest.action ?? "", reason: "same-turn exclusive edit/verify work must run alone" };
|
|
4076
4102
|
continue;
|
|
4077
4103
|
}
|
|
4104
|
+
const request = this.config.processManager.autoInjectFileOwnership(rawRequest);
|
|
4078
4105
|
const validation = this.config.processManager.validateDispatchRequest(request);
|
|
4079
4106
|
if (!validation.allow) {
|
|
4080
4107
|
toolResultMessages.push({
|
|
@@ -4445,9 +4472,7 @@ ${failureSummary}`;
|
|
|
4445
4472
|
(toolResult) => toolResult.toolName === "DownloadRawFile" && !toolResult.isError
|
|
4446
4473
|
)
|
|
4447
4474
|
);
|
|
4448
|
-
const hints = [
|
|
4449
|
-
`To give a dependent thread the full data from this thread, pass contextEpisodeIds: ["${episode.id}"]`
|
|
4450
|
-
];
|
|
4475
|
+
const hints = [];
|
|
4451
4476
|
const structuredOutputText = this.formatStructuredOutput(episode.structuredOutput);
|
|
4452
4477
|
if (structuredOutputText) {
|
|
4453
4478
|
hints.push("Prefer the structured output above as the canonical handoff artifact for dependent threads.");
|
|
@@ -4478,7 +4503,7 @@ ${structuredOutputText}` : null,
|
|
|
4478
4503
|
buildRetryHint(process2) {
|
|
4479
4504
|
const files = process2.result?.episode.filesModified?.filter(Boolean) ?? [];
|
|
4480
4505
|
const fileList = files.length > 0 ? files.join(", ") : "(same file scope)";
|
|
4481
|
-
return `If you retry this edit, dispatch exactly one follow-up edit thread for ${fileList}
|
|
4506
|
+
return `If you retry this edit, dispatch exactly one follow-up edit thread for ${fileList}. File ownership is auto-chained.`;
|
|
4482
4507
|
}
|
|
4483
4508
|
formatStructuredOutput(structuredOutput) {
|
|
4484
4509
|
if (!structuredOutput || Object.keys(structuredOutput).length === 0) {
|
|
@@ -4515,7 +4540,7 @@ var DEFAULT_ORCHESTRATOR_PROMPT = `You are an orchestrator agent. You accomplish
|
|
|
4515
4540
|
|
|
4516
4541
|
Dispatch when the task requires tool use (search, code, file I/O, web access). Examples:
|
|
4517
4542
|
- "Research X and Y" \u2192 2 parallel research threads
|
|
4518
|
-
- "Read file A, then modify it" \u2192 1 read thread, then 1 write thread
|
|
4543
|
+
- "Read file A, then modify it" \u2192 1 read thread, then 1 write thread
|
|
4519
4544
|
- "Create a report with data from 3 sources" \u2192 3 parallel data threads, then 1 synthesis thread
|
|
4520
4545
|
|
|
4521
4546
|
## When NOT to dispatch
|
|
@@ -4541,13 +4566,11 @@ Dispatch at most 3 threads per turn. If more are needed, dispatch the first batc
|
|
|
4541
4566
|
Research and read-only work may parallelize.
|
|
4542
4567
|
Do not split one small edit into multiple edit threads.
|
|
4543
4568
|
For a bounded edit, dispatch exactly one editor thread that reads, edits, and verifies the change.
|
|
4544
|
-
|
|
4569
|
+
File ownership is auto-chained by the harness \u2014 you don't need to track episode IDs manually.
|
|
4545
4570
|
|
|
4546
4571
|
## Context passing
|
|
4547
4572
|
|
|
4548
|
-
Threads automatically receive the user's current message and
|
|
4549
|
-
|
|
4550
|
-
Use contextEpisodeIds to chain dependent threads: research threads first (parallel), then implementation threads with their episodeIds.
|
|
4573
|
+
Threads automatically receive the user's current message, attachment metadata, and compressed session history as seed context. You do NOT need to copy-paste URLs, file contents, or pasted data into the action text \u2014 threads can read this directly from their context.
|
|
4551
4574
|
|
|
4552
4575
|
## Completion
|
|
4553
4576
|
|