omnius 1.0.352 → 1.0.354
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +223 -10
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -567440,6 +567440,81 @@ var init_adversaryStream = __esm({
|
|
|
567440
567440
|
}
|
|
567441
567441
|
});
|
|
567442
567442
|
|
|
567443
|
+
// packages/orchestrator/dist/completion-resolution-verifier.js
|
|
567444
|
+
function resolutionSystemPrompt() {
|
|
567445
|
+
return [
|
|
567446
|
+
"You are a COMPLETION VERIFIER. A coding agent wants to mark a task complete.",
|
|
567447
|
+
"Your only job: decide whether the ACTIONS IT TOOK actually accomplish the ORIGINAL request.",
|
|
567448
|
+
"",
|
|
567449
|
+
"Judge from the ORIGINAL REQUEST and the ACTIONS + EVIDENCE only. The agent's own",
|
|
567450
|
+
"summary is just a CLAIM — never treat the claim as proof. The agent has a documented",
|
|
567451
|
+
"bias toward declaring success prematurely, so be strict:",
|
|
567452
|
+
" - An edit is not a fix unless the evidence shows the failing thing now passes.",
|
|
567453
|
+
" - 'started' / a PID / a log line is not 'running' or 'working'.",
|
|
567454
|
+
" - exit code 0 on an unrelated command does not resolve the request.",
|
|
567455
|
+
" - Doing PART of the request, or adjacent work, is NOT resolution.",
|
|
567456
|
+
" - If the request had multiple parts, EVERY part must be addressed.",
|
|
567457
|
+
"",
|
|
567458
|
+
"Respond with ONLY a JSON object, no prose, no code fences:",
|
|
567459
|
+
'{"resolved": true|false,',
|
|
567460
|
+
' "confidence": 0.0-1.0,',
|
|
567461
|
+
' "missing": ["each concrete part of the ORIGINAL request not yet accomplished"],',
|
|
567462
|
+
' "rationale": "2-3 sentences citing the specific actions/evidence (or their absence)"}',
|
|
567463
|
+
"",
|
|
567464
|
+
"Set resolved=true ONLY when the actions+evidence directly and fully accomplish the",
|
|
567465
|
+
"original request. When in doubt, resolved=false and name what is missing."
|
|
567466
|
+
].join("\n");
|
|
567467
|
+
}
|
|
567468
|
+
function buildResolutionPrompt(i2) {
|
|
567469
|
+
return [
|
|
567470
|
+
"ORIGINAL REQUEST (what the user actually asked for):",
|
|
567471
|
+
i2.originalGoal.slice(0, 2e3) || "(empty)",
|
|
567472
|
+
"",
|
|
567473
|
+
"ACTIONS THE AGENT TOOK:",
|
|
567474
|
+
i2.actionsDigest.slice(0, 3e3) || "(none recorded)",
|
|
567475
|
+
"",
|
|
567476
|
+
"GROUNDED EVIDENCE (outcomes, tests, files):",
|
|
567477
|
+
i2.evidenceDigest.slice(0, 2e3) || "(none recorded)",
|
|
567478
|
+
"",
|
|
567479
|
+
"THE AGENT'S COMPLETION CLAIM (audit this — do NOT trust it as evidence):",
|
|
567480
|
+
i2.proposedSummary.slice(0, 1500) || "(empty)",
|
|
567481
|
+
"",
|
|
567482
|
+
"Does the work above directly and fully accomplish the ORIGINAL REQUEST? Return ONLY the JSON object."
|
|
567483
|
+
].join("\n");
|
|
567484
|
+
}
|
|
567485
|
+
function parseResolutionVerdict(raw) {
|
|
567486
|
+
if (!raw)
|
|
567487
|
+
return null;
|
|
567488
|
+
const text2 = raw.trim().replace(/^```(?:json)?/i, "").replace(/```$/i, "").trim();
|
|
567489
|
+
const start2 = text2.indexOf("{");
|
|
567490
|
+
const end = text2.lastIndexOf("}");
|
|
567491
|
+
if (start2 < 0 || end <= start2)
|
|
567492
|
+
return null;
|
|
567493
|
+
let obj;
|
|
567494
|
+
try {
|
|
567495
|
+
obj = JSON.parse(text2.slice(start2, end + 1));
|
|
567496
|
+
} catch {
|
|
567497
|
+
return null;
|
|
567498
|
+
}
|
|
567499
|
+
let confidence2 = Number(obj["confidence"]);
|
|
567500
|
+
if (!Number.isFinite(confidence2))
|
|
567501
|
+
confidence2 = 0.6;
|
|
567502
|
+
confidence2 = Math.min(1, Math.max(0, confidence2));
|
|
567503
|
+
const missingRaw = obj["missing"];
|
|
567504
|
+
const missing = Array.isArray(missingRaw) ? missingRaw.map((m2) => String(m2)).filter((m2) => m2.trim()).slice(0, 12) : [];
|
|
567505
|
+
return {
|
|
567506
|
+
resolved: obj["resolved"] === true,
|
|
567507
|
+
confidence: confidence2,
|
|
567508
|
+
missing,
|
|
567509
|
+
rationale: String(obj["rationale"] ?? "").slice(0, 800)
|
|
567510
|
+
};
|
|
567511
|
+
}
|
|
567512
|
+
var init_completion_resolution_verifier = __esm({
|
|
567513
|
+
"packages/orchestrator/dist/completion-resolution-verifier.js"() {
|
|
567514
|
+
"use strict";
|
|
567515
|
+
}
|
|
567516
|
+
});
|
|
567517
|
+
|
|
567443
567518
|
// packages/orchestrator/dist/evidenceBranch.js
|
|
567444
567519
|
function queryTerms(query) {
|
|
567445
567520
|
return [
|
|
@@ -569606,6 +569681,7 @@ var init_agenticRunner = __esm({
|
|
|
569606
569681
|
init_context_fabric();
|
|
569607
569682
|
init_evidenceLedger();
|
|
569608
569683
|
init_adversaryStream();
|
|
569684
|
+
init_completion_resolution_verifier();
|
|
569609
569685
|
init_evidenceBranch();
|
|
569610
569686
|
init_resolution_memory();
|
|
569611
569687
|
init_contextEngine();
|
|
@@ -569923,6 +569999,9 @@ var init_agenticRunner = __esm({
|
|
|
569923
569999
|
// because the configured verify command failed. Bounded by
|
|
569924
570000
|
// OMNIUS_COMPLETION_VERIFY_MAX to avoid an endless verify→fix→verify loop.
|
|
569925
570001
|
_completionVerifyRejections = 0;
|
|
570002
|
+
// Diagnostics counter for the inference-driven resolution gate (the real
|
|
570003
|
+
// bounded escape is the same-summary completion-hold counter / REG-54).
|
|
570004
|
+
_resolutionGateRejections = 0;
|
|
569926
570005
|
_lastBackwardPassVerdict = null;
|
|
569927
570006
|
_lastBackwardPassCritique = null;
|
|
569928
570007
|
// Run-local completion contract inferred from the user's ask/context before
|
|
@@ -571733,6 +571812,119 @@ ${input.answerText ?? ""}`.toLowerCase().trim();
|
|
|
571733
571812
|
* up auto-blocking and surfaces a status event so the caller can take
|
|
571734
571813
|
* a different path (eg. surface to user). max cycles enforced here.
|
|
571735
571814
|
*/
|
|
571815
|
+
/**
|
|
571816
|
+
* Inference-driven, original-request-anchored completion gate. Runs ONE
|
|
571817
|
+
* grounded LLM call (native /api/chat via _auxInferenceBackend) asking whether
|
|
571818
|
+
* the actions taken actually resolve the ORIGINAL request. Returns
|
|
571819
|
+
* { proceed:false, feedback, reason } to HOLD task_complete when not resolved.
|
|
571820
|
+
* Bounded: after OMNIUS_COMPLETION_HOLD_MAX holds of the same summary the
|
|
571821
|
+
* existing REG-54 escape finishes the run as incomplete_verification.
|
|
571822
|
+
*
|
|
571823
|
+
* Disabled by OMNIUS_DISABLE_RESOLUTION_GATE=1, by disableAdversaryCritic, by
|
|
571824
|
+
* a missing backend, or when the run took no file/shell actions. Fails OPEN on
|
|
571825
|
+
* any inference error — never hard-stalls a run because the verifier hiccuped.
|
|
571826
|
+
*
|
|
571827
|
+
* Anti-reward-hacking: the verdict is judged from originalGoal + actions +
|
|
571828
|
+
* evidence; proposedSummary is passed only as "the claim under audit".
|
|
571829
|
+
*/
|
|
571830
|
+
async _runResolutionGate(turn, proposedSummary, toolCallLog) {
|
|
571831
|
+
if (process.env["OMNIUS_DISABLE_RESOLUTION_GATE"] === "1")
|
|
571832
|
+
return { proceed: true };
|
|
571833
|
+
if (this.options.disableAdversaryCritic === true)
|
|
571834
|
+
return { proceed: true };
|
|
571835
|
+
if (!this.backend || typeof this.backend.chatCompletion !== "function")
|
|
571836
|
+
return { proceed: true };
|
|
571837
|
+
if (this._resolutionGateRejections >= 3)
|
|
571838
|
+
return { proceed: true };
|
|
571839
|
+
if (/^\s*BLOCKED\b/i.test(proposedSummary))
|
|
571840
|
+
return { proceed: true };
|
|
571841
|
+
const originalGoal = (this._taskState.originalGoal || this._taskState.goal || "").trim();
|
|
571842
|
+
if (!originalGoal)
|
|
571843
|
+
return { proceed: true };
|
|
571844
|
+
const actionable = toolCallLog.some((e2) => e2.mutated || e2.name === "shell" || e2.name === "file_write" || e2.name === "file_edit");
|
|
571845
|
+
if (!actionable)
|
|
571846
|
+
return { proceed: true };
|
|
571847
|
+
const filesChanged = [...this._taskState.modifiedFiles.entries()].map(([p2, action]) => ` - ${action} ${p2}`).slice(0, 40);
|
|
571848
|
+
const shellLines = toolCallLog.filter((e2) => e2.name === "shell").slice(-12).map((e2) => ` - shell: ${e2.success ? "ok" : "FAIL"} — ${(e2.outputPreview || "").slice(0, 120)}`);
|
|
571849
|
+
const actionsDigest = [
|
|
571850
|
+
filesChanged.length ? `Files changed (${filesChanged.length}):
|
|
571851
|
+
${filesChanged.join("\n")}` : "Files changed: none",
|
|
571852
|
+
shellLines.length ? `Recent commands:
|
|
571853
|
+
${shellLines.join("\n")}` : "Commands run: none"
|
|
571854
|
+
].join("\n");
|
|
571855
|
+
const wf = this._worldFacts;
|
|
571856
|
+
const evidenceParts = [];
|
|
571857
|
+
if (wf?.lastTest?.summary) {
|
|
571858
|
+
evidenceParts.push(`Last test outcome: ${wf.lastTest.passed ? "PASSED" : "FAILED"} — ${wf.lastTest.summary.slice(0, 200)}`);
|
|
571859
|
+
}
|
|
571860
|
+
const failCount = toolCallLog.filter((e2) => e2.success === false).length;
|
|
571861
|
+
evidenceParts.push(`Failed tool calls this run: ${failCount}`);
|
|
571862
|
+
const evidenceDigest = evidenceParts.join("\n");
|
|
571863
|
+
let verdict = null;
|
|
571864
|
+
try {
|
|
571865
|
+
const backend = this._auxInferenceBackend();
|
|
571866
|
+
for (let attempt = 0; attempt < 2 && !verdict; attempt++) {
|
|
571867
|
+
const resp = await backend.chatCompletion({
|
|
571868
|
+
messages: [
|
|
571869
|
+
{ role: "system", content: resolutionSystemPrompt() },
|
|
571870
|
+
{
|
|
571871
|
+
role: "user",
|
|
571872
|
+
content: buildResolutionPrompt({
|
|
571873
|
+
originalGoal,
|
|
571874
|
+
actionsDigest,
|
|
571875
|
+
evidenceDigest,
|
|
571876
|
+
proposedSummary
|
|
571877
|
+
})
|
|
571878
|
+
}
|
|
571879
|
+
],
|
|
571880
|
+
tools: [],
|
|
571881
|
+
temperature: 0,
|
|
571882
|
+
maxTokens: 700,
|
|
571883
|
+
timeoutMs: 3e4
|
|
571884
|
+
});
|
|
571885
|
+
verdict = parseResolutionVerdict(resp.choices?.[0]?.message?.content ?? "");
|
|
571886
|
+
}
|
|
571887
|
+
} catch {
|
|
571888
|
+
verdict = null;
|
|
571889
|
+
}
|
|
571890
|
+
if (!verdict)
|
|
571891
|
+
return { proceed: true };
|
|
571892
|
+
if (verdict.resolved || verdict.confidence < 0.5) {
|
|
571893
|
+
this._resolutionGateRejections = 0;
|
|
571894
|
+
return { proceed: true };
|
|
571895
|
+
}
|
|
571896
|
+
this._resolutionGateRejections++;
|
|
571897
|
+
const missing = verdict.missing.length ? verdict.missing.map((m2) => ` • ${m2}`).join("\n") : " • (verifier did not enumerate specific gaps; re-read the original request)";
|
|
571898
|
+
const feedback = [
|
|
571899
|
+
`[COMPLETION BLOCKED — actions do not yet resolve the original request]`,
|
|
571900
|
+
`Original request: ${originalGoal.slice(0, 300)}`,
|
|
571901
|
+
`Verifier rationale: ${verdict.rationale}`,
|
|
571902
|
+
`Still unaddressed:`,
|
|
571903
|
+
missing,
|
|
571904
|
+
``,
|
|
571905
|
+
`Do the work that actually accomplishes the unaddressed items above, then re-verify.`,
|
|
571906
|
+
`Do NOT call task_complete again until each item is done AND its result is evidenced.`
|
|
571907
|
+
].join("\n");
|
|
571908
|
+
const reason = `task resolution not verified (${verdict.missing.length} item(s) unaddressed)`;
|
|
571909
|
+
this.emit({
|
|
571910
|
+
type: "adversary_reaction",
|
|
571911
|
+
adversary: {
|
|
571912
|
+
class: "false_success",
|
|
571913
|
+
shortText: `Completion blocked — ${verdict.missing.length} request item(s) unresolved`,
|
|
571914
|
+
confidence: verdict.confidence,
|
|
571915
|
+
details: feedback
|
|
571916
|
+
},
|
|
571917
|
+
turn,
|
|
571918
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
571919
|
+
});
|
|
571920
|
+
this.emit({
|
|
571921
|
+
type: "status",
|
|
571922
|
+
content: `Resolution gate HELD task_complete: ${reason}`,
|
|
571923
|
+
turn,
|
|
571924
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
571925
|
+
});
|
|
571926
|
+
return { proceed: false, feedback, reason };
|
|
571927
|
+
}
|
|
571736
571928
|
/**
|
|
571737
571929
|
* Completion compile/verify gate (opt-in). Runs a configured shell command
|
|
571738
571930
|
* (e.g. a typecheck/build) before `task_complete` is accepted, but ONLY when
|
|
@@ -571841,6 +572033,13 @@ ${input.answerText ?? ""}`.toLowerCase().trim();
|
|
|
571841
572033
|
const _verify = await this._runCompletionVerifyGate(turn);
|
|
571842
572034
|
if (!_verify.proceed)
|
|
571843
572035
|
return _verify;
|
|
572036
|
+
const _resolution = await this._runResolutionGate(turn, proposedSummary, toolCallLog);
|
|
572037
|
+
if (!_resolution.proceed) {
|
|
572038
|
+
return {
|
|
572039
|
+
proceed: false,
|
|
572040
|
+
feedback: _resolution.feedback ?? "Completion not resolved."
|
|
572041
|
+
};
|
|
572042
|
+
}
|
|
571844
572043
|
if (this._completionLedger && proposedSummary) {
|
|
571845
572044
|
const _newClaims = deriveClaimsFromProposedText({
|
|
571846
572045
|
text: proposedSummary,
|
|
@@ -573918,27 +574117,34 @@ ${blob}
|
|
|
573918
574117
|
}
|
|
573919
574118
|
if (toolResultIndices.length <= keepResults)
|
|
573920
574119
|
return;
|
|
573921
|
-
const
|
|
574120
|
+
const callMetaById = /* @__PURE__ */ new Map();
|
|
573922
574121
|
for (const m2 of messages2) {
|
|
573923
574122
|
const calls = m2.tool_calls;
|
|
573924
574123
|
if (m2.role === "assistant" && Array.isArray(calls)) {
|
|
573925
574124
|
for (const c8 of calls) {
|
|
573926
|
-
|
|
574125
|
+
const name10 = c8?.function?.name;
|
|
574126
|
+
if (!name10 || !c8.id)
|
|
574127
|
+
continue;
|
|
574128
|
+
const rawArgs = c8.function?.arguments || "{}";
|
|
574129
|
+
let path12;
|
|
574130
|
+
if (name10 === "file_read") {
|
|
573927
574131
|
try {
|
|
573928
|
-
const a2 = JSON.parse(
|
|
574132
|
+
const a2 = JSON.parse(rawArgs);
|
|
573929
574133
|
const p2 = a2.path ?? a2.file ?? a2.file_path;
|
|
573930
574134
|
if (p2)
|
|
573931
|
-
|
|
574135
|
+
path12 = String(p2);
|
|
573932
574136
|
} catch {
|
|
573933
574137
|
}
|
|
573934
574138
|
}
|
|
574139
|
+
callMetaById.set(c8.id, { name: name10, path: path12, argsPreview: rawArgs.slice(0, 120) });
|
|
573935
574140
|
}
|
|
573936
574141
|
}
|
|
573937
574142
|
}
|
|
573938
|
-
const
|
|
574143
|
+
const metaForResult = (idx) => {
|
|
573939
574144
|
const tcid = messages2[idx].tool_call_id;
|
|
573940
|
-
return tcid ?
|
|
574145
|
+
return tcid ? callMetaById.get(tcid) : void 0;
|
|
573941
574146
|
};
|
|
574147
|
+
const pathForResult = (idx) => metaForResult(idx)?.path;
|
|
573942
574148
|
const latestIdxForPath = /* @__PURE__ */ new Map();
|
|
573943
574149
|
for (const idx of toolResultIndices) {
|
|
573944
574150
|
const p2 = pathForResult(idx);
|
|
@@ -573968,10 +574174,16 @@ ${blob}
|
|
|
573968
574174
|
} catch {
|
|
573969
574175
|
}
|
|
573970
574176
|
}
|
|
573971
|
-
|
|
573972
|
-
|
|
573973
|
-
|
|
573974
|
-
|
|
574177
|
+
const meta = metaForResult(idx);
|
|
574178
|
+
let stub;
|
|
574179
|
+
if (p2) {
|
|
574180
|
+
stub = `[Earlier read of ${p2} cleared — its content is preserved in your ACTIVE CONTEXT FRAME (Evidence already gathered). Do NOT re-read it; use the frame.]`;
|
|
574181
|
+
} else {
|
|
574182
|
+
const firstLine = content.split("\n").find((l2) => l2.trim())?.trim().slice(0, 140) ?? "";
|
|
574183
|
+
const failed = /\berror\b|\bfail|✗|exit code [1-9]|traceback/i.test(content);
|
|
574184
|
+
stub = `[${meta?.name ?? "tool"}(${meta?.argsPreview ?? ""}) result compacted to save context — ${failed ? "FAILED" : "ok"}${firstLine ? `: ${firstLine}` : ""}. Re-run ONLY if the underlying state has since changed.]`;
|
|
574185
|
+
}
|
|
574186
|
+
messages2[idx] = { ...msg, content: stub };
|
|
573975
574187
|
cleared++;
|
|
573976
574188
|
}
|
|
573977
574189
|
if (idleGapMs > IDLE_THRESHOLD_MS && !this._thinkingClearLatched) {
|
|
@@ -574947,6 +575159,7 @@ Respond with your assessment, then take action.`;
|
|
|
574947
575159
|
this._fileWritesThisRun = 0;
|
|
574948
575160
|
this._backwardPassCyclesUsed = 0;
|
|
574949
575161
|
this._completionVerifyRejections = 0;
|
|
575162
|
+
this._resolutionGateRejections = 0;
|
|
574950
575163
|
this._lastBackwardPassVerdict = null;
|
|
574951
575164
|
this._lastBackwardPassCritique = null;
|
|
574952
575165
|
this._completionContract = null;
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.354",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.354",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED