omnius 1.0.366 → 1.0.368
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +240 -64
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -558608,6 +558608,19 @@ function isFailedVerificationEvidence(entry) {
|
|
|
558608
558608
|
const text2 = `${entry.toolName ?? ""} ${entry.summary}`.toLowerCase();
|
|
558609
558609
|
return /\b(test|tests|verify|verification|build|compile|tsc|vitest|jest|pytest|go test|cargo test)\b/.test(text2);
|
|
558610
558610
|
}
|
|
558611
|
+
function verificationFamily(entry) {
|
|
558612
|
+
const text2 = `${entry.toolName ?? ""} ${entry.summary}`.toLowerCase();
|
|
558613
|
+
if (/\b(vitest|jest|pytest|go test|cargo test|npm test|pnpm test|yarn test|node\s+tests?|test|tests|spec)\b/.test(text2)) {
|
|
558614
|
+
return "test";
|
|
558615
|
+
}
|
|
558616
|
+
if (/\b(typecheck|tsc)\b/.test(text2))
|
|
558617
|
+
return "typecheck";
|
|
558618
|
+
if (/\b(build|built|compile|compiled)\b/.test(text2))
|
|
558619
|
+
return "build";
|
|
558620
|
+
if (/\b(verify|verified|verification)\b/.test(text2))
|
|
558621
|
+
return "verify";
|
|
558622
|
+
return null;
|
|
558623
|
+
}
|
|
558611
558624
|
function isStaleEditEvidence(entry) {
|
|
558612
558625
|
if (entry.success !== false)
|
|
558613
558626
|
return false;
|
|
@@ -558618,13 +558631,24 @@ function finalizeCompletionLedgerTruth(ledger) {
|
|
|
558618
558631
|
let unresolved = [...ledger.unresolved];
|
|
558619
558632
|
let lastMutation = -1;
|
|
558620
558633
|
let lastVerification = -1;
|
|
558634
|
+
const lastSuccessfulVerificationByFamily = /* @__PURE__ */ new Map();
|
|
558621
558635
|
ledger.evidence.forEach((entry, index) => {
|
|
558622
558636
|
if (isMutationEvidence(entry))
|
|
558623
558637
|
lastMutation = index;
|
|
558624
|
-
if (isSuccessfulVerificationEvidence(entry))
|
|
558638
|
+
if (isSuccessfulVerificationEvidence(entry)) {
|
|
558625
558639
|
lastVerification = index;
|
|
558640
|
+
const family = verificationFamily(entry);
|
|
558641
|
+
if (family)
|
|
558642
|
+
lastSuccessfulVerificationByFamily.set(family, index);
|
|
558643
|
+
}
|
|
558644
|
+
});
|
|
558645
|
+
ledger.evidence.forEach((entry, index) => {
|
|
558626
558646
|
if (isFailedVerificationEvidence(entry)) {
|
|
558627
|
-
|
|
558647
|
+
const family = verificationFamily(entry);
|
|
558648
|
+
const resolvedByLaterSuccess = family !== null && (lastSuccessfulVerificationByFamily.get(family) ?? -1) > index;
|
|
558649
|
+
if (!resolvedByLaterSuccess) {
|
|
558650
|
+
unresolved = appendUnresolved(unresolved, `Verification failed or did not prove success: ${entry.summary}`, entry.id);
|
|
558651
|
+
}
|
|
558628
558652
|
}
|
|
558629
558653
|
if (isStaleEditEvidence(entry)) {
|
|
558630
558654
|
unresolved = appendUnresolved(unresolved, `Stale edit failure remains unresolved: ${entry.summary}`, entry.id);
|
|
@@ -567594,6 +567618,9 @@ function adversarySystemPrompt() {
|
|
|
567594
567618
|
" • started ≠ running — a PID or a log line is not a liveness probe.",
|
|
567595
567619
|
" • exit code 0 / 'build complete' ≠ success — the specific artifact must exist.",
|
|
567596
567620
|
" • an edit ≠ a fix — the failing command must be re-run and pass.",
|
|
567621
|
+
" • file_write/file_read tool outcomes ARE first-class evidence for narrow file existence/content claims.",
|
|
567622
|
+
" Do not demand shell/cat proof when a successful file_write is followed by file_read evidence for the same path.",
|
|
567623
|
+
" • If a shell integrity check is truly needed, demand a newline-safe command such as: printf '\n---sha256---\n'; sha256sum <path>. Do not use brittle cat <path> && sha256sum <path> as the default.",
|
|
567597
567624
|
" • simulation / mock / placeholder ≠ real.",
|
|
567598
567625
|
" • partial progress ≠ done.",
|
|
567599
567626
|
"Mixed results are the norm: do NOT let one success excuse an unproven completion claim.",
|
|
@@ -567614,7 +567641,11 @@ function adversarySystemPrompt() {
|
|
|
567614
567641
|
].join("\n");
|
|
567615
567642
|
}
|
|
567616
567643
|
function buildObservationPrompt(obs, recentLedger) {
|
|
567617
|
-
const outcomes = obs.recentToolOutcomes.slice(-8).map((o2) =>
|
|
567644
|
+
const outcomes = obs.recentToolOutcomes.slice(-8).map((o2) => {
|
|
567645
|
+
const target = o2.path ? ` path=${o2.path}` : "";
|
|
567646
|
+
const evidence = o2.evidence ? ` | evidence: ${o2.evidence.slice(0, 220)}` : "";
|
|
567647
|
+
return ` - ${o2.tool}: ${o2.succeeded ? "OK" : "FAIL"}${target} — ${o2.preview.slice(0, 160)}${evidence}`;
|
|
567648
|
+
}).join("\n");
|
|
567618
567649
|
const priorDoubts = recentLedger.slice(-3).filter((e2) => e2.verdict !== "ok").map((e2) => ` - turn ${e2.turn}: ${e2.verdict} — demanded: ${e2.demand}`).join("\n");
|
|
567619
567650
|
if (obs.loopSignal) {
|
|
567620
567651
|
const ls2 = obs.loopSignal;
|
|
@@ -567669,6 +567700,47 @@ ${priorDoubts}` : "",
|
|
|
567669
567700
|
"Audit this. Return ONLY the JSON object."
|
|
567670
567701
|
].join("\n");
|
|
567671
567702
|
}
|
|
567703
|
+
function deterministicFileProofPaths(obs) {
|
|
567704
|
+
const writes = /* @__PURE__ */ new Set();
|
|
567705
|
+
const reads = /* @__PURE__ */ new Set();
|
|
567706
|
+
for (const outcome of obs.recentToolOutcomes) {
|
|
567707
|
+
if (!outcome.succeeded || !outcome.path)
|
|
567708
|
+
continue;
|
|
567709
|
+
if (/^(file_write|file_edit|file_patch|batch_edit)$/.test(outcome.tool)) {
|
|
567710
|
+
writes.add(outcome.path);
|
|
567711
|
+
}
|
|
567712
|
+
if (outcome.tool === "file_read")
|
|
567713
|
+
reads.add(outcome.path);
|
|
567714
|
+
}
|
|
567715
|
+
const proven = /* @__PURE__ */ new Set();
|
|
567716
|
+
for (const path12 of writes)
|
|
567717
|
+
if (reads.has(path12))
|
|
567718
|
+
proven.add(path12);
|
|
567719
|
+
return proven;
|
|
567720
|
+
}
|
|
567721
|
+
function hasRecentNonFileFailure(obs) {
|
|
567722
|
+
return obs.recentToolOutcomes.some((outcome) => !outcome.succeeded && !/^(file_write|file_edit|file_patch|batch_edit|file_read|task_complete)$/.test(outcome.tool));
|
|
567723
|
+
}
|
|
567724
|
+
function isNarrowFileCompletionClaim(text2) {
|
|
567725
|
+
return /\b(file|path|created|wrote|written|exists|content|reads?back|verified by read|single line)\b/i.test(text2);
|
|
567726
|
+
}
|
|
567727
|
+
function critiqueContradictedByFileProof(obs, critique2) {
|
|
567728
|
+
if (!obs.claimsCompletion)
|
|
567729
|
+
return false;
|
|
567730
|
+
if (hasRecentNonFileFailure(obs))
|
|
567731
|
+
return false;
|
|
567732
|
+
if (!isNarrowFileCompletionClaim(obs.assistantText))
|
|
567733
|
+
return false;
|
|
567734
|
+
if (deterministicFileProofPaths(obs).size === 0)
|
|
567735
|
+
return false;
|
|
567736
|
+
if (critique2.class === "ok")
|
|
567737
|
+
return false;
|
|
567738
|
+
const text2 = critique2.shortText + "\n" + critique2.details + "\n" + critique2.demand;
|
|
567739
|
+
return /no tool output|tool output|file creation|file exists|file content|cat\b|sha256|hash|unverified|unproven|readback|read back/i.test(text2);
|
|
567740
|
+
}
|
|
567741
|
+
function hasDeterministicFileCompletionProof(obs) {
|
|
567742
|
+
return obs.claimsCompletion && !hasRecentNonFileFailure(obs) && isNarrowFileCompletionClaim(obs.assistantText) && deterministicFileProofPaths(obs).size > 0;
|
|
567743
|
+
}
|
|
567672
567744
|
function parseAdversaryCritique(raw) {
|
|
567673
567745
|
if (!raw)
|
|
567674
567746
|
return null;
|
|
@@ -567739,7 +567811,7 @@ var init_adversaryStream = __esm({
|
|
|
567739
567811
|
*/
|
|
567740
567812
|
shouldAudit(obs) {
|
|
567741
567813
|
if (obs.claimsCompletion)
|
|
567742
|
-
return
|
|
567814
|
+
return !hasDeterministicFileCompletionProof(obs);
|
|
567743
567815
|
if (obs.loopSignal)
|
|
567744
567816
|
return true;
|
|
567745
567817
|
if (obs.failingApproach)
|
|
@@ -567787,6 +567859,15 @@ var init_adversaryStream = __esm({
|
|
|
567787
567859
|
}
|
|
567788
567860
|
if (!critique2)
|
|
567789
567861
|
return null;
|
|
567862
|
+
if (critiqueContradictedByFileProof(obs, critique2)) {
|
|
567863
|
+
critique2 = {
|
|
567864
|
+
class: "ok",
|
|
567865
|
+
shortText: "file proof present",
|
|
567866
|
+
confidence: 0.05,
|
|
567867
|
+
details: "The recent file_write/file_read outcomes provide deterministic evidence for this narrow file existence/content claim.",
|
|
567868
|
+
demand: ""
|
|
567869
|
+
};
|
|
567870
|
+
}
|
|
567790
567871
|
this.ledger.push({
|
|
567791
567872
|
ts: Date.now(),
|
|
567792
567873
|
turn: obs.turn,
|
|
@@ -568759,6 +568840,16 @@ function classifyShellIntent(cmd) {
|
|
|
568759
568840
|
} else {
|
|
568760
568841
|
verbToken = first2;
|
|
568761
568842
|
}
|
|
568843
|
+
if (isRunner && verbToken === "run" && tokens.length >= 3) {
|
|
568844
|
+
const scriptToken = tokens[2].toLowerCase().replace(/^["']|["']$/g, "");
|
|
568845
|
+
const scriptRow = VERB_TABLE[scriptToken];
|
|
568846
|
+
if (scriptRow) {
|
|
568847
|
+
return { klass: scriptRow.klass, verb: scriptRow.canonical, tool: first2 };
|
|
568848
|
+
}
|
|
568849
|
+
if (/\b(test|spec|vitest|jest|pytest|check|verify|lint|typecheck)\b/i.test(scriptToken)) {
|
|
568850
|
+
return { klass: "verify", verb: "test", tool: first2 };
|
|
568851
|
+
}
|
|
568852
|
+
}
|
|
568762
568853
|
const row = VERB_TABLE[verbToken];
|
|
568763
568854
|
if (!row)
|
|
568764
568855
|
return { klass: "other", verb: verbToken, tool: first2 };
|
|
@@ -568774,7 +568865,7 @@ function verifyShellOutcome(command, ctx3, result) {
|
|
|
568774
568865
|
if (!result.success) {
|
|
568775
568866
|
return { trustworthy: true, intentBucket: baseBucket, outcomeClass: "broken" };
|
|
568776
568867
|
}
|
|
568777
|
-
if (intent.klass === "read") {
|
|
568868
|
+
if (intent.klass === "read" || intent.klass === "verify") {
|
|
568778
568869
|
return { trustworthy: true, intentBucket: baseBucket, outcomeClass: "verified" };
|
|
568779
568870
|
}
|
|
568780
568871
|
const mtimeCheck = checkMutateMtimeDelta(intent, cwd4, ctx3, result);
|
|
@@ -569112,13 +569203,17 @@ var init_postActionVerifier = __esm({
|
|
|
569112
569203
|
build: { canonical: "build", klass: "mutate" },
|
|
569113
569204
|
compile: { canonical: "build", klass: "mutate" },
|
|
569114
569205
|
make: { canonical: "build", klass: "mutate" },
|
|
569115
|
-
// ──
|
|
569116
|
-
test: { canonical: "test", klass: "
|
|
569117
|
-
t: { canonical: "test", klass: "
|
|
569118
|
-
spec: { canonical: "test", klass: "
|
|
569119
|
-
vitest: { canonical: "test", klass: "
|
|
569120
|
-
jest: { canonical: "test", klass: "
|
|
569121
|
-
pytest: { canonical: "test", klass: "
|
|
569206
|
+
// ── verification family (read-only by contract for the verifier) ──────────
|
|
569207
|
+
test: { canonical: "test", klass: "verify" },
|
|
569208
|
+
t: { canonical: "test", klass: "verify" },
|
|
569209
|
+
spec: { canonical: "test", klass: "verify" },
|
|
569210
|
+
vitest: { canonical: "test", klass: "verify" },
|
|
569211
|
+
jest: { canonical: "test", klass: "verify" },
|
|
569212
|
+
pytest: { canonical: "test", klass: "verify" },
|
|
569213
|
+
check: { canonical: "check", klass: "verify" },
|
|
569214
|
+
verify: { canonical: "verify", klass: "verify" },
|
|
569215
|
+
lint: { canonical: "lint", klass: "verify" },
|
|
569216
|
+
typecheck: { canonical: "typecheck", klass: "verify" },
|
|
569122
569217
|
// ── publish / scaffold / generate (mutate) ────────────────────────────────
|
|
569123
569218
|
publish: { canonical: "publish", klass: "mutate" },
|
|
569124
569219
|
init: { canonical: "init", klass: "mutate" },
|
|
@@ -571855,6 +571950,37 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
|
|
|
571855
571950
|
return false;
|
|
571856
571951
|
return true;
|
|
571857
571952
|
}
|
|
571953
|
+
_toolEvidencePreview(result, displayOutput, max = 500) {
|
|
571954
|
+
const modelVisible = result.llmContent ?? result.output ?? displayOutput ?? "";
|
|
571955
|
+
const failurePrefix = result.success === false && result.error ? `Error: ${result.error}` : "";
|
|
571956
|
+
const combined = failurePrefix && modelVisible && !String(modelVisible).startsWith(failurePrefix) ? `${failurePrefix}
|
|
571957
|
+
${modelVisible}` : modelVisible || result.error || displayOutput || "";
|
|
571958
|
+
return String(combined).slice(0, max);
|
|
571959
|
+
}
|
|
571960
|
+
_recordCompletionToolEvidenceFromResult(input) {
|
|
571961
|
+
if (!this._completionLedger || input.toolName === "task_complete")
|
|
571962
|
+
return;
|
|
571963
|
+
const realFileMutation = input.realFileMutation ?? this._isRealProjectMutation(input.toolName, input.result);
|
|
571964
|
+
const realMutationPaths = input.realMutationPaths ?? (realFileMutation ? this._extractToolTargetPaths(input.toolName, input.args, input.result) : []);
|
|
571965
|
+
this._completionLedger = recordToolEvidence(this._completionLedger, {
|
|
571966
|
+
name: input.toolName,
|
|
571967
|
+
success: input.result.success,
|
|
571968
|
+
outputPreview: (input.outputPreview ?? this._toolEvidencePreview(input.result)).toString().slice(0, 500),
|
|
571969
|
+
argsKey: input.argsKey.slice(0, 300)
|
|
571970
|
+
});
|
|
571971
|
+
if (realFileMutation && realMutationPaths.length > 0) {
|
|
571972
|
+
for (const filePath of realMutationPaths) {
|
|
571973
|
+
this._completionLedger = recordCompletionEvidence(this._completionLedger, {
|
|
571974
|
+
kind: "file_change",
|
|
571975
|
+
toolName: input.toolName,
|
|
571976
|
+
success: true,
|
|
571977
|
+
summary: `file change: ${filePath}`,
|
|
571978
|
+
rawRef: `file://${filePath}`
|
|
571979
|
+
});
|
|
571980
|
+
}
|
|
571981
|
+
}
|
|
571982
|
+
this._saveCompletionLedgerSafe();
|
|
571983
|
+
}
|
|
571858
571984
|
_isAtomicBatchEditAbort(toolName, result) {
|
|
571859
571985
|
if (toolName !== "batch_edit" || !result || result.success !== false) {
|
|
571860
571986
|
return false;
|
|
@@ -572548,10 +572674,20 @@ ${_checks}`
|
|
|
572548
572674
|
const maxCycles = parseInt(process.env["OMNIUS_BACKWARD_PASS_MAX_CYCLES"] || "2", 10) || 2;
|
|
572549
572675
|
if (this._backwardPassCyclesUsed >= maxCycles) {
|
|
572550
572676
|
const lastCritique2 = this._lastBackwardPassCritique;
|
|
572551
|
-
const
|
|
572677
|
+
const evidenceAfterCritique = lastCritique2 ? toolCallLog.slice(lastCritique2.toolLogLength) : [];
|
|
572678
|
+
const freshVerification = evidenceAfterCritique.filter((entry) => {
|
|
572679
|
+
if (entry.success !== true)
|
|
572680
|
+
return false;
|
|
572681
|
+
const text2 = `${entry.name} ${entry.argsKey} ${entry.outputPreview ?? ""}`.toLowerCase();
|
|
572682
|
+
return /\b(test|tests|vitest|jest|pytest|go test|cargo test|npm test|pnpm test|yarn test|typecheck|tsc|build|verify|verification)\b/.test(text2) && /\b(pass|passed|success|exit:?\s*0|ok|0 failures|0 failed)\b/.test(text2);
|
|
572683
|
+
}).slice(-3);
|
|
572684
|
+
const concern = freshVerification.length > 0 ? [
|
|
572685
|
+
"Prior reviewer concern may be stale; fresh verification evidence was recorded after that critique.",
|
|
572686
|
+
...freshVerification.map((entry) => `${entry.name}: ${(entry.outputPreview ?? entry.argsKey).slice(0, 220)}`)
|
|
572687
|
+
].join(" ") : lastCritique2?.rationale?.trim() || this._lastBackwardPassVerdict || "unspecified";
|
|
572552
572688
|
this._completionCaveat = [
|
|
572553
572689
|
`[COMPLETION CAVEAT] Backward-pass review did not fully approve after ${this._backwardPassCyclesUsed}/${maxCycles} cycle(s).`,
|
|
572554
|
-
|
|
572690
|
+
`${freshVerification.length > 0 ? "Reviewer reconciliation note" : "Unresolved reviewer concern"}: ${concern.slice(0, 600)}`,
|
|
572555
572691
|
"The work was completed with deliverables present; treat the above as follow-up rather than a blocker."
|
|
572556
572692
|
].join("\n");
|
|
572557
572693
|
this.emit({
|
|
@@ -576216,6 +576352,9 @@ TASK: ${scrubbedTask}` : scrubbedTask;
|
|
|
576216
576352
|
backend: this._auxInferenceBackend(),
|
|
576217
576353
|
persistPath,
|
|
576218
576354
|
onCritique: (critique2, sourceTurn) => {
|
|
576355
|
+
if (completed || this._completionIncompleteVerification || this.aborted) {
|
|
576356
|
+
return;
|
|
576357
|
+
}
|
|
576219
576358
|
if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
|
|
576220
576359
|
this.pendingUserMessages.push(AdversaryStream.formatInjection(critique2));
|
|
576221
576360
|
}
|
|
@@ -578872,7 +579011,9 @@ Use the saved fact to continue the promised synthesis or next concrete step, or
|
|
|
578872
579011
|
recentToolOutcomes: this._adversaryToolOutcomes.slice(-8).map((o2) => ({
|
|
578873
579012
|
tool: o2.tool,
|
|
578874
579013
|
succeeded: o2.succeeded,
|
|
578875
|
-
preview: o2.preview
|
|
579014
|
+
preview: o2.preview,
|
|
579015
|
+
path: o2.path,
|
|
579016
|
+
evidence: o2.evidence
|
|
578876
579017
|
})),
|
|
578877
579018
|
claimsCompletion: false,
|
|
578878
579019
|
loopSignal: {
|
|
@@ -580099,7 +580240,7 @@ ${bookkeepingGuidance}` : bookkeepingGuidance;
|
|
|
580099
580240
|
currentLogEntry.success = result.success;
|
|
580100
580241
|
currentLogEntry.mutated = realFileMutation;
|
|
580101
580242
|
currentLogEntry.mutatedFiles = realMutationPaths;
|
|
580102
|
-
currentLogEntry.outputPreview = (result
|
|
580243
|
+
currentLogEntry.outputPreview = this._toolEvidencePreview(result, output);
|
|
580103
580244
|
}
|
|
580104
580245
|
this._toolEvents.push({
|
|
580105
580246
|
name: tc.name,
|
|
@@ -580440,26 +580581,15 @@ Then use file_read on individual FILES inside it.`);
|
|
|
580440
580581
|
lastFailureHandoffTurn = turn;
|
|
580441
580582
|
}
|
|
580442
580583
|
}
|
|
580443
|
-
|
|
580444
|
-
|
|
580445
|
-
|
|
580446
|
-
|
|
580447
|
-
|
|
580448
|
-
|
|
580449
|
-
|
|
580450
|
-
|
|
580451
|
-
|
|
580452
|
-
this._completionLedger = recordCompletionEvidence(this._completionLedger, {
|
|
580453
|
-
kind: "file_change",
|
|
580454
|
-
toolName: tc.name,
|
|
580455
|
-
success: true,
|
|
580456
|
-
summary: `file change: ${filePath2}`,
|
|
580457
|
-
rawRef: `file://${filePath2}`
|
|
580458
|
-
});
|
|
580459
|
-
}
|
|
580460
|
-
}
|
|
580461
|
-
this._saveCompletionLedgerSafe();
|
|
580462
|
-
}
|
|
580584
|
+
this._recordCompletionToolEvidenceFromResult({
|
|
580585
|
+
toolName: tc.name,
|
|
580586
|
+
argsKey: tc.arguments ? JSON.stringify(tc.arguments) : "",
|
|
580587
|
+
args: tc.arguments,
|
|
580588
|
+
result,
|
|
580589
|
+
outputPreview: this._toolEvidencePreview(result, output),
|
|
580590
|
+
realFileMutation,
|
|
580591
|
+
realMutationPaths
|
|
580592
|
+
});
|
|
580463
580593
|
this._onTypedEvent?.({
|
|
580464
580594
|
type: "tool_call_finished",
|
|
580465
580595
|
runId: this._sessionId ?? "unknown",
|
|
@@ -581555,6 +581685,15 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')") or
|
|
|
581555
581685
|
} else {
|
|
581556
581686
|
output = modelContent2;
|
|
581557
581687
|
}
|
|
581688
|
+
const bfRealFileMutation = this._isRealProjectMutation(tc.name, result);
|
|
581689
|
+
const bfRealMutationPaths = bfRealFileMutation ? this._extractToolTargetPaths(tc.name, tc.arguments, result) : [];
|
|
581690
|
+
const bfLogEntry = toolCallLog[toolCallLog.length - 1];
|
|
581691
|
+
if (bfLogEntry) {
|
|
581692
|
+
bfLogEntry.success = result.success;
|
|
581693
|
+
bfLogEntry.mutated = bfRealFileMutation;
|
|
581694
|
+
bfLogEntry.mutatedFiles = bfRealMutationPaths;
|
|
581695
|
+
bfLogEntry.outputPreview = this._toolEvidencePreview(result, output);
|
|
581696
|
+
}
|
|
581558
581697
|
this.emit({
|
|
581559
581698
|
type: "tool_result",
|
|
581560
581699
|
toolName: tc.name,
|
|
@@ -581564,14 +581703,15 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')") or
|
|
|
581564
581703
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
581565
581704
|
});
|
|
581566
581705
|
this._trackDecomp2(tc, result, turn);
|
|
581567
|
-
|
|
581568
|
-
|
|
581569
|
-
|
|
581570
|
-
|
|
581571
|
-
|
|
581572
|
-
|
|
581573
|
-
|
|
581574
|
-
|
|
581706
|
+
this._recordCompletionToolEvidenceFromResult({
|
|
581707
|
+
toolName: tc.name,
|
|
581708
|
+
argsKey: bfArgsKey,
|
|
581709
|
+
args: tc.arguments,
|
|
581710
|
+
result,
|
|
581711
|
+
outputPreview: this._toolEvidencePreview(result, output),
|
|
581712
|
+
realFileMutation: bfRealFileMutation,
|
|
581713
|
+
realMutationPaths: bfRealMutationPaths
|
|
581714
|
+
});
|
|
581575
581715
|
const enoentTools2 = /* @__PURE__ */ new Set([
|
|
581576
581716
|
"file_read",
|
|
581577
581717
|
"list_directory",
|
|
@@ -584050,6 +584190,39 @@ ${trimmedNew}`;
|
|
|
584050
584190
|
* Generates typed self-reflections on task failure and injects them
|
|
584051
584191
|
* into the next attempt's context for active learning. */
|
|
584052
584192
|
_reflectionBuffer = null;
|
|
584193
|
+
buildAdversaryToolOutcomeEvidence(toolName, toolArgs, content, succeeded) {
|
|
584194
|
+
const pathValue = toolArgs?.["path"] ?? toolArgs?.["file"] ?? toolArgs?.["filePath"] ?? toolArgs?.["file_path"];
|
|
584195
|
+
const path12 = typeof pathValue === "string" && pathValue.trim() ? pathValue.trim() : void 0;
|
|
584196
|
+
const compact3 = content.replace(/\s+/g, " ").trim();
|
|
584197
|
+
const snippet = compact3.slice(0, 160);
|
|
584198
|
+
const digest3 = _createHash("sha256").update(content).digest("hex").slice(0, 16);
|
|
584199
|
+
const lineCount = content.length > 0 ? content.split("\n").length : 0;
|
|
584200
|
+
if (toolName === "file_read") {
|
|
584201
|
+
const evidence = path12 ? `file_read confirmed path=${path12}; lines=${lineCount}; sha256=${digest3}` : `file_read confirmed content; lines=${lineCount}; sha256=${digest3}`;
|
|
584202
|
+
return {
|
|
584203
|
+
path: path12,
|
|
584204
|
+
evidence,
|
|
584205
|
+
preview: `${evidence}; content="${snippet}"`
|
|
584206
|
+
};
|
|
584207
|
+
}
|
|
584208
|
+
if (/^(file_write|file_edit|file_patch|batch_edit)$/.test(toolName)) {
|
|
584209
|
+
const evidence = path12 ? `${toolName} succeeded for path=${path12}; output_sha256=${digest3}` : `${toolName} succeeded; output_sha256=${digest3}`;
|
|
584210
|
+
return {
|
|
584211
|
+
path: path12,
|
|
584212
|
+
evidence,
|
|
584213
|
+
preview: `${evidence}; output="${snippet}"`
|
|
584214
|
+
};
|
|
584215
|
+
}
|
|
584216
|
+
if (toolName === "shell") {
|
|
584217
|
+
const command = typeof toolArgs?.["command"] === "string" ? String(toolArgs["command"]) : void 0;
|
|
584218
|
+
return {
|
|
584219
|
+
path: command,
|
|
584220
|
+
evidence: succeeded ? `shell exited successfully; stdout_sha256=${digest3}` : `shell failed; output_sha256=${digest3}`,
|
|
584221
|
+
preview: snippet || content.slice(0, 160)
|
|
584222
|
+
};
|
|
584223
|
+
}
|
|
584224
|
+
return { preview: snippet || content.slice(0, 160) };
|
|
584225
|
+
}
|
|
584053
584226
|
/**
|
|
584054
584227
|
* Adversary: post-turn meta-analysis.
|
|
584055
584228
|
*
|
|
@@ -584063,29 +584236,10 @@ ${trimmedNew}`;
|
|
|
584063
584236
|
*/
|
|
584064
584237
|
adversaryObserve(messages2, turn) {
|
|
584065
584238
|
const recent = messages2.slice(-6);
|
|
584066
|
-
if (this._adversaryStream) {
|
|
584067
|
-
const lastAssistantMsg = [...recent].reverse().find((m2) => m2.role === "assistant" && typeof m2.content === "string");
|
|
584068
|
-
const assistantText = typeof lastAssistantMsg?.content === "string" ? lastAssistantMsg.content.replace(/<think>[\s\S]*?<\/think>/g, "").trim() : "";
|
|
584069
|
-
if (assistantText) {
|
|
584070
|
-
this._adversaryStream.observe({
|
|
584071
|
-
turn,
|
|
584072
|
-
assistantText,
|
|
584073
|
-
recentToolOutcomes: this._adversaryToolOutcomes.slice(-8).map((o2) => ({
|
|
584074
|
-
tool: o2.tool,
|
|
584075
|
-
succeeded: o2.succeeded,
|
|
584076
|
-
preview: o2.preview
|
|
584077
|
-
})),
|
|
584078
|
-
claimsCompletion: /task.?complete|all tests pass|\bdone\b|\bcomplete(d)?\b/i.test(assistantText)
|
|
584079
|
-
});
|
|
584080
|
-
void this._adversaryStream.tick().catch(() => {
|
|
584081
|
-
});
|
|
584082
|
-
}
|
|
584083
|
-
}
|
|
584084
584239
|
for (const msg of recent) {
|
|
584085
584240
|
if (msg.role === "tool" && typeof msg.content === "string") {
|
|
584086
584241
|
const isError2 = msg.content.startsWith("Error:") || /^(FAIL|ERR!|TypeError)/i.test(msg.content);
|
|
584087
584242
|
const succeeded = !isError2;
|
|
584088
|
-
const preview = msg.content.slice(0, 80);
|
|
584089
584243
|
let toolName = "unknown";
|
|
584090
584244
|
let toolArgs;
|
|
584091
584245
|
if (msg.tool_call_id) {
|
|
@@ -584112,6 +584266,7 @@ ${trimmedNew}`;
|
|
|
584112
584266
|
return o2.turn === turn && o2.tool === toolName && o2.fingerprint === fingerprint;
|
|
584113
584267
|
});
|
|
584114
584268
|
if (!alreadySeen) {
|
|
584269
|
+
const outcomeEvidence = this.buildAdversaryToolOutcomeEvidence(toolName, toolArgs, msg.content, succeeded);
|
|
584115
584270
|
this._adversaryToolOutcomes.push({
|
|
584116
584271
|
turn,
|
|
584117
584272
|
tool: toolName,
|
|
@@ -584119,13 +584274,34 @@ ${trimmedNew}`;
|
|
|
584119
584274
|
argsKey,
|
|
584120
584275
|
fingerprint,
|
|
584121
584276
|
succeeded,
|
|
584122
|
-
|
|
584277
|
+
...outcomeEvidence
|
|
584123
584278
|
});
|
|
584124
584279
|
}
|
|
584125
584280
|
}
|
|
584126
584281
|
}
|
|
584127
584282
|
while (this._adversaryToolOutcomes.length > 20)
|
|
584128
584283
|
this._adversaryToolOutcomes.shift();
|
|
584284
|
+
if (this._adversaryStream && !this._completionIncompleteVerification) {
|
|
584285
|
+
const lastAssistantMsg = [...recent].reverse().find((m2) => m2.role === "assistant" && typeof m2.content === "string");
|
|
584286
|
+
const assistantText = typeof lastAssistantMsg?.content === "string" ? lastAssistantMsg.content.replace(/<think>[\s\S]*?<\/think>/g, "").trim() : "";
|
|
584287
|
+
if (assistantText) {
|
|
584288
|
+
const claimsCompletion = /task.?complete|all tests pass|\bdone\b|\bcomplete(d)?\b/i.test(assistantText);
|
|
584289
|
+
this._adversaryStream.observe({
|
|
584290
|
+
turn,
|
|
584291
|
+
assistantText,
|
|
584292
|
+
recentToolOutcomes: this._adversaryToolOutcomes.slice(-8).map((o2) => ({
|
|
584293
|
+
tool: o2.tool,
|
|
584294
|
+
succeeded: o2.succeeded,
|
|
584295
|
+
preview: o2.preview,
|
|
584296
|
+
path: o2.path,
|
|
584297
|
+
evidence: o2.evidence
|
|
584298
|
+
})),
|
|
584299
|
+
claimsCompletion
|
|
584300
|
+
});
|
|
584301
|
+
void this._adversaryStream.tick().catch(() => {
|
|
584302
|
+
});
|
|
584303
|
+
}
|
|
584304
|
+
}
|
|
584129
584305
|
for (const [key, val] of this._adversaryRecentFlags) {
|
|
584130
584306
|
if (turn - val.lastTurn > _AgenticRunner.ADVERSARY_FLAG_TTL)
|
|
584131
584307
|
this._adversaryRecentFlags.delete(key);
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.368",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.368",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED