omnius 1.0.383 → 1.0.385
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +231 -50
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -8602,12 +8602,14 @@ __export(vision_exports, {
|
|
|
8602
8602
|
MOONDREAM3_PREVIEW_HF_MODEL: () => MOONDREAM3_PREVIEW_HF_MODEL,
|
|
8603
8603
|
VisionTool: () => VisionTool,
|
|
8604
8604
|
analyzeImageWithVision: () => analyzeImageWithVision,
|
|
8605
|
+
callOllamaVision: () => callOllamaVision,
|
|
8605
8606
|
formatVisionPointResult: () => formatVisionPointResult,
|
|
8606
8607
|
getVisionPointDiagnostics: () => getVisionPointDiagnostics,
|
|
8607
8608
|
locateImagePoints: () => locateImagePoints,
|
|
8608
8609
|
normalizeVisionModelName: () => normalizeVisionModelName,
|
|
8609
8610
|
resetMoondreamClient: () => resetMoondreamClient,
|
|
8610
8611
|
resolveHuggingFaceVisionModelCandidates: () => resolveHuggingFaceVisionModelCandidates,
|
|
8612
|
+
resolveInstalledOllamaVisionModelAlias: () => resolveInstalledOllamaVisionModelAlias,
|
|
8611
8613
|
resolveOllamaVisionModelCandidates: () => resolveOllamaVisionModelCandidates
|
|
8612
8614
|
});
|
|
8613
8615
|
import { mkdirSync as mkdirSync9, readFileSync as readFileSync12, existsSync as existsSync14, statSync as statSync7, unlinkSync as unlinkSync2, writeFileSync as writeFileSync10 } from "node:fs";
|
|
@@ -8940,7 +8942,8 @@ function resolveOllamaVisionModelCandidates(options2 = {}) {
|
|
|
8940
8942
|
ollamaVisionModelName(options2.preferredModel || ""),
|
|
8941
8943
|
process.env["OLLAMA_VISION_MODEL"] || "",
|
|
8942
8944
|
options2.activeModelHasVision && options2.activeModel ? options2.activeModel : "",
|
|
8943
|
-
DEFAULT_OLLAMA_VISION_MODEL
|
|
8945
|
+
DEFAULT_OLLAMA_VISION_MODEL,
|
|
8946
|
+
`${DEFAULT_OLLAMA_VISION_MODEL}:latest`
|
|
8944
8947
|
].map((entry) => entry.trim()).filter(Boolean);
|
|
8945
8948
|
return [...new Set(candidates)];
|
|
8946
8949
|
}
|
|
@@ -9191,6 +9194,26 @@ async function callOllamaVision(ollamaHost, model, prompt, imageBase64, timeoutM
|
|
|
9191
9194
|
if (!res.ok && shouldAutoPullOllamaVisionModel(model)) {
|
|
9192
9195
|
const errText = await res.text().catch(() => "");
|
|
9193
9196
|
if (res.status === 404 || /not found|does not exist/i.test(errText)) {
|
|
9197
|
+
const installedAlias = await resolveInstalledOllamaVisionModelAlias(ollamaHost, model, timeoutMs);
|
|
9198
|
+
if (installedAlias && installedAlias !== model) {
|
|
9199
|
+
res = await fetch(`${ollamaHost}/api/generate`, {
|
|
9200
|
+
method: "POST",
|
|
9201
|
+
headers: { "Content-Type": "application/json" },
|
|
9202
|
+
body: JSON.stringify({
|
|
9203
|
+
model: installedAlias,
|
|
9204
|
+
prompt,
|
|
9205
|
+
images: [imageBase64],
|
|
9206
|
+
stream: false,
|
|
9207
|
+
think: false,
|
|
9208
|
+
options: { temperature: 0 }
|
|
9209
|
+
}),
|
|
9210
|
+
signal: AbortSignal.timeout(timeoutMs)
|
|
9211
|
+
});
|
|
9212
|
+
if (res.ok) {
|
|
9213
|
+
const data2 = await res.json();
|
|
9214
|
+
return typeof data2.response === "string" && data2.response.trim() ? data2.response : null;
|
|
9215
|
+
}
|
|
9216
|
+
}
|
|
9194
9217
|
try {
|
|
9195
9218
|
ensureDiskSpaceForOllamaVisionModel(model);
|
|
9196
9219
|
pullOllamaVisionModel(model);
|
|
@@ -9217,6 +9240,33 @@ async function callOllamaVision(ollamaHost, model, prompt, imageBase64, timeoutM
|
|
|
9217
9240
|
const data = await res.json();
|
|
9218
9241
|
return typeof data.response === "string" && data.response.trim() ? data.response : null;
|
|
9219
9242
|
}
|
|
9243
|
+
async function resolveInstalledOllamaVisionModelAlias(ollamaHost, model, timeoutMs = 5e3) {
|
|
9244
|
+
const requested = model.trim();
|
|
9245
|
+
if (!requested)
|
|
9246
|
+
return null;
|
|
9247
|
+
try {
|
|
9248
|
+
const res = await fetch(`${ollamaHost}/api/tags`, {
|
|
9249
|
+
signal: AbortSignal.timeout(Math.min(Math.max(timeoutMs, 1e3), 5e3))
|
|
9250
|
+
});
|
|
9251
|
+
if (!res.ok)
|
|
9252
|
+
return null;
|
|
9253
|
+
const data = await res.json();
|
|
9254
|
+
const names = (Array.isArray(data.models) ? data.models : []).map((entry) => typeof entry.name === "string" ? entry.name.trim() : "").filter(Boolean);
|
|
9255
|
+
if (names.includes(requested))
|
|
9256
|
+
return requested;
|
|
9257
|
+
if (!requested.includes(":")) {
|
|
9258
|
+
const latest = `${requested}:latest`;
|
|
9259
|
+
if (names.includes(latest))
|
|
9260
|
+
return latest;
|
|
9261
|
+
const prefixMatch = names.find((name10) => name10.startsWith(`${requested}:`));
|
|
9262
|
+
if (prefixMatch)
|
|
9263
|
+
return prefixMatch;
|
|
9264
|
+
}
|
|
9265
|
+
} catch {
|
|
9266
|
+
return null;
|
|
9267
|
+
}
|
|
9268
|
+
return null;
|
|
9269
|
+
}
|
|
9220
9270
|
function shouldAutoPullOllamaVisionModel(model) {
|
|
9221
9271
|
if (!envFlag2(process.env["OMNIUS_OLLAMA_VISION_AUTO_PULL"], true))
|
|
9222
9272
|
return false;
|
|
@@ -295276,6 +295326,21 @@ function getTodoSessionId() {
|
|
|
295276
295326
|
return envSession;
|
|
295277
295327
|
return "default";
|
|
295278
295328
|
}
|
|
295329
|
+
function validateLargeTaskDecomposition(todos) {
|
|
295330
|
+
if (todos.length < 20)
|
|
295331
|
+
return null;
|
|
295332
|
+
const ids = new Set(todos.map((todo) => todo.id).filter((id) => typeof id === "string" && id.trim().length > 0));
|
|
295333
|
+
const childTodos = todos.filter((todo) => typeof todo.parentId === "string" && todo.parentId.trim().length > 0);
|
|
295334
|
+
const parentIds = new Set(childTodos.map((todo) => todo.parentId.trim()));
|
|
295335
|
+
const hasValidParent = [...parentIds].some((parentId) => ids.has(parentId));
|
|
295336
|
+
if (childTodos.length > 0 && hasValidParent)
|
|
295337
|
+
return null;
|
|
295338
|
+
return [
|
|
295339
|
+
"Large todo lists (20+ items) must be decomposed into a nested tree with stable ids and parentId links.",
|
|
295340
|
+
"Create parent objectives and child leaf tasks instead of a flat checklist.",
|
|
295341
|
+
'Canonical shape: todo_write({"todos":[{"id":"group-1","content":"Steps 01-08","status":"in_progress"},{"id":"step-01","parentId":"group-1","content":"Complete step 01 and verify evidence","status":"in_progress"}]})'
|
|
295342
|
+
].join(" ");
|
|
295343
|
+
}
|
|
295279
295344
|
function normalizeIncomingTodos(args) {
|
|
295280
295345
|
const repairNotes = [];
|
|
295281
295346
|
const record = args;
|
|
@@ -295462,6 +295527,15 @@ Mark tasks complete IMMEDIATELY after finishing — don't batch. Never mark comp
|
|
|
295462
295527
|
declaredArtifacts: Array.isArray(entry["declaredArtifacts"]) ? entry["declaredArtifacts"].filter((x) => typeof x === "string") : void 0
|
|
295463
295528
|
});
|
|
295464
295529
|
}
|
|
295530
|
+
const decompositionError = validateLargeTaskDecomposition(incoming);
|
|
295531
|
+
if (decompositionError) {
|
|
295532
|
+
return {
|
|
295533
|
+
success: false,
|
|
295534
|
+
output: "",
|
|
295535
|
+
error: decompositionError,
|
|
295536
|
+
durationMs: performance.now() - start2
|
|
295537
|
+
};
|
|
295538
|
+
}
|
|
295465
295539
|
const sessionId = typeof args["session_id"] === "string" && args["session_id"].trim() ? args["session_id"].trim() : typeof args["sessionId"] === "string" && args["sessionId"].trim() ? args["sessionId"].trim() : getTodoSessionId();
|
|
295466
295540
|
const oldTodos = readTodos(sessionId);
|
|
295467
295541
|
const canonicalize2 = (todos) => JSON.stringify(todos.map((t2) => ({
|
|
@@ -547133,6 +547207,23 @@ function summarizeProcessFailure(stdout, stderr) {
|
|
|
547133
547207
|
}
|
|
547134
547208
|
return parts.join("\n").slice(0, 2200);
|
|
547135
547209
|
}
|
|
547210
|
+
function formatObjectRecognitionResult(result) {
|
|
547211
|
+
const matches = (Array.isArray(result.matches) ? result.matches : []).filter((m2) => m2.recognized);
|
|
547212
|
+
const matchLines = matches.map((m2) => ` ${m2.label}: ${(m2.blended_score * 100).toFixed(0)}% (image=${(m2.image_similarity * 100).toFixed(0)}%, text=${(m2.text_similarity * 100).toFixed(0)}%)`);
|
|
547213
|
+
const extraLabels = Array.isArray(result.extra_labels) ? result.extra_labels : null;
|
|
547214
|
+
if (extraLabels && extraLabels.length > 0) {
|
|
547215
|
+
const extraLines = extraLabels.map((s2) => ` ${s2.label}: ${(s2.score * 100).toFixed(0)}%`);
|
|
547216
|
+
const sections = [`CLIP candidate label scores:
|
|
547217
|
+
${extraLines.join("\n")}`];
|
|
547218
|
+
if (matches.length > 0) {
|
|
547219
|
+
sections.push(`Persistent visual memory matches above threshold:
|
|
547220
|
+
${matchLines.join("\n")}`);
|
|
547221
|
+
}
|
|
547222
|
+
return sections.join("\n\n");
|
|
547223
|
+
}
|
|
547224
|
+
return matches.length > 0 ? `Recognized ${result.recognized_count} object(s):
|
|
547225
|
+
${matchLines.join("\n")}` : "No taught objects recognized in this image.";
|
|
547226
|
+
}
|
|
547136
547227
|
var VMEM_DIR, VENV_DIR2, VENV_PY, VENV_PIP2, VISUAL_MEMORY_ACTIONS, VisualMemoryTool;
|
|
547137
547228
|
var init_visual_memory = __esm({
|
|
547138
547229
|
"packages/execution/dist/tools/visual-memory.js"() {
|
|
@@ -547668,18 +547759,7 @@ print(json.dumps({
|
|
|
547668
547759
|
const payload = JSON.stringify(result);
|
|
547669
547760
|
return { success: true, output: payload, llmContent: payload, durationMs: performance.now() - start2 };
|
|
547670
547761
|
}
|
|
547671
|
-
|
|
547672
|
-
const lines = matches.map((m2) => ` ${m2.label}: ${(m2.blended_score * 100).toFixed(0)}% (image=${(m2.image_similarity * 100).toFixed(0)}%, text=${(m2.text_similarity * 100).toFixed(0)}%)`);
|
|
547673
|
-
let output = matches.length > 0 ? `Recognized ${result.recognized_count} object(s):
|
|
547674
|
-
${lines.join("\n")}` : "No taught objects recognized in this image.";
|
|
547675
|
-
if (result.extra_labels) {
|
|
547676
|
-
const extraLines = result.extra_labels.map((s2) => ` ${s2.label}: ${(s2.score * 100).toFixed(0)}%`);
|
|
547677
|
-
output += `
|
|
547678
|
-
|
|
547679
|
-
CLIP label scores:
|
|
547680
|
-
${extraLines.join("\n")}`;
|
|
547681
|
-
}
|
|
547682
|
-
return { success: true, output, durationMs: performance.now() - start2 };
|
|
547762
|
+
return { success: true, output: formatObjectRecognitionResult(result), durationMs: performance.now() - start2 };
|
|
547683
547763
|
}
|
|
547684
547764
|
// =========================================================================
|
|
547685
547765
|
// Memory Management
|
|
@@ -569521,8 +569601,8 @@ var init_focusSupervisor = __esm({
|
|
|
569521
569601
|
const directive = this.setDirective({
|
|
569522
569602
|
turn: input.turn,
|
|
569523
569603
|
state: ignoredManyTimes ? "verify_or_block" : "single_next_action",
|
|
569524
|
-
reason: ignoredManyTimes ? `model ignored ${this.ignoredDirectiveStreak} focus directives;
|
|
569525
|
-
requiredNextAction:
|
|
569604
|
+
reason: ignoredManyTimes ? `model ignored ${this.ignoredDirectiveStreak} focus directives; take the required recovery action before trying another variant` : `model ignored prior directive ${prior.id}; ${prior.reason}`,
|
|
569605
|
+
requiredNextAction: prior.requiredNextAction,
|
|
569526
569606
|
forbiddenActionFamilies: unique2([
|
|
569527
569607
|
...prior.forbiddenActionFamilies,
|
|
569528
569608
|
family
|
|
@@ -569626,7 +569706,7 @@ var init_focusSupervisor = __esm({
|
|
|
569626
569706
|
turn: input.turn,
|
|
569627
569707
|
state: "forced_replan",
|
|
569628
569708
|
reason: `same ${input.toolName} failure family repeated ${next.count} times: ${next.sample}`,
|
|
569629
|
-
requiredNextAction: input.toolName === "shell" ? "
|
|
569709
|
+
requiredNextAction: input.toolName === "shell" ? "edit_different_target" : "update_todos",
|
|
569630
569710
|
forbiddenActionFamilies: [actionFamily(input.toolName, input.args)]
|
|
569631
569711
|
});
|
|
569632
569712
|
}
|
|
@@ -572951,6 +573031,7 @@ ${parts.join("\n")}
|
|
|
572951
573031
|
memoryPrefix: options2?.memoryPrefix ?? "",
|
|
572952
573032
|
memoryPrefixHash: options2?.memoryPrefixHash ?? "",
|
|
572953
573033
|
stateDir: options2?.stateDir ?? "",
|
|
573034
|
+
surface: options2?.surface ?? "tui",
|
|
572954
573035
|
artifactMode: options2?.artifactMode ?? "user-task",
|
|
572955
573036
|
disablePersistentMemory: options2?.disablePersistentMemory ?? false,
|
|
572956
573037
|
disableCodebaseMap: options2?.disableCodebaseMap ?? false,
|
|
@@ -573302,16 +573383,21 @@ ${parts.join("\n")}
|
|
|
573302
573383
|
// -------------------------------------------------------------------------
|
|
573303
573384
|
/** Infer the surface identifier from runner configuration and dynamic context. */
|
|
573304
573385
|
_inferSurface() {
|
|
573305
|
-
|
|
573306
|
-
|
|
573307
|
-
|
|
573308
|
-
|
|
573309
|
-
|
|
573310
|
-
|
|
573311
|
-
|
|
573312
|
-
if (
|
|
573313
|
-
return "
|
|
573314
|
-
|
|
573386
|
+
return this.options.surface;
|
|
573387
|
+
}
|
|
573388
|
+
_isTelegramSurface() {
|
|
573389
|
+
return this.options.surface === "telegram-public" || this.options.surface === "telegram-admin";
|
|
573390
|
+
}
|
|
573391
|
+
stickyDynamicContextForActiveSurface() {
|
|
573392
|
+
const ctx3 = this._stickyDynamicContext.trim();
|
|
573393
|
+
if (!ctx3)
|
|
573394
|
+
return "";
|
|
573395
|
+
if (this._isTelegramSurface())
|
|
573396
|
+
return ctx3;
|
|
573397
|
+
const voiceSoul = this.extractDynamicMarkdownBlock(ctx3, "## Voice Soul Context", 6e3);
|
|
573398
|
+
if (!voiceSoul)
|
|
573399
|
+
return "";
|
|
573400
|
+
return /Telegram|telegram|Public Telegram|Admin Capability/i.test(voiceSoul) ? "" : voiceSoul;
|
|
573315
573401
|
}
|
|
573316
573402
|
/**
|
|
573317
573403
|
* Build structured context via the context engine.
|
|
@@ -587072,10 +587158,11 @@ ${postCompactRestore.join("\n")}`);
|
|
|
587072
587158
|
[Ephemeral skill-pack restore — current run only, do not persist]
|
|
587073
587159
|
${this._ephemeralSkillPackContext}
|
|
587074
587160
|
Use skill_extract for targeted skill unpacking; do not load full skills into the main context unless necessary.` : "";
|
|
587075
|
-
const
|
|
587161
|
+
const scopedStickyDynamicContext = this.stickyDynamicContextForActiveSurface();
|
|
587162
|
+
const stickyDynamicContextReminder = scopedStickyDynamicContext ? `
|
|
587076
587163
|
|
|
587077
587164
|
[Sticky dynamic context restore — surface/persona anchors]
|
|
587078
|
-
${
|
|
587165
|
+
${scopedStickyDynamicContext}` : "";
|
|
587079
587166
|
const compactionMsg = {
|
|
587080
587167
|
role: "system",
|
|
587081
587168
|
// WO-CE-03: XML tags for structural clarity on small/medium models
|
|
@@ -587092,7 +587179,7 @@ ${fullSummary}
|
|
|
587092
587179
|
this.persistCheckpoint(fullSummary);
|
|
587093
587180
|
let narrowedHead = [...head];
|
|
587094
587181
|
const EVIDENCE_RULE_COMPACT = `EVIDENCE RULE (PRIORITY 0): never claim something works or is true unless a tool result you saw this turn proves it. A command succeeding only means it ran — not that the intended effect happened; verify the end-state directly before claiming it. A negative, empty, or error result means failed or absent — report it, never explain it away with an untested theory. Never describe how you got a result (tool, command, or source) unless you actually used it. Do not assert relationships the output does not show. Say "I could not verify X" when it is unproven — that is the correct answer, not a guess.`;
|
|
587095
|
-
const telegramPersonaHead = /Telegram|Voice Soul Context|Public Telegram voice profile/.test(
|
|
587182
|
+
const telegramPersonaHead = this._isTelegramSurface() && /Telegram|Voice Soul Context|Public Telegram voice profile/.test(scopedStickyDynamicContext) ? `You are Omnius replying through Telegram. Your visible assistant text is sent to Telegram; keep it concise, scoped, and user-facing. Do not emit scratch notes, router decisions, internal status, or no_reply text. Use available tools when needed and call task_complete when the Telegram run is complete.
|
|
587096
587183
|
|
|
587097
587184
|
${EVIDENCE_RULE_COMPACT}
|
|
587098
587185
|
|
|
@@ -587247,7 +587334,12 @@ ${content.slice(0, 8e3)}
|
|
|
587247
587334
|
while (trimmedRecent.length > 1 && trimmedRecent[0]?.role === "tool") {
|
|
587248
587335
|
trimmedRecent = trimmedRecent.slice(1);
|
|
587249
587336
|
}
|
|
587250
|
-
result = [
|
|
587337
|
+
result = [
|
|
587338
|
+
...narrowedHead,
|
|
587339
|
+
compactionMsg,
|
|
587340
|
+
...stickyToKeep,
|
|
587341
|
+
...trimmedRecent
|
|
587342
|
+
];
|
|
587251
587343
|
}
|
|
587252
587344
|
if (trimmedRecent.length < filteredRecent.length) {
|
|
587253
587345
|
this.emit({
|
|
@@ -680834,6 +680926,7 @@ ${conversationStream}`
|
|
|
680834
680926
|
);
|
|
680835
680927
|
const requestTimeoutMs = config.timeoutMs ?? 3e5;
|
|
680836
680928
|
const runner = new AgenticRunner(backend, {
|
|
680929
|
+
surface: isAdminDM || isAdminGroup ? "telegram-admin" : "telegram-public",
|
|
680837
680930
|
// Admin DMs are operator-directed work sessions. A hard turn cap turns
|
|
680838
680931
|
// active tool progress into a false "completed" Telegram panel when the
|
|
680839
680932
|
// model has not reached task_complete yet. Public/group runs stay bounded.
|
|
@@ -719721,6 +719814,7 @@ Only tools allowed by this profile are visible and executable.`
|
|
|
719721
719814
|
].filter(Boolean).join("");
|
|
719722
719815
|
}
|
|
719723
719816
|
const runner = new AgenticRunner(backend, {
|
|
719817
|
+
surface: "tui",
|
|
719724
719818
|
maxTurns: realtimeEnabled ? Math.min(effectiveMaxTurns, 8) : effectiveMaxTurns,
|
|
719725
719819
|
maxTokens: realtimeEnabled ? 512 : 16384,
|
|
719726
719820
|
temperature: realtimeEnabled ? 0.6 : 0,
|
|
@@ -722715,6 +722809,7 @@ Respond to the scoped Telegram target when complete.`
|
|
|
722715
722809
|
}
|
|
722716
722810
|
const modelTier2 = getModelTier(currentConfig.model);
|
|
722717
722811
|
const runner = new AgenticRunner(backend, {
|
|
722812
|
+
surface: "background",
|
|
722718
722813
|
// 0 = unlimited; halt only on task_complete or abort. Background
|
|
722719
722814
|
// prompts may legitimately need many turns; an arbitrary cap stalls
|
|
722720
722815
|
// them mid-task.
|
|
@@ -728675,11 +728770,17 @@ var init_serve2 = __esm({
|
|
|
728675
728770
|
// packages/cli/src/commands/eval.ts
|
|
728676
728771
|
var eval_exports = {};
|
|
728677
728772
|
__export(eval_exports, {
|
|
728678
|
-
|
|
728773
|
+
createTempEvalRepo: () => createTempEvalRepo,
|
|
728774
|
+
evalCommand: () => evalCommand,
|
|
728775
|
+
expectedStatusesForEvalTask: () => expectedStatusesForEvalTask
|
|
728679
728776
|
});
|
|
728680
728777
|
import { tmpdir as tmpdir23 } from "node:os";
|
|
728681
728778
|
import { mkdirSync as mkdirSync106, writeFileSync as writeFileSync90 } from "node:fs";
|
|
728682
728779
|
import { join as join178 } from "node:path";
|
|
728780
|
+
function expectedStatusesForEvalTask(task, live) {
|
|
728781
|
+
if (!live) return task.expectedStatuses;
|
|
728782
|
+
return task.liveExpectedStatuses ?? task.expectedStatuses.filter((status) => status !== "needs_human_decision");
|
|
728783
|
+
}
|
|
728683
728784
|
async function evalCommand(opts, config) {
|
|
728684
728785
|
const suiteName = opts.suite ?? "basic";
|
|
728685
728786
|
const suite = SUITES[suiteName];
|
|
@@ -728693,6 +728794,10 @@ async function evalCommand(opts, config) {
|
|
|
728693
728794
|
printKeyValue("Suite", suiteName, 2);
|
|
728694
728795
|
printKeyValue("Tasks", String(suite.length), 2);
|
|
728695
728796
|
printKeyValue("Mode", modeLabel, 2);
|
|
728797
|
+
if (useLive) {
|
|
728798
|
+
printKeyValue("Live pass statuses", "success, partial_success", 2);
|
|
728799
|
+
printInfo("Live eval treats needs_human_decision as a failure for concrete coding tasks.");
|
|
728800
|
+
}
|
|
728696
728801
|
const evalRepoRoot = opts.repoPath ?? createTempEvalRepo();
|
|
728697
728802
|
let rawBackend;
|
|
728698
728803
|
if (useLive) {
|
|
@@ -728751,22 +728856,27 @@ async function evalCommand(opts, config) {
|
|
|
728751
728856
|
let result;
|
|
728752
728857
|
try {
|
|
728753
728858
|
const report2 = await loop.run(task.request, evalRepoRoot);
|
|
728754
|
-
const
|
|
728859
|
+
const expectedStatuses = expectedStatusesForEvalTask(task, useLive);
|
|
728860
|
+
const passed2 = expectedStatuses.includes(report2.status);
|
|
728755
728861
|
result = {
|
|
728756
728862
|
task,
|
|
728757
728863
|
status: report2.status,
|
|
728864
|
+
expectedStatuses,
|
|
728758
728865
|
passed: passed2,
|
|
728759
728866
|
durationMs: Date.now() - start2
|
|
728760
728867
|
};
|
|
728761
728868
|
if (passed2) {
|
|
728762
728869
|
spinner.succeed(`[${task.id}] PASS (${report2.status})`);
|
|
728763
728870
|
} else {
|
|
728764
|
-
spinner.fail(
|
|
728871
|
+
spinner.fail(
|
|
728872
|
+
`[${task.id}] FAIL (got: ${report2.status}; expected: ${expectedStatuses.join(", ")})`
|
|
728873
|
+
);
|
|
728765
728874
|
}
|
|
728766
728875
|
} catch (err) {
|
|
728767
728876
|
result = {
|
|
728768
728877
|
task,
|
|
728769
728878
|
status: "error",
|
|
728879
|
+
expectedStatuses: expectedStatusesForEvalTask(task, useLive),
|
|
728770
728880
|
passed: false,
|
|
728771
728881
|
durationMs: Date.now() - start2,
|
|
728772
728882
|
error: err instanceof Error ? err.message : String(err)
|
|
@@ -728792,7 +728902,7 @@ async function evalCommand(opts, config) {
|
|
|
728792
728902
|
const icon = r2.passed ? "PASS" : "FAIL";
|
|
728793
728903
|
printKeyValue(
|
|
728794
728904
|
`${r2.task.id} [${icon}]`,
|
|
728795
|
-
`${r2.status} (${formatDuration(r2.durationMs)})`,
|
|
728905
|
+
`${r2.status} (${formatDuration(r2.durationMs)}; expected ${r2.expectedStatuses.join(", ")})`,
|
|
728796
728906
|
2
|
|
728797
728907
|
);
|
|
728798
728908
|
if (r2.error) {
|
|
@@ -728810,13 +728920,79 @@ async function evalCommand(opts, config) {
|
|
|
728810
728920
|
function createTempEvalRepo() {
|
|
728811
728921
|
const dir = join178(tmpdir23(), `omnius-eval-${Date.now()}`);
|
|
728812
728922
|
mkdirSync106(dir, { recursive: true });
|
|
728813
|
-
|
|
728814
|
-
|
|
728815
|
-
|
|
728816
|
-
"
|
|
728817
|
-
|
|
728923
|
+
mkdirSync106(join178(dir, "src"), { recursive: true });
|
|
728924
|
+
mkdirSync106(join178(dir, "tests"), { recursive: true });
|
|
728925
|
+
writeEvalFile(dir, "package.json", JSON.stringify({
|
|
728926
|
+
name: "eval-repo",
|
|
728927
|
+
version: "0.0.0",
|
|
728928
|
+
type: "module",
|
|
728929
|
+
scripts: { test: "node tests/auth.test.js && node tests/users.test.js && node tests/db.test.js && node tests/payment.test.js" }
|
|
728930
|
+
}, null, 2));
|
|
728931
|
+
writeEvalFile(dir, "src/auth.js", [
|
|
728932
|
+
"export function authenticateUser(user) {",
|
|
728933
|
+
" return user.active;",
|
|
728934
|
+
"}"
|
|
728935
|
+
].join("\n"));
|
|
728936
|
+
writeEvalFile(dir, "src/users.js", [
|
|
728937
|
+
"export function listUsers(users) {",
|
|
728938
|
+
" return users.slice();",
|
|
728939
|
+
"}"
|
|
728940
|
+
].join("\n"));
|
|
728941
|
+
writeEvalFile(dir, "src/db.js", [
|
|
728942
|
+
"export function getConnection() {",
|
|
728943
|
+
' return Promise.resolve({ id: "primary", open: true });',
|
|
728944
|
+
"}"
|
|
728945
|
+
].join("\n"));
|
|
728946
|
+
writeEvalFile(dir, "src/payment.js", [
|
|
728947
|
+
"export function applyDiscount(amount, percent) {",
|
|
728948
|
+
" return amount - amount * (percent / 100);",
|
|
728949
|
+
"}",
|
|
728950
|
+
"",
|
|
728951
|
+
"export function addTax(amount, taxRate) {",
|
|
728952
|
+
" return amount + amount * taxRate;",
|
|
728953
|
+
"}"
|
|
728954
|
+
].join("\n"));
|
|
728955
|
+
writeEvalFile(dir, "src/api.js", [
|
|
728956
|
+
"export function health() {",
|
|
728957
|
+
" return { ok: true };",
|
|
728958
|
+
"}",
|
|
728959
|
+
"",
|
|
728960
|
+
"export function version() {",
|
|
728961
|
+
' return "0.0.0";',
|
|
728962
|
+
"}"
|
|
728963
|
+
].join("\n"));
|
|
728964
|
+
writeEvalFile(dir, "tests/auth.test.js", [
|
|
728965
|
+
"import assert from 'node:assert/strict';",
|
|
728966
|
+
"import { authenticateUser } from '../src/auth.js';",
|
|
728967
|
+
"assert.equal(authenticateUser({ id: 'u1', active: true }), true);",
|
|
728968
|
+
"assert.equal(authenticateUser({ id: 'u2', active: false }), false);",
|
|
728969
|
+
"assert.equal(authenticateUser(null), false);",
|
|
728970
|
+
"assert.equal(authenticateUser(undefined), false);"
|
|
728971
|
+
].join("\n"));
|
|
728972
|
+
writeEvalFile(dir, "tests/users.test.js", [
|
|
728973
|
+
"import assert from 'node:assert/strict';",
|
|
728974
|
+
"import { paginateUsers } from '../src/users.js';",
|
|
728975
|
+
"const users = ['a', 'b', 'c', 'd', 'e'];",
|
|
728976
|
+
"assert.deepEqual(paginateUsers(users, 1, 2), { items: ['a', 'b'], page: 1, pageSize: 2, totalPages: 3, totalItems: 5 });",
|
|
728977
|
+
"assert.deepEqual(paginateUsers(users, 3, 2).items, ['e']);"
|
|
728978
|
+
].join("\n"));
|
|
728979
|
+
writeEvalFile(dir, "tests/db.test.js", [
|
|
728980
|
+
"import assert from 'node:assert/strict';",
|
|
728981
|
+
"import { getConnection } from '../src/db.js';",
|
|
728982
|
+
"const conn = await getConnection();",
|
|
728983
|
+
"assert.deepEqual(conn, { id: 'primary', open: true });"
|
|
728984
|
+
].join("\n"));
|
|
728985
|
+
writeEvalFile(dir, "tests/payment.test.js", [
|
|
728986
|
+
"import assert from 'node:assert/strict';",
|
|
728987
|
+
"import { applyDiscount, addTax } from '../src/payment.js';",
|
|
728988
|
+
"assert.equal(applyDiscount(100, 15), 85);",
|
|
728989
|
+
"assert.equal(addTax(100, 0.0825), 108.25);"
|
|
728990
|
+
].join("\n"));
|
|
728818
728991
|
return dir;
|
|
728819
728992
|
}
|
|
728993
|
+
function writeEvalFile(root, relativePath, content) {
|
|
728994
|
+
writeFileSync90(join178(root, relativePath), content.trimEnd() + "\n", "utf8");
|
|
728995
|
+
}
|
|
728820
728996
|
var BASIC_SUITE, FULL_SUITE, SUITES;
|
|
728821
728997
|
var init_eval = __esm({
|
|
728822
728998
|
"packages/cli/src/commands/eval.ts"() {
|
|
@@ -728828,21 +729004,24 @@ var init_eval = __esm({
|
|
|
728828
729004
|
BASIC_SUITE = [
|
|
728829
729005
|
{
|
|
728830
729006
|
id: "eval-001",
|
|
728831
|
-
description: "
|
|
728832
|
-
request: "
|
|
728833
|
-
expectedStatuses: ["success", "partial_success", "needs_human_decision"]
|
|
729007
|
+
description: "Boundary bug fix",
|
|
729008
|
+
request: "In src/auth.js, fix authenticateUser so null or undefined users return false instead of throwing. Use tests/auth.test.js as the acceptance evidence.",
|
|
729009
|
+
expectedStatuses: ["success", "partial_success", "needs_human_decision"],
|
|
729010
|
+
liveExpectedStatuses: ["success", "partial_success"]
|
|
728834
729011
|
},
|
|
728835
729012
|
{
|
|
728836
729013
|
id: "eval-002",
|
|
728837
729014
|
description: "Feature addition request",
|
|
728838
|
-
request: "
|
|
728839
|
-
expectedStatuses: ["success", "partial_success", "needs_human_decision"]
|
|
729015
|
+
request: "In src/users.js, add paginateUsers(users, page, pageSize) with 1-based page indexing, stable slicing, and totalPages metadata. Use tests/users.test.js as the acceptance evidence.",
|
|
729016
|
+
expectedStatuses: ["success", "partial_success", "needs_human_decision"],
|
|
729017
|
+
liveExpectedStatuses: ["success", "partial_success"]
|
|
728840
729018
|
},
|
|
728841
729019
|
{
|
|
728842
729020
|
id: "eval-003",
|
|
728843
729021
|
description: "Refactor request",
|
|
728844
|
-
request: "
|
|
728845
|
-
expectedStatuses: ["success", "partial_success", "needs_human_decision"]
|
|
729022
|
+
request: "In src/db.js, refactor getConnection to async/await while preserving the exported API behavior covered by tests/db.test.js.",
|
|
729023
|
+
expectedStatuses: ["success", "partial_success", "needs_human_decision"],
|
|
729024
|
+
liveExpectedStatuses: ["success", "partial_success"]
|
|
728846
729025
|
}
|
|
728847
729026
|
];
|
|
728848
729027
|
FULL_SUITE = [
|
|
@@ -728850,14 +729029,16 @@ var init_eval = __esm({
|
|
|
728850
729029
|
{
|
|
728851
729030
|
id: "eval-004",
|
|
728852
729031
|
description: "Test generation request",
|
|
728853
|
-
request: "
|
|
728854
|
-
expectedStatuses: ["success", "partial_success", "needs_human_decision"]
|
|
729032
|
+
request: "Add missing unit coverage for src/payment.js discount and tax behavior in tests/payment.test.js without changing production semantics.",
|
|
729033
|
+
expectedStatuses: ["success", "partial_success", "needs_human_decision"],
|
|
729034
|
+
liveExpectedStatuses: ["success", "partial_success"]
|
|
728855
729035
|
},
|
|
728856
729036
|
{
|
|
728857
729037
|
id: "eval-005",
|
|
728858
729038
|
description: "Documentation request",
|
|
728859
|
-
request: "Add JSDoc comments to
|
|
728860
|
-
expectedStatuses: ["success", "partial_success", "needs_human_decision"]
|
|
729039
|
+
request: "Add concise JSDoc comments to the exported functions in src/api.js while preserving behavior.",
|
|
729040
|
+
expectedStatuses: ["success", "partial_success", "needs_human_decision"],
|
|
729041
|
+
liveExpectedStatuses: ["success", "partial_success"]
|
|
728861
729042
|
}
|
|
728862
729043
|
];
|
|
728863
729044
|
SUITES = {
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.385",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.385",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED