omnius 1.0.384 → 1.0.386
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +277 -36
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -8602,12 +8602,14 @@ __export(vision_exports, {
|
|
|
8602
8602
|
MOONDREAM3_PREVIEW_HF_MODEL: () => MOONDREAM3_PREVIEW_HF_MODEL,
|
|
8603
8603
|
VisionTool: () => VisionTool,
|
|
8604
8604
|
analyzeImageWithVision: () => analyzeImageWithVision,
|
|
8605
|
+
callOllamaVision: () => callOllamaVision,
|
|
8605
8606
|
formatVisionPointResult: () => formatVisionPointResult,
|
|
8606
8607
|
getVisionPointDiagnostics: () => getVisionPointDiagnostics,
|
|
8607
8608
|
locateImagePoints: () => locateImagePoints,
|
|
8608
8609
|
normalizeVisionModelName: () => normalizeVisionModelName,
|
|
8609
8610
|
resetMoondreamClient: () => resetMoondreamClient,
|
|
8610
8611
|
resolveHuggingFaceVisionModelCandidates: () => resolveHuggingFaceVisionModelCandidates,
|
|
8612
|
+
resolveInstalledOllamaVisionModelAlias: () => resolveInstalledOllamaVisionModelAlias,
|
|
8611
8613
|
resolveOllamaVisionModelCandidates: () => resolveOllamaVisionModelCandidates
|
|
8612
8614
|
});
|
|
8613
8615
|
import { mkdirSync as mkdirSync9, readFileSync as readFileSync12, existsSync as existsSync14, statSync as statSync7, unlinkSync as unlinkSync2, writeFileSync as writeFileSync10 } from "node:fs";
|
|
@@ -8940,7 +8942,8 @@ function resolveOllamaVisionModelCandidates(options2 = {}) {
|
|
|
8940
8942
|
ollamaVisionModelName(options2.preferredModel || ""),
|
|
8941
8943
|
process.env["OLLAMA_VISION_MODEL"] || "",
|
|
8942
8944
|
options2.activeModelHasVision && options2.activeModel ? options2.activeModel : "",
|
|
8943
|
-
DEFAULT_OLLAMA_VISION_MODEL
|
|
8945
|
+
DEFAULT_OLLAMA_VISION_MODEL,
|
|
8946
|
+
`${DEFAULT_OLLAMA_VISION_MODEL}:latest`
|
|
8944
8947
|
].map((entry) => entry.trim()).filter(Boolean);
|
|
8945
8948
|
return [...new Set(candidates)];
|
|
8946
8949
|
}
|
|
@@ -9191,6 +9194,26 @@ async function callOllamaVision(ollamaHost, model, prompt, imageBase64, timeoutM
|
|
|
9191
9194
|
if (!res.ok && shouldAutoPullOllamaVisionModel(model)) {
|
|
9192
9195
|
const errText = await res.text().catch(() => "");
|
|
9193
9196
|
if (res.status === 404 || /not found|does not exist/i.test(errText)) {
|
|
9197
|
+
const installedAlias = await resolveInstalledOllamaVisionModelAlias(ollamaHost, model, timeoutMs);
|
|
9198
|
+
if (installedAlias && installedAlias !== model) {
|
|
9199
|
+
res = await fetch(`${ollamaHost}/api/generate`, {
|
|
9200
|
+
method: "POST",
|
|
9201
|
+
headers: { "Content-Type": "application/json" },
|
|
9202
|
+
body: JSON.stringify({
|
|
9203
|
+
model: installedAlias,
|
|
9204
|
+
prompt,
|
|
9205
|
+
images: [imageBase64],
|
|
9206
|
+
stream: false,
|
|
9207
|
+
think: false,
|
|
9208
|
+
options: { temperature: 0 }
|
|
9209
|
+
}),
|
|
9210
|
+
signal: AbortSignal.timeout(timeoutMs)
|
|
9211
|
+
});
|
|
9212
|
+
if (res.ok) {
|
|
9213
|
+
const data2 = await res.json();
|
|
9214
|
+
return typeof data2.response === "string" && data2.response.trim() ? data2.response : null;
|
|
9215
|
+
}
|
|
9216
|
+
}
|
|
9194
9217
|
try {
|
|
9195
9218
|
ensureDiskSpaceForOllamaVisionModel(model);
|
|
9196
9219
|
pullOllamaVisionModel(model);
|
|
@@ -9217,6 +9240,33 @@ async function callOllamaVision(ollamaHost, model, prompt, imageBase64, timeoutM
|
|
|
9217
9240
|
const data = await res.json();
|
|
9218
9241
|
return typeof data.response === "string" && data.response.trim() ? data.response : null;
|
|
9219
9242
|
}
|
|
9243
|
+
async function resolveInstalledOllamaVisionModelAlias(ollamaHost, model, timeoutMs = 5e3) {
|
|
9244
|
+
const requested = model.trim();
|
|
9245
|
+
if (!requested)
|
|
9246
|
+
return null;
|
|
9247
|
+
try {
|
|
9248
|
+
const res = await fetch(`${ollamaHost}/api/tags`, {
|
|
9249
|
+
signal: AbortSignal.timeout(Math.min(Math.max(timeoutMs, 1e3), 5e3))
|
|
9250
|
+
});
|
|
9251
|
+
if (!res.ok)
|
|
9252
|
+
return null;
|
|
9253
|
+
const data = await res.json();
|
|
9254
|
+
const names = (Array.isArray(data.models) ? data.models : []).map((entry) => typeof entry.name === "string" ? entry.name.trim() : "").filter(Boolean);
|
|
9255
|
+
if (names.includes(requested))
|
|
9256
|
+
return requested;
|
|
9257
|
+
if (!requested.includes(":")) {
|
|
9258
|
+
const latest = `${requested}:latest`;
|
|
9259
|
+
if (names.includes(latest))
|
|
9260
|
+
return latest;
|
|
9261
|
+
const prefixMatch = names.find((name10) => name10.startsWith(`${requested}:`));
|
|
9262
|
+
if (prefixMatch)
|
|
9263
|
+
return prefixMatch;
|
|
9264
|
+
}
|
|
9265
|
+
} catch {
|
|
9266
|
+
return null;
|
|
9267
|
+
}
|
|
9268
|
+
return null;
|
|
9269
|
+
}
|
|
9220
9270
|
function shouldAutoPullOllamaVisionModel(model) {
|
|
9221
9271
|
if (!envFlag2(process.env["OMNIUS_OLLAMA_VISION_AUTO_PULL"], true))
|
|
9222
9272
|
return false;
|
|
@@ -295276,6 +295326,50 @@ function getTodoSessionId() {
|
|
|
295276
295326
|
return envSession;
|
|
295277
295327
|
return "default";
|
|
295278
295328
|
}
|
|
295329
|
+
function flattenNestedTodoItems(items, repairNotes, parentId) {
|
|
295330
|
+
const flattened = [];
|
|
295331
|
+
for (const item of items) {
|
|
295332
|
+
if (!item || typeof item !== "object" || Array.isArray(item)) {
|
|
295333
|
+
flattened.push(item);
|
|
295334
|
+
continue;
|
|
295335
|
+
}
|
|
295336
|
+
const record = item;
|
|
295337
|
+
const children2 = Array.isArray(record["children"]) ? record["children"] : Array.isArray(record["subtasks"]) ? record["subtasks"] : [];
|
|
295338
|
+
const parentAware = { ...record };
|
|
295339
|
+
delete parentAware["children"];
|
|
295340
|
+
delete parentAware["subtasks"];
|
|
295341
|
+
if (parentId && typeof parentAware["parentId"] !== "string") {
|
|
295342
|
+
parentAware["parentId"] = parentId;
|
|
295343
|
+
}
|
|
295344
|
+
flattened.push(parentAware);
|
|
295345
|
+
const id = typeof parentAware["id"] === "string" && parentAware["id"].trim() ? parentAware["id"].trim() : void 0;
|
|
295346
|
+
if (children2.length > 0) {
|
|
295347
|
+
if (id) {
|
|
295348
|
+
repairNotes.push("flattened nested children/subtasks into parentId-linked todos");
|
|
295349
|
+
flattened.push(...flattenNestedTodoItems(children2, repairNotes, id));
|
|
295350
|
+
} else {
|
|
295351
|
+
repairNotes.push("left nested children unattached because parent todo had no stable id");
|
|
295352
|
+
flattened.push(...flattenNestedTodoItems(children2, repairNotes, parentId));
|
|
295353
|
+
}
|
|
295354
|
+
}
|
|
295355
|
+
}
|
|
295356
|
+
return flattened;
|
|
295357
|
+
}
|
|
295358
|
+
function validateLargeTaskDecomposition(todos) {
|
|
295359
|
+
if (todos.length < 20)
|
|
295360
|
+
return null;
|
|
295361
|
+
const ids = new Set(todos.map((todo) => todo.id).filter((id) => typeof id === "string" && id.trim().length > 0));
|
|
295362
|
+
const childTodos = todos.filter((todo) => typeof todo.parentId === "string" && todo.parentId.trim().length > 0);
|
|
295363
|
+
const parentIds = new Set(childTodos.map((todo) => todo.parentId.trim()));
|
|
295364
|
+
const hasValidParent = [...parentIds].some((parentId) => ids.has(parentId));
|
|
295365
|
+
if (childTodos.length > 0 && hasValidParent)
|
|
295366
|
+
return null;
|
|
295367
|
+
return [
|
|
295368
|
+
"Large todo lists (20+ items) must be decomposed into a nested tree with stable ids and parentId links.",
|
|
295369
|
+
"Create parent objectives and child leaf tasks instead of a flat checklist.",
|
|
295370
|
+
'Canonical shape: todo_write({"todos":[{"id":"group-1","content":"Steps 01-08","status":"in_progress"},{"id":"step-01","parentId":"group-1","content":"Complete step 01 and verify evidence","status":"in_progress"}]})'
|
|
295371
|
+
].join(" ");
|
|
295372
|
+
}
|
|
295279
295373
|
function normalizeIncomingTodos(args) {
|
|
295280
295374
|
const repairNotes = [];
|
|
295281
295375
|
const record = args;
|
|
@@ -295385,6 +295479,16 @@ Mark tasks complete IMMEDIATELY after finishing — don't batch. Never mark comp
|
|
|
295385
295479
|
type: "array",
|
|
295386
295480
|
items: { type: "string" },
|
|
295387
295481
|
description: `REG-38: optional list of file paths this todo is expected to produce on disk. When you mark the todo 'completed', the supervisor inspects each declared path; missing/empty/stale files trigger a rejection with a specific gap critique. Use whenever a todo has concrete deliverables (e.g. ["src/lib/foo.ts", "tests/unit/foo.test.ts"]). Generic across stacks.`
|
|
295482
|
+
},
|
|
295483
|
+
children: {
|
|
295484
|
+
type: "array",
|
|
295485
|
+
description: "Optional nested child todos. The tool flattens children into parentId-linked todos before storing.",
|
|
295486
|
+
items: { type: "object" }
|
|
295487
|
+
},
|
|
295488
|
+
subtasks: {
|
|
295489
|
+
type: "array",
|
|
295490
|
+
description: "Alias for children. Use this for decomposed child work under a parent objective.",
|
|
295491
|
+
items: { type: "object" }
|
|
295388
295492
|
}
|
|
295389
295493
|
}
|
|
295390
295494
|
}
|
|
@@ -295409,8 +295513,9 @@ Mark tasks complete IMMEDIATELY after finishing — don't batch. Never mark comp
|
|
|
295409
295513
|
}
|
|
295410
295514
|
const incoming = [];
|
|
295411
295515
|
const repairNotes = [...normalized.repairNotes];
|
|
295412
|
-
|
|
295413
|
-
|
|
295516
|
+
const flattenedTodos = flattenNestedTodoItems(normalized.todos, repairNotes);
|
|
295517
|
+
for (let index = 0; index < flattenedTodos.length; index++) {
|
|
295518
|
+
const raw = flattenedTodos[index];
|
|
295414
295519
|
if (!raw || typeof raw !== "object") {
|
|
295415
295520
|
if (typeof raw === "string" && raw.trim()) {
|
|
295416
295521
|
incoming.push({
|
|
@@ -295462,6 +295567,15 @@ Mark tasks complete IMMEDIATELY after finishing — don't batch. Never mark comp
|
|
|
295462
295567
|
declaredArtifacts: Array.isArray(entry["declaredArtifacts"]) ? entry["declaredArtifacts"].filter((x) => typeof x === "string") : void 0
|
|
295463
295568
|
});
|
|
295464
295569
|
}
|
|
295570
|
+
const decompositionError = validateLargeTaskDecomposition(incoming);
|
|
295571
|
+
if (decompositionError) {
|
|
295572
|
+
return {
|
|
295573
|
+
success: false,
|
|
295574
|
+
output: "",
|
|
295575
|
+
error: decompositionError,
|
|
295576
|
+
durationMs: performance.now() - start2
|
|
295577
|
+
};
|
|
295578
|
+
}
|
|
295465
295579
|
const sessionId = typeof args["session_id"] === "string" && args["session_id"].trim() ? args["session_id"].trim() : typeof args["sessionId"] === "string" && args["sessionId"].trim() ? args["sessionId"].trim() : getTodoSessionId();
|
|
295466
295580
|
const oldTodos = readTodos(sessionId);
|
|
295467
295581
|
const canonicalize2 = (todos) => JSON.stringify(todos.map((t2) => ({
|
|
@@ -547133,6 +547247,23 @@ function summarizeProcessFailure(stdout, stderr) {
|
|
|
547133
547247
|
}
|
|
547134
547248
|
return parts.join("\n").slice(0, 2200);
|
|
547135
547249
|
}
|
|
547250
|
+
function formatObjectRecognitionResult(result) {
|
|
547251
|
+
const matches = (Array.isArray(result.matches) ? result.matches : []).filter((m2) => m2.recognized);
|
|
547252
|
+
const matchLines = matches.map((m2) => ` ${m2.label}: ${(m2.blended_score * 100).toFixed(0)}% (image=${(m2.image_similarity * 100).toFixed(0)}%, text=${(m2.text_similarity * 100).toFixed(0)}%)`);
|
|
547253
|
+
const extraLabels = Array.isArray(result.extra_labels) ? result.extra_labels : null;
|
|
547254
|
+
if (extraLabels && extraLabels.length > 0) {
|
|
547255
|
+
const extraLines = extraLabels.map((s2) => ` ${s2.label}: ${(s2.score * 100).toFixed(0)}%`);
|
|
547256
|
+
const sections = [`CLIP candidate label scores:
|
|
547257
|
+
${extraLines.join("\n")}`];
|
|
547258
|
+
if (matches.length > 0) {
|
|
547259
|
+
sections.push(`Persistent visual memory matches above threshold:
|
|
547260
|
+
${matchLines.join("\n")}`);
|
|
547261
|
+
}
|
|
547262
|
+
return sections.join("\n\n");
|
|
547263
|
+
}
|
|
547264
|
+
return matches.length > 0 ? `Recognized ${result.recognized_count} object(s):
|
|
547265
|
+
${matchLines.join("\n")}` : "No taught objects recognized in this image.";
|
|
547266
|
+
}
|
|
547136
547267
|
var VMEM_DIR, VENV_DIR2, VENV_PY, VENV_PIP2, VISUAL_MEMORY_ACTIONS, VisualMemoryTool;
|
|
547137
547268
|
var init_visual_memory = __esm({
|
|
547138
547269
|
"packages/execution/dist/tools/visual-memory.js"() {
|
|
@@ -547668,18 +547799,7 @@ print(json.dumps({
|
|
|
547668
547799
|
const payload = JSON.stringify(result);
|
|
547669
547800
|
return { success: true, output: payload, llmContent: payload, durationMs: performance.now() - start2 };
|
|
547670
547801
|
}
|
|
547671
|
-
|
|
547672
|
-
const lines = matches.map((m2) => ` ${m2.label}: ${(m2.blended_score * 100).toFixed(0)}% (image=${(m2.image_similarity * 100).toFixed(0)}%, text=${(m2.text_similarity * 100).toFixed(0)}%)`);
|
|
547673
|
-
let output = matches.length > 0 ? `Recognized ${result.recognized_count} object(s):
|
|
547674
|
-
${lines.join("\n")}` : "No taught objects recognized in this image.";
|
|
547675
|
-
if (result.extra_labels) {
|
|
547676
|
-
const extraLines = result.extra_labels.map((s2) => ` ${s2.label}: ${(s2.score * 100).toFixed(0)}%`);
|
|
547677
|
-
output += `
|
|
547678
|
-
|
|
547679
|
-
CLIP label scores:
|
|
547680
|
-
${extraLines.join("\n")}`;
|
|
547681
|
-
}
|
|
547682
|
-
return { success: true, output, durationMs: performance.now() - start2 };
|
|
547802
|
+
return { success: true, output: formatObjectRecognitionResult(result), durationMs: performance.now() - start2 };
|
|
547683
547803
|
}
|
|
547684
547804
|
// =========================================================================
|
|
547685
547805
|
// Memory Management
|
|
@@ -576297,6 +576417,26 @@ ${contentPreview}
|
|
|
576297
576417
|
}
|
|
576298
576418
|
return true;
|
|
576299
576419
|
}
|
|
576420
|
+
_shellCommandLikelyMutatesFilesystem(rawCmd) {
|
|
576421
|
+
if (!rawCmd || typeof rawCmd !== "string")
|
|
576422
|
+
return false;
|
|
576423
|
+
const cmd = rawCmd.trim();
|
|
576424
|
+
if (!cmd)
|
|
576425
|
+
return false;
|
|
576426
|
+
if (/(^|[^&\d])(>|>>)\s*\S/.test(cmd))
|
|
576427
|
+
return true;
|
|
576428
|
+
if (/\|\s*(?:tee|dd)\b/i.test(cmd))
|
|
576429
|
+
return true;
|
|
576430
|
+
if (/\b(?:sed|gsed)\s+(?:[^\n;&|]*\s)?(?:-i|--in-place)\b/i.test(cmd))
|
|
576431
|
+
return true;
|
|
576432
|
+
if (/\bperl\s+-[A-Za-z]*i[A-Za-z]*\b/.test(cmd))
|
|
576433
|
+
return true;
|
|
576434
|
+
if (/\b(?:cp|mv|rm|mkdir|rmdir|touch|truncate|ln|install)\b/i.test(cmd))
|
|
576435
|
+
return true;
|
|
576436
|
+
if (/\b(?:python3?|node|ruby|deno|bun)\b[\s\S]{0,240}\b(?:writeFile|writeFileSync|openSync|mkdirSync|renameSync|unlinkSync|rmSync)\b/i.test(cmd))
|
|
576437
|
+
return true;
|
|
576438
|
+
return false;
|
|
576439
|
+
}
|
|
576300
576440
|
/**
|
|
576301
576441
|
* REG-5: Render the recent-failures block so the agent SEES its own error
|
|
576302
576442
|
* output before deciding what to do next. Detects same-fingerprint failure
|
|
@@ -582076,6 +582216,7 @@ Respond with EXACTLY this structure before your next tool call:
|
|
|
582076
582216
|
}
|
|
582077
582217
|
}
|
|
582078
582218
|
}
|
|
582219
|
+
const shellFilesystemMutation = tc.name === "shell" && result.success === true && this._shellCommandLikelyMutatesFilesystem(String(tc.arguments?.["command"] ?? tc.arguments?.["cmd"] ?? ""));
|
|
582079
582220
|
const realFileMutation = this._isRealProjectMutation(tc.name, result);
|
|
582080
582221
|
const realMutationPaths = realFileMutation ? this._extractToolTargetPaths(tc.name, tc.arguments, result) : [];
|
|
582081
582222
|
if (realFileMutation && this._reg61PerpetualGateActive) {
|
|
@@ -582857,6 +582998,20 @@ Respond with EXACTLY this structure before your next tool call:
|
|
|
582857
582998
|
dedupHitCount.clear();
|
|
582858
582999
|
}
|
|
582859
583000
|
}
|
|
583001
|
+
if (shellFilesystemMutation && recentToolResults.size > 0) {
|
|
583002
|
+
for (const key of Array.from(recentToolResults.keys())) {
|
|
583003
|
+
if (key.startsWith("shell:") || key.startsWith("file_read:") || key.startsWith("list_directory:") || key.startsWith("grep_search:") || key.startsWith("find_files:")) {
|
|
583004
|
+
recentToolResults.delete(key);
|
|
583005
|
+
dedupHitCount.delete(key);
|
|
583006
|
+
}
|
|
583007
|
+
}
|
|
583008
|
+
this._readCoverage.clear();
|
|
583009
|
+
this.emit({
|
|
583010
|
+
type: "status",
|
|
583011
|
+
content: "Shell filesystem mutation invalidated cached read/shell evidence",
|
|
583012
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
583013
|
+
});
|
|
583014
|
+
}
|
|
582860
583015
|
if (isFileMutation && recentToolResults.size > 0) {
|
|
582861
583016
|
for (const key of Array.from(recentToolResults.keys())) {
|
|
582862
583017
|
if (key.startsWith("shell:"))
|
|
@@ -583254,7 +583409,7 @@ Evidence: ${evidencePreview}`.slice(0, 500);
|
|
|
583254
583409
|
success: result.success,
|
|
583255
583410
|
output: result.output ?? result.llmContent ?? "",
|
|
583256
583411
|
error: result.error ?? "",
|
|
583257
|
-
mutated: realFileMutation,
|
|
583412
|
+
mutated: realFileMutation || shellFilesystemMutation,
|
|
583258
583413
|
isReadLike
|
|
583259
583414
|
});
|
|
583260
583415
|
const afterDirective = this._focusSupervisor?.snapshot().directive ?? null;
|
|
@@ -728690,11 +728845,17 @@ var init_serve2 = __esm({
|
|
|
728690
728845
|
// packages/cli/src/commands/eval.ts
|
|
728691
728846
|
var eval_exports = {};
|
|
728692
728847
|
__export(eval_exports, {
|
|
728693
|
-
|
|
728848
|
+
createTempEvalRepo: () => createTempEvalRepo,
|
|
728849
|
+
evalCommand: () => evalCommand,
|
|
728850
|
+
expectedStatusesForEvalTask: () => expectedStatusesForEvalTask
|
|
728694
728851
|
});
|
|
728695
728852
|
import { tmpdir as tmpdir23 } from "node:os";
|
|
728696
728853
|
import { mkdirSync as mkdirSync106, writeFileSync as writeFileSync90 } from "node:fs";
|
|
728697
728854
|
import { join as join178 } from "node:path";
|
|
728855
|
+
function expectedStatusesForEvalTask(task, live) {
|
|
728856
|
+
if (!live) return task.expectedStatuses;
|
|
728857
|
+
return task.liveExpectedStatuses ?? task.expectedStatuses.filter((status) => status !== "needs_human_decision");
|
|
728858
|
+
}
|
|
728698
728859
|
async function evalCommand(opts, config) {
|
|
728699
728860
|
const suiteName = opts.suite ?? "basic";
|
|
728700
728861
|
const suite = SUITES[suiteName];
|
|
@@ -728708,6 +728869,10 @@ async function evalCommand(opts, config) {
|
|
|
728708
728869
|
printKeyValue("Suite", suiteName, 2);
|
|
728709
728870
|
printKeyValue("Tasks", String(suite.length), 2);
|
|
728710
728871
|
printKeyValue("Mode", modeLabel, 2);
|
|
728872
|
+
if (useLive) {
|
|
728873
|
+
printKeyValue("Live pass statuses", "success, partial_success", 2);
|
|
728874
|
+
printInfo("Live eval treats needs_human_decision as a failure for concrete coding tasks.");
|
|
728875
|
+
}
|
|
728711
728876
|
const evalRepoRoot = opts.repoPath ?? createTempEvalRepo();
|
|
728712
728877
|
let rawBackend;
|
|
728713
728878
|
if (useLive) {
|
|
@@ -728766,22 +728931,27 @@ async function evalCommand(opts, config) {
|
|
|
728766
728931
|
let result;
|
|
728767
728932
|
try {
|
|
728768
728933
|
const report2 = await loop.run(task.request, evalRepoRoot);
|
|
728769
|
-
const
|
|
728934
|
+
const expectedStatuses = expectedStatusesForEvalTask(task, useLive);
|
|
728935
|
+
const passed2 = expectedStatuses.includes(report2.status);
|
|
728770
728936
|
result = {
|
|
728771
728937
|
task,
|
|
728772
728938
|
status: report2.status,
|
|
728939
|
+
expectedStatuses,
|
|
728773
728940
|
passed: passed2,
|
|
728774
728941
|
durationMs: Date.now() - start2
|
|
728775
728942
|
};
|
|
728776
728943
|
if (passed2) {
|
|
728777
728944
|
spinner.succeed(`[${task.id}] PASS (${report2.status})`);
|
|
728778
728945
|
} else {
|
|
728779
|
-
spinner.fail(
|
|
728946
|
+
spinner.fail(
|
|
728947
|
+
`[${task.id}] FAIL (got: ${report2.status}; expected: ${expectedStatuses.join(", ")})`
|
|
728948
|
+
);
|
|
728780
728949
|
}
|
|
728781
728950
|
} catch (err) {
|
|
728782
728951
|
result = {
|
|
728783
728952
|
task,
|
|
728784
728953
|
status: "error",
|
|
728954
|
+
expectedStatuses: expectedStatusesForEvalTask(task, useLive),
|
|
728785
728955
|
passed: false,
|
|
728786
728956
|
durationMs: Date.now() - start2,
|
|
728787
728957
|
error: err instanceof Error ? err.message : String(err)
|
|
@@ -728807,7 +728977,7 @@ async function evalCommand(opts, config) {
|
|
|
728807
728977
|
const icon = r2.passed ? "PASS" : "FAIL";
|
|
728808
728978
|
printKeyValue(
|
|
728809
728979
|
`${r2.task.id} [${icon}]`,
|
|
728810
|
-
`${r2.status} (${formatDuration(r2.durationMs)})`,
|
|
728980
|
+
`${r2.status} (${formatDuration(r2.durationMs)}; expected ${r2.expectedStatuses.join(", ")})`,
|
|
728811
728981
|
2
|
|
728812
728982
|
);
|
|
728813
728983
|
if (r2.error) {
|
|
@@ -728825,13 +728995,79 @@ async function evalCommand(opts, config) {
|
|
|
728825
728995
|
function createTempEvalRepo() {
|
|
728826
728996
|
const dir = join178(tmpdir23(), `omnius-eval-${Date.now()}`);
|
|
728827
728997
|
mkdirSync106(dir, { recursive: true });
|
|
728828
|
-
|
|
728829
|
-
|
|
728830
|
-
|
|
728831
|
-
"
|
|
728832
|
-
|
|
728998
|
+
mkdirSync106(join178(dir, "src"), { recursive: true });
|
|
728999
|
+
mkdirSync106(join178(dir, "tests"), { recursive: true });
|
|
729000
|
+
writeEvalFile(dir, "package.json", JSON.stringify({
|
|
729001
|
+
name: "eval-repo",
|
|
729002
|
+
version: "0.0.0",
|
|
729003
|
+
type: "module",
|
|
729004
|
+
scripts: { test: "node tests/auth.test.js && node tests/users.test.js && node tests/db.test.js && node tests/payment.test.js" }
|
|
729005
|
+
}, null, 2));
|
|
729006
|
+
writeEvalFile(dir, "src/auth.js", [
|
|
729007
|
+
"export function authenticateUser(user) {",
|
|
729008
|
+
" return user.active;",
|
|
729009
|
+
"}"
|
|
729010
|
+
].join("\n"));
|
|
729011
|
+
writeEvalFile(dir, "src/users.js", [
|
|
729012
|
+
"export function listUsers(users) {",
|
|
729013
|
+
" return users.slice();",
|
|
729014
|
+
"}"
|
|
729015
|
+
].join("\n"));
|
|
729016
|
+
writeEvalFile(dir, "src/db.js", [
|
|
729017
|
+
"export function getConnection() {",
|
|
729018
|
+
' return Promise.resolve({ id: "primary", open: true });',
|
|
729019
|
+
"}"
|
|
729020
|
+
].join("\n"));
|
|
729021
|
+
writeEvalFile(dir, "src/payment.js", [
|
|
729022
|
+
"export function applyDiscount(amount, percent) {",
|
|
729023
|
+
" return amount - amount * (percent / 100);",
|
|
729024
|
+
"}",
|
|
729025
|
+
"",
|
|
729026
|
+
"export function addTax(amount, taxRate) {",
|
|
729027
|
+
" return amount + amount * taxRate;",
|
|
729028
|
+
"}"
|
|
729029
|
+
].join("\n"));
|
|
729030
|
+
writeEvalFile(dir, "src/api.js", [
|
|
729031
|
+
"export function health() {",
|
|
729032
|
+
" return { ok: true };",
|
|
729033
|
+
"}",
|
|
729034
|
+
"",
|
|
729035
|
+
"export function version() {",
|
|
729036
|
+
' return "0.0.0";',
|
|
729037
|
+
"}"
|
|
729038
|
+
].join("\n"));
|
|
729039
|
+
writeEvalFile(dir, "tests/auth.test.js", [
|
|
729040
|
+
"import assert from 'node:assert/strict';",
|
|
729041
|
+
"import { authenticateUser } from '../src/auth.js';",
|
|
729042
|
+
"assert.equal(authenticateUser({ id: 'u1', active: true }), true);",
|
|
729043
|
+
"assert.equal(authenticateUser({ id: 'u2', active: false }), false);",
|
|
729044
|
+
"assert.equal(authenticateUser(null), false);",
|
|
729045
|
+
"assert.equal(authenticateUser(undefined), false);"
|
|
729046
|
+
].join("\n"));
|
|
729047
|
+
writeEvalFile(dir, "tests/users.test.js", [
|
|
729048
|
+
"import assert from 'node:assert/strict';",
|
|
729049
|
+
"import { paginateUsers } from '../src/users.js';",
|
|
729050
|
+
"const users = ['a', 'b', 'c', 'd', 'e'];",
|
|
729051
|
+
"assert.deepEqual(paginateUsers(users, 1, 2), { items: ['a', 'b'], page: 1, pageSize: 2, totalPages: 3, totalItems: 5 });",
|
|
729052
|
+
"assert.deepEqual(paginateUsers(users, 3, 2).items, ['e']);"
|
|
729053
|
+
].join("\n"));
|
|
729054
|
+
writeEvalFile(dir, "tests/db.test.js", [
|
|
729055
|
+
"import assert from 'node:assert/strict';",
|
|
729056
|
+
"import { getConnection } from '../src/db.js';",
|
|
729057
|
+
"const conn = await getConnection();",
|
|
729058
|
+
"assert.deepEqual(conn, { id: 'primary', open: true });"
|
|
729059
|
+
].join("\n"));
|
|
729060
|
+
writeEvalFile(dir, "tests/payment.test.js", [
|
|
729061
|
+
"import assert from 'node:assert/strict';",
|
|
729062
|
+
"import { applyDiscount, addTax } from '../src/payment.js';",
|
|
729063
|
+
"assert.equal(applyDiscount(100, 15), 85);",
|
|
729064
|
+
"assert.equal(addTax(100, 0.0825), 108.25);"
|
|
729065
|
+
].join("\n"));
|
|
728833
729066
|
return dir;
|
|
728834
729067
|
}
|
|
729068
|
+
function writeEvalFile(root, relativePath, content) {
|
|
729069
|
+
writeFileSync90(join178(root, relativePath), content.trimEnd() + "\n", "utf8");
|
|
729070
|
+
}
|
|
728835
729071
|
var BASIC_SUITE, FULL_SUITE, SUITES;
|
|
728836
729072
|
var init_eval = __esm({
|
|
728837
729073
|
"packages/cli/src/commands/eval.ts"() {
|
|
@@ -728843,21 +729079,24 @@ var init_eval = __esm({
|
|
|
728843
729079
|
BASIC_SUITE = [
|
|
728844
729080
|
{
|
|
728845
729081
|
id: "eval-001",
|
|
728846
|
-
description: "
|
|
728847
|
-
request: "
|
|
728848
|
-
expectedStatuses: ["success", "partial_success", "needs_human_decision"]
|
|
729082
|
+
description: "Boundary bug fix",
|
|
729083
|
+
request: "In src/auth.js, fix authenticateUser so null or undefined users return false instead of throwing. Use tests/auth.test.js as the acceptance evidence.",
|
|
729084
|
+
expectedStatuses: ["success", "partial_success", "needs_human_decision"],
|
|
729085
|
+
liveExpectedStatuses: ["success", "partial_success"]
|
|
728849
729086
|
},
|
|
728850
729087
|
{
|
|
728851
729088
|
id: "eval-002",
|
|
728852
729089
|
description: "Feature addition request",
|
|
728853
|
-
request: "
|
|
728854
|
-
expectedStatuses: ["success", "partial_success", "needs_human_decision"]
|
|
729090
|
+
request: "In src/users.js, add paginateUsers(users, page, pageSize) with 1-based page indexing, stable slicing, and totalPages metadata. Use tests/users.test.js as the acceptance evidence.",
|
|
729091
|
+
expectedStatuses: ["success", "partial_success", "needs_human_decision"],
|
|
729092
|
+
liveExpectedStatuses: ["success", "partial_success"]
|
|
728855
729093
|
},
|
|
728856
729094
|
{
|
|
728857
729095
|
id: "eval-003",
|
|
728858
729096
|
description: "Refactor request",
|
|
728859
|
-
request: "
|
|
728860
|
-
expectedStatuses: ["success", "partial_success", "needs_human_decision"]
|
|
729097
|
+
request: "In src/db.js, refactor getConnection to async/await while preserving the exported API behavior covered by tests/db.test.js.",
|
|
729098
|
+
expectedStatuses: ["success", "partial_success", "needs_human_decision"],
|
|
729099
|
+
liveExpectedStatuses: ["success", "partial_success"]
|
|
728861
729100
|
}
|
|
728862
729101
|
];
|
|
728863
729102
|
FULL_SUITE = [
|
|
@@ -728865,14 +729104,16 @@ var init_eval = __esm({
|
|
|
728865
729104
|
{
|
|
728866
729105
|
id: "eval-004",
|
|
728867
729106
|
description: "Test generation request",
|
|
728868
|
-
request: "
|
|
728869
|
-
expectedStatuses: ["success", "partial_success", "needs_human_decision"]
|
|
729107
|
+
request: "Add missing unit coverage for src/payment.js discount and tax behavior in tests/payment.test.js without changing production semantics.",
|
|
729108
|
+
expectedStatuses: ["success", "partial_success", "needs_human_decision"],
|
|
729109
|
+
liveExpectedStatuses: ["success", "partial_success"]
|
|
728870
729110
|
},
|
|
728871
729111
|
{
|
|
728872
729112
|
id: "eval-005",
|
|
728873
729113
|
description: "Documentation request",
|
|
728874
|
-
request: "Add JSDoc comments to
|
|
728875
|
-
expectedStatuses: ["success", "partial_success", "needs_human_decision"]
|
|
729114
|
+
request: "Add concise JSDoc comments to the exported functions in src/api.js while preserving behavior.",
|
|
729115
|
+
expectedStatuses: ["success", "partial_success", "needs_human_decision"],
|
|
729116
|
+
liveExpectedStatuses: ["success", "partial_success"]
|
|
728876
729117
|
}
|
|
728877
729118
|
];
|
|
728878
729119
|
SUITES = {
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.386",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.386",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED