omnius 1.0.169 → 1.0.170

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -615859,9 +615859,10 @@ function buildRealtimeSystemPrompt(opts) {
615859
615859
  `- Produce one natural spoken turn, normally ${maxReplyWords} words or fewer.`,
615860
615860
  "- Use one sentence when possible; two short sentences only when repair or confirmation needs it.",
615861
615861
  "- Lead with the answer. Do not preface with status, analysis, summaries, or implementation narration.",
615862
- "- No markdown, bullets, tables, headings, citations, code blocks, JSON, or labels like 'Assistant:'.",
615862
+ "- No markdown, bullets, tables, headings, citations, inline code, code blocks, JSON, or labels like 'Assistant:'.",
615863
615863
  "- Sound like a person on a live call: brief acknowledgment, direct answer, one focused follow-up only if needed.",
615864
615864
  "- If the ASR text is garbled or underspecified, ask a single compact repair question.",
615865
+ "- Do not invent app modes, method names, settings, or implementation details when the caller has not supplied them.",
615865
615866
  "- Do not mention ASR, TTS, prompts, realtime mode, hidden reasoning, tools, or policy unless the caller explicitly asks.",
615866
615867
  "- If a request needs work outside this text-only exchange, say the next handoff in one short sentence."
615867
615868
  ].join("\n"),
@@ -615952,7 +615953,7 @@ function wordParts(text) {
615952
615953
  }
615953
615954
  function finalizeRealtimeReply(text, opts = {}) {
615954
615955
  const maxWords = clampInt2(opts.maxReplyWords, DEFAULT_REALTIME_MAX_REPLY_WORDS, 8, 80);
615955
- let clean5 = stripHiddenThinking(String(text ?? "")).replace(/```[\s\S]*?```/g, "").split("\n").map((line) => line.replace(/^\s*(?:[-*]+|\d+[.)])\s+/, "").trim()).filter(Boolean).join(" ").replace(/^(?:assistant|omnius|agent)\s*:\s*/i, "").replace(/\s+/g, " ").trim();
615956
+ let clean5 = stripHiddenThinking(String(text ?? "")).replace(/```[\s\S]*?```/g, "").split("\n").map((line) => line.replace(/^\s*(?:[-*]+|\d+[.)])\s+/, "").trim()).filter(Boolean).join(" ").replace(/^(?:assistant|omnius|agent)\s*:\s*/i, "").replace(/`([^`]+)`/g, "$1").replace(/\s+/g, " ").trim();
615956
615957
  if (!clean5) return "I didn't catch that. Can you say it again?";
615957
615958
  const sentences = clean5.match(/[^.!?]+[.!?]+(?=\s|$)|[^.!?]+$/g) ?? [clean5];
615958
615959
  const selected = [];
@@ -657279,6 +657280,49 @@ function bodyString(body, keys) {
657279
657280
  }
657280
657281
  return "";
657281
657282
  }
657283
+ function realtimeFallbackCacheKey(ollamaUrl, missingModel) {
657284
+ return `${ollamaUrl}
657285
+ ${missingModel}`;
657286
+ }
657287
+ function isOllamaMissingModelError(body) {
657288
+ return /model ['\"]?[^'\"]+['\"]? not found/i.test(body);
657289
+ }
657290
+ async function resolveRealtimeOllamaFallbackModel(ollamaUrl, timeoutMs, missingModel) {
657291
+ try {
657292
+ const cacheKey = realtimeFallbackCacheKey(ollamaUrl, missingModel);
657293
+ const cached = realtimeOllamaFallbackCache.get(cacheKey);
657294
+ if (cached) return cached;
657295
+ const result = await ollamaRequest(ollamaUrl, "/api/tags", "GET", void 0, Math.min(timeoutMs, 1e4));
657296
+ if (result.status >= 400) return null;
657297
+ const parsed = JSON.parse(result.body);
657298
+ const names = (parsed.models ?? []).map((entry) => typeof entry.name === "string" ? entry.name : typeof entry.model === "string" ? entry.model : "").filter(Boolean);
657299
+ if (!names.length) return null;
657300
+ const remember = (name10) => {
657301
+ realtimeOllamaFallbackCache.set(cacheKey, name10);
657302
+ return name10;
657303
+ };
657304
+ const exactLatest = `${missingModel}:latest`;
657305
+ if (names.includes(exactLatest)) return remember(exactLatest);
657306
+ const preferred = [
657307
+ "qwen3.5-9b-r10:q4km",
657308
+ "open-agents-qwen35-9b-r10-q4km:latest",
657309
+ "open-agents-qwen35-9b-r10-parsed-q4km:latest",
657310
+ "open-agents-qwen35-9b-r9-q4km:latest",
657311
+ "qwen3:8b",
657312
+ "open-agents-qwen3-8b:latest",
657313
+ "omnius-qwen36-35b:latest",
657314
+ "open-agents-qwen36:latest",
657315
+ "qwen3.6:35b"
657316
+ ];
657317
+ for (const name10 of preferred) {
657318
+ if (names.includes(name10)) return remember(name10);
657319
+ }
657320
+ const fallback = names.find((name10) => /qwen/i.test(name10) && !/embed|vision/i.test(name10)) ?? names.find((name10) => !/embed|vision|moondream/i.test(name10)) ?? null;
657321
+ return fallback ? remember(fallback) : null;
657322
+ } catch {
657323
+ return null;
657324
+ }
657325
+ }
657282
657326
  function realtimeEndpointMessages(body) {
657283
657327
  const messages2 = [];
657284
657328
  const suppliedSoul = bodyString(body, ["soul_md", "soul", "soulMd"]);
@@ -657305,13 +657349,14 @@ ${suppliedContext}` });
657305
657349
  }
657306
657350
  async function completeRealtimeTextOnly(opts) {
657307
657351
  const cfg = loadConfig();
657308
- const model = bodyString(opts.body, ["model"]) || cfg.model;
657352
+ const requestedModel = bodyString(opts.body, ["model"]);
657353
+ const model = requestedModel || opts.defaultModel || cfg.model;
657309
657354
  const route = resolveModelEndpoint(model);
657310
657355
  const limitErr = route?.endpoint ? checkEndpointRateLimit(route.endpoint) : null;
657311
657356
  if (limitErr) throw new Error(limitErr);
657312
657357
  const targetUrl = route?.endpoint.url ?? opts.ollamaUrl;
657313
- const targetType = route?.endpoint.type ?? cfg.backendType ?? "ollama";
657314
- const originalModel = route?.originalId ?? model.replace(/^[a-z]+\//, "");
657358
+ const targetType = route?.endpoint.type ?? opts.defaultBackendType ?? cfg.backendType ?? "ollama";
657359
+ let originalModel = route?.originalId ?? model.replace(/^[a-z]+\//, "");
657315
657360
  const realtimeOpts = {
657316
657361
  ...realtimeOptionsFromBody(opts.body, process.cwd(), opts.sessionId),
657317
657362
  surface: "voice_adapter"
@@ -657333,13 +657378,24 @@ async function completeRealtimeTextOnly(opts) {
657333
657378
  }
657334
657379
  const maxTokens = typeof requestBody["max_tokens"] === "number" ? requestBody["max_tokens"] : 120;
657335
657380
  const temperature = typeof requestBody["temperature"] === "number" ? requestBody["temperature"] : 0.6;
657336
- const result = await ollamaRequest(targetUrl, "/api/chat", "POST", JSON.stringify({
657337
- model: originalModel,
657381
+ if (!requestedModel) {
657382
+ originalModel = realtimeOllamaFallbackCache.get(realtimeFallbackCacheKey(targetUrl, originalModel)) ?? originalModel;
657383
+ }
657384
+ const makeOllamaChatBody = (modelName) => JSON.stringify({
657385
+ model: modelName,
657338
657386
  messages: requestBody["messages"],
657339
657387
  stream: false,
657340
657388
  think: false,
657341
657389
  options: { temperature, num_predict: maxTokens }
657342
- }), timeoutMs, route?.endpoint);
657390
+ });
657391
+ let result = await ollamaRequest(targetUrl, "/api/chat", "POST", makeOllamaChatBody(originalModel), timeoutMs, route?.endpoint);
657392
+ if (result.status >= 400 && !requestedModel && isOllamaMissingModelError(result.body)) {
657393
+ const fallbackModel = await resolveRealtimeOllamaFallbackModel(targetUrl, timeoutMs, originalModel);
657394
+ if (fallbackModel && fallbackModel !== originalModel) {
657395
+ originalModel = fallbackModel;
657396
+ result = await ollamaRequest(targetUrl, "/api/chat", "POST", makeOllamaChatBody(originalModel), timeoutMs, route?.endpoint);
657397
+ }
657398
+ }
657343
657399
  if (result.status >= 400) throw new Error(`Backend HTTP ${result.status}: ${result.body.slice(0, 300)}`);
657344
657400
  const parsed = JSON.parse(result.body);
657345
657401
  const rawReply = String(parsed?.message?.content ?? "").trim();
@@ -657354,7 +657410,7 @@ async function completeRealtimeTextOnly(opts) {
657354
657410
  }
657355
657411
  };
657356
657412
  }
657357
- async function handleRealtimeText(req2, res, ollamaUrl) {
657413
+ async function handleRealtimeText(req2, res, ollamaUrl, defaults3 = {}) {
657358
657414
  const body = await parseJsonBody(req2);
657359
657415
  if (!body || typeof body !== "object") {
657360
657416
  jsonResponse(res, 400, { error: "invalid_request", message: "Expected a JSON object." });
@@ -657367,7 +657423,14 @@ async function handleRealtimeText(req2, res, ollamaUrl) {
657367
657423
  }
657368
657424
  try {
657369
657425
  const sessionId = typeof body["session_id"] === "string" ? body["session_id"] : void 0;
657370
- const result = await completeRealtimeTextOnly({ body, messages: messages2, ollamaUrl, sessionId });
657426
+ const result = await completeRealtimeTextOnly({
657427
+ body,
657428
+ messages: messages2,
657429
+ ollamaUrl,
657430
+ defaultModel: defaults3.model,
657431
+ defaultBackendType: defaults3.backendType,
657432
+ sessionId
657433
+ });
657371
657434
  const wantsPlain = String(req2.headers["accept"] ?? "").includes("text/plain") || body["format"] === "text";
657372
657435
  if (wantsPlain) {
657373
657436
  res.writeHead(200, { "Content-Type": "text/plain; charset=utf-8", "Cache-Control": "no-store" });
@@ -660331,7 +660394,7 @@ async function handlePostCommand(res, cmd) {
660331
660394
  });
660332
660395
  }
660333
660396
  }
660334
- async function handleRequest(req2, res, ollamaUrl, verbose) {
660397
+ async function handleRequest(req2, res, ollamaUrl, verbose, runtimeDefaults = {}) {
660335
660398
  try {
660336
660399
  const _liveCfg = loadConfig();
660337
660400
  if (_liveCfg.backendUrl) ollamaUrl = _liveCfg.backendUrl;
@@ -660606,7 +660669,7 @@ async function handleRequest(req2, res, ollamaUrl, verbose) {
660606
660669
  status = 401;
660607
660670
  return;
660608
660671
  }
660609
- await handleRealtimeText(req2, res, ollamaUrl);
660672
+ await handleRealtimeText(req2, res, ollamaUrl, runtimeDefaults);
660610
660673
  return;
660611
660674
  }
660612
660675
  if (pathname === "/v1/files" && method === "GET") {
@@ -662614,13 +662677,14 @@ ${historyLines}
662614
662677
  }));
662615
662678
  }
662616
662679
  } finally {
662617
- recordMetric(method, pathname, status);
662680
+ const finalStatus = res.headersSent ? res.statusCode : status;
662681
+ recordMetric(method, pathname, finalStatus);
662618
662682
  const latencyMs = Math.round(performance.now() - startMs);
662619
662683
  logRequest({
662620
662684
  requestId,
662621
662685
  method,
662622
662686
  path: pathname,
662623
- status,
662687
+ status: finalStatus,
662624
662688
  latencyMs,
662625
662689
  user: req2._authUser ?? "anonymous",
662626
662690
  scope: req2._authScope ?? "none"
@@ -662630,7 +662694,7 @@ ${historyLines}
662630
662694
  requestId,
662631
662695
  method,
662632
662696
  path: pathname,
662633
- status,
662697
+ status: finalStatus,
662634
662698
  user: req2._authUser ?? "anonymous",
662635
662699
  scope: req2._authScope ?? "none",
662636
662700
  latencyMs: Math.round(performance.now() - startMs),
@@ -663552,7 +663616,10 @@ function startApiServer(options2 = {}) {
663552
663616
  }
663553
663617
  } catch {
663554
663618
  }
663555
- handleRequest(req2, res, ollamaUrl, verbose).catch((err) => {
663619
+ handleRequest(req2, res, ollamaUrl, verbose, {
663620
+ model: options2.model ?? config.model,
663621
+ backendType: options2.backendType ?? config.backendType
663622
+ }).catch((err) => {
663556
663623
  metrics.totalErrors++;
663557
663624
  try {
663558
663625
  jsonResponse(res, 500, {
@@ -664374,7 +664441,9 @@ async function apiServeCommand(opts, config) {
664374
664441
  port: opts.port,
664375
664442
  // Let startApiServer() parse OMNIUS_HOST env if no explicit --port
664376
664443
  verbose: opts.verbose,
664377
- ollamaUrl: config.backendUrl
664444
+ ollamaUrl: config.backendUrl,
664445
+ model: config.model,
664446
+ backendType: config.backendType
664378
664447
  });
664379
664448
  await new Promise((resolve57) => {
664380
664449
  server2.on("close", resolve57);
@@ -664429,7 +664498,7 @@ function setTimerEnabled(name10, enabled2) {
664429
664498
  return false;
664430
664499
  }
664431
664500
  }
664432
- var require4, NEXUS_DIRECTORY_ORIGIN2, NEXUS_SPONSORS_URL2, endpointRegistry, modelRouteMap, endpointUsage, _lastEndpointDiagnostics, BACKEND_TIMEOUT_DEFAULT_MS, BACKEND_TIMEOUT_MAX_MS, MODEL_LIST_TIMEOUT_DEFAULT_MS, metrics, startedAt, runningProcesses, perKeyUsage, CRON_MARKER2;
664501
+ var require4, NEXUS_DIRECTORY_ORIGIN2, NEXUS_SPONSORS_URL2, endpointRegistry, modelRouteMap, endpointUsage, _lastEndpointDiagnostics, BACKEND_TIMEOUT_DEFAULT_MS, BACKEND_TIMEOUT_MAX_MS, MODEL_LIST_TIMEOUT_DEFAULT_MS, metrics, startedAt, realtimeOllamaFallbackCache, runningProcesses, perKeyUsage, CRON_MARKER2;
664433
664502
  var init_serve = __esm({
664434
664503
  "packages/cli/src/api/serve.ts"() {
664435
664504
  "use strict";
@@ -664477,6 +664546,7 @@ var init_serve = __esm({
664477
664546
  totalErrors: 0
664478
664547
  };
664479
664548
  startedAt = Date.now();
664549
+ realtimeOllamaFallbackCache = /* @__PURE__ */ new Map();
664480
664550
  runningProcesses = /* @__PURE__ */ new Map();
664481
664551
  perKeyUsage = /* @__PURE__ */ new Map();
664482
664552
  CRON_MARKER2 = "# OMNIUS-SCHEDULED:";
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.169",
3
+ "version": "1.0.170",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.169",
9
+ "version": "1.0.170",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.169",
3
+ "version": "1.0.170",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",