omnius 1.0.169 → 1.0.170
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +88 -18
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -615859,9 +615859,10 @@ function buildRealtimeSystemPrompt(opts) {
|
|
|
615859
615859
|
`- Produce one natural spoken turn, normally ${maxReplyWords} words or fewer.`,
|
|
615860
615860
|
"- Use one sentence when possible; two short sentences only when repair or confirmation needs it.",
|
|
615861
615861
|
"- Lead with the answer. Do not preface with status, analysis, summaries, or implementation narration.",
|
|
615862
|
-
"- No markdown, bullets, tables, headings, citations, code blocks, JSON, or labels like 'Assistant:'.",
|
|
615862
|
+
"- No markdown, bullets, tables, headings, citations, inline code, code blocks, JSON, or labels like 'Assistant:'.",
|
|
615863
615863
|
"- Sound like a person on a live call: brief acknowledgment, direct answer, one focused follow-up only if needed.",
|
|
615864
615864
|
"- If the ASR text is garbled or underspecified, ask a single compact repair question.",
|
|
615865
|
+
"- Do not invent app modes, method names, settings, or implementation details when the caller has not supplied them.",
|
|
615865
615866
|
"- Do not mention ASR, TTS, prompts, realtime mode, hidden reasoning, tools, or policy unless the caller explicitly asks.",
|
|
615866
615867
|
"- If a request needs work outside this text-only exchange, say the next handoff in one short sentence."
|
|
615867
615868
|
].join("\n"),
|
|
@@ -615952,7 +615953,7 @@ function wordParts(text) {
|
|
|
615952
615953
|
}
|
|
615953
615954
|
function finalizeRealtimeReply(text, opts = {}) {
|
|
615954
615955
|
const maxWords = clampInt2(opts.maxReplyWords, DEFAULT_REALTIME_MAX_REPLY_WORDS, 8, 80);
|
|
615955
|
-
let clean5 = stripHiddenThinking(String(text ?? "")).replace(/```[\s\S]*?```/g, "").split("\n").map((line) => line.replace(/^\s*(?:[-*]+|\d+[.)])\s+/, "").trim()).filter(Boolean).join(" ").replace(/^(?:assistant|omnius|agent)\s*:\s*/i, "").replace(/\s+/g, " ").trim();
|
|
615956
|
+
let clean5 = stripHiddenThinking(String(text ?? "")).replace(/```[\s\S]*?```/g, "").split("\n").map((line) => line.replace(/^\s*(?:[-*]+|\d+[.)])\s+/, "").trim()).filter(Boolean).join(" ").replace(/^(?:assistant|omnius|agent)\s*:\s*/i, "").replace(/`([^`]+)`/g, "$1").replace(/\s+/g, " ").trim();
|
|
615956
615957
|
if (!clean5) return "I didn't catch that. Can you say it again?";
|
|
615957
615958
|
const sentences = clean5.match(/[^.!?]+[.!?]+(?=\s|$)|[^.!?]+$/g) ?? [clean5];
|
|
615958
615959
|
const selected = [];
|
|
@@ -657279,6 +657280,49 @@ function bodyString(body, keys) {
|
|
|
657279
657280
|
}
|
|
657280
657281
|
return "";
|
|
657281
657282
|
}
|
|
657283
|
+
function realtimeFallbackCacheKey(ollamaUrl, missingModel) {
|
|
657284
|
+
return `${ollamaUrl}
|
|
657285
|
+
${missingModel}`;
|
|
657286
|
+
}
|
|
657287
|
+
function isOllamaMissingModelError(body) {
|
|
657288
|
+
return /model ['\"]?[^'\"]+['\"]? not found/i.test(body);
|
|
657289
|
+
}
|
|
657290
|
+
async function resolveRealtimeOllamaFallbackModel(ollamaUrl, timeoutMs, missingModel) {
|
|
657291
|
+
try {
|
|
657292
|
+
const cacheKey = realtimeFallbackCacheKey(ollamaUrl, missingModel);
|
|
657293
|
+
const cached = realtimeOllamaFallbackCache.get(cacheKey);
|
|
657294
|
+
if (cached) return cached;
|
|
657295
|
+
const result = await ollamaRequest(ollamaUrl, "/api/tags", "GET", void 0, Math.min(timeoutMs, 1e4));
|
|
657296
|
+
if (result.status >= 400) return null;
|
|
657297
|
+
const parsed = JSON.parse(result.body);
|
|
657298
|
+
const names = (parsed.models ?? []).map((entry) => typeof entry.name === "string" ? entry.name : typeof entry.model === "string" ? entry.model : "").filter(Boolean);
|
|
657299
|
+
if (!names.length) return null;
|
|
657300
|
+
const remember = (name10) => {
|
|
657301
|
+
realtimeOllamaFallbackCache.set(cacheKey, name10);
|
|
657302
|
+
return name10;
|
|
657303
|
+
};
|
|
657304
|
+
const exactLatest = `${missingModel}:latest`;
|
|
657305
|
+
if (names.includes(exactLatest)) return remember(exactLatest);
|
|
657306
|
+
const preferred = [
|
|
657307
|
+
"qwen3.5-9b-r10:q4km",
|
|
657308
|
+
"open-agents-qwen35-9b-r10-q4km:latest",
|
|
657309
|
+
"open-agents-qwen35-9b-r10-parsed-q4km:latest",
|
|
657310
|
+
"open-agents-qwen35-9b-r9-q4km:latest",
|
|
657311
|
+
"qwen3:8b",
|
|
657312
|
+
"open-agents-qwen3-8b:latest",
|
|
657313
|
+
"omnius-qwen36-35b:latest",
|
|
657314
|
+
"open-agents-qwen36:latest",
|
|
657315
|
+
"qwen3.6:35b"
|
|
657316
|
+
];
|
|
657317
|
+
for (const name10 of preferred) {
|
|
657318
|
+
if (names.includes(name10)) return remember(name10);
|
|
657319
|
+
}
|
|
657320
|
+
const fallback = names.find((name10) => /qwen/i.test(name10) && !/embed|vision/i.test(name10)) ?? names.find((name10) => !/embed|vision|moondream/i.test(name10)) ?? null;
|
|
657321
|
+
return fallback ? remember(fallback) : null;
|
|
657322
|
+
} catch {
|
|
657323
|
+
return null;
|
|
657324
|
+
}
|
|
657325
|
+
}
|
|
657282
657326
|
function realtimeEndpointMessages(body) {
|
|
657283
657327
|
const messages2 = [];
|
|
657284
657328
|
const suppliedSoul = bodyString(body, ["soul_md", "soul", "soulMd"]);
|
|
@@ -657305,13 +657349,14 @@ ${suppliedContext}` });
|
|
|
657305
657349
|
}
|
|
657306
657350
|
async function completeRealtimeTextOnly(opts) {
|
|
657307
657351
|
const cfg = loadConfig();
|
|
657308
|
-
const
|
|
657352
|
+
const requestedModel = bodyString(opts.body, ["model"]);
|
|
657353
|
+
const model = requestedModel || opts.defaultModel || cfg.model;
|
|
657309
657354
|
const route = resolveModelEndpoint(model);
|
|
657310
657355
|
const limitErr = route?.endpoint ? checkEndpointRateLimit(route.endpoint) : null;
|
|
657311
657356
|
if (limitErr) throw new Error(limitErr);
|
|
657312
657357
|
const targetUrl = route?.endpoint.url ?? opts.ollamaUrl;
|
|
657313
|
-
const targetType = route?.endpoint.type ?? cfg.backendType ?? "ollama";
|
|
657314
|
-
|
|
657358
|
+
const targetType = route?.endpoint.type ?? opts.defaultBackendType ?? cfg.backendType ?? "ollama";
|
|
657359
|
+
let originalModel = route?.originalId ?? model.replace(/^[a-z]+\//, "");
|
|
657315
657360
|
const realtimeOpts = {
|
|
657316
657361
|
...realtimeOptionsFromBody(opts.body, process.cwd(), opts.sessionId),
|
|
657317
657362
|
surface: "voice_adapter"
|
|
@@ -657333,13 +657378,24 @@ async function completeRealtimeTextOnly(opts) {
|
|
|
657333
657378
|
}
|
|
657334
657379
|
const maxTokens = typeof requestBody["max_tokens"] === "number" ? requestBody["max_tokens"] : 120;
|
|
657335
657380
|
const temperature = typeof requestBody["temperature"] === "number" ? requestBody["temperature"] : 0.6;
|
|
657336
|
-
|
|
657337
|
-
|
|
657381
|
+
if (!requestedModel) {
|
|
657382
|
+
originalModel = realtimeOllamaFallbackCache.get(realtimeFallbackCacheKey(targetUrl, originalModel)) ?? originalModel;
|
|
657383
|
+
}
|
|
657384
|
+
const makeOllamaChatBody = (modelName) => JSON.stringify({
|
|
657385
|
+
model: modelName,
|
|
657338
657386
|
messages: requestBody["messages"],
|
|
657339
657387
|
stream: false,
|
|
657340
657388
|
think: false,
|
|
657341
657389
|
options: { temperature, num_predict: maxTokens }
|
|
657342
|
-
})
|
|
657390
|
+
});
|
|
657391
|
+
let result = await ollamaRequest(targetUrl, "/api/chat", "POST", makeOllamaChatBody(originalModel), timeoutMs, route?.endpoint);
|
|
657392
|
+
if (result.status >= 400 && !requestedModel && isOllamaMissingModelError(result.body)) {
|
|
657393
|
+
const fallbackModel = await resolveRealtimeOllamaFallbackModel(targetUrl, timeoutMs, originalModel);
|
|
657394
|
+
if (fallbackModel && fallbackModel !== originalModel) {
|
|
657395
|
+
originalModel = fallbackModel;
|
|
657396
|
+
result = await ollamaRequest(targetUrl, "/api/chat", "POST", makeOllamaChatBody(originalModel), timeoutMs, route?.endpoint);
|
|
657397
|
+
}
|
|
657398
|
+
}
|
|
657343
657399
|
if (result.status >= 400) throw new Error(`Backend HTTP ${result.status}: ${result.body.slice(0, 300)}`);
|
|
657344
657400
|
const parsed = JSON.parse(result.body);
|
|
657345
657401
|
const rawReply = String(parsed?.message?.content ?? "").trim();
|
|
@@ -657354,7 +657410,7 @@ async function completeRealtimeTextOnly(opts) {
|
|
|
657354
657410
|
}
|
|
657355
657411
|
};
|
|
657356
657412
|
}
|
|
657357
|
-
async function handleRealtimeText(req2, res, ollamaUrl) {
|
|
657413
|
+
async function handleRealtimeText(req2, res, ollamaUrl, defaults3 = {}) {
|
|
657358
657414
|
const body = await parseJsonBody(req2);
|
|
657359
657415
|
if (!body || typeof body !== "object") {
|
|
657360
657416
|
jsonResponse(res, 400, { error: "invalid_request", message: "Expected a JSON object." });
|
|
@@ -657367,7 +657423,14 @@ async function handleRealtimeText(req2, res, ollamaUrl) {
|
|
|
657367
657423
|
}
|
|
657368
657424
|
try {
|
|
657369
657425
|
const sessionId = typeof body["session_id"] === "string" ? body["session_id"] : void 0;
|
|
657370
|
-
const result = await completeRealtimeTextOnly({
|
|
657426
|
+
const result = await completeRealtimeTextOnly({
|
|
657427
|
+
body,
|
|
657428
|
+
messages: messages2,
|
|
657429
|
+
ollamaUrl,
|
|
657430
|
+
defaultModel: defaults3.model,
|
|
657431
|
+
defaultBackendType: defaults3.backendType,
|
|
657432
|
+
sessionId
|
|
657433
|
+
});
|
|
657371
657434
|
const wantsPlain = String(req2.headers["accept"] ?? "").includes("text/plain") || body["format"] === "text";
|
|
657372
657435
|
if (wantsPlain) {
|
|
657373
657436
|
res.writeHead(200, { "Content-Type": "text/plain; charset=utf-8", "Cache-Control": "no-store" });
|
|
@@ -660331,7 +660394,7 @@ async function handlePostCommand(res, cmd) {
|
|
|
660331
660394
|
});
|
|
660332
660395
|
}
|
|
660333
660396
|
}
|
|
660334
|
-
async function handleRequest(req2, res, ollamaUrl, verbose) {
|
|
660397
|
+
async function handleRequest(req2, res, ollamaUrl, verbose, runtimeDefaults = {}) {
|
|
660335
660398
|
try {
|
|
660336
660399
|
const _liveCfg = loadConfig();
|
|
660337
660400
|
if (_liveCfg.backendUrl) ollamaUrl = _liveCfg.backendUrl;
|
|
@@ -660606,7 +660669,7 @@ async function handleRequest(req2, res, ollamaUrl, verbose) {
|
|
|
660606
660669
|
status = 401;
|
|
660607
660670
|
return;
|
|
660608
660671
|
}
|
|
660609
|
-
await handleRealtimeText(req2, res, ollamaUrl);
|
|
660672
|
+
await handleRealtimeText(req2, res, ollamaUrl, runtimeDefaults);
|
|
660610
660673
|
return;
|
|
660611
660674
|
}
|
|
660612
660675
|
if (pathname === "/v1/files" && method === "GET") {
|
|
@@ -662614,13 +662677,14 @@ ${historyLines}
|
|
|
662614
662677
|
}));
|
|
662615
662678
|
}
|
|
662616
662679
|
} finally {
|
|
662617
|
-
|
|
662680
|
+
const finalStatus = res.headersSent ? res.statusCode : status;
|
|
662681
|
+
recordMetric(method, pathname, finalStatus);
|
|
662618
662682
|
const latencyMs = Math.round(performance.now() - startMs);
|
|
662619
662683
|
logRequest({
|
|
662620
662684
|
requestId,
|
|
662621
662685
|
method,
|
|
662622
662686
|
path: pathname,
|
|
662623
|
-
status,
|
|
662687
|
+
status: finalStatus,
|
|
662624
662688
|
latencyMs,
|
|
662625
662689
|
user: req2._authUser ?? "anonymous",
|
|
662626
662690
|
scope: req2._authScope ?? "none"
|
|
@@ -662630,7 +662694,7 @@ ${historyLines}
|
|
|
662630
662694
|
requestId,
|
|
662631
662695
|
method,
|
|
662632
662696
|
path: pathname,
|
|
662633
|
-
status,
|
|
662697
|
+
status: finalStatus,
|
|
662634
662698
|
user: req2._authUser ?? "anonymous",
|
|
662635
662699
|
scope: req2._authScope ?? "none",
|
|
662636
662700
|
latencyMs: Math.round(performance.now() - startMs),
|
|
@@ -663552,7 +663616,10 @@ function startApiServer(options2 = {}) {
|
|
|
663552
663616
|
}
|
|
663553
663617
|
} catch {
|
|
663554
663618
|
}
|
|
663555
|
-
handleRequest(req2, res, ollamaUrl, verbose
|
|
663619
|
+
handleRequest(req2, res, ollamaUrl, verbose, {
|
|
663620
|
+
model: options2.model ?? config.model,
|
|
663621
|
+
backendType: options2.backendType ?? config.backendType
|
|
663622
|
+
}).catch((err) => {
|
|
663556
663623
|
metrics.totalErrors++;
|
|
663557
663624
|
try {
|
|
663558
663625
|
jsonResponse(res, 500, {
|
|
@@ -664374,7 +664441,9 @@ async function apiServeCommand(opts, config) {
|
|
|
664374
664441
|
port: opts.port,
|
|
664375
664442
|
// Let startApiServer() parse OMNIUS_HOST env if no explicit --port
|
|
664376
664443
|
verbose: opts.verbose,
|
|
664377
|
-
ollamaUrl: config.backendUrl
|
|
664444
|
+
ollamaUrl: config.backendUrl,
|
|
664445
|
+
model: config.model,
|
|
664446
|
+
backendType: config.backendType
|
|
664378
664447
|
});
|
|
664379
664448
|
await new Promise((resolve57) => {
|
|
664380
664449
|
server2.on("close", resolve57);
|
|
@@ -664429,7 +664498,7 @@ function setTimerEnabled(name10, enabled2) {
|
|
|
664429
664498
|
return false;
|
|
664430
664499
|
}
|
|
664431
664500
|
}
|
|
664432
|
-
var require4, NEXUS_DIRECTORY_ORIGIN2, NEXUS_SPONSORS_URL2, endpointRegistry, modelRouteMap, endpointUsage, _lastEndpointDiagnostics, BACKEND_TIMEOUT_DEFAULT_MS, BACKEND_TIMEOUT_MAX_MS, MODEL_LIST_TIMEOUT_DEFAULT_MS, metrics, startedAt, runningProcesses, perKeyUsage, CRON_MARKER2;
|
|
664501
|
+
var require4, NEXUS_DIRECTORY_ORIGIN2, NEXUS_SPONSORS_URL2, endpointRegistry, modelRouteMap, endpointUsage, _lastEndpointDiagnostics, BACKEND_TIMEOUT_DEFAULT_MS, BACKEND_TIMEOUT_MAX_MS, MODEL_LIST_TIMEOUT_DEFAULT_MS, metrics, startedAt, realtimeOllamaFallbackCache, runningProcesses, perKeyUsage, CRON_MARKER2;
|
|
664433
664502
|
var init_serve = __esm({
|
|
664434
664503
|
"packages/cli/src/api/serve.ts"() {
|
|
664435
664504
|
"use strict";
|
|
@@ -664477,6 +664546,7 @@ var init_serve = __esm({
|
|
|
664477
664546
|
totalErrors: 0
|
|
664478
664547
|
};
|
|
664479
664548
|
startedAt = Date.now();
|
|
664549
|
+
realtimeOllamaFallbackCache = /* @__PURE__ */ new Map();
|
|
664480
664550
|
runningProcesses = /* @__PURE__ */ new Map();
|
|
664481
664551
|
perKeyUsage = /* @__PURE__ */ new Map();
|
|
664482
664552
|
CRON_MARKER2 = "# OMNIUS-SCHEDULED:";
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.170",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.170",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED