omnius 1.0.168 → 1.0.170
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +253 -17
- package/docs/guides/realtime.md +23 -1
- package/docs/rest/endpoints/chat.md +4 -0
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -615839,6 +615839,7 @@ function buildRealtimeSystemPrompt(opts) {
|
|
|
615839
615839
|
const voice = projectVoice(repoRoot);
|
|
615840
615840
|
const soulLimit = opts.maxSoulChars ?? DEFAULT_SOUL_CHARS;
|
|
615841
615841
|
const voiceLimit = opts.maxVoiceChars ?? DEFAULT_VOICE_CHARS;
|
|
615842
|
+
const maxReplyWords = clampInt2(opts.maxReplyWords, DEFAULT_REALTIME_MAX_REPLY_WORDS, 8, 80);
|
|
615842
615843
|
const sections = [
|
|
615843
615844
|
"[Omnius realtime conversation mode]",
|
|
615844
615845
|
[
|
|
@@ -615854,13 +615855,16 @@ function buildRealtimeSystemPrompt(opts) {
|
|
|
615854
615855
|
"- Listen for human cues in the provided words and conversation state; do not run local keyword classifiers."
|
|
615855
615856
|
].join("\n"),
|
|
615856
615857
|
[
|
|
615857
|
-
"
|
|
615858
|
-
|
|
615859
|
-
"-
|
|
615860
|
-
"-
|
|
615861
|
-
"-
|
|
615862
|
-
"-
|
|
615863
|
-
"- If the
|
|
615858
|
+
"Phone reply contract:",
|
|
615859
|
+
`- Produce one natural spoken turn, normally ${maxReplyWords} words or fewer.`,
|
|
615860
|
+
"- Use one sentence when possible; two short sentences only when repair or confirmation needs it.",
|
|
615861
|
+
"- Lead with the answer. Do not preface with status, analysis, summaries, or implementation narration.",
|
|
615862
|
+
"- No markdown, bullets, tables, headings, citations, inline code, code blocks, JSON, or labels like 'Assistant:'.",
|
|
615863
|
+
"- Sound like a person on a live call: brief acknowledgment, direct answer, one focused follow-up only if needed.",
|
|
615864
|
+
"- If the ASR text is garbled or underspecified, ask a single compact repair question.",
|
|
615865
|
+
"- Do not invent app modes, method names, settings, or implementation details when the caller has not supplied them.",
|
|
615866
|
+
"- Do not mention ASR, TTS, prompts, realtime mode, hidden reasoning, tools, or policy unless the caller explicitly asks.",
|
|
615867
|
+
"- If a request needs work outside this text-only exchange, say the next handoff in one short sentence."
|
|
615864
615868
|
].join("\n"),
|
|
615865
615869
|
soul ? `Project SOUL.md (${basename25(soul.path)}), compacted for realtime:
|
|
615866
615870
|
${blockText2(soul.content, soulLimit)}` : [
|
|
@@ -615873,6 +615877,7 @@ ${blockText2(soul.content, soulLimit)}` : [
|
|
|
615873
615877
|
${blockText2(voice.content, voiceLimit)}` : [
|
|
615874
615878
|
"Default realtime voice:",
|
|
615875
615879
|
"- conversational, brief, and proportional",
|
|
615880
|
+
"- phone-call natural: contractions, plain words, no written-document structure",
|
|
615876
615881
|
"- contractions are fine when natural",
|
|
615877
615882
|
"- no list formatting unless the user asks for a list"
|
|
615878
615883
|
].join("\n")
|
|
@@ -615897,6 +615902,12 @@ function realtimeOptionsFromBody(body, repoRoot, sessionId) {
|
|
|
615897
615902
|
DEFAULT_REALTIME_MAX_TOKENS,
|
|
615898
615903
|
32,
|
|
615899
615904
|
1024
|
|
615905
|
+
),
|
|
615906
|
+
maxReplyWords: clampInt2(
|
|
615907
|
+
obj["max_reply_words"] ?? body["realtime_max_reply_words"],
|
|
615908
|
+
DEFAULT_REALTIME_MAX_REPLY_WORDS,
|
|
615909
|
+
8,
|
|
615910
|
+
80
|
|
615900
615911
|
)
|
|
615901
615912
|
};
|
|
615902
615913
|
}
|
|
@@ -615931,14 +615942,47 @@ function applyRealtimeToRequestBody(body, opts) {
|
|
|
615931
615942
|
delete out["realtime_options"];
|
|
615932
615943
|
delete out["realtime_max_history_messages"];
|
|
615933
615944
|
delete out["realtime_max_tokens"];
|
|
615945
|
+
delete out["realtime_max_reply_words"];
|
|
615934
615946
|
return out;
|
|
615935
615947
|
}
|
|
615936
|
-
|
|
615948
|
+
function stripHiddenThinking(text) {
|
|
615949
|
+
return text.replace(/<think>[\s\S]*?<\/think>/gi, "").replace(/<think>[\s\S]*$/gi, "").trim();
|
|
615950
|
+
}
|
|
615951
|
+
function wordParts(text) {
|
|
615952
|
+
return text.trim().split(/\s+/).filter(Boolean);
|
|
615953
|
+
}
|
|
615954
|
+
function finalizeRealtimeReply(text, opts = {}) {
|
|
615955
|
+
const maxWords = clampInt2(opts.maxReplyWords, DEFAULT_REALTIME_MAX_REPLY_WORDS, 8, 80);
|
|
615956
|
+
let clean5 = stripHiddenThinking(String(text ?? "")).replace(/```[\s\S]*?```/g, "").split("\n").map((line) => line.replace(/^\s*(?:[-*]+|\d+[.)])\s+/, "").trim()).filter(Boolean).join(" ").replace(/^(?:assistant|omnius|agent)\s*:\s*/i, "").replace(/`([^`]+)`/g, "$1").replace(/\s+/g, " ").trim();
|
|
615957
|
+
if (!clean5) return "I didn't catch that. Can you say it again?";
|
|
615958
|
+
const sentences = clean5.match(/[^.!?]+[.!?]+(?=\s|$)|[^.!?]+$/g) ?? [clean5];
|
|
615959
|
+
const selected = [];
|
|
615960
|
+
let words = 0;
|
|
615961
|
+
for (const raw of sentences) {
|
|
615962
|
+
const sentence = raw.trim();
|
|
615963
|
+
if (!sentence) continue;
|
|
615964
|
+
const count = wordParts(sentence).length;
|
|
615965
|
+
if (selected.length >= 2) break;
|
|
615966
|
+
if (selected.length > 0 && words + count > maxWords) break;
|
|
615967
|
+
selected.push(sentence);
|
|
615968
|
+
words += count;
|
|
615969
|
+
if (words >= maxWords) break;
|
|
615970
|
+
}
|
|
615971
|
+
clean5 = (selected.join(" ") || clean5).trim();
|
|
615972
|
+
const parts = wordParts(clean5);
|
|
615973
|
+
if (parts.length > maxWords) {
|
|
615974
|
+
clean5 = parts.slice(0, maxWords).join(" ");
|
|
615975
|
+
}
|
|
615976
|
+
if (clean5 && !/[.!?]$/.test(clean5)) clean5 += ".";
|
|
615977
|
+
return clean5;
|
|
615978
|
+
}
|
|
615979
|
+
var DEFAULT_REALTIME_HISTORY_MESSAGES, DEFAULT_REALTIME_MAX_TOKENS, DEFAULT_REALTIME_MAX_REPLY_WORDS, DEFAULT_SOUL_CHARS, DEFAULT_VOICE_CHARS;
|
|
615937
615980
|
var init_realtime = __esm({
|
|
615938
615981
|
"packages/cli/src/realtime.ts"() {
|
|
615939
615982
|
"use strict";
|
|
615940
|
-
DEFAULT_REALTIME_HISTORY_MESSAGES =
|
|
615941
|
-
DEFAULT_REALTIME_MAX_TOKENS =
|
|
615983
|
+
DEFAULT_REALTIME_HISTORY_MESSAGES = 8;
|
|
615984
|
+
DEFAULT_REALTIME_MAX_TOKENS = 120;
|
|
615985
|
+
DEFAULT_REALTIME_MAX_REPLY_WORDS = 36;
|
|
615942
615986
|
DEFAULT_SOUL_CHARS = 1400;
|
|
615943
615987
|
DEFAULT_VOICE_CHARS = 700;
|
|
615944
615988
|
}
|
|
@@ -657229,6 +657273,183 @@ function ollamaFormatFromOpenAIResponseFormat(value2) {
|
|
|
657229
657273
|
if (record["type"] === "object" || record["properties"] !== void 0) return record;
|
|
657230
657274
|
return void 0;
|
|
657231
657275
|
}
|
|
657276
|
+
function bodyString(body, keys) {
|
|
657277
|
+
for (const key of keys) {
|
|
657278
|
+
const value2 = body[key];
|
|
657279
|
+
if (typeof value2 === "string" && value2.trim()) return value2.trim();
|
|
657280
|
+
}
|
|
657281
|
+
return "";
|
|
657282
|
+
}
|
|
657283
|
+
function realtimeFallbackCacheKey(ollamaUrl, missingModel) {
|
|
657284
|
+
return `${ollamaUrl}
|
|
657285
|
+
${missingModel}`;
|
|
657286
|
+
}
|
|
657287
|
+
function isOllamaMissingModelError(body) {
|
|
657288
|
+
return /model ['\"]?[^'\"]+['\"]? not found/i.test(body);
|
|
657289
|
+
}
|
|
657290
|
+
async function resolveRealtimeOllamaFallbackModel(ollamaUrl, timeoutMs, missingModel) {
|
|
657291
|
+
try {
|
|
657292
|
+
const cacheKey = realtimeFallbackCacheKey(ollamaUrl, missingModel);
|
|
657293
|
+
const cached = realtimeOllamaFallbackCache.get(cacheKey);
|
|
657294
|
+
if (cached) return cached;
|
|
657295
|
+
const result = await ollamaRequest(ollamaUrl, "/api/tags", "GET", void 0, Math.min(timeoutMs, 1e4));
|
|
657296
|
+
if (result.status >= 400) return null;
|
|
657297
|
+
const parsed = JSON.parse(result.body);
|
|
657298
|
+
const names = (parsed.models ?? []).map((entry) => typeof entry.name === "string" ? entry.name : typeof entry.model === "string" ? entry.model : "").filter(Boolean);
|
|
657299
|
+
if (!names.length) return null;
|
|
657300
|
+
const remember = (name10) => {
|
|
657301
|
+
realtimeOllamaFallbackCache.set(cacheKey, name10);
|
|
657302
|
+
return name10;
|
|
657303
|
+
};
|
|
657304
|
+
const exactLatest = `${missingModel}:latest`;
|
|
657305
|
+
if (names.includes(exactLatest)) return remember(exactLatest);
|
|
657306
|
+
const preferred = [
|
|
657307
|
+
"qwen3.5-9b-r10:q4km",
|
|
657308
|
+
"open-agents-qwen35-9b-r10-q4km:latest",
|
|
657309
|
+
"open-agents-qwen35-9b-r10-parsed-q4km:latest",
|
|
657310
|
+
"open-agents-qwen35-9b-r9-q4km:latest",
|
|
657311
|
+
"qwen3:8b",
|
|
657312
|
+
"open-agents-qwen3-8b:latest",
|
|
657313
|
+
"omnius-qwen36-35b:latest",
|
|
657314
|
+
"open-agents-qwen36:latest",
|
|
657315
|
+
"qwen3.6:35b"
|
|
657316
|
+
];
|
|
657317
|
+
for (const name10 of preferred) {
|
|
657318
|
+
if (names.includes(name10)) return remember(name10);
|
|
657319
|
+
}
|
|
657320
|
+
const fallback = names.find((name10) => /qwen/i.test(name10) && !/embed|vision/i.test(name10)) ?? names.find((name10) => !/embed|vision|moondream/i.test(name10)) ?? null;
|
|
657321
|
+
return fallback ? remember(fallback) : null;
|
|
657322
|
+
} catch {
|
|
657323
|
+
return null;
|
|
657324
|
+
}
|
|
657325
|
+
}
|
|
657326
|
+
function realtimeEndpointMessages(body) {
|
|
657327
|
+
const messages2 = [];
|
|
657328
|
+
const suppliedSoul = bodyString(body, ["soul_md", "soul", "soulMd"]);
|
|
657329
|
+
const suppliedContext = bodyString(body, ["context", "call_context", "adapter_context"]);
|
|
657330
|
+
if (suppliedSoul) messages2.push({ role: "system", content: `SOUL.md supplied by the voice adapter:
|
|
657331
|
+
${suppliedSoul}` });
|
|
657332
|
+
if (suppliedContext) messages2.push({ role: "system", content: `Live call context supplied by the adapter:
|
|
657333
|
+
${suppliedContext}` });
|
|
657334
|
+
if (Array.isArray(body["messages"])) {
|
|
657335
|
+
for (const msg of body["messages"]) {
|
|
657336
|
+
if (!msg || typeof msg !== "object") continue;
|
|
657337
|
+
const record = msg;
|
|
657338
|
+
const role = typeof record["role"] === "string" ? record["role"] : "user";
|
|
657339
|
+
const content = typeof record["content"] === "string" ? record["content"].trim() : "";
|
|
657340
|
+
if (content && (role === "system" || role === "user" || role === "assistant")) messages2.push({ role, content });
|
|
657341
|
+
}
|
|
657342
|
+
}
|
|
657343
|
+
const latestTurn = bodyString(body, ["message", "text", "input", "callerText", "caller_text", "recent_turn", "asr_text"]);
|
|
657344
|
+
if (latestTurn) {
|
|
657345
|
+
const last2 = [...messages2].reverse().find((msg) => msg.role === "user");
|
|
657346
|
+
if (!last2 || last2.content !== latestTurn) messages2.push({ role: "user", content: latestTurn });
|
|
657347
|
+
}
|
|
657348
|
+
return messages2;
|
|
657349
|
+
}
|
|
657350
|
+
async function completeRealtimeTextOnly(opts) {
|
|
657351
|
+
const cfg = loadConfig();
|
|
657352
|
+
const requestedModel = bodyString(opts.body, ["model"]);
|
|
657353
|
+
const model = requestedModel || opts.defaultModel || cfg.model;
|
|
657354
|
+
const route = resolveModelEndpoint(model);
|
|
657355
|
+
const limitErr = route?.endpoint ? checkEndpointRateLimit(route.endpoint) : null;
|
|
657356
|
+
if (limitErr) throw new Error(limitErr);
|
|
657357
|
+
const targetUrl = route?.endpoint.url ?? opts.ollamaUrl;
|
|
657358
|
+
const targetType = route?.endpoint.type ?? opts.defaultBackendType ?? cfg.backendType ?? "ollama";
|
|
657359
|
+
let originalModel = route?.originalId ?? model.replace(/^[a-z]+\//, "");
|
|
657360
|
+
const realtimeOpts = {
|
|
657361
|
+
...realtimeOptionsFromBody(opts.body, process.cwd(), opts.sessionId),
|
|
657362
|
+
surface: "voice_adapter"
|
|
657363
|
+
};
|
|
657364
|
+
const requestBody = applyRealtimeToRequestBody({
|
|
657365
|
+
...opts.body,
|
|
657366
|
+
model: originalModel,
|
|
657367
|
+
messages: opts.messages,
|
|
657368
|
+
realtime: true,
|
|
657369
|
+
stream: false
|
|
657370
|
+
}, realtimeOpts);
|
|
657371
|
+
const timeoutMs = getBackendTimeoutMs(typeof opts.body["timeout_s"] === "number" ? opts.body["timeout_s"] : void 0);
|
|
657372
|
+
if (targetType === "vllm" || targetType === "openai") {
|
|
657373
|
+
const result2 = await ollamaRequest(targetUrl, "/v1/chat/completions", "POST", JSON.stringify(requestBody), timeoutMs, route?.endpoint);
|
|
657374
|
+
if (result2.status >= 400) throw new Error(`Backend HTTP ${result2.status}: ${result2.body.slice(0, 300)}`);
|
|
657375
|
+
const parsed2 = JSON.parse(result2.body);
|
|
657376
|
+
const rawReply2 = String(parsed2?.choices?.[0]?.message?.content ?? "").trim();
|
|
657377
|
+
return { reply: finalizeRealtimeReply(rawReply2, realtimeOpts), rawReply: rawReply2, model: originalModel, usage: parsed2?.usage };
|
|
657378
|
+
}
|
|
657379
|
+
const maxTokens = typeof requestBody["max_tokens"] === "number" ? requestBody["max_tokens"] : 120;
|
|
657380
|
+
const temperature = typeof requestBody["temperature"] === "number" ? requestBody["temperature"] : 0.6;
|
|
657381
|
+
if (!requestedModel) {
|
|
657382
|
+
originalModel = realtimeOllamaFallbackCache.get(realtimeFallbackCacheKey(targetUrl, originalModel)) ?? originalModel;
|
|
657383
|
+
}
|
|
657384
|
+
const makeOllamaChatBody = (modelName) => JSON.stringify({
|
|
657385
|
+
model: modelName,
|
|
657386
|
+
messages: requestBody["messages"],
|
|
657387
|
+
stream: false,
|
|
657388
|
+
think: false,
|
|
657389
|
+
options: { temperature, num_predict: maxTokens }
|
|
657390
|
+
});
|
|
657391
|
+
let result = await ollamaRequest(targetUrl, "/api/chat", "POST", makeOllamaChatBody(originalModel), timeoutMs, route?.endpoint);
|
|
657392
|
+
if (result.status >= 400 && !requestedModel && isOllamaMissingModelError(result.body)) {
|
|
657393
|
+
const fallbackModel = await resolveRealtimeOllamaFallbackModel(targetUrl, timeoutMs, originalModel);
|
|
657394
|
+
if (fallbackModel && fallbackModel !== originalModel) {
|
|
657395
|
+
originalModel = fallbackModel;
|
|
657396
|
+
result = await ollamaRequest(targetUrl, "/api/chat", "POST", makeOllamaChatBody(originalModel), timeoutMs, route?.endpoint);
|
|
657397
|
+
}
|
|
657398
|
+
}
|
|
657399
|
+
if (result.status >= 400) throw new Error(`Backend HTTP ${result.status}: ${result.body.slice(0, 300)}`);
|
|
657400
|
+
const parsed = JSON.parse(result.body);
|
|
657401
|
+
const rawReply = String(parsed?.message?.content ?? "").trim();
|
|
657402
|
+
return {
|
|
657403
|
+
reply: finalizeRealtimeReply(rawReply, realtimeOpts),
|
|
657404
|
+
rawReply,
|
|
657405
|
+
model: originalModel,
|
|
657406
|
+
usage: {
|
|
657407
|
+
prompt_tokens: parsed?.prompt_eval_count ?? 0,
|
|
657408
|
+
completion_tokens: parsed?.eval_count ?? 0,
|
|
657409
|
+
total_tokens: (parsed?.prompt_eval_count ?? 0) + (parsed?.eval_count ?? 0)
|
|
657410
|
+
}
|
|
657411
|
+
};
|
|
657412
|
+
}
|
|
657413
|
+
async function handleRealtimeText(req2, res, ollamaUrl, defaults3 = {}) {
|
|
657414
|
+
const body = await parseJsonBody(req2);
|
|
657415
|
+
if (!body || typeof body !== "object") {
|
|
657416
|
+
jsonResponse(res, 400, { error: "invalid_request", message: "Expected a JSON object." });
|
|
657417
|
+
return;
|
|
657418
|
+
}
|
|
657419
|
+
const messages2 = realtimeEndpointMessages(body);
|
|
657420
|
+
if (!messages2.some((msg) => msg.role === "user" && msg.content.trim())) {
|
|
657421
|
+
jsonResponse(res, 400, { error: "missing_turn", message: "Provide message, text, recent_turn, asr_text, callerText, or messages[]." });
|
|
657422
|
+
return;
|
|
657423
|
+
}
|
|
657424
|
+
try {
|
|
657425
|
+
const sessionId = typeof body["session_id"] === "string" ? body["session_id"] : void 0;
|
|
657426
|
+
const result = await completeRealtimeTextOnly({
|
|
657427
|
+
body,
|
|
657428
|
+
messages: messages2,
|
|
657429
|
+
ollamaUrl,
|
|
657430
|
+
defaultModel: defaults3.model,
|
|
657431
|
+
defaultBackendType: defaults3.backendType,
|
|
657432
|
+
sessionId
|
|
657433
|
+
});
|
|
657434
|
+
const wantsPlain = String(req2.headers["accept"] ?? "").includes("text/plain") || body["format"] === "text";
|
|
657435
|
+
if (wantsPlain) {
|
|
657436
|
+
res.writeHead(200, { "Content-Type": "text/plain; charset=utf-8", "Cache-Control": "no-store" });
|
|
657437
|
+
res.end(result.reply + "\n");
|
|
657438
|
+
return;
|
|
657439
|
+
}
|
|
657440
|
+
jsonResponse(res, 200, {
|
|
657441
|
+
reply: result.reply,
|
|
657442
|
+
text: result.reply,
|
|
657443
|
+
raw_reply: result.rawReply,
|
|
657444
|
+
model: result.model,
|
|
657445
|
+
usage: result.usage,
|
|
657446
|
+
realtime: true,
|
|
657447
|
+
mode: "voice_adapter_text_only"
|
|
657448
|
+
});
|
|
657449
|
+
} catch (err) {
|
|
657450
|
+
jsonResponse(res, 502, { error: "realtime_failed", message: err instanceof Error ? err.message : String(err) });
|
|
657451
|
+
}
|
|
657452
|
+
}
|
|
657232
657453
|
function backendAuthHeaders(endpoint) {
|
|
657233
657454
|
const key = endpoint?.authKey ?? loadConfig().apiKey;
|
|
657234
657455
|
if (key) return { Authorization: `Bearer ${key}` };
|
|
@@ -660173,7 +660394,7 @@ async function handlePostCommand(res, cmd) {
|
|
|
660173
660394
|
});
|
|
660174
660395
|
}
|
|
660175
660396
|
}
|
|
660176
|
-
async function handleRequest(req2, res, ollamaUrl, verbose) {
|
|
660397
|
+
async function handleRequest(req2, res, ollamaUrl, verbose, runtimeDefaults = {}) {
|
|
660177
660398
|
try {
|
|
660178
660399
|
const _liveCfg = loadConfig();
|
|
660179
660400
|
if (_liveCfg.backendUrl) ollamaUrl = _liveCfg.backendUrl;
|
|
@@ -660443,6 +660664,14 @@ async function handleRequest(req2, res, ollamaUrl, verbose) {
|
|
|
660443
660664
|
return;
|
|
660444
660665
|
}
|
|
660445
660666
|
}
|
|
660667
|
+
if ((pathname === "/realtime" || pathname === "/v1/realtime") && method === "POST") {
|
|
660668
|
+
if (!checkAuth(req2, res, "read")) {
|
|
660669
|
+
status = 401;
|
|
660670
|
+
return;
|
|
660671
|
+
}
|
|
660672
|
+
await handleRealtimeText(req2, res, ollamaUrl, runtimeDefaults);
|
|
660673
|
+
return;
|
|
660674
|
+
}
|
|
660446
660675
|
if (pathname === "/v1/files" && method === "GET") {
|
|
660447
660676
|
const dir = urlObj.searchParams.get("path") || process.cwd();
|
|
660448
660677
|
try {
|
|
@@ -662448,13 +662677,14 @@ ${historyLines}
|
|
|
662448
662677
|
}));
|
|
662449
662678
|
}
|
|
662450
662679
|
} finally {
|
|
662451
|
-
|
|
662680
|
+
const finalStatus = res.headersSent ? res.statusCode : status;
|
|
662681
|
+
recordMetric(method, pathname, finalStatus);
|
|
662452
662682
|
const latencyMs = Math.round(performance.now() - startMs);
|
|
662453
662683
|
logRequest({
|
|
662454
662684
|
requestId,
|
|
662455
662685
|
method,
|
|
662456
662686
|
path: pathname,
|
|
662457
|
-
status,
|
|
662687
|
+
status: finalStatus,
|
|
662458
662688
|
latencyMs,
|
|
662459
662689
|
user: req2._authUser ?? "anonymous",
|
|
662460
662690
|
scope: req2._authScope ?? "none"
|
|
@@ -662464,7 +662694,7 @@ ${historyLines}
|
|
|
662464
662694
|
requestId,
|
|
662465
662695
|
method,
|
|
662466
662696
|
path: pathname,
|
|
662467
|
-
status,
|
|
662697
|
+
status: finalStatus,
|
|
662468
662698
|
user: req2._authUser ?? "anonymous",
|
|
662469
662699
|
scope: req2._authScope ?? "none",
|
|
662470
662700
|
latencyMs: Math.round(performance.now() - startMs),
|
|
@@ -663386,7 +663616,10 @@ function startApiServer(options2 = {}) {
|
|
|
663386
663616
|
}
|
|
663387
663617
|
} catch {
|
|
663388
663618
|
}
|
|
663389
|
-
handleRequest(req2, res, ollamaUrl, verbose
|
|
663619
|
+
handleRequest(req2, res, ollamaUrl, verbose, {
|
|
663620
|
+
model: options2.model ?? config.model,
|
|
663621
|
+
backendType: options2.backendType ?? config.backendType
|
|
663622
|
+
}).catch((err) => {
|
|
663390
663623
|
metrics.totalErrors++;
|
|
663391
663624
|
try {
|
|
663392
663625
|
jsonResponse(res, 500, {
|
|
@@ -664208,7 +664441,9 @@ async function apiServeCommand(opts, config) {
|
|
|
664208
664441
|
port: opts.port,
|
|
664209
664442
|
// Let startApiServer() parse OMNIUS_HOST env if no explicit --port
|
|
664210
664443
|
verbose: opts.verbose,
|
|
664211
|
-
ollamaUrl: config.backendUrl
|
|
664444
|
+
ollamaUrl: config.backendUrl,
|
|
664445
|
+
model: config.model,
|
|
664446
|
+
backendType: config.backendType
|
|
664212
664447
|
});
|
|
664213
664448
|
await new Promise((resolve57) => {
|
|
664214
664449
|
server2.on("close", resolve57);
|
|
@@ -664263,7 +664498,7 @@ function setTimerEnabled(name10, enabled2) {
|
|
|
664263
664498
|
return false;
|
|
664264
664499
|
}
|
|
664265
664500
|
}
|
|
664266
|
-
var require4, NEXUS_DIRECTORY_ORIGIN2, NEXUS_SPONSORS_URL2, endpointRegistry, modelRouteMap, endpointUsage, _lastEndpointDiagnostics, BACKEND_TIMEOUT_DEFAULT_MS, BACKEND_TIMEOUT_MAX_MS, MODEL_LIST_TIMEOUT_DEFAULT_MS, metrics, startedAt, runningProcesses, perKeyUsage, CRON_MARKER2;
|
|
664501
|
+
var require4, NEXUS_DIRECTORY_ORIGIN2, NEXUS_SPONSORS_URL2, endpointRegistry, modelRouteMap, endpointUsage, _lastEndpointDiagnostics, BACKEND_TIMEOUT_DEFAULT_MS, BACKEND_TIMEOUT_MAX_MS, MODEL_LIST_TIMEOUT_DEFAULT_MS, metrics, startedAt, realtimeOllamaFallbackCache, runningProcesses, perKeyUsage, CRON_MARKER2;
|
|
664267
664502
|
var init_serve = __esm({
|
|
664268
664503
|
"packages/cli/src/api/serve.ts"() {
|
|
664269
664504
|
"use strict";
|
|
@@ -664311,6 +664546,7 @@ var init_serve = __esm({
|
|
|
664311
664546
|
totalErrors: 0
|
|
664312
664547
|
};
|
|
664313
664548
|
startedAt = Date.now();
|
|
664549
|
+
realtimeOllamaFallbackCache = /* @__PURE__ */ new Map();
|
|
664314
664550
|
runningProcesses = /* @__PURE__ */ new Map();
|
|
664315
664551
|
perKeyUsage = /* @__PURE__ */ new Map();
|
|
664316
664552
|
CRON_MARKER2 = "# OMNIUS-SCHEDULED:";
|
package/docs/guides/realtime.md
CHANGED
|
@@ -4,6 +4,8 @@ Realtime mode is for short, natural, back-and-forth spoken conversation behind A
|
|
|
4
4
|
|
|
5
5
|
It is not a long-form coding-task mode. It trims context, reduces scaffolding, and optimizes for speakable answers.
|
|
6
6
|
|
|
7
|
+
The text-only adapter endpoint is `/realtime` (alias: `/v1/realtime`). ASR and TTS are intentionally out of scope for that route; pass the latest transcript text in, receive a short reply text out.
|
|
8
|
+
|
|
7
9
|
## Enable In The TUI
|
|
8
10
|
|
|
9
11
|
```text
|
|
@@ -14,6 +16,25 @@ It is not a long-form coding-task mode. It trims context, reduces scaffolding, a
|
|
|
14
16
|
|
|
15
17
|
## Use Through REST
|
|
16
18
|
|
|
19
|
+
Voice-adapter text endpoint:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
curl -s http://127.0.0.1:11435/realtime \
|
|
23
|
+
-H 'content-type: application/json' \
|
|
24
|
+
-H 'accept: text/plain' \
|
|
25
|
+
-d '{
|
|
26
|
+
"soul_md": "Be direct, warm, and practical.",
|
|
27
|
+
"recent_turn": "Can you say the short version?",
|
|
28
|
+
"realtime_options": {
|
|
29
|
+
"max_reply_words": 32,
|
|
30
|
+
"max_tokens": 120
|
|
31
|
+
},
|
|
32
|
+
"format": "text"
|
|
33
|
+
}'
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Chat-compatible endpoint:
|
|
37
|
+
|
|
17
38
|
```bash
|
|
18
39
|
curl -s http://127.0.0.1:11435/v1/chat \
|
|
19
40
|
-H 'content-type: application/json' \
|
|
@@ -57,7 +78,8 @@ Realtime mode builds a compact prompt from:
|
|
|
57
78
|
|
|
58
79
|
Realtime responses should:
|
|
59
80
|
|
|
60
|
-
- default to one
|
|
81
|
+
- default to one natural phone-call turn, usually under 36 words
|
|
82
|
+
- lead with the answer, not analysis or status
|
|
61
83
|
- ask one focused repair question when ASR text is ambiguous
|
|
62
84
|
- treat the latest user utterance as the live turn
|
|
63
85
|
- avoid long markdown, tables, verbose plans, or implementation narration unless requested
|
|
@@ -6,6 +6,8 @@
|
|
|
6
6
|
| --- | --- | --- |
|
|
7
7
|
| `GET` | `/v1/models` | List aggregated models |
|
|
8
8
|
| `POST` | `/v1/chat/completions` | OpenAI-compatible chat completions |
|
|
9
|
+
| `POST` | `/realtime` | Text-only voice-adapter brain: transcript text in, short reply text out |
|
|
10
|
+
| `POST` | `/v1/realtime` | Auth-scoped alias for `/realtime` |
|
|
9
11
|
| `POST` | `/v1/embeddings` | Generate embeddings |
|
|
10
12
|
| `POST` | `/v1/chat` | Stateful Omnius chat with optional full agent tools |
|
|
11
13
|
| `POST` | `/v1/generate` | Ollama-compatible one-shot generation |
|
|
@@ -96,6 +98,8 @@ When `realtime: true`, Omnius:
|
|
|
96
98
|
|
|
97
99
|
Use this for live voice clients, not long coding tasks.
|
|
98
100
|
|
|
101
|
+
For ASR/TTS systems that only need the text brain, use `/realtime` or `/v1/realtime` with `message`, `text`, `recent_turn`, `asr_text`, or `callerText`. Optional `soul_md` supplies adapter-local SOUL.md content. Set `Accept: text/plain` or `format: "text"` to receive only the reply string.
|
|
102
|
+
|
|
99
103
|
## Server-Side Agent Loop
|
|
100
104
|
|
|
101
105
|
`/v1/chat/completions` can run an internal tool loop when `agent_loop: true`. This lets clients collapse multiple model/tool round trips into one daemon request. Daemon tool calls execute inline; client-owned tool calls can still be yielded in OpenAI-compatible shape.
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.170",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.170",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED