omnius 1.0.168 → 1.0.169
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +176 -10
- package/docs/guides/realtime.md +23 -1
- package/docs/rest/endpoints/chat.md +4 -0
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -615839,6 +615839,7 @@ function buildRealtimeSystemPrompt(opts) {
|
|
|
615839
615839
|
const voice = projectVoice(repoRoot);
|
|
615840
615840
|
const soulLimit = opts.maxSoulChars ?? DEFAULT_SOUL_CHARS;
|
|
615841
615841
|
const voiceLimit = opts.maxVoiceChars ?? DEFAULT_VOICE_CHARS;
|
|
615842
|
+
const maxReplyWords = clampInt2(opts.maxReplyWords, DEFAULT_REALTIME_MAX_REPLY_WORDS, 8, 80);
|
|
615842
615843
|
const sections = [
|
|
615843
615844
|
"[Omnius realtime conversation mode]",
|
|
615844
615845
|
[
|
|
@@ -615854,13 +615855,15 @@ function buildRealtimeSystemPrompt(opts) {
|
|
|
615854
615855
|
"- Listen for human cues in the provided words and conversation state; do not run local keyword classifiers."
|
|
615855
615856
|
].join("\n"),
|
|
615856
615857
|
[
|
|
615857
|
-
"
|
|
615858
|
-
|
|
615859
|
-
"-
|
|
615860
|
-
"-
|
|
615861
|
-
"-
|
|
615862
|
-
"-
|
|
615863
|
-
"- If the
|
|
615858
|
+
"Phone reply contract:",
|
|
615859
|
+
`- Produce one natural spoken turn, normally ${maxReplyWords} words or fewer.`,
|
|
615860
|
+
"- Use one sentence when possible; two short sentences only when repair or confirmation needs it.",
|
|
615861
|
+
"- Lead with the answer. Do not preface with status, analysis, summaries, or implementation narration.",
|
|
615862
|
+
"- No markdown, bullets, tables, headings, citations, code blocks, JSON, or labels like 'Assistant:'.",
|
|
615863
|
+
"- Sound like a person on a live call: brief acknowledgment, direct answer, one focused follow-up only if needed.",
|
|
615864
|
+
"- If the ASR text is garbled or underspecified, ask a single compact repair question.",
|
|
615865
|
+
"- Do not mention ASR, TTS, prompts, realtime mode, hidden reasoning, tools, or policy unless the caller explicitly asks.",
|
|
615866
|
+
"- If a request needs work outside this text-only exchange, say the next handoff in one short sentence."
|
|
615864
615867
|
].join("\n"),
|
|
615865
615868
|
soul ? `Project SOUL.md (${basename25(soul.path)}), compacted for realtime:
|
|
615866
615869
|
${blockText2(soul.content, soulLimit)}` : [
|
|
@@ -615873,6 +615876,7 @@ ${blockText2(soul.content, soulLimit)}` : [
|
|
|
615873
615876
|
${blockText2(voice.content, voiceLimit)}` : [
|
|
615874
615877
|
"Default realtime voice:",
|
|
615875
615878
|
"- conversational, brief, and proportional",
|
|
615879
|
+
"- phone-call natural: contractions, plain words, no written-document structure",
|
|
615876
615880
|
"- contractions are fine when natural",
|
|
615877
615881
|
"- no list formatting unless the user asks for a list"
|
|
615878
615882
|
].join("\n")
|
|
@@ -615897,6 +615901,12 @@ function realtimeOptionsFromBody(body, repoRoot, sessionId) {
|
|
|
615897
615901
|
DEFAULT_REALTIME_MAX_TOKENS,
|
|
615898
615902
|
32,
|
|
615899
615903
|
1024
|
|
615904
|
+
),
|
|
615905
|
+
maxReplyWords: clampInt2(
|
|
615906
|
+
obj["max_reply_words"] ?? body["realtime_max_reply_words"],
|
|
615907
|
+
DEFAULT_REALTIME_MAX_REPLY_WORDS,
|
|
615908
|
+
8,
|
|
615909
|
+
80
|
|
615900
615910
|
)
|
|
615901
615911
|
};
|
|
615902
615912
|
}
|
|
@@ -615931,14 +615941,47 @@ function applyRealtimeToRequestBody(body, opts) {
|
|
|
615931
615941
|
delete out["realtime_options"];
|
|
615932
615942
|
delete out["realtime_max_history_messages"];
|
|
615933
615943
|
delete out["realtime_max_tokens"];
|
|
615944
|
+
delete out["realtime_max_reply_words"];
|
|
615934
615945
|
return out;
|
|
615935
615946
|
}
|
|
615936
|
-
|
|
615947
|
+
function stripHiddenThinking(text) {
|
|
615948
|
+
return text.replace(/<think>[\s\S]*?<\/think>/gi, "").replace(/<think>[\s\S]*$/gi, "").trim();
|
|
615949
|
+
}
|
|
615950
|
+
function wordParts(text) {
|
|
615951
|
+
return text.trim().split(/\s+/).filter(Boolean);
|
|
615952
|
+
}
|
|
615953
|
+
function finalizeRealtimeReply(text, opts = {}) {
|
|
615954
|
+
const maxWords = clampInt2(opts.maxReplyWords, DEFAULT_REALTIME_MAX_REPLY_WORDS, 8, 80);
|
|
615955
|
+
let clean5 = stripHiddenThinking(String(text ?? "")).replace(/```[\s\S]*?```/g, "").split("\n").map((line) => line.replace(/^\s*(?:[-*]+|\d+[.)])\s+/, "").trim()).filter(Boolean).join(" ").replace(/^(?:assistant|omnius|agent)\s*:\s*/i, "").replace(/\s+/g, " ").trim();
|
|
615956
|
+
if (!clean5) return "I didn't catch that. Can you say it again?";
|
|
615957
|
+
const sentences = clean5.match(/[^.!?]+[.!?]+(?=\s|$)|[^.!?]+$/g) ?? [clean5];
|
|
615958
|
+
const selected = [];
|
|
615959
|
+
let words = 0;
|
|
615960
|
+
for (const raw of sentences) {
|
|
615961
|
+
const sentence = raw.trim();
|
|
615962
|
+
if (!sentence) continue;
|
|
615963
|
+
const count = wordParts(sentence).length;
|
|
615964
|
+
if (selected.length >= 2) break;
|
|
615965
|
+
if (selected.length > 0 && words + count > maxWords) break;
|
|
615966
|
+
selected.push(sentence);
|
|
615967
|
+
words += count;
|
|
615968
|
+
if (words >= maxWords) break;
|
|
615969
|
+
}
|
|
615970
|
+
clean5 = (selected.join(" ") || clean5).trim();
|
|
615971
|
+
const parts = wordParts(clean5);
|
|
615972
|
+
if (parts.length > maxWords) {
|
|
615973
|
+
clean5 = parts.slice(0, maxWords).join(" ");
|
|
615974
|
+
}
|
|
615975
|
+
if (clean5 && !/[.!?]$/.test(clean5)) clean5 += ".";
|
|
615976
|
+
return clean5;
|
|
615977
|
+
}
|
|
615978
|
+
var DEFAULT_REALTIME_HISTORY_MESSAGES, DEFAULT_REALTIME_MAX_TOKENS, DEFAULT_REALTIME_MAX_REPLY_WORDS, DEFAULT_SOUL_CHARS, DEFAULT_VOICE_CHARS;
|
|
615937
615979
|
var init_realtime = __esm({
|
|
615938
615980
|
"packages/cli/src/realtime.ts"() {
|
|
615939
615981
|
"use strict";
|
|
615940
|
-
DEFAULT_REALTIME_HISTORY_MESSAGES =
|
|
615941
|
-
DEFAULT_REALTIME_MAX_TOKENS =
|
|
615982
|
+
DEFAULT_REALTIME_HISTORY_MESSAGES = 8;
|
|
615983
|
+
DEFAULT_REALTIME_MAX_TOKENS = 120;
|
|
615984
|
+
DEFAULT_REALTIME_MAX_REPLY_WORDS = 36;
|
|
615942
615985
|
DEFAULT_SOUL_CHARS = 1400;
|
|
615943
615986
|
DEFAULT_VOICE_CHARS = 700;
|
|
615944
615987
|
}
|
|
@@ -657229,6 +657272,121 @@ function ollamaFormatFromOpenAIResponseFormat(value2) {
|
|
|
657229
657272
|
if (record["type"] === "object" || record["properties"] !== void 0) return record;
|
|
657230
657273
|
return void 0;
|
|
657231
657274
|
}
|
|
657275
|
+
function bodyString(body, keys) {
|
|
657276
|
+
for (const key of keys) {
|
|
657277
|
+
const value2 = body[key];
|
|
657278
|
+
if (typeof value2 === "string" && value2.trim()) return value2.trim();
|
|
657279
|
+
}
|
|
657280
|
+
return "";
|
|
657281
|
+
}
|
|
657282
|
+
function realtimeEndpointMessages(body) {
|
|
657283
|
+
const messages2 = [];
|
|
657284
|
+
const suppliedSoul = bodyString(body, ["soul_md", "soul", "soulMd"]);
|
|
657285
|
+
const suppliedContext = bodyString(body, ["context", "call_context", "adapter_context"]);
|
|
657286
|
+
if (suppliedSoul) messages2.push({ role: "system", content: `SOUL.md supplied by the voice adapter:
|
|
657287
|
+
${suppliedSoul}` });
|
|
657288
|
+
if (suppliedContext) messages2.push({ role: "system", content: `Live call context supplied by the adapter:
|
|
657289
|
+
${suppliedContext}` });
|
|
657290
|
+
if (Array.isArray(body["messages"])) {
|
|
657291
|
+
for (const msg of body["messages"]) {
|
|
657292
|
+
if (!msg || typeof msg !== "object") continue;
|
|
657293
|
+
const record = msg;
|
|
657294
|
+
const role = typeof record["role"] === "string" ? record["role"] : "user";
|
|
657295
|
+
const content = typeof record["content"] === "string" ? record["content"].trim() : "";
|
|
657296
|
+
if (content && (role === "system" || role === "user" || role === "assistant")) messages2.push({ role, content });
|
|
657297
|
+
}
|
|
657298
|
+
}
|
|
657299
|
+
const latestTurn = bodyString(body, ["message", "text", "input", "callerText", "caller_text", "recent_turn", "asr_text"]);
|
|
657300
|
+
if (latestTurn) {
|
|
657301
|
+
const last2 = [...messages2].reverse().find((msg) => msg.role === "user");
|
|
657302
|
+
if (!last2 || last2.content !== latestTurn) messages2.push({ role: "user", content: latestTurn });
|
|
657303
|
+
}
|
|
657304
|
+
return messages2;
|
|
657305
|
+
}
|
|
657306
|
+
async function completeRealtimeTextOnly(opts) {
|
|
657307
|
+
const cfg = loadConfig();
|
|
657308
|
+
const model = bodyString(opts.body, ["model"]) || cfg.model;
|
|
657309
|
+
const route = resolveModelEndpoint(model);
|
|
657310
|
+
const limitErr = route?.endpoint ? checkEndpointRateLimit(route.endpoint) : null;
|
|
657311
|
+
if (limitErr) throw new Error(limitErr);
|
|
657312
|
+
const targetUrl = route?.endpoint.url ?? opts.ollamaUrl;
|
|
657313
|
+
const targetType = route?.endpoint.type ?? cfg.backendType ?? "ollama";
|
|
657314
|
+
const originalModel = route?.originalId ?? model.replace(/^[a-z]+\//, "");
|
|
657315
|
+
const realtimeOpts = {
|
|
657316
|
+
...realtimeOptionsFromBody(opts.body, process.cwd(), opts.sessionId),
|
|
657317
|
+
surface: "voice_adapter"
|
|
657318
|
+
};
|
|
657319
|
+
const requestBody = applyRealtimeToRequestBody({
|
|
657320
|
+
...opts.body,
|
|
657321
|
+
model: originalModel,
|
|
657322
|
+
messages: opts.messages,
|
|
657323
|
+
realtime: true,
|
|
657324
|
+
stream: false
|
|
657325
|
+
}, realtimeOpts);
|
|
657326
|
+
const timeoutMs = getBackendTimeoutMs(typeof opts.body["timeout_s"] === "number" ? opts.body["timeout_s"] : void 0);
|
|
657327
|
+
if (targetType === "vllm" || targetType === "openai") {
|
|
657328
|
+
const result2 = await ollamaRequest(targetUrl, "/v1/chat/completions", "POST", JSON.stringify(requestBody), timeoutMs, route?.endpoint);
|
|
657329
|
+
if (result2.status >= 400) throw new Error(`Backend HTTP ${result2.status}: ${result2.body.slice(0, 300)}`);
|
|
657330
|
+
const parsed2 = JSON.parse(result2.body);
|
|
657331
|
+
const rawReply2 = String(parsed2?.choices?.[0]?.message?.content ?? "").trim();
|
|
657332
|
+
return { reply: finalizeRealtimeReply(rawReply2, realtimeOpts), rawReply: rawReply2, model: originalModel, usage: parsed2?.usage };
|
|
657333
|
+
}
|
|
657334
|
+
const maxTokens = typeof requestBody["max_tokens"] === "number" ? requestBody["max_tokens"] : 120;
|
|
657335
|
+
const temperature = typeof requestBody["temperature"] === "number" ? requestBody["temperature"] : 0.6;
|
|
657336
|
+
const result = await ollamaRequest(targetUrl, "/api/chat", "POST", JSON.stringify({
|
|
657337
|
+
model: originalModel,
|
|
657338
|
+
messages: requestBody["messages"],
|
|
657339
|
+
stream: false,
|
|
657340
|
+
think: false,
|
|
657341
|
+
options: { temperature, num_predict: maxTokens }
|
|
657342
|
+
}), timeoutMs, route?.endpoint);
|
|
657343
|
+
if (result.status >= 400) throw new Error(`Backend HTTP ${result.status}: ${result.body.slice(0, 300)}`);
|
|
657344
|
+
const parsed = JSON.parse(result.body);
|
|
657345
|
+
const rawReply = String(parsed?.message?.content ?? "").trim();
|
|
657346
|
+
return {
|
|
657347
|
+
reply: finalizeRealtimeReply(rawReply, realtimeOpts),
|
|
657348
|
+
rawReply,
|
|
657349
|
+
model: originalModel,
|
|
657350
|
+
usage: {
|
|
657351
|
+
prompt_tokens: parsed?.prompt_eval_count ?? 0,
|
|
657352
|
+
completion_tokens: parsed?.eval_count ?? 0,
|
|
657353
|
+
total_tokens: (parsed?.prompt_eval_count ?? 0) + (parsed?.eval_count ?? 0)
|
|
657354
|
+
}
|
|
657355
|
+
};
|
|
657356
|
+
}
|
|
657357
|
+
async function handleRealtimeText(req2, res, ollamaUrl) {
|
|
657358
|
+
const body = await parseJsonBody(req2);
|
|
657359
|
+
if (!body || typeof body !== "object") {
|
|
657360
|
+
jsonResponse(res, 400, { error: "invalid_request", message: "Expected a JSON object." });
|
|
657361
|
+
return;
|
|
657362
|
+
}
|
|
657363
|
+
const messages2 = realtimeEndpointMessages(body);
|
|
657364
|
+
if (!messages2.some((msg) => msg.role === "user" && msg.content.trim())) {
|
|
657365
|
+
jsonResponse(res, 400, { error: "missing_turn", message: "Provide message, text, recent_turn, asr_text, callerText, or messages[]." });
|
|
657366
|
+
return;
|
|
657367
|
+
}
|
|
657368
|
+
try {
|
|
657369
|
+
const sessionId = typeof body["session_id"] === "string" ? body["session_id"] : void 0;
|
|
657370
|
+
const result = await completeRealtimeTextOnly({ body, messages: messages2, ollamaUrl, sessionId });
|
|
657371
|
+
const wantsPlain = String(req2.headers["accept"] ?? "").includes("text/plain") || body["format"] === "text";
|
|
657372
|
+
if (wantsPlain) {
|
|
657373
|
+
res.writeHead(200, { "Content-Type": "text/plain; charset=utf-8", "Cache-Control": "no-store" });
|
|
657374
|
+
res.end(result.reply + "\n");
|
|
657375
|
+
return;
|
|
657376
|
+
}
|
|
657377
|
+
jsonResponse(res, 200, {
|
|
657378
|
+
reply: result.reply,
|
|
657379
|
+
text: result.reply,
|
|
657380
|
+
raw_reply: result.rawReply,
|
|
657381
|
+
model: result.model,
|
|
657382
|
+
usage: result.usage,
|
|
657383
|
+
realtime: true,
|
|
657384
|
+
mode: "voice_adapter_text_only"
|
|
657385
|
+
});
|
|
657386
|
+
} catch (err) {
|
|
657387
|
+
jsonResponse(res, 502, { error: "realtime_failed", message: err instanceof Error ? err.message : String(err) });
|
|
657388
|
+
}
|
|
657389
|
+
}
|
|
657232
657390
|
function backendAuthHeaders(endpoint) {
|
|
657233
657391
|
const key = endpoint?.authKey ?? loadConfig().apiKey;
|
|
657234
657392
|
if (key) return { Authorization: `Bearer ${key}` };
|
|
@@ -660443,6 +660601,14 @@ async function handleRequest(req2, res, ollamaUrl, verbose) {
|
|
|
660443
660601
|
return;
|
|
660444
660602
|
}
|
|
660445
660603
|
}
|
|
660604
|
+
if ((pathname === "/realtime" || pathname === "/v1/realtime") && method === "POST") {
|
|
660605
|
+
if (!checkAuth(req2, res, "read")) {
|
|
660606
|
+
status = 401;
|
|
660607
|
+
return;
|
|
660608
|
+
}
|
|
660609
|
+
await handleRealtimeText(req2, res, ollamaUrl);
|
|
660610
|
+
return;
|
|
660611
|
+
}
|
|
660446
660612
|
if (pathname === "/v1/files" && method === "GET") {
|
|
660447
660613
|
const dir = urlObj.searchParams.get("path") || process.cwd();
|
|
660448
660614
|
try {
|
package/docs/guides/realtime.md
CHANGED
|
@@ -4,6 +4,8 @@ Realtime mode is for short, natural, back-and-forth spoken conversation behind A
|
|
|
4
4
|
|
|
5
5
|
It is not a long-form coding-task mode. It trims context, reduces scaffolding, and optimizes for speakable answers.
|
|
6
6
|
|
|
7
|
+
The text-only adapter endpoint is `/realtime` (alias: `/v1/realtime`). ASR and TTS are intentionally out of scope for that route; pass the latest transcript text in, receive a short reply text out.
|
|
8
|
+
|
|
7
9
|
## Enable In The TUI
|
|
8
10
|
|
|
9
11
|
```text
|
|
@@ -14,6 +16,25 @@ It is not a long-form coding-task mode. It trims context, reduces scaffolding, a
|
|
|
14
16
|
|
|
15
17
|
## Use Through REST
|
|
16
18
|
|
|
19
|
+
Voice-adapter text endpoint:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
curl -s http://127.0.0.1:11435/realtime \
|
|
23
|
+
-H 'content-type: application/json' \
|
|
24
|
+
-H 'accept: text/plain' \
|
|
25
|
+
-d '{
|
|
26
|
+
"soul_md": "Be direct, warm, and practical.",
|
|
27
|
+
"recent_turn": "Can you say the short version?",
|
|
28
|
+
"realtime_options": {
|
|
29
|
+
"max_reply_words": 32,
|
|
30
|
+
"max_tokens": 120
|
|
31
|
+
},
|
|
32
|
+
"format": "text"
|
|
33
|
+
}'
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Chat-compatible endpoint:
|
|
37
|
+
|
|
17
38
|
```bash
|
|
18
39
|
curl -s http://127.0.0.1:11435/v1/chat \
|
|
19
40
|
-H 'content-type: application/json' \
|
|
@@ -57,7 +78,8 @@ Realtime mode builds a compact prompt from:
|
|
|
57
78
|
|
|
58
79
|
Realtime responses should:
|
|
59
80
|
|
|
60
|
-
- default to one
|
|
81
|
+
- default to one natural phone-call turn, usually under 36 words
|
|
82
|
+
- lead with the answer, not analysis or status
|
|
61
83
|
- ask one focused repair question when ASR text is ambiguous
|
|
62
84
|
- treat the latest user utterance as the live turn
|
|
63
85
|
- avoid long markdown, tables, verbose plans, or implementation narration unless requested
|
|
@@ -6,6 +6,8 @@
|
|
|
6
6
|
| --- | --- | --- |
|
|
7
7
|
| `GET` | `/v1/models` | List aggregated models |
|
|
8
8
|
| `POST` | `/v1/chat/completions` | OpenAI-compatible chat completions |
|
|
9
|
+
| `POST` | `/realtime` | Text-only voice-adapter brain: transcript text in, short reply text out |
|
|
10
|
+
| `POST` | `/v1/realtime` | Auth-scoped alias for `/realtime` |
|
|
9
11
|
| `POST` | `/v1/embeddings` | Generate embeddings |
|
|
10
12
|
| `POST` | `/v1/chat` | Stateful Omnius chat with optional full agent tools |
|
|
11
13
|
| `POST` | `/v1/generate` | Ollama-compatible one-shot generation |
|
|
@@ -96,6 +98,8 @@ When `realtime: true`, Omnius:
|
|
|
96
98
|
|
|
97
99
|
Use this for live voice clients, not long coding tasks.
|
|
98
100
|
|
|
101
|
+
For ASR/TTS systems that only need the text brain, use `/realtime` or `/v1/realtime` with `message`, `text`, `recent_turn`, `asr_text`, or `callerText`. Optional `soul_md` supplies adapter-local SOUL.md content. Set `Accept: text/plain` or `format: "text"` to receive only the reply string.
|
|
102
|
+
|
|
99
103
|
## Server-Side Agent Loop
|
|
100
104
|
|
|
101
105
|
`/v1/chat/completions` can run an internal tool loop when `agent_loop: true`. This lets clients collapse multiple model/tool round trips into one daemon request. Daemon tool calls execute inline; client-owned tool calls can still be yielded in OpenAI-compatible shape.
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.169",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.169",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED