omnius 1.0.158 → 1.0.160

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/.aiwg/addons/omnius-docs/README.md +5 -0
  2. package/.aiwg/addons/omnius-docs/manifest.json +32 -0
  3. package/.aiwg/addons/omnius-docs/skills/omnius-docs/SKILL.md +48 -0
  4. package/.aiwg/addons/omnius-docs/skills/omnius-ops-docs/SKILL.md +32 -0
  5. package/.aiwg/addons/omnius-docs/skills/omnius-realtime-docs/SKILL.md +30 -0
  6. package/.aiwg/addons/omnius-docs/skills/omnius-sponsor-docs/SKILL.md +31 -0
  7. package/.aiwg/addons/omnius-docs/skills/omnius-telegram-docs/SKILL.md +30 -0
  8. package/.aiwg/addons/omnius-rest-docs/README.md +7 -0
  9. package/.aiwg/addons/omnius-rest-docs/manifest.json +24 -0
  10. package/.aiwg/addons/omnius-rest-docs/skills/omnius-rest-docs/SKILL.md +72 -0
  11. package/README.md +115 -5011
  12. package/dist/index.js +2173 -1427
  13. package/docs/.vitepress/config.mts +108 -0
  14. package/docs/agent-memory/INDEX.md +38 -0
  15. package/docs/agent-memory/index.md +14 -0
  16. package/docs/architecture/overview.md +30 -0
  17. package/docs/getting-started/first-run.md +38 -0
  18. package/docs/getting-started/install.md +58 -0
  19. package/docs/getting-started/model-providers.md +48 -0
  20. package/docs/guides/media-generation.md +88 -0
  21. package/docs/guides/realtime.md +138 -0
  22. package/docs/guides/sponsor-and-cohere.md +123 -0
  23. package/docs/guides/telegram.md +95 -0
  24. package/docs/guides/tui-workflows.md +48 -0
  25. package/docs/index.md +30 -0
  26. package/docs/operations/runtime-hygiene.md +75 -0
  27. package/docs/operations/security-and-remote-access.md +70 -0
  28. package/docs/reference/configuration.md +45 -0
  29. package/docs/reference/rest-api.md +225 -0
  30. package/docs/reference/slash-commands.md +2095 -0
  31. package/docs/rest/INDEX.md +129 -0
  32. package/docs/rest/QUICKREF.md +125 -0
  33. package/docs/rest/REST-DOCS-MANIFEST.json +27 -0
  34. package/docs/rest/auth-and-scopes.md +101 -0
  35. package/docs/rest/endpoints/aims.md +26 -0
  36. package/docs/rest/endpoints/aiwg.md +44 -0
  37. package/docs/rest/endpoints/chat.md +101 -0
  38. package/docs/rest/endpoints/config.md +53 -0
  39. package/docs/rest/endpoints/events.md +63 -0
  40. package/docs/rest/endpoints/files.md +18 -0
  41. package/docs/rest/endpoints/memory.md +42 -0
  42. package/docs/rest/endpoints/run.md +52 -0
  43. package/docs/rest/endpoints/skills.md +41 -0
  44. package/docs/rest/endpoints/tools.md +62 -0
  45. package/docs/rest/endpoints/voice-vision.md +80 -0
  46. package/docs/rest/errors-pagination-etags.md +84 -0
  47. package/docs/rest/examples/curl.md +84 -0
  48. package/docs/rest/examples/openai-sdk.md +59 -0
  49. package/docs/rest/openapi-source.md +36 -0
  50. package/npm-shrinkwrap.json +2 -2
  51. package/package.json +5 -2
@@ -0,0 +1,108 @@
1
+ import { defineConfig } from "vitepress";
2
+
3
+ export default defineConfig({
4
+ title: "Omnius",
5
+ description: "Local-first agent runtime, REST daemon, sponsor mesh, and AIWG-compatible docs.",
6
+ cleanUrls: true,
7
+ lastUpdated: true,
8
+ srcExclude: [
9
+ "HANDOFF-crl-encoder-decoder-fix.md",
10
+ "concept-relational-language.md",
11
+ "context-management-medium-models-proposal.md",
12
+ "dedup-false-positive-meta-analysis.md",
13
+ "duplicate-calls-root-cause-analysis.md",
14
+ "duplicate-calls-root-cause-deep.md",
15
+ "ephemeral-skill-pack-small-context.md",
16
+ "flowstate.md",
17
+ "memory-integration-analysis.md",
18
+ "model-capability-awareness-and-multimodal-memory-root-fix.md",
19
+ "multimodal-identity-memory-implementation.md",
20
+ "sana-and-video-generation-integration-plan.md",
21
+ "session-diary-llm-training-analysis.md",
22
+ "telegram-dmn-curiosity-outreach-scaffold.md",
23
+ "telegram-mid-horizon-download-loop-handoff.md",
24
+ "telegram-reflection-corpus-integration-plan.md",
25
+ "telegram-unified-tooling-architecture.md",
26
+ "threat-model.md",
27
+ "voice-flow-architecture.md",
28
+ "x402-remote-inference-plan.md",
29
+ "agent-memory/INDEX.md",
30
+ "research/**",
31
+ "work-orders/**",
32
+ ],
33
+ themeConfig: {
34
+ nav: [
35
+ { text: "Guide", link: "/getting-started/install" },
36
+ { text: "REST", link: "/reference/rest-api" },
37
+ { text: "Commands", link: "/reference/slash-commands" },
38
+ { text: "Operations", link: "/operations/security-and-remote-access" },
39
+ ],
40
+ sidebar: [
41
+ {
42
+ text: "Getting Started",
43
+ items: [
44
+ { text: "Install", link: "/getting-started/install" },
45
+ { text: "First Run", link: "/getting-started/first-run" },
46
+ { text: "Model Providers", link: "/getting-started/model-providers" },
47
+ ],
48
+ },
49
+ {
50
+ text: "Guides",
51
+ items: [
52
+ { text: "TUI Workflows", link: "/guides/tui-workflows" },
53
+ { text: "Sponsor And COHERE", link: "/guides/sponsor-and-cohere" },
54
+ { text: "Realtime Conversation", link: "/guides/realtime" },
55
+ { text: "Telegram", link: "/guides/telegram" },
56
+ { text: "Media Generation", link: "/guides/media-generation" },
57
+ ],
58
+ },
59
+ {
60
+ text: "REST API",
61
+ items: [
62
+ { text: "Overview", link: "/rest/INDEX" },
63
+ { text: "Quick Reference", link: "/rest/QUICKREF" },
64
+ { text: "Auth And Scopes", link: "/rest/auth-and-scopes" },
65
+ { text: "Errors, Pagination, ETags", link: "/rest/errors-pagination-etags" },
66
+ { text: "Chat", link: "/rest/endpoints/chat" },
67
+ { text: "Runs", link: "/rest/endpoints/run" },
68
+ { text: "Config", link: "/rest/endpoints/config" },
69
+ { text: "Memory", link: "/rest/endpoints/memory" },
70
+ { text: "Skills", link: "/rest/endpoints/skills" },
71
+ { text: "Tools", link: "/rest/endpoints/tools" },
72
+ { text: "Events", link: "/rest/endpoints/events" },
73
+ { text: "Files", link: "/rest/endpoints/files" },
74
+ { text: "Voice And Vision", link: "/rest/endpoints/voice-vision" },
75
+ { text: "AIWG", link: "/rest/endpoints/aiwg" },
76
+ { text: "AIMS", link: "/rest/endpoints/aims" },
77
+ { text: "Curl Examples", link: "/rest/examples/curl" },
78
+ { text: "OpenAI SDK Examples", link: "/rest/examples/openai-sdk" },
79
+ ],
80
+ },
81
+ {
82
+ text: "Reference",
83
+ items: [
84
+ { text: "REST Inventory", link: "/reference/rest-api" },
85
+ { text: "Slash Commands", link: "/reference/slash-commands" },
86
+ { text: "Configuration", link: "/reference/configuration" },
87
+ { text: "Agent Memory", link: "/agent-memory/" },
88
+ ],
89
+ },
90
+ {
91
+ text: "Operations",
92
+ items: [
93
+ { text: "Security And Remote Access", link: "/operations/security-and-remote-access" },
94
+ { text: "Runtime Hygiene", link: "/operations/runtime-hygiene" },
95
+ ],
96
+ },
97
+ {
98
+ text: "Architecture",
99
+ items: [
100
+ { text: "Overview", link: "/architecture/overview" },
101
+ ],
102
+ },
103
+ ],
104
+ search: {
105
+ provider: "local",
106
+ },
107
+ },
108
+ });
@@ -0,0 +1,38 @@
1
+ # Agent-Explorable Documentation
2
+
3
+ Omnius documentation is exposed to agents through project-local AIWG-style bundles under `.aiwg/addons/`.
4
+
5
+ ## Bundles
6
+
7
+ | Bundle | Purpose |
8
+ | --- | --- |
9
+ | `omnius-docs` | General Omnius docs entrypoint and feature guides |
10
+ | `omnius-rest-docs` | REST API docs entrypoint and endpoint-family map |
11
+
12
+ ## Agent Use Pattern
13
+
14
+ 1. Use `skill_list` or `/skills` with a focused filter.
15
+ 2. Load the matching docs skill.
16
+ 3. Open the index named by that skill.
17
+ 4. Read only the relevant guide or reference page.
18
+ 5. Treat source files and live OpenAPI as canonical when docs conflict with code.
19
+
20
+ ## Skills
21
+
22
+ ```text
23
+ omnius-docs
24
+ omnius-realtime-docs
25
+ omnius-sponsor-docs
26
+ omnius-telegram-docs
27
+ omnius-ops-docs
28
+ omnius-rest-docs
29
+ ```
30
+
31
+ ## Maintenance Rule
32
+
33
+ When adding a major feature, add:
34
+
35
+ - a human guide or reference page
36
+ - an entry in README if it affects first-run understanding
37
+ - a docs skill or trigger if agents should discover it
38
+ - validation coverage if the feature has a machine-readable surface
@@ -0,0 +1,14 @@
1
+ # Agent Memory Index
2
+
3
+ Use the Omnius docs skills when an agent needs to explore the documentation corpus instead of loading the whole docs tree.
4
+
5
+ | Skill | Use |
6
+ | --- | --- |
7
+ | `omnius-docs` | Product overview, setup, guides, operations, and architecture |
8
+ | `omnius-rest-docs` | REST endpoint families, auth, examples, OpenAPI drift checks |
9
+ | `omnius-sponsor-docs` | Sponsor, COHERE, peer sharing, usage, and media sharing |
10
+ | `omnius-telegram-docs` | Telegram bridge setup, scope, preferences, and failure feedback |
11
+ | `omnius-realtime-docs` | Short spoken dialogue mode and ASR/TTS client loops |
12
+ | `omnius-ops-docs` | Security, runtime hygiene, update/install triage |
13
+
14
+ The source index for installed agent discovery remains `docs/agent-memory/INDEX.md`.
@@ -0,0 +1,30 @@
1
+ # Architecture Overview
2
+
3
+ Omnius combines a terminal-first agent loop, REST daemon, model routing layer, tool runtime, persistent context, and peer mesh.
4
+
5
+ ## Main Surfaces
6
+
7
+ - TUI: interactive control plane and task interface.
8
+ - CLI: one-shot tasks and operational commands.
9
+ - REST daemon: HTTP/WebSocket API for automation, GUI, CI, voice, and remote clients.
10
+ - Tool runtime: file, shell, browser, memory, media, MCP, Telegram, and P2P tools.
11
+ - Model routing: local Ollama, managed Ollama pool, vLLM, OpenAI-compatible endpoints, sponsors, and COHERE peers.
12
+ - Memory and context: session context, scoped Telegram persona state, failure records, episodes, and skill indexes.
13
+
14
+ ## Agent Loop
15
+
16
+ The agent loop assembles task context, chooses tools, executes them through policy gates, observes raw output, and iterates until task completion or interruption.
17
+
18
+ Steering input should enter through an intake layer that interprets the new requirement relative to the active trajectory before interleaving it with the next model turn.
19
+
20
+ ## Skills And Docs
21
+
22
+ Omnius discovers skills from AIWG roots, project `.aiwg/skills`, project-local AIWG bundles, and `.omnius/skills`. Docs that should be available to agents should be exposed as small skills that point to structured docs rather than dumping whole manuals into context.
23
+
24
+ ## P2P Mesh
25
+
26
+ Sponsor and COHERE capabilities use the Nexus/libp2p layer to discover peers, route capacity, exchange usage signals, and expose selected models or media modalities.
27
+
28
+ ## REST Contract
29
+
30
+ The canonical API contract is generated from `packages/cli/src/api/openapi.ts`. Human docs summarize and explain the contract, but automation should validate against the source.
@@ -0,0 +1,38 @@
1
+ # First Run And Setup
2
+
3
+ The setup flow is responsible for choosing a usable model path without assuming local Ollama is the only option.
4
+
5
+ ## What Setup Probes
6
+
7
+ Setup can inspect:
8
+
9
+ - local platform and hardware
10
+ - existing Ollama availability
11
+ - configured custom endpoints
12
+ - OpenAI-compatible endpoint model lists
13
+ - optional voice, media, and tool dependencies
14
+ - unified-memory hardware constraints
15
+
16
+ ## Model And Endpoint Choice
17
+
18
+ The model picker should show the union of enabled endpoints. If a sponsor endpoint, OpenRouter endpoint, or other external endpoint is selected, the next model step must list models from that endpoint as well as other toggled enabled endpoints.
19
+
20
+ The setup flow should not silently fall back to local Ollama when an external endpoint was selected.
21
+
22
+ ## Sponsor Endpoint Banner
23
+
24
+ The setup banner is separate from consumer-visible sponsor endpoint labeling. Sponsor consumer headers should stay simple: a short sponsor-provided string and optional clickable link.
25
+
26
+ ## Optional Dependency Setup
27
+
28
+ Optional installers can run before the TUI launches. If elevation is required in a terminal context, the TUI should temporarily clear or suspend its drawing state, present the password prompt directly, consume the password through the normal system elevation path, then restore the TUI state.
29
+
30
+ ## Expected Outcome
31
+
32
+ After first run:
33
+
34
+ - an endpoint is selected
35
+ - a model is selected
36
+ - `.omnius/settings.json` exists if settings were persisted
37
+ - `.omnius/` is ignored by git
38
+ - the TUI can submit a prompt without re-entering setup
@@ -0,0 +1,58 @@
1
+ # Install Omnius
2
+
3
+ This guide gets a machine ready to run Omnius from npm or from the workspace.
4
+
5
+ ## Requirements
6
+
7
+ - Node.js 22 or newer
8
+ - npm 10 or newer for published CLI use
9
+ - pnpm 9 or newer for workspace development
10
+ - A local model endpoint, remote OpenAI-compatible endpoint, or sponsor/peer endpoint
11
+
12
+ ## Install From npm
13
+
14
+ ```bash
15
+ npm install -g omnius
16
+ omnius
17
+ ```
18
+
19
+ The first launch opens the setup flow, probes local capabilities, and asks which endpoint and model path to use.
20
+
21
+ ## Start The REST Daemon
22
+
23
+ ```bash
24
+ omnius serve
25
+ ```
26
+
27
+ Default daemon URL:
28
+
29
+ ```text
30
+ http://127.0.0.1:11435
31
+ ```
32
+
33
+ Open API docs:
34
+
35
+ ```text
36
+ http://127.0.0.1:11435/docs
37
+ ```
38
+
39
+ ## Workspace Development
40
+
41
+ ```bash
42
+ pnpm install
43
+ pnpm -r build
44
+ pnpm docs:check
45
+ ```
46
+
47
+ Useful focused checks:
48
+
49
+ ```bash
50
+ pnpm --filter omnius exec vitest run tests/realtime-mode.test.ts tests/command-registry.test.ts
51
+ pnpm --filter @omnius/execution exec vitest run tests/skill-discovery.test.ts
52
+ ```
53
+
54
+ ## Runtime State
55
+
56
+ Omnius stores project runtime state under `.omnius/`. This directory includes settings, jobs, context, sponsor state, scoped Telegram persona state, and generated connector artifacts.
57
+
58
+ Do not commit `.omnius/`. Omnius adds it to `.gitignore` automatically for repositories that have or later create a `.gitignore`.
@@ -0,0 +1,48 @@
1
+ # Model Providers And Endpoints
2
+
3
+ Omnius routes model requests through a provider abstraction instead of treating local Ollama as the only source of models.
4
+
5
+ ## Provider Types
6
+
7
+ Supported endpoint patterns include:
8
+
9
+ - local Ollama
10
+ - Omnius-managed Ollama pool
11
+ - vLLM
12
+ - OpenAI-compatible HTTP servers
13
+ - OpenRouter
14
+ - Groq
15
+ - Chutes
16
+ - Together, Fireworks, DeepInfra, Mistral, Cerebras, and similar providers
17
+ - sponsor endpoints discovered through Nexus
18
+ - COHERE distributed inference peers
19
+
20
+ ## Endpoint Selection
21
+
22
+ Use:
23
+
24
+ ```text
25
+ /endpoint
26
+ /endpoint sponsor
27
+ /model
28
+ ```
29
+
30
+ Endpoint history records recently used URLs and auth metadata so users can return to external routers without retyping them.
31
+
32
+ ## Model Discovery Rule
33
+
34
+ When multiple endpoints are enabled, model lists should be consolidated from all toggled enabled endpoints. This applies to:
35
+
36
+ - setup wizard model selection
37
+ - `/model`
38
+ - sponsor wizard model exposure
39
+ - REST `GET /v1/models`
40
+ - consumer sponsor endpoint selection
41
+
42
+ ## Passthrough Rule
43
+
44
+ Sponsor and COHERE passthrough should preserve the upstream provider's model identity while hiding raw provider secrets and URLs from consumers.
45
+
46
+ ## Thinking Mode
47
+
48
+ Omnius defaults to direct-answer mode (`think: false`) for backend requests. Tool-calling turns force `think: false`; `OMNIUS_FORCE_NO_THINK=1` disables thinking globally. `/think` controls session defaults where supported.
@@ -0,0 +1,88 @@
1
+ # Media Generation
2
+
3
+ Omnius exposes media generation through TUI commands, tools, Telegram creative workflows, and sponsor media endpoints.
4
+
5
+ ## Commands And Tools
6
+
7
+ ```text
8
+ /image
9
+ /video
10
+ /sound
11
+ /music
12
+ /voice
13
+ /listen
14
+ /call
15
+ ```
16
+
17
+ Tool names include:
18
+
19
+ - `generate_image`
20
+ - `generate_video`
21
+ - `generate_audio`
22
+ - `generate_tts`
23
+
24
+ ## Video
25
+
26
+ Video generation supports setup/list/prewarm/delete flows, thumbnails, sidecars, audio muxing, broker preflight, and model presets such as SANA/Wan paths where installed.
27
+
28
+ ## Backend Matrix
29
+
30
+ | Modality | Typical Backends | Hardware Notes | Output |
31
+ | --- | --- | --- | --- |
32
+ | Image | diffusers, SD.cpp, ComfyUI, Ollama-compatible image routes where available | CPU works slowly; CUDA/ROCm/Metal preferred for larger diffusion models | PNG/JPEG plus prompt sidecar |
33
+ | Video | diffusers video pipelines, ComfyUI, SANA/Wan-style presets | high VRAM pressure; preflight broker should reject unsafe loads | MP4/WebM plus thumbnail and sidecar |
34
+ | Sound | AudioCraft, Stable Audio, TangoFlux-style pipelines | GPU preferred for longer clips; duration caps protect providers | WAV/MP3 plus prompt sidecar |
35
+ | Music | transformers, AudioCraft, Stable Audio, diffusion audio | long generations should be queued and capped | WAV/MP3 plus prompt sidecar |
36
+ | Voice | LuxTTS, Kokoro, Supertonic, ASR backends | CPU can handle small TTS/ASR; clone models need setup validation | WAV/PCM/transcript |
37
+
38
+ ## Setup And Preflight
39
+
40
+ Each modality should expose:
41
+
42
+ - `setup` for dependency guidance and install triage
43
+ - `list` for known models and hardware fit
44
+ - `prewarm` where model load time is large
45
+ - broker checks for RAM, VRAM, disk, and existing model pressure
46
+ - a sidecar file containing prompt, model, seed when available, duration/steps, and source chat/session
47
+
48
+ When a backend install needs elevation, the TUI should suspend, expose the terminal password prompt directly, then restore TUI state after the installer exits.
49
+
50
+ ## Audio And Voice
51
+
52
+ Voice surfaces include:
53
+
54
+ - TTS synthesis
55
+ - ASR transcription
56
+ - voice clone references
57
+ - realtime voicechat WebSocket
58
+ - `/v1/audio/speech`
59
+ - `/v1/audio/transcriptions`
60
+
61
+ ## Sponsor Media
62
+
63
+ Sponsors can expose selected media modalities to consumers. Provider-side controls should enforce:
64
+
65
+ - enabled modality list
66
+ - model allowlist
67
+ - max image steps
68
+ - max video/audio duration
69
+ - daily request limits
70
+ - output storage and download paths
71
+
72
+ Consumers receive generated artifacts under `.omnius/remote-media` where remote media download is enabled.
73
+
74
+ ## Sponsor Media Contract
75
+
76
+ Remote media requests should carry modality, model, prompt, safety options, max duration or steps, requested format, and caller peer ID. Provider responses should include artifact metadata and a download handle, not arbitrary provider filesystem paths.
77
+
78
+ | Provider Control | Why It Exists |
79
+ | --- | --- |
80
+ | modality allowlist | prevents accidental exposure of expensive backends |
81
+ | model allowlist | prevents hidden/private models from being advertised |
82
+ | max steps/duration | bounds GPU time |
83
+ | output retention | prevents unbounded disk growth |
84
+ | per-peer daily jobs | prevents one peer from monopolizing media capacity |
85
+
86
+ ## Telegram Media
87
+
88
+ Telegram public creative workflows use chat-scoped directories and return generated artifacts to the originating chat without exposing arbitrary paths.
@@ -0,0 +1,138 @@
1
+ # Realtime Conversations
2
+
3
+ Realtime mode is for short, natural, back-and-forth spoken conversation behind ASR and TTS.
4
+
5
+ It is not a long-form coding-task mode. It trims context, reduces scaffolding, and optimizes for speakable answers.
6
+
7
+ ## Enable In The TUI
8
+
9
+ ```text
10
+ /realtime on
11
+ /realtime off
12
+ /realtime status
13
+ ```
14
+
15
+ ## Use Through REST
16
+
17
+ ```bash
18
+ curl -s http://127.0.0.1:11435/v1/chat \
19
+ -H 'content-type: application/json' \
20
+ -d '{
21
+ "message": "Can you say that again more simply?",
22
+ "model": "qwen3:4b",
23
+ "realtime": true,
24
+ "realtime_options": {
25
+ "max_history_messages": 12,
26
+ "max_tokens": 160
27
+ }
28
+ }'
29
+ ```
30
+
31
+ OpenAI-compatible endpoint:
32
+
33
+ ```bash
34
+ curl -s http://127.0.0.1:11435/v1/chat/completions \
35
+ -H 'content-type: application/json' \
36
+ -d '{
37
+ "model": "qwen3:4b",
38
+ "realtime": true,
39
+ "messages": [
40
+ {"role": "user", "content": "Give me the short version."}
41
+ ]
42
+ }'
43
+ ```
44
+
45
+ ## Context Intake
46
+
47
+ Realtime mode builds a compact prompt from:
48
+
49
+ - `SOUL.md`
50
+ - `.aiwg/SOUL.md`
51
+ - `.aiwg/voices/default.yaml`
52
+ - `.aiwg/voices/omnius.yaml`
53
+ - the first voice profile in `.aiwg/voices/`
54
+ - caller-provided system context, at lower priority than the realtime contract
55
+
56
+ ## Behavior Contract
57
+
58
+ Realtime responses should:
59
+
60
+ - default to one or two speakable sentences
61
+ - ask one focused repair question when ASR text is ambiguous
62
+ - treat the latest user utterance as the live turn
63
+ - avoid long markdown, tables, verbose plans, or implementation narration unless requested
64
+ - avoid hidden reasoning and prompt-policy exposure
65
+
66
+ ## Client Patterns
67
+
68
+ Use `/v1/chat` with `realtime: true` for push-to-talk and transcript-driven clients. Use `/v1/voicechat/ws` for full-duplex voicechat where mic PCM and TTS PCM are exchanged over WebSocket.
69
+
70
+ ## ASR/TTS Loop
71
+
72
+ A push-to-talk client usually follows this loop:
73
+
74
+ ```ts
75
+ const transcript = await asr.captureFinalUtterance();
76
+ const response = await fetch("http://127.0.0.1:11435/v1/chat", {
77
+ method: "POST",
78
+ headers: { "content-type": "application/json" },
79
+ body: JSON.stringify({
80
+ message: transcript.text,
81
+ realtime: true,
82
+ realtime_options: {
83
+ max_history_messages: 10,
84
+ max_tokens: 140,
85
+ voice_profile: "default",
86
+ },
87
+ }),
88
+ }).then((r) => r.json());
89
+
90
+ await tts.speak(response.choices?.[0]?.message?.content ?? response.response ?? "");
91
+ ```
92
+
93
+ A full-duplex client uses `/v1/voicechat/ws`:
94
+
95
+ ```ts
96
+ const ws = new WebSocket("ws://127.0.0.1:11435/v1/voicechat/ws?user=operator");
97
+ ws.binaryType = "arraybuffer";
98
+ ws.onopen = () => ws.send(JSON.stringify({ type: "start" }));
99
+ ws.onmessage = (event) => {
100
+ if (typeof event.data === "string") {
101
+ const frame = JSON.parse(event.data);
102
+ if (frame.type === "agent_text") renderCaption(frame.text);
103
+ if (frame.type === "tts_header") pendingSampleRate = frame.sampleRate;
104
+ return;
105
+ }
106
+ playPcm(new Int16Array(event.data), pendingSampleRate);
107
+ };
108
+ ```
109
+
110
+ ## Session Handling
111
+
112
+ Realtime sessions should keep only the last few spoken turns plus compact speaker/profile context. The realtime flag does not grant higher authority to caller-provided system text; it only selects the short spoken-dialogue contract and context budget.
113
+
114
+ Recommended defaults:
115
+
116
+ | Option | Default | Reason |
117
+ | --- | ---: | --- |
118
+ | `max_history_messages` | 8-12 | enough for short repairs without dragging old turns forward |
119
+ | `max_tokens` | 120-180 | keeps TTS latency and answer length bounded |
120
+ | `tools` | false unless requested | avoids long action narration in voice mode |
121
+ | `stream` | true for captions, false for simple TTS | choose based on client playback strategy |
122
+
123
+ ## Conversation Cues
124
+
125
+ Realtime mode should handle natural dialogue signals directly:
126
+
127
+ - repair phrases like "wait", "say that again", or "shorter" refer to the previous spoken answer
128
+ - short confirmations such as "yes" or "do it" resolve against the latest live question
129
+ - ambiguous ASR should trigger one focused clarification, not a long plan
130
+ - answers should be speakable without Markdown tables unless the user asks for structured output
131
+
132
+ ## Verification
133
+
134
+ Relevant focused tests:
135
+
136
+ ```bash
137
+ pnpm --filter omnius exec vitest run tests/realtime-mode.test.ts tests/command-registry.test.ts
138
+ ```
@@ -0,0 +1,123 @@
1
+ # Sponsor And COHERE Mesh
2
+
3
+ Sponsor and COHERE let Omnius share inference capacity across peers. Sponsor mode is explicit provider/consumer sharing. COHERE is a distributed cognitive inference mesh.
4
+
5
+ ## Sponsor Provider Flow
6
+
7
+ ```text
8
+ /sponsor
9
+ ```
10
+
11
+ The wizard covers:
12
+
13
+ - endpoint selection across all enabled endpoints
14
+ - model selection across local and external endpoints
15
+ - consumer-visible sponsor label and optional link
16
+ - relay/transport settings
17
+ - request, token, and concurrency limits
18
+ - media sponsorship for image, video, sound, and music
19
+
20
+ ## Sponsor Consumer Flow
21
+
22
+ ```text
23
+ /endpoint sponsor
24
+ ```
25
+
26
+ Consumers should see sponsor models as endpoint models. They should not need the sponsor's raw upstream URL or provider secret.
27
+
28
+ ## Endpoint Passthrough
29
+
30
+ Sponsors can expose local models or forward upstream OpenAI-compatible endpoints such as OpenRouter, Groq, Chutes, or vLLM. Model discovery must consolidate models from all toggled enabled endpoints.
31
+
32
+ ## Telemetry
33
+
34
+ Provider dashboards should report:
35
+
36
+ - active and maximum concurrency
37
+ - requests per minute
38
+ - daily tokens
39
+ - per-peer token usage
40
+ - per-model token usage
41
+ - exposed or loaded models
42
+ - media jobs by modality
43
+
44
+ Consumer status rows can alternate local and remote peer metrics so remote Ollama/GPU pressure is visible.
45
+
46
+ ## Media Sponsorship
47
+
48
+ Providers can expose:
49
+
50
+ - image generation
51
+ - video generation
52
+ - sound-effect generation
53
+ - music generation
54
+
55
+ Each modality should enforce provider-side setup, request sanitization, model allowlists, and per-modality limits.
56
+
57
+ ## COHERE
58
+
59
+ ```text
60
+ /cohere
61
+ /cohere status
62
+ /cohere models
63
+ /cohere allow <model>
64
+ /cohere deny <model>
65
+ ```
66
+
67
+ COHERE status should expose daemon health, query counts, error counts, served peers, model exposure, and usage. COHERE endpoint passthrough follows the same rule as sponsor passthrough: do not assume Ollama is the only backend.
68
+
69
+ ## Failure Handling
70
+
71
+ Directory or peer failures should be surfaced as observed failure output and usage-state signals. Avoid masking broken limits, JSON parse failures, or concurrency override conditions behind generic "unreachable" text.
72
+
73
+ ## Provider Setup Checklist
74
+
75
+ Before advertising capacity:
76
+
77
+ - confirm every enabled endpoint has a working model list
78
+ - confirm external endpoints return models during wizard step two, not only local Ollama models
79
+ - set model and modality allowlists before turning public serving on
80
+ - set concurrency, requests-per-minute, daily token, and media job limits
81
+ - run `/sponsor status` and verify daily and RPM bars start at zero
82
+ - submit one local test request through the sponsor route before sharing the peer
83
+
84
+ The consumer should only receive the sponsor label, optional sponsor link, exposed model names, supported modality metadata, and live utilization. Raw upstream URLs, provider API keys, and internal endpoint names stay provider-side.
85
+
86
+ ## Status Bars And Limits
87
+
88
+ Daily and request-rate bars show used quota over the configured limit. A full bar means the provider should reject or queue new work instead of silently accepting more requests.
89
+
90
+ | Metric | Meaning | Enforcement Point |
91
+ | --- | --- | --- |
92
+ | Concurrency | In-flight requests for this sponsor | admission before dispatch |
93
+ | RPM | Rolling requests per minute | admission before dispatch |
94
+ | Daily tokens | Input plus output tokens for the current day | admission and post-response accounting |
95
+ | Peer tokens | Per-consumer token totals | post-response accounting |
96
+ | Model tokens | Per-model totals | post-response accounting |
97
+ | Media jobs | Per-modality job counts and duration/step budgets | modality router |
98
+
99
+ If status shows impossible values such as `8/5 concurrent`, treat that as a limit-enforcement bug, not a display issue. The provider should reject the sixth request before it reaches the backend.
100
+
101
+ ## Directory Failure Cases
102
+
103
+ `Unexpected end of json input` usually means the directory request returned a partial body, empty body, or transport-truncated JSON. Triage it as a transport and serialization problem:
104
+
105
+ - check the nexus directory response body before parsing
106
+ - log the peer ID, endpoint, status code, content length, and first parse failure
107
+ - keep the last good directory snapshot until a complete replacement arrives
108
+ - show stale age in `/sponsor status` so consumers can distinguish stale cache from no peers
109
+ - never replace a valid peer cache with a failed parse result
110
+
111
+ ## COHERE Operations
112
+
113
+ COHERE status should be grouped rather than flattened:
114
+
115
+ | Section | Fields |
116
+ | --- | --- |
117
+ | Daemon | active flag, pid, uptime, last query, connected peers |
118
+ | Results | answered, errors, sent bytes, received bytes, average latency |
119
+ | Models | exposed, hidden, allowlist state, downloaded inventory |
120
+ | Usage | requests per peer, tokens per peer, tokens per model |
121
+ | Failures | recent raw failure events and peer/model involved |
122
+
123
+ Model exposure should be deduplicated before rendering. A status output that repeats `Allowlist` and the same model multiple times indicates the inventory merger is appending display rows instead of merging model identities.