npm - omnius - Versions diffs - 1.0.158 → 1.0.160 - Mend

omnius 1.0.158 → 1.0.160

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

package/.aiwg/addons/omnius-docs/README.md +5 -0
package/.aiwg/addons/omnius-docs/manifest.json +32 -0
package/.aiwg/addons/omnius-docs/skills/omnius-docs/SKILL.md +48 -0
package/.aiwg/addons/omnius-docs/skills/omnius-ops-docs/SKILL.md +32 -0
package/.aiwg/addons/omnius-docs/skills/omnius-realtime-docs/SKILL.md +30 -0
package/.aiwg/addons/omnius-docs/skills/omnius-sponsor-docs/SKILL.md +31 -0
package/.aiwg/addons/omnius-docs/skills/omnius-telegram-docs/SKILL.md +30 -0
package/.aiwg/addons/omnius-rest-docs/README.md +7 -0
package/.aiwg/addons/omnius-rest-docs/manifest.json +24 -0
package/.aiwg/addons/omnius-rest-docs/skills/omnius-rest-docs/SKILL.md +72 -0
package/README.md +115 -5011
package/dist/index.js +2173 -1427
package/docs/.vitepress/config.mts +108 -0
package/docs/agent-memory/INDEX.md +38 -0
package/docs/agent-memory/index.md +14 -0
package/docs/architecture/overview.md +30 -0
package/docs/getting-started/first-run.md +38 -0
package/docs/getting-started/install.md +58 -0
package/docs/getting-started/model-providers.md +48 -0
package/docs/guides/media-generation.md +88 -0
package/docs/guides/realtime.md +138 -0
package/docs/guides/sponsor-and-cohere.md +123 -0
package/docs/guides/telegram.md +95 -0
package/docs/guides/tui-workflows.md +48 -0
package/docs/index.md +30 -0
package/docs/operations/runtime-hygiene.md +75 -0
package/docs/operations/security-and-remote-access.md +70 -0
package/docs/reference/configuration.md +45 -0
package/docs/reference/rest-api.md +225 -0
package/docs/reference/slash-commands.md +2095 -0
package/docs/rest/INDEX.md +129 -0
package/docs/rest/QUICKREF.md +125 -0
package/docs/rest/REST-DOCS-MANIFEST.json +27 -0
package/docs/rest/auth-and-scopes.md +101 -0
package/docs/rest/endpoints/aims.md +26 -0
package/docs/rest/endpoints/aiwg.md +44 -0
package/docs/rest/endpoints/chat.md +101 -0
package/docs/rest/endpoints/config.md +53 -0
package/docs/rest/endpoints/events.md +63 -0
package/docs/rest/endpoints/files.md +18 -0
package/docs/rest/endpoints/memory.md +42 -0
package/docs/rest/endpoints/run.md +52 -0
package/docs/rest/endpoints/skills.md +41 -0
package/docs/rest/endpoints/tools.md +62 -0
package/docs/rest/endpoints/voice-vision.md +80 -0
package/docs/rest/errors-pagination-etags.md +84 -0
package/docs/rest/examples/curl.md +84 -0
package/docs/rest/examples/openai-sdk.md +59 -0
package/docs/rest/openapi-source.md +36 -0
package/npm-shrinkwrap.json +2 -2
package/package.json +5 -2

package/docs/.vitepress/config.mts ADDED Viewed

@@ -0,0 +1,108 @@
+import { defineConfig } from "vitepress";
+export default defineConfig({
+  title: "Omnius",
+  description: "Local-first agent runtime, REST daemon, sponsor mesh, and AIWG-compatible docs.",
+  cleanUrls: true,
+  lastUpdated: true,
+  srcExclude: [
+    "HANDOFF-crl-encoder-decoder-fix.md",
+    "concept-relational-language.md",
+    "context-management-medium-models-proposal.md",
+    "dedup-false-positive-meta-analysis.md",
+    "duplicate-calls-root-cause-analysis.md",
+    "duplicate-calls-root-cause-deep.md",
+    "ephemeral-skill-pack-small-context.md",
+    "flowstate.md",
+    "memory-integration-analysis.md",
+    "model-capability-awareness-and-multimodal-memory-root-fix.md",
+    "multimodal-identity-memory-implementation.md",
+    "sana-and-video-generation-integration-plan.md",
+    "session-diary-llm-training-analysis.md",
+    "telegram-dmn-curiosity-outreach-scaffold.md",
+    "telegram-mid-horizon-download-loop-handoff.md",
+    "telegram-reflection-corpus-integration-plan.md",
+    "telegram-unified-tooling-architecture.md",
+    "threat-model.md",
+    "voice-flow-architecture.md",
+    "x402-remote-inference-plan.md",
+    "agent-memory/INDEX.md",
+    "research/**",
+    "work-orders/**",
+  ],
+  themeConfig: {
+    nav: [
+      { text: "Guide", link: "/getting-started/install" },
+      { text: "REST", link: "/reference/rest-api" },
+      { text: "Commands", link: "/reference/slash-commands" },
+      { text: "Operations", link: "/operations/security-and-remote-access" },
+    ],
+    sidebar: [
+      {
+        text: "Getting Started",
+        items: [
+          { text: "Install", link: "/getting-started/install" },
+          { text: "First Run", link: "/getting-started/first-run" },
+          { text: "Model Providers", link: "/getting-started/model-providers" },
+        ],
+      },
+      {
+        text: "Guides",
+        items: [
+          { text: "TUI Workflows", link: "/guides/tui-workflows" },
+          { text: "Sponsor And COHERE", link: "/guides/sponsor-and-cohere" },
+          { text: "Realtime Conversation", link: "/guides/realtime" },
+          { text: "Telegram", link: "/guides/telegram" },
+          { text: "Media Generation", link: "/guides/media-generation" },
+        ],
+      },
+      {
+        text: "REST API",
+        items: [
+          { text: "Overview", link: "/rest/INDEX" },
+          { text: "Quick Reference", link: "/rest/QUICKREF" },
+          { text: "Auth And Scopes", link: "/rest/auth-and-scopes" },
+          { text: "Errors, Pagination, ETags", link: "/rest/errors-pagination-etags" },
+          { text: "Chat", link: "/rest/endpoints/chat" },
+          { text: "Runs", link: "/rest/endpoints/run" },
+          { text: "Config", link: "/rest/endpoints/config" },
+          { text: "Memory", link: "/rest/endpoints/memory" },
+          { text: "Skills", link: "/rest/endpoints/skills" },
+          { text: "Tools", link: "/rest/endpoints/tools" },
+          { text: "Events", link: "/rest/endpoints/events" },
+          { text: "Files", link: "/rest/endpoints/files" },
+          { text: "Voice And Vision", link: "/rest/endpoints/voice-vision" },
+          { text: "AIWG", link: "/rest/endpoints/aiwg" },
+          { text: "AIMS", link: "/rest/endpoints/aims" },
+          { text: "Curl Examples", link: "/rest/examples/curl" },
+          { text: "OpenAI SDK Examples", link: "/rest/examples/openai-sdk" },
+        ],
+      },
+      {
+        text: "Reference",
+        items: [
+          { text: "REST Inventory", link: "/reference/rest-api" },
+          { text: "Slash Commands", link: "/reference/slash-commands" },
+          { text: "Configuration", link: "/reference/configuration" },
+          { text: "Agent Memory", link: "/agent-memory/" },
+        ],
+      },
+      {
+        text: "Operations",
+        items: [
+          { text: "Security And Remote Access", link: "/operations/security-and-remote-access" },
+          { text: "Runtime Hygiene", link: "/operations/runtime-hygiene" },
+        ],
+      },
+      {
+        text: "Architecture",
+        items: [
+          { text: "Overview", link: "/architecture/overview" },
+        ],
+      },
+    ],
+    search: {
+      provider: "local",
+    },
+  },
+});

package/docs/agent-memory/INDEX.md ADDED Viewed

@@ -0,0 +1,38 @@
+# Agent-Explorable Documentation
+Omnius documentation is exposed to agents through project-local AIWG-style bundles under `.aiwg/addons/`.
+## Bundles
+| Bundle | Purpose |
+| --- | --- |
+| `omnius-docs` | General Omnius docs entrypoint and feature guides |
+| `omnius-rest-docs` | REST API docs entrypoint and endpoint-family map |
+## Agent Use Pattern
+1. Use `skill_list` or `/skills` with a focused filter.
+2. Load the matching docs skill.
+3. Open the index named by that skill.
+4. Read only the relevant guide or reference page.
+5. Treat source files and live OpenAPI as canonical when docs conflict with code.
+## Skills
+```text
+omnius-docs
+omnius-realtime-docs
+omnius-sponsor-docs
+omnius-telegram-docs
+omnius-ops-docs
+omnius-rest-docs
+```
+## Maintenance Rule
+When adding a major feature, add:
+- a human guide or reference page
+- an entry in README if it affects first-run understanding
+- a docs skill or trigger if agents should discover it
+- validation coverage if the feature has a machine-readable surface

package/docs/agent-memory/index.md ADDED Viewed

@@ -0,0 +1,14 @@
+# Agent Memory Index
+Use the Omnius docs skills when an agent needs to explore the documentation corpus instead of loading the whole docs tree.
+| Skill | Use |
+| --- | --- |
+| `omnius-docs` | Product overview, setup, guides, operations, and architecture |
+| `omnius-rest-docs` | REST endpoint families, auth, examples, OpenAPI drift checks |
+| `omnius-sponsor-docs` | Sponsor, COHERE, peer sharing, usage, and media sharing |
+| `omnius-telegram-docs` | Telegram bridge setup, scope, preferences, and failure feedback |
+| `omnius-realtime-docs` | Short spoken dialogue mode and ASR/TTS client loops |
+| `omnius-ops-docs` | Security, runtime hygiene, update/install triage |
+The source index for installed agent discovery remains `docs/agent-memory/INDEX.md`.

package/docs/architecture/overview.md ADDED Viewed

@@ -0,0 +1,30 @@
+# Architecture Overview
+Omnius combines a terminal-first agent loop, REST daemon, model routing layer, tool runtime, persistent context, and peer mesh.
+## Main Surfaces
+- TUI: interactive control plane and task interface.
+- CLI: one-shot tasks and operational commands.
+- REST daemon: HTTP/WebSocket API for automation, GUI, CI, voice, and remote clients.
+- Tool runtime: file, shell, browser, memory, media, MCP, Telegram, and P2P tools.
+- Model routing: local Ollama, managed Ollama pool, vLLM, OpenAI-compatible endpoints, sponsors, and COHERE peers.
+- Memory and context: session context, scoped Telegram persona state, failure records, episodes, and skill indexes.
+## Agent Loop
+The agent loop assembles task context, chooses tools, executes them through policy gates, observes raw output, and iterates until task completion or interruption.
+Steering input should enter through an intake layer that interprets the new requirement relative to the active trajectory before interleaving it with the next model turn.
+## Skills And Docs
+Omnius discovers skills from AIWG roots, project `.aiwg/skills`, project-local AIWG bundles, and `.omnius/skills`. Docs that should be available to agents should be exposed as small skills that point to structured docs rather than dumping whole manuals into context.
+## P2P Mesh
+Sponsor and COHERE capabilities use the Nexus/libp2p layer to discover peers, route capacity, exchange usage signals, and expose selected models or media modalities.
+## REST Contract
+The canonical API contract is generated from `packages/cli/src/api/openapi.ts`. Human docs summarize and explain the contract, but automation should validate against the source.

package/docs/getting-started/first-run.md ADDED Viewed

@@ -0,0 +1,38 @@
+# First Run And Setup
+The setup flow is responsible for choosing a usable model path without assuming local Ollama is the only option.
+## What Setup Probes
+Setup can inspect:
+- local platform and hardware
+- existing Ollama availability
+- configured custom endpoints
+- OpenAI-compatible endpoint model lists
+- optional voice, media, and tool dependencies
+- unified-memory hardware constraints
+## Model And Endpoint Choice
+The model picker should show the union of enabled endpoints. If a sponsor endpoint, OpenRouter endpoint, or other external endpoint is selected, the next model step must list models from that endpoint as well as other toggled enabled endpoints.
+The setup flow should not silently fall back to local Ollama when an external endpoint was selected.
+## Sponsor Endpoint Banner
+The setup banner is separate from consumer-visible sponsor endpoint labeling. Sponsor consumer headers should stay simple: a short sponsor-provided string and optional clickable link.
+## Optional Dependency Setup
+Optional installers can run before the TUI launches. If elevation is required in a terminal context, the TUI should temporarily clear or suspend its drawing state, present the password prompt directly, consume the password through the normal system elevation path, then restore the TUI state.
+## Expected Outcome
+After first run:
+- an endpoint is selected
+- a model is selected
+- `.omnius/settings.json` exists if settings were persisted
+- `.omnius/` is ignored by git
+- the TUI can submit a prompt without re-entering setup

package/docs/getting-started/install.md ADDED Viewed

@@ -0,0 +1,58 @@
+# Install Omnius
+This guide gets a machine ready to run Omnius from npm or from the workspace.
+## Requirements
+- Node.js 22 or newer
+- npm 10 or newer for published CLI use
+- pnpm 9 or newer for workspace development
+- A local model endpoint, remote OpenAI-compatible endpoint, or sponsor/peer endpoint
+## Install From npm
+```bash
+npm install -g omnius
+omnius
+```
+The first launch opens the setup flow, probes local capabilities, and asks which endpoint and model path to use.
+## Start The REST Daemon
+```bash
+omnius serve
+```
+Default daemon URL:
+```text
+http://127.0.0.1:11435
+```
+Open API docs:
+```text
+http://127.0.0.1:11435/docs
+```
+## Workspace Development
+```bash
+pnpm install
+pnpm -r build
+pnpm docs:check
+```
+Useful focused checks:
+```bash
+pnpm --filter omnius exec vitest run tests/realtime-mode.test.ts tests/command-registry.test.ts
+pnpm --filter @omnius/execution exec vitest run tests/skill-discovery.test.ts
+```
+## Runtime State
+Omnius stores project runtime state under `.omnius/`. This directory includes settings, jobs, context, sponsor state, scoped Telegram persona state, and generated connector artifacts.
+Do not commit `.omnius/`. Omnius adds it to `.gitignore` automatically for repositories that have or later create a `.gitignore`.

package/docs/getting-started/model-providers.md ADDED Viewed

@@ -0,0 +1,48 @@
+# Model Providers And Endpoints
+Omnius routes model requests through a provider abstraction instead of treating local Ollama as the only source of models.
+## Provider Types
+Supported endpoint patterns include:
+- local Ollama
+- Omnius-managed Ollama pool
+- vLLM
+- OpenAI-compatible HTTP servers
+- OpenRouter
+- Groq
+- Chutes
+- Together, Fireworks, DeepInfra, Mistral, Cerebras, and similar providers
+- sponsor endpoints discovered through Nexus
+- COHERE distributed inference peers
+## Endpoint Selection
+Use:
+```text
+/endpoint
+/endpoint sponsor
+/model
+```
+Endpoint history records recently used URLs and auth metadata so users can return to external routers without retyping them.
+## Model Discovery Rule
+When multiple endpoints are enabled, model lists should be consolidated from all toggled enabled endpoints. This applies to:
+- setup wizard model selection
+- `/model`
+- sponsor wizard model exposure
+- REST `GET /v1/models`
+- consumer sponsor endpoint selection
+## Passthrough Rule
+Sponsor and COHERE passthrough should preserve the upstream provider's model identity while hiding raw provider secrets and URLs from consumers.
+## Thinking Mode
+Omnius defaults to direct-answer mode (`think: false`) for backend requests. Tool-calling turns force `think: false`; `OMNIUS_FORCE_NO_THINK=1` disables thinking globally. `/think` controls session defaults where supported.

package/docs/guides/media-generation.md ADDED Viewed

@@ -0,0 +1,88 @@
+# Media Generation
+Omnius exposes media generation through TUI commands, tools, Telegram creative workflows, and sponsor media endpoints.
+## Commands And Tools
+```text
+/image
+/video
+/sound
+/music
+/voice
+/listen
+/call
+```
+Tool names include:
+- `generate_image`
+- `generate_video`
+- `generate_audio`
+- `generate_tts`
+## Video
+Video generation supports setup/list/prewarm/delete flows, thumbnails, sidecars, audio muxing, broker preflight, and model presets such as SANA/Wan paths where installed.
+## Backend Matrix
+| Modality | Typical Backends | Hardware Notes | Output |
+| --- | --- | --- | --- |
+| Image | diffusers, SD.cpp, ComfyUI, Ollama-compatible image routes where available | CPU works slowly; CUDA/ROCm/Metal preferred for larger diffusion models | PNG/JPEG plus prompt sidecar |
+| Video | diffusers video pipelines, ComfyUI, SANA/Wan-style presets | high VRAM pressure; preflight broker should reject unsafe loads | MP4/WebM plus thumbnail and sidecar |
+| Sound | AudioCraft, Stable Audio, TangoFlux-style pipelines | GPU preferred for longer clips; duration caps protect providers | WAV/MP3 plus prompt sidecar |
+| Music | transformers, AudioCraft, Stable Audio, diffusion audio | long generations should be queued and capped | WAV/MP3 plus prompt sidecar |
+| Voice | LuxTTS, Kokoro, Supertonic, ASR backends | CPU can handle small TTS/ASR; clone models need setup validation | WAV/PCM/transcript |
+## Setup And Preflight
+Each modality should expose:
+- `setup` for dependency guidance and install triage
+- `list` for known models and hardware fit
+- `prewarm` where model load time is large
+- broker checks for RAM, VRAM, disk, and existing model pressure
+- a sidecar file containing prompt, model, seed when available, duration/steps, and source chat/session
+When a backend install needs elevation, the TUI should suspend, expose the terminal password prompt directly, then restore TUI state after the installer exits.
+## Audio And Voice
+Voice surfaces include:
+- TTS synthesis
+- ASR transcription
+- voice clone references
+- realtime voicechat WebSocket
+- `/v1/audio/speech`
+- `/v1/audio/transcriptions`
+## Sponsor Media
+Sponsors can expose selected media modalities to consumers. Provider-side controls should enforce:
+- enabled modality list
+- model allowlist
+- max image steps
+- max video/audio duration
+- daily request limits
+- output storage and download paths
+Consumers receive generated artifacts under `.omnius/remote-media` where remote media download is enabled.
+## Sponsor Media Contract
+Remote media requests should carry modality, model, prompt, safety options, max duration or steps, requested format, and caller peer ID. Provider responses should include artifact metadata and a download handle, not arbitrary provider filesystem paths.
+| Provider Control | Why It Exists |
+| --- | --- |
+| modality allowlist | prevents accidental exposure of expensive backends |
+| model allowlist | prevents hidden/private models from being advertised |
+| max steps/duration | bounds GPU time |
+| output retention | prevents unbounded disk growth |
+| per-peer daily jobs | prevents one peer from monopolizing media capacity |
+## Telegram Media
+Telegram public creative workflows use chat-scoped directories and return generated artifacts to the originating chat without exposing arbitrary paths.

package/docs/guides/realtime.md ADDED Viewed

@@ -0,0 +1,138 @@
+# Realtime Conversations
+Realtime mode is for short, natural, back-and-forth spoken conversation behind ASR and TTS.
+It is not a long-form coding-task mode. It trims context, reduces scaffolding, and optimizes for speakable answers.
+## Enable In The TUI
+```text
+/realtime on
+/realtime off
+/realtime status
+```
+## Use Through REST
+```bash
+curl -s http://127.0.0.1:11435/v1/chat \
+  -H 'content-type: application/json' \
+  -d '{
+    "message": "Can you say that again more simply?",
+    "model": "qwen3:4b",
+    "realtime": true,
+    "realtime_options": {
+      "max_history_messages": 12,
+      "max_tokens": 160
+    }
+  }'
+```
+OpenAI-compatible endpoint:
+```bash
+curl -s http://127.0.0.1:11435/v1/chat/completions \
+  -H 'content-type: application/json' \
+  -d '{
+    "model": "qwen3:4b",
+    "realtime": true,
+    "messages": [
+      {"role": "user", "content": "Give me the short version."}
+    ]
+  }'
+```
+## Context Intake
+Realtime mode builds a compact prompt from:
+- `SOUL.md`
+- `.aiwg/SOUL.md`
+- `.aiwg/voices/default.yaml`
+- `.aiwg/voices/omnius.yaml`
+- the first voice profile in `.aiwg/voices/`
+- caller-provided system context, at lower priority than the realtime contract
+## Behavior Contract
+Realtime responses should:
+- default to one or two speakable sentences
+- ask one focused repair question when ASR text is ambiguous
+- treat the latest user utterance as the live turn
+- avoid long markdown, tables, verbose plans, or implementation narration unless requested
+- avoid hidden reasoning and prompt-policy exposure
+## Client Patterns
+Use `/v1/chat` with `realtime: true` for push-to-talk and transcript-driven clients. Use `/v1/voicechat/ws` for full-duplex voicechat where mic PCM and TTS PCM are exchanged over WebSocket.
+## ASR/TTS Loop
+A push-to-talk client usually follows this loop:
+```ts
+const transcript = await asr.captureFinalUtterance();
+const response = await fetch("http://127.0.0.1:11435/v1/chat", {
+  method: "POST",
+  headers: { "content-type": "application/json" },
+  body: JSON.stringify({
+    message: transcript.text,
+    realtime: true,
+    realtime_options: {
+      max_history_messages: 10,
+      max_tokens: 140,
+      voice_profile: "default",
+    },
+  }),
+}).then((r) => r.json());
+await tts.speak(response.choices?.[0]?.message?.content ?? response.response ?? "");
+```
+A full-duplex client uses `/v1/voicechat/ws`:
+```ts
+const ws = new WebSocket("ws://127.0.0.1:11435/v1/voicechat/ws?user=operator");
+ws.binaryType = "arraybuffer";
+ws.onopen = () => ws.send(JSON.stringify({ type: "start" }));
+ws.onmessage = (event) => {
+  if (typeof event.data === "string") {
+    const frame = JSON.parse(event.data);
+    if (frame.type === "agent_text") renderCaption(frame.text);
+    if (frame.type === "tts_header") pendingSampleRate = frame.sampleRate;
+    return;
+  }
+  playPcm(new Int16Array(event.data), pendingSampleRate);
+};
+```
+## Session Handling
+Realtime sessions should keep only the last few spoken turns plus compact speaker/profile context. The realtime flag does not grant higher authority to caller-provided system text; it only selects the short spoken-dialogue contract and context budget.
+Recommended defaults:
+| Option | Default | Reason |
+| --- | ---: | --- |
+| `max_history_messages` | 8-12 | enough for short repairs without dragging old turns forward |
+| `max_tokens` | 120-180 | keeps TTS latency and answer length bounded |
+| `tools` | false unless requested | avoids long action narration in voice mode |
+| `stream` | true for captions, false for simple TTS | choose based on client playback strategy |
+## Conversation Cues
+Realtime mode should handle natural dialogue signals directly:
+- repair phrases like "wait", "say that again", or "shorter" refer to the previous spoken answer
+- short confirmations such as "yes" or "do it" resolve against the latest live question
+- ambiguous ASR should trigger one focused clarification, not a long plan
+- answers should be speakable without Markdown tables unless the user asks for structured output
+## Verification
+Relevant focused tests:
+```bash
+pnpm --filter omnius exec vitest run tests/realtime-mode.test.ts tests/command-registry.test.ts
+```

package/docs/guides/sponsor-and-cohere.md ADDED Viewed

@@ -0,0 +1,123 @@
+# Sponsor And COHERE Mesh
+Sponsor and COHERE let Omnius share inference capacity across peers. Sponsor mode is explicit provider/consumer sharing. COHERE is a distributed cognitive inference mesh.
+## Sponsor Provider Flow
+```text
+/sponsor
+```
+The wizard covers:
+- endpoint selection across all enabled endpoints
+- model selection across local and external endpoints
+- consumer-visible sponsor label and optional link
+- relay/transport settings
+- request, token, and concurrency limits
+- media sponsorship for image, video, sound, and music
+## Sponsor Consumer Flow
+```text
+/endpoint sponsor
+```
+Consumers should see sponsor models as endpoint models. They should not need the sponsor's raw upstream URL or provider secret.
+## Endpoint Passthrough
+Sponsors can expose local models or forward upstream OpenAI-compatible endpoints such as OpenRouter, Groq, Chutes, or vLLM. Model discovery must consolidate models from all toggled enabled endpoints.
+## Telemetry
+Provider dashboards should report:
+- active and maximum concurrency
+- requests per minute
+- daily tokens
+- per-peer token usage
+- per-model token usage
+- exposed or loaded models
+- media jobs by modality
+Consumer status rows can alternate local and remote peer metrics so remote Ollama/GPU pressure is visible.
+## Media Sponsorship
+Providers can expose:
+- image generation
+- video generation
+- sound-effect generation
+- music generation
+Each modality should enforce provider-side setup, request sanitization, model allowlists, and per-modality limits.
+## COHERE
+```text
+/cohere
+/cohere status
+/cohere models
+/cohere allow <model>
+/cohere deny <model>
+```
+COHERE status should expose daemon health, query counts, error counts, served peers, model exposure, and usage. COHERE endpoint passthrough follows the same rule as sponsor passthrough: do not assume Ollama is the only backend.
+## Failure Handling
+Directory or peer failures should be surfaced as observed failure output and usage-state signals. Avoid masking broken limits, JSON parse failures, or concurrency override conditions behind generic "unreachable" text.
+## Provider Setup Checklist
+Before advertising capacity:
+- confirm every enabled endpoint has a working model list
+- confirm external endpoints return models during wizard step two, not only local Ollama models
+- set model and modality allowlists before turning public serving on
+- set concurrency, requests-per-minute, daily token, and media job limits
+- run `/sponsor status` and verify daily and RPM bars start at zero
+- submit one local test request through the sponsor route before sharing the peer
+The consumer should only receive the sponsor label, optional sponsor link, exposed model names, supported modality metadata, and live utilization. Raw upstream URLs, provider API keys, and internal endpoint names stay provider-side.
+## Status Bars And Limits
+Daily and request-rate bars show used quota over the configured limit. A full bar means the provider should reject or queue new work instead of silently accepting more requests.
+| Metric | Meaning | Enforcement Point |
+| --- | --- | --- |
+| Concurrency | In-flight requests for this sponsor | admission before dispatch |
+| RPM | Rolling requests per minute | admission before dispatch |
+| Daily tokens | Input plus output tokens for the current day | admission and post-response accounting |
+| Peer tokens | Per-consumer token totals | post-response accounting |
+| Model tokens | Per-model totals | post-response accounting |
+| Media jobs | Per-modality job counts and duration/step budgets | modality router |
+If status shows impossible values such as `8/5 concurrent`, treat that as a limit-enforcement bug, not a display issue. The provider should reject the sixth request before it reaches the backend.
+## Directory Failure Cases
+`Unexpected end of json input` usually means the directory request returned a partial body, empty body, or transport-truncated JSON. Triage it as a transport and serialization problem:
+- check the nexus directory response body before parsing
+- log the peer ID, endpoint, status code, content length, and first parse failure
+- keep the last good directory snapshot until a complete replacement arrives
+- show stale age in `/sponsor status` so consumers can distinguish stale cache from no peers
+- never replace a valid peer cache with a failed parse result
+## COHERE Operations
+COHERE status should be grouped rather than flattened:
+| Section | Fields |
+| --- | --- |
+| Daemon | active flag, pid, uptime, last query, connected peers |
+| Results | answered, errors, sent bytes, received bytes, average latency |
+| Models | exposed, hidden, allowlist state, downloaded inventory |
+| Usage | requests per peer, tokens per peer, tokens per model |
+| Failures | recent raw failure events and peer/model involved |
+Model exposure should be deduplicated before rendering. A status output that repeats `Allowlist` and the same model multiple times indicates the inventory merger is appending display rows instead of merging model identities.