npm - bosun - Versions diffs - 0.36.3 → 0.36.4 - Mend

bosun 0.36.3 → 0.36.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/agent-prompts.mjs +95 -0
package/bosun.schema.json +101 -3
package/desktop/main.mjs +871 -48
package/desktop/preload.mjs +54 -1
package/desktop-shortcut.mjs +90 -11
package/package.json +9 -1
package/setup-web-server.mjs +20 -10
package/setup.mjs +376 -83
package/startup-service.mjs +51 -6
package/ui/app.js +145 -0
package/ui/components/agent-selector.js +145 -1
package/ui/components/chat-view.js +53 -10
package/ui/components/session-list.js +1 -1
package/ui/modules/voice-client-sdk.js +733 -0
package/ui/modules/voice-overlay.js +128 -15
package/ui/setup.html +281 -81
package/ui/styles/sessions.css +38 -2
package/ui/styles.css +14 -0
package/ui/tabs/agents.js +1 -1
package/ui/tabs/chat.js +118 -13
package/ui-server.mjs +207 -3
package/voice-action-dispatcher.mjs +844 -0
package/voice-agents-sdk.mjs +664 -0
package/voice-auth-manager.mjs +164 -0
package/voice-relay.mjs +433 -55
package/voice-tools.mjs +236 -1

package/agent-prompts.mjs CHANGED Viewed

@@ -125,6 +125,18 @@ const PROMPT_DEFS = [
     description:
       "Front-end specialist agent with screenshot-based validation and visual verification.",
   },
+  {
+    key: "voiceAgent",
+    filename: "voice-agent.md",
+    description:
+      "Voice agent system prompt for real-time voice sessions with action dispatch.",
+  },
+  {
+    key: "voiceAgentCompact",
+    filename: "voice-agent-compact.md",
+    description:
+      "Compact voice agent prompt for bandwidth-constrained or low-latency sessions.",
+  },
 ];
 export const AGENT_PROMPT_DEFINITIONS = Object.freeze(
@@ -922,6 +934,89 @@ requirements before the task is marked as done.
 - Working Directory: {{WORKTREE_PATH}}
 {{COAUTHOR_INSTRUCTION}}
+`,
+  voiceAgent: `# Bosun Voice Agent
+You are **Bosun**, a voice-first assistant for the VirtEngine development platform.
+You interact with developers through real-time voice conversations and have **full access**
+to the Bosun workspace, task board, coding agents, and system operations.
+## Core Capabilities
+You can do everything Bosun can — through voice. This includes:
+- **Task management**: List, create, update, delete, search, and comment on tasks
+- **Agent delegation**: Send work to coding agents (Codex, Copilot, Claude, Gemini, OpenCode)
+- **Agent steering**: Use /ask (read-only), /agent (code changes), or /plan (architecture)
+- **System monitoring**: Check fleet status, agent health, system configuration
+- **Workspace navigation**: Read files, list directories, search code
+- **Workflow management**: List and inspect workflow templates
+- **Skills & prompts**: Browse the knowledge base and prompt library
+## How Actions Work
+When the user asks you to do something, you perform it by returning a JSON action intent.
+Bosun processes the action directly via JavaScript (no MCP bridge needed) and returns the result.
+You then speak the result to the user naturally.
+### Action Format
+\`\`\`json
+{ "action": "task.list", "params": { "status": "todo" } }
+\`\`\`
+### Multiple Actions
+\`\`\`json
+{ "action": "batch", "params": { "actions": [
+  { "action": "task.stats", "params": {} },
+  { "action": "agent.status", "params": {} }
+] } }
+\`\`\`
+{{VOICE_ACTION_MANIFEST}}
+## Agent Delegation
+When users need code written, files modified, bugs debugged, or PRs created:
+1. Use \`agent.delegate\` with a detailed message
+2. Choose the right mode: "ask" for questions, "agent" for code changes, "plan" for architecture
+3. You can specify which executor to use, or let the default handle it
+Examples:
+- "Fix the login bug" → \`{ "action": "agent.code", "params": { "message": "Fix the login bug in auth.mjs" } }\`
+- "How does the config system work?" → \`{ "action": "agent.ask", "params": { "message": "Explain the config system" } }\`
+- "Plan a refactor of the voice module" → \`{ "action": "agent.plan", "params": { "message": "Plan refactoring voice-relay.mjs" } }\`
+## Conversation Style
+- Be **concise and conversational** — this is voice, not text.
+- Lead with the answer, then add details if needed.
+- For numbers, say them naturally: "You have 12 tasks in the backlog."
+- When tasks or agents are busy, keep the user informed.
+- For long outputs (code, logs), summarize the key points vocally.
+- When delegating to an agent, let the user know: "I'm sending that to Codex now."
+## Error Handling
+If an action fails, explain what happened and suggest alternatives.
+Never show raw error objects — speak the issue naturally.
+## Security
+- Never expose API keys, tokens, or secrets in conversation.
+- Only execute safe operations via voice (reads, creates, delegates).
+- Dangerous operations (delete all tasks, force push) require explicit confirmation.
+`,
+  voiceAgentCompact: `# Bosun Voice (Compact)
+Voice assistant for VirtEngine. Access tasks, agents, workspace.
+Return JSON actions: { "action": "<name>", "params": { ... } }
+{{VOICE_ACTION_MANIFEST}}
+Key actions: task.list, task.create, task.stats, agent.delegate, agent.ask, agent.plan,
+system.status, workspace.readFile, workspace.search.
+Be concise. Lead with answers. Summarize long outputs.
 `,
 };

package/bosun.schema.json CHANGED Viewed

@@ -38,7 +38,13 @@
     "codexEnabled": { "type": "boolean" },
     "primaryAgent": {
       "type": "string",
-      "enum": ["codex-sdk", "copilot-sdk", "claude-sdk", "gemini-sdk", "opencode-sdk"]
+      "enum": [
+        "codex-sdk",
+        "copilot-sdk",
+        "claude-sdk",
+        "gemini-sdk",
+        "opencode-sdk"
+      ]
     },
     "telegramUiTunnel": {
       "type": "string",
@@ -147,6 +153,48 @@
           "default": "auto",
           "description": "Voice provider: openai/azure (Tier 1 realtime), claude/gemini (Tier 2 voice + provider vision), fallback (browser STT/TTS), auto (detect from env)"
         },
+        "providers": {
+          "type": "array",
+          "description": "Ordered provider candidates for voice routing/failover. First match with credentials is used.",
+          "items": {
+            "anyOf": [
+              {
+                "type": "string",
+                "enum": ["openai", "azure", "claude", "gemini", "fallback"]
+              },
+              {
+                "type": "object",
+                "additionalProperties": false,
+                "properties": {
+                  "provider": {
+                    "type": "string",
+                    "enum": ["openai", "azure", "claude", "gemini", "fallback"]
+                  },
+                  "model": { "type": "string" },
+                  "visionModel": { "type": "string" },
+                  "voiceId": {
+                    "type": "string",
+                    "enum": [
+                      "alloy",
+                      "ash",
+                      "ballad",
+                      "coral",
+                      "echo",
+                      "fable",
+                      "onyx",
+                      "nova",
+                      "sage",
+                      "shimmer",
+                      "verse"
+                    ]
+                  },
+                  "azureDeployment": { "type": "string" }
+                },
+                "required": ["provider"]
+              }
+            ]
+          }
+        },
         "model": {
           "type": "string",
           "default": "gpt-4o-realtime-preview-2024-12-17",
@@ -161,10 +209,18 @@
           "type": "string",
           "description": "OpenAI API key for Realtime API (overrides OPENAI_API_KEY env)"
         },
+        "openaiAccessToken": {
+          "type": "string",
+          "description": "OpenAI OAuth access token for voice (OAuth preferred over API key when present)"
+        },
         "azureApiKey": {
           "type": "string",
           "description": "Azure OpenAI API key for Realtime API"
         },
+        "azureAccessToken": {
+          "type": "string",
+          "description": "Azure OAuth/AAD access token for voice realtime"
+        },
         "azureEndpoint": {
           "type": "string",
           "description": "Azure OpenAI endpoint URL"
@@ -178,13 +234,33 @@
           "type": "string",
           "description": "Anthropic API key for Claude voice/vision provider mode"
         },
+        "claudeAccessToken": {
+          "type": "string",
+          "description": "Claude OAuth access token for voice provider mode"
+        },
         "geminiApiKey": {
           "type": "string",
           "description": "Gemini API key for Gemini voice/vision provider mode"
         },
+        "geminiAccessToken": {
+          "type": "string",
+          "description": "Gemini OAuth access token for voice provider mode"
+        },
         "voiceId": {
           "type": "string",
-          "enum": ["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"],
+          "enum": [
+            "alloy",
+            "ash",
+            "ballad",
+            "coral",
+            "echo",
+            "fable",
+            "onyx",
+            "nova",
+            "sage",
+            "shimmer",
+            "verse"
+          ],
           "default": "alloy",
           "description": "Voice ID for TTS output"
         },
@@ -204,9 +280,31 @@
           "default": "browser",
           "description": "Fallback when Realtime API unavailable: browser (Web Speech API) or disabled"
         },
+        "failover": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "enabled": {
+              "type": "boolean",
+              "default": true,
+              "description": "Enable automatic realtime failover across configured voice providers"
+            },
+            "maxAttempts": {
+              "type": "number",
+              "default": 2,
+              "description": "Maximum realtime provider attempts per voice session token request"
+            }
+          }
+        },
         "delegateExecutor": {
           "type": "string",
-          "enum": ["codex-sdk", "copilot-sdk", "claude-sdk", "gemini-sdk", "opencode-sdk"],
+          "enum": [
+            "codex-sdk",
+            "copilot-sdk",
+            "claude-sdk",
+            "gemini-sdk",
+            "opencode-sdk"
+          ],
           "description": "Which executor to use for delegate_to_agent calls. Defaults to primaryAgent."
         }
       }