bosun 0.36.3 → 0.36.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/agent-prompts.mjs CHANGED
@@ -125,6 +125,18 @@ const PROMPT_DEFS = [
125
125
  description:
126
126
  "Front-end specialist agent with screenshot-based validation and visual verification.",
127
127
  },
128
+ {
129
+ key: "voiceAgent",
130
+ filename: "voice-agent.md",
131
+ description:
132
+ "Voice agent system prompt for real-time voice sessions with action dispatch.",
133
+ },
134
+ {
135
+ key: "voiceAgentCompact",
136
+ filename: "voice-agent-compact.md",
137
+ description:
138
+ "Compact voice agent prompt for bandwidth-constrained or low-latency sessions.",
139
+ },
128
140
  ];
129
141
 
130
142
  export const AGENT_PROMPT_DEFINITIONS = Object.freeze(
@@ -922,6 +934,89 @@ requirements before the task is marked as done.
922
934
  - Working Directory: {{WORKTREE_PATH}}
923
935
 
924
936
  {{COAUTHOR_INSTRUCTION}}
937
+ `,
938
+ voiceAgent: `# Bosun Voice Agent
939
+
940
+ You are **Bosun**, a voice-first assistant for the VirtEngine development platform.
941
+ You interact with developers through real-time voice conversations and have **full access**
942
+ to the Bosun workspace, task board, coding agents, and system operations.
943
+
944
+ ## Core Capabilities
945
+
946
+ You can do everything Bosun can — through voice. This includes:
947
+ - **Task management**: List, create, update, delete, search, and comment on tasks
948
+ - **Agent delegation**: Send work to coding agents (Codex, Copilot, Claude, Gemini, OpenCode)
949
+ - **Agent steering**: Use /ask (read-only), /agent (code changes), or /plan (architecture)
950
+ - **System monitoring**: Check fleet status, agent health, system configuration
951
+ - **Workspace navigation**: Read files, list directories, search code
952
+ - **Workflow management**: List and inspect workflow templates
953
+ - **Skills & prompts**: Browse the knowledge base and prompt library
954
+
955
+ ## How Actions Work
956
+
957
+ When the user asks you to do something, you perform it by returning a JSON action intent.
958
+ Bosun processes the action directly via JavaScript (no MCP bridge needed) and returns the result.
959
+ You then speak the result to the user naturally.
960
+
961
+ ### Action Format
962
+ \`\`\`json
963
+ { "action": "task.list", "params": { "status": "todo" } }
964
+ \`\`\`
965
+
966
+ ### Multiple Actions
967
+ \`\`\`json
968
+ { "action": "batch", "params": { "actions": [
969
+ { "action": "task.stats", "params": {} },
970
+ { "action": "agent.status", "params": {} }
971
+ ] } }
972
+ \`\`\`
973
+
974
+ {{VOICE_ACTION_MANIFEST}}
975
+
976
+ ## Agent Delegation
977
+
978
+ When users need code written, files modified, bugs debugged, or PRs created:
979
+ 1. Use \`agent.delegate\` with a detailed message
980
+ 2. Choose the right mode: "ask" for questions, "agent" for code changes, "plan" for architecture
981
+ 3. You can specify which executor to use, or let the default handle it
982
+
983
+ Examples:
984
+ - "Fix the login bug" → \`{ "action": "agent.code", "params": { "message": "Fix the login bug in auth.mjs" } }\`
985
+ - "How does the config system work?" → \`{ "action": "agent.ask", "params": { "message": "Explain the config system" } }\`
986
+ - "Plan a refactor of the voice module" → \`{ "action": "agent.plan", "params": { "message": "Plan refactoring voice-relay.mjs" } }\`
987
+
988
+ ## Conversation Style
989
+
990
+ - Be **concise and conversational** — this is voice, not text.
991
+ - Lead with the answer, then add details if needed.
992
+ - For numbers, say them naturally: "You have 12 tasks in the backlog."
993
+ - When tasks or agents are busy, keep the user informed.
994
+ - For long outputs (code, logs), summarize the key points vocally.
995
+ - When delegating to an agent, let the user know: "I'm sending that to Codex now."
996
+
997
+ ## Error Handling
998
+
999
+ If an action fails, explain what happened and suggest alternatives.
1000
+ Never show raw error objects — speak the issue naturally.
1001
+
1002
+ ## Security
1003
+
1004
+ - Never expose API keys, tokens, or secrets in conversation.
1005
+ - Only execute safe operations via voice (reads, creates, delegates).
1006
+ - Dangerous operations (delete all tasks, force push) require explicit confirmation.
1007
+ `,
1008
+ voiceAgentCompact: `# Bosun Voice (Compact)
1009
+
1010
+ Voice assistant for VirtEngine. Access tasks, agents, workspace.
1011
+
1012
+ Return JSON actions: { "action": "<name>", "params": { ... } }
1013
+
1014
+ {{VOICE_ACTION_MANIFEST}}
1015
+
1016
+ Key actions: task.list, task.create, task.stats, agent.delegate, agent.ask, agent.plan,
1017
+ system.status, workspace.readFile, workspace.search.
1018
+
1019
+ Be concise. Lead with answers. Summarize long outputs.
925
1020
  `,
926
1021
  };
927
1022
 
package/bosun.schema.json CHANGED
@@ -38,7 +38,13 @@
38
38
  "codexEnabled": { "type": "boolean" },
39
39
  "primaryAgent": {
40
40
  "type": "string",
41
- "enum": ["codex-sdk", "copilot-sdk", "claude-sdk", "gemini-sdk", "opencode-sdk"]
41
+ "enum": [
42
+ "codex-sdk",
43
+ "copilot-sdk",
44
+ "claude-sdk",
45
+ "gemini-sdk",
46
+ "opencode-sdk"
47
+ ]
42
48
  },
43
49
  "telegramUiTunnel": {
44
50
  "type": "string",
@@ -147,6 +153,48 @@
147
153
  "default": "auto",
148
154
  "description": "Voice provider: openai/azure (Tier 1 realtime), claude/gemini (Tier 2 voice + provider vision), fallback (browser STT/TTS), auto (detect from env)"
149
155
  },
156
+ "providers": {
157
+ "type": "array",
158
+ "description": "Ordered provider candidates for voice routing/failover. First match with credentials is used.",
159
+ "items": {
160
+ "anyOf": [
161
+ {
162
+ "type": "string",
163
+ "enum": ["openai", "azure", "claude", "gemini", "fallback"]
164
+ },
165
+ {
166
+ "type": "object",
167
+ "additionalProperties": false,
168
+ "properties": {
169
+ "provider": {
170
+ "type": "string",
171
+ "enum": ["openai", "azure", "claude", "gemini", "fallback"]
172
+ },
173
+ "model": { "type": "string" },
174
+ "visionModel": { "type": "string" },
175
+ "voiceId": {
176
+ "type": "string",
177
+ "enum": [
178
+ "alloy",
179
+ "ash",
180
+ "ballad",
181
+ "coral",
182
+ "echo",
183
+ "fable",
184
+ "onyx",
185
+ "nova",
186
+ "sage",
187
+ "shimmer",
188
+ "verse"
189
+ ]
190
+ },
191
+ "azureDeployment": { "type": "string" }
192
+ },
193
+ "required": ["provider"]
194
+ }
195
+ ]
196
+ }
197
+ },
150
198
  "model": {
151
199
  "type": "string",
152
200
  "default": "gpt-4o-realtime-preview-2024-12-17",
@@ -161,10 +209,18 @@
161
209
  "type": "string",
162
210
  "description": "OpenAI API key for Realtime API (overrides OPENAI_API_KEY env)"
163
211
  },
212
+ "openaiAccessToken": {
213
+ "type": "string",
214
+ "description": "OpenAI OAuth access token for voice (OAuth preferred over API key when present)"
215
+ },
164
216
  "azureApiKey": {
165
217
  "type": "string",
166
218
  "description": "Azure OpenAI API key for Realtime API"
167
219
  },
220
+ "azureAccessToken": {
221
+ "type": "string",
222
+ "description": "Azure OAuth/AAD access token for voice realtime"
223
+ },
168
224
  "azureEndpoint": {
169
225
  "type": "string",
170
226
  "description": "Azure OpenAI endpoint URL"
@@ -178,13 +234,33 @@
178
234
  "type": "string",
179
235
  "description": "Anthropic API key for Claude voice/vision provider mode"
180
236
  },
237
+ "claudeAccessToken": {
238
+ "type": "string",
239
+ "description": "Claude OAuth access token for voice provider mode"
240
+ },
181
241
  "geminiApiKey": {
182
242
  "type": "string",
183
243
  "description": "Gemini API key for Gemini voice/vision provider mode"
184
244
  },
245
+ "geminiAccessToken": {
246
+ "type": "string",
247
+ "description": "Gemini OAuth access token for voice provider mode"
248
+ },
185
249
  "voiceId": {
186
250
  "type": "string",
187
- "enum": ["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"],
251
+ "enum": [
252
+ "alloy",
253
+ "ash",
254
+ "ballad",
255
+ "coral",
256
+ "echo",
257
+ "fable",
258
+ "onyx",
259
+ "nova",
260
+ "sage",
261
+ "shimmer",
262
+ "verse"
263
+ ],
188
264
  "default": "alloy",
189
265
  "description": "Voice ID for TTS output"
190
266
  },
@@ -204,9 +280,31 @@
204
280
  "default": "browser",
205
281
  "description": "Fallback when Realtime API unavailable: browser (Web Speech API) or disabled"
206
282
  },
283
+ "failover": {
284
+ "type": "object",
285
+ "additionalProperties": false,
286
+ "properties": {
287
+ "enabled": {
288
+ "type": "boolean",
289
+ "default": true,
290
+ "description": "Enable automatic realtime failover across configured voice providers"
291
+ },
292
+ "maxAttempts": {
293
+ "type": "number",
294
+ "default": 2,
295
+ "description": "Maximum realtime provider attempts per voice session token request"
296
+ }
297
+ }
298
+ },
207
299
  "delegateExecutor": {
208
300
  "type": "string",
209
- "enum": ["codex-sdk", "copilot-sdk", "claude-sdk", "gemini-sdk", "opencode-sdk"],
301
+ "enum": [
302
+ "codex-sdk",
303
+ "copilot-sdk",
304
+ "claude-sdk",
305
+ "gemini-sdk",
306
+ "opencode-sdk"
307
+ ],
210
308
  "description": "Which executor to use for delegate_to_agent calls. Defaults to primaryAgent."
211
309
  }
212
310
  }