bosun 0.36.3 → 0.36.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agent-prompts.mjs +95 -0
- package/bosun.schema.json +101 -3
- package/desktop/main.mjs +871 -48
- package/desktop/preload.mjs +54 -1
- package/desktop-shortcut.mjs +90 -11
- package/package.json +9 -1
- package/setup-web-server.mjs +20 -10
- package/setup.mjs +376 -83
- package/startup-service.mjs +51 -6
- package/ui/app.js +145 -0
- package/ui/components/agent-selector.js +145 -1
- package/ui/components/chat-view.js +53 -10
- package/ui/components/session-list.js +1 -1
- package/ui/modules/voice-client-sdk.js +733 -0
- package/ui/modules/voice-overlay.js +128 -15
- package/ui/setup.html +281 -81
- package/ui/styles/sessions.css +38 -2
- package/ui/styles.css +14 -0
- package/ui/tabs/agents.js +1 -1
- package/ui/tabs/chat.js +118 -13
- package/ui-server.mjs +207 -3
- package/voice-action-dispatcher.mjs +844 -0
- package/voice-agents-sdk.mjs +664 -0
- package/voice-auth-manager.mjs +164 -0
- package/voice-relay.mjs +433 -55
- package/voice-tools.mjs +236 -1
package/agent-prompts.mjs
CHANGED
|
@@ -125,6 +125,18 @@ const PROMPT_DEFS = [
|
|
|
125
125
|
description:
|
|
126
126
|
"Front-end specialist agent with screenshot-based validation and visual verification.",
|
|
127
127
|
},
|
|
128
|
+
{
|
|
129
|
+
key: "voiceAgent",
|
|
130
|
+
filename: "voice-agent.md",
|
|
131
|
+
description:
|
|
132
|
+
"Voice agent system prompt for real-time voice sessions with action dispatch.",
|
|
133
|
+
},
|
|
134
|
+
{
|
|
135
|
+
key: "voiceAgentCompact",
|
|
136
|
+
filename: "voice-agent-compact.md",
|
|
137
|
+
description:
|
|
138
|
+
"Compact voice agent prompt for bandwidth-constrained or low-latency sessions.",
|
|
139
|
+
},
|
|
128
140
|
];
|
|
129
141
|
|
|
130
142
|
export const AGENT_PROMPT_DEFINITIONS = Object.freeze(
|
|
@@ -922,6 +934,89 @@ requirements before the task is marked as done.
|
|
|
922
934
|
- Working Directory: {{WORKTREE_PATH}}
|
|
923
935
|
|
|
924
936
|
{{COAUTHOR_INSTRUCTION}}
|
|
937
|
+
`,
|
|
938
|
+
voiceAgent: `# Bosun Voice Agent
|
|
939
|
+
|
|
940
|
+
You are **Bosun**, a voice-first assistant for the VirtEngine development platform.
|
|
941
|
+
You interact with developers through real-time voice conversations and have **full access**
|
|
942
|
+
to the Bosun workspace, task board, coding agents, and system operations.
|
|
943
|
+
|
|
944
|
+
## Core Capabilities
|
|
945
|
+
|
|
946
|
+
You can do everything Bosun can — through voice. This includes:
|
|
947
|
+
- **Task management**: List, create, update, delete, search, and comment on tasks
|
|
948
|
+
- **Agent delegation**: Send work to coding agents (Codex, Copilot, Claude, Gemini, OpenCode)
|
|
949
|
+
- **Agent steering**: Use /ask (read-only), /agent (code changes), or /plan (architecture)
|
|
950
|
+
- **System monitoring**: Check fleet status, agent health, system configuration
|
|
951
|
+
- **Workspace navigation**: Read files, list directories, search code
|
|
952
|
+
- **Workflow management**: List and inspect workflow templates
|
|
953
|
+
- **Skills & prompts**: Browse the knowledge base and prompt library
|
|
954
|
+
|
|
955
|
+
## How Actions Work
|
|
956
|
+
|
|
957
|
+
When the user asks you to do something, you perform it by returning a JSON action intent.
|
|
958
|
+
Bosun processes the action directly via JavaScript (no MCP bridge needed) and returns the result.
|
|
959
|
+
You then speak the result to the user naturally.
|
|
960
|
+
|
|
961
|
+
### Action Format
|
|
962
|
+
\`\`\`json
|
|
963
|
+
{ "action": "task.list", "params": { "status": "todo" } }
|
|
964
|
+
\`\`\`
|
|
965
|
+
|
|
966
|
+
### Multiple Actions
|
|
967
|
+
\`\`\`json
|
|
968
|
+
{ "action": "batch", "params": { "actions": [
|
|
969
|
+
{ "action": "task.stats", "params": {} },
|
|
970
|
+
{ "action": "agent.status", "params": {} }
|
|
971
|
+
] } }
|
|
972
|
+
\`\`\`
|
|
973
|
+
|
|
974
|
+
{{VOICE_ACTION_MANIFEST}}
|
|
975
|
+
|
|
976
|
+
## Agent Delegation
|
|
977
|
+
|
|
978
|
+
When users need code written, files modified, bugs debugged, or PRs created:
|
|
979
|
+
1. Use \`agent.delegate\` with a detailed message
|
|
980
|
+
2. Choose the right mode: "ask" for questions, "agent" for code changes, "plan" for architecture
|
|
981
|
+
3. You can specify which executor to use, or let the default handle it
|
|
982
|
+
|
|
983
|
+
Examples:
|
|
984
|
+
- "Fix the login bug" → \`{ "action": "agent.code", "params": { "message": "Fix the login bug in auth.mjs" } }\`
|
|
985
|
+
- "How does the config system work?" → \`{ "action": "agent.ask", "params": { "message": "Explain the config system" } }\`
|
|
986
|
+
- "Plan a refactor of the voice module" → \`{ "action": "agent.plan", "params": { "message": "Plan refactoring voice-relay.mjs" } }\`
|
|
987
|
+
|
|
988
|
+
## Conversation Style
|
|
989
|
+
|
|
990
|
+
- Be **concise and conversational** — this is voice, not text.
|
|
991
|
+
- Lead with the answer, then add details if needed.
|
|
992
|
+
- For numbers, say them naturally: "You have 12 tasks in the backlog."
|
|
993
|
+
- When tasks or agents are busy, keep the user informed.
|
|
994
|
+
- For long outputs (code, logs), summarize the key points vocally.
|
|
995
|
+
- When delegating to an agent, let the user know: "I'm sending that to Codex now."
|
|
996
|
+
|
|
997
|
+
## Error Handling
|
|
998
|
+
|
|
999
|
+
If an action fails, explain what happened and suggest alternatives.
|
|
1000
|
+
Never show raw error objects — speak the issue naturally.
|
|
1001
|
+
|
|
1002
|
+
## Security
|
|
1003
|
+
|
|
1004
|
+
- Never expose API keys, tokens, or secrets in conversation.
|
|
1005
|
+
- Only execute safe operations via voice (reads, creates, delegates).
|
|
1006
|
+
- Dangerous operations (delete all tasks, force push) require explicit confirmation.
|
|
1007
|
+
`,
|
|
1008
|
+
voiceAgentCompact: `# Bosun Voice (Compact)
|
|
1009
|
+
|
|
1010
|
+
Voice assistant for VirtEngine. Access tasks, agents, workspace.
|
|
1011
|
+
|
|
1012
|
+
Return JSON actions: { "action": "<name>", "params": { ... } }
|
|
1013
|
+
|
|
1014
|
+
{{VOICE_ACTION_MANIFEST}}
|
|
1015
|
+
|
|
1016
|
+
Key actions: task.list, task.create, task.stats, agent.delegate, agent.ask, agent.plan,
|
|
1017
|
+
system.status, workspace.readFile, workspace.search.
|
|
1018
|
+
|
|
1019
|
+
Be concise. Lead with answers. Summarize long outputs.
|
|
925
1020
|
`,
|
|
926
1021
|
};
|
|
927
1022
|
|
package/bosun.schema.json
CHANGED
|
@@ -38,7 +38,13 @@
|
|
|
38
38
|
"codexEnabled": { "type": "boolean" },
|
|
39
39
|
"primaryAgent": {
|
|
40
40
|
"type": "string",
|
|
41
|
-
"enum": [
|
|
41
|
+
"enum": [
|
|
42
|
+
"codex-sdk",
|
|
43
|
+
"copilot-sdk",
|
|
44
|
+
"claude-sdk",
|
|
45
|
+
"gemini-sdk",
|
|
46
|
+
"opencode-sdk"
|
|
47
|
+
]
|
|
42
48
|
},
|
|
43
49
|
"telegramUiTunnel": {
|
|
44
50
|
"type": "string",
|
|
@@ -147,6 +153,48 @@
|
|
|
147
153
|
"default": "auto",
|
|
148
154
|
"description": "Voice provider: openai/azure (Tier 1 realtime), claude/gemini (Tier 2 voice + provider vision), fallback (browser STT/TTS), auto (detect from env)"
|
|
149
155
|
},
|
|
156
|
+
"providers": {
|
|
157
|
+
"type": "array",
|
|
158
|
+
"description": "Ordered provider candidates for voice routing/failover. First match with credentials is used.",
|
|
159
|
+
"items": {
|
|
160
|
+
"anyOf": [
|
|
161
|
+
{
|
|
162
|
+
"type": "string",
|
|
163
|
+
"enum": ["openai", "azure", "claude", "gemini", "fallback"]
|
|
164
|
+
},
|
|
165
|
+
{
|
|
166
|
+
"type": "object",
|
|
167
|
+
"additionalProperties": false,
|
|
168
|
+
"properties": {
|
|
169
|
+
"provider": {
|
|
170
|
+
"type": "string",
|
|
171
|
+
"enum": ["openai", "azure", "claude", "gemini", "fallback"]
|
|
172
|
+
},
|
|
173
|
+
"model": { "type": "string" },
|
|
174
|
+
"visionModel": { "type": "string" },
|
|
175
|
+
"voiceId": {
|
|
176
|
+
"type": "string",
|
|
177
|
+
"enum": [
|
|
178
|
+
"alloy",
|
|
179
|
+
"ash",
|
|
180
|
+
"ballad",
|
|
181
|
+
"coral",
|
|
182
|
+
"echo",
|
|
183
|
+
"fable",
|
|
184
|
+
"onyx",
|
|
185
|
+
"nova",
|
|
186
|
+
"sage",
|
|
187
|
+
"shimmer",
|
|
188
|
+
"verse"
|
|
189
|
+
]
|
|
190
|
+
},
|
|
191
|
+
"azureDeployment": { "type": "string" }
|
|
192
|
+
},
|
|
193
|
+
"required": ["provider"]
|
|
194
|
+
}
|
|
195
|
+
]
|
|
196
|
+
}
|
|
197
|
+
},
|
|
150
198
|
"model": {
|
|
151
199
|
"type": "string",
|
|
152
200
|
"default": "gpt-4o-realtime-preview-2024-12-17",
|
|
@@ -161,10 +209,18 @@
|
|
|
161
209
|
"type": "string",
|
|
162
210
|
"description": "OpenAI API key for Realtime API (overrides OPENAI_API_KEY env)"
|
|
163
211
|
},
|
|
212
|
+
"openaiAccessToken": {
|
|
213
|
+
"type": "string",
|
|
214
|
+
"description": "OpenAI OAuth access token for voice (OAuth preferred over API key when present)"
|
|
215
|
+
},
|
|
164
216
|
"azureApiKey": {
|
|
165
217
|
"type": "string",
|
|
166
218
|
"description": "Azure OpenAI API key for Realtime API"
|
|
167
219
|
},
|
|
220
|
+
"azureAccessToken": {
|
|
221
|
+
"type": "string",
|
|
222
|
+
"description": "Azure OAuth/AAD access token for voice realtime"
|
|
223
|
+
},
|
|
168
224
|
"azureEndpoint": {
|
|
169
225
|
"type": "string",
|
|
170
226
|
"description": "Azure OpenAI endpoint URL"
|
|
@@ -178,13 +234,33 @@
|
|
|
178
234
|
"type": "string",
|
|
179
235
|
"description": "Anthropic API key for Claude voice/vision provider mode"
|
|
180
236
|
},
|
|
237
|
+
"claudeAccessToken": {
|
|
238
|
+
"type": "string",
|
|
239
|
+
"description": "Claude OAuth access token for voice provider mode"
|
|
240
|
+
},
|
|
181
241
|
"geminiApiKey": {
|
|
182
242
|
"type": "string",
|
|
183
243
|
"description": "Gemini API key for Gemini voice/vision provider mode"
|
|
184
244
|
},
|
|
245
|
+
"geminiAccessToken": {
|
|
246
|
+
"type": "string",
|
|
247
|
+
"description": "Gemini OAuth access token for voice provider mode"
|
|
248
|
+
},
|
|
185
249
|
"voiceId": {
|
|
186
250
|
"type": "string",
|
|
187
|
-
"enum": [
|
|
251
|
+
"enum": [
|
|
252
|
+
"alloy",
|
|
253
|
+
"ash",
|
|
254
|
+
"ballad",
|
|
255
|
+
"coral",
|
|
256
|
+
"echo",
|
|
257
|
+
"fable",
|
|
258
|
+
"onyx",
|
|
259
|
+
"nova",
|
|
260
|
+
"sage",
|
|
261
|
+
"shimmer",
|
|
262
|
+
"verse"
|
|
263
|
+
],
|
|
188
264
|
"default": "alloy",
|
|
189
265
|
"description": "Voice ID for TTS output"
|
|
190
266
|
},
|
|
@@ -204,9 +280,31 @@
|
|
|
204
280
|
"default": "browser",
|
|
205
281
|
"description": "Fallback when Realtime API unavailable: browser (Web Speech API) or disabled"
|
|
206
282
|
},
|
|
283
|
+
"failover": {
|
|
284
|
+
"type": "object",
|
|
285
|
+
"additionalProperties": false,
|
|
286
|
+
"properties": {
|
|
287
|
+
"enabled": {
|
|
288
|
+
"type": "boolean",
|
|
289
|
+
"default": true,
|
|
290
|
+
"description": "Enable automatic realtime failover across configured voice providers"
|
|
291
|
+
},
|
|
292
|
+
"maxAttempts": {
|
|
293
|
+
"type": "number",
|
|
294
|
+
"default": 2,
|
|
295
|
+
"description": "Maximum realtime provider attempts per voice session token request"
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
},
|
|
207
299
|
"delegateExecutor": {
|
|
208
300
|
"type": "string",
|
|
209
|
-
"enum": [
|
|
301
|
+
"enum": [
|
|
302
|
+
"codex-sdk",
|
|
303
|
+
"copilot-sdk",
|
|
304
|
+
"claude-sdk",
|
|
305
|
+
"gemini-sdk",
|
|
306
|
+
"opencode-sdk"
|
|
307
|
+
],
|
|
210
308
|
"description": "Which executor to use for delegate_to_agent calls. Defaults to primaryAgent."
|
|
211
309
|
}
|
|
212
310
|
}
|