npm - bosun - Versions diffs - 0.37.0 → 0.37.2 - Mend

bosun 0.37.0 → 0.37.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/.env.example +4 -1
package/agent-tool-config.mjs +338 -0
package/bosun-skills.mjs +59 -4
package/bosun.schema.json +1 -1
package/desktop/launch.mjs +18 -0
package/desktop/main.mjs +52 -13
package/fleet-coordinator.mjs +34 -1
package/kanban-adapter.mjs +30 -3
package/library-manager.mjs +66 -0
package/maintenance.mjs +30 -5
package/monitor.mjs +56 -0
package/package.json +4 -1
package/setup-web-server.mjs +73 -12
package/setup.mjs +3 -3
package/ui/app.js +40 -3
package/ui/components/session-list.js +25 -7
package/ui/components/workspace-switcher.js +48 -1
package/ui/demo.html +176 -0
package/ui/modules/mic-track-registry.js +83 -0
package/ui/modules/settings-schema.js +4 -1
package/ui/modules/state.js +25 -0
package/ui/modules/streaming.js +1 -1
package/ui/modules/voice-barge-in.js +27 -0
package/ui/modules/voice-client-sdk.js +268 -42
package/ui/modules/voice-client.js +665 -61
package/ui/modules/voice-overlay.js +829 -47
package/ui/setup.html +151 -9
package/ui/styles.css +258 -0
package/ui/tabs/chat.js +11 -0
package/ui/tabs/library.js +890 -15
package/ui/tabs/settings.js +51 -11
package/ui/tabs/telemetry.js +327 -105
package/ui/tabs/workflows.js +86 -0
package/ui-server.mjs +1201 -107
package/voice-action-dispatcher.mjs +81 -0
package/voice-agents-sdk.mjs +2 -2
package/voice-relay.mjs +131 -14
package/voice-tools.mjs +475 -9
package/workflow-engine.mjs +54 -0
package/workflow-nodes.mjs +177 -28
package/workflow-templates/github.mjs +205 -94
package/workflow-templates/task-batch.mjs +247 -0
package/workflow-templates.mjs +15 -0

package/voice-action-dispatcher.mjs CHANGED Viewed

@@ -746,6 +746,83 @@ registerAction("workflow.run_get", async (params) => {
   return run;
 });
+registerAction("workflow.save", async (params) => {
+  const wfEngineMod = await getWorkflowEngineModule();
+  const engine = typeof wfEngineMod.getWorkflowEngine === "function"
+    ? wfEngineMod.getWorkflowEngine()
+    : null;
+  if (!engine?.save) throw new Error("Workflow engine is unavailable");
+  const def = params?.definition;
+  if (!def || typeof def !== "object" || Array.isArray(def)) {
+    throw new Error("definition object is required");
+  }
+  const workflowId = String(params.workflowId || def.id || "").trim();
+  const payload = { ...def };
+  if (workflowId) payload.id = workflowId;
+  if (!Array.isArray(payload.nodes)) payload.nodes = [];
+  if (!Array.isArray(payload.edges)) payload.edges = [];
+  if (!payload.name) payload.name = payload.id || "Voice Workflow";
+  const saved = engine.save(payload);
+  return {
+    id: saved.id,
+    name: saved.name || saved.id,
+    enabled: saved.enabled !== false,
+    nodeCount: Array.isArray(saved.nodes) ? saved.nodes.length : 0,
+    edgeCount: Array.isArray(saved.edges) ? saved.edges.length : 0,
+  };
+});
+registerAction("workflow.delete", async (params) => {
+  const wfEngineMod = await getWorkflowEngineModule();
+  const engine = typeof wfEngineMod.getWorkflowEngine === "function"
+    ? wfEngineMod.getWorkflowEngine()
+    : null;
+  if (!engine?.delete) throw new Error("Workflow engine is unavailable");
+  const workflowId = String(params.workflowId || params.id || "").trim();
+  if (!workflowId) throw new Error("workflowId is required");
+  const deleted = await engine.delete(workflowId);
+  return { ok: Boolean(deleted), workflowId };
+});
+registerAction("workflow.execute", async (params) => {
+  const wfEngineMod = await getWorkflowEngineModule();
+  const engine = typeof wfEngineMod.getWorkflowEngine === "function"
+    ? wfEngineMod.getWorkflowEngine()
+    : null;
+  if (!engine?.execute) throw new Error("Workflow engine is unavailable");
+  const workflowId = String(params.workflowId || params.id || "").trim();
+  if (!workflowId) throw new Error("workflowId is required");
+  const input = params?.input && typeof params.input === "object" ? params.input : {};
+  const force = params?.force === true;
+  const ctx = await engine.execute(workflowId, input, { force });
+  return {
+    runId: ctx?.id || null,
+    workflowId,
+    status: Array.isArray(ctx?.errors) && ctx.errors.length > 0 ? "failed" : "completed",
+    errorCount: Array.isArray(ctx?.errors) ? ctx.errors.length : 0,
+  };
+});
+registerAction("workflow.retry", async (params) => {
+  const wfEngineMod = await getWorkflowEngineModule();
+  const engine = typeof wfEngineMod.getWorkflowEngine === "function"
+    ? wfEngineMod.getWorkflowEngine()
+    : null;
+  if (!engine?.retryRun) throw new Error("Workflow retry is unavailable");
+  const runId = String(params.runId || params.id || "").trim();
+  if (!runId) throw new Error("runId is required");
+  const mode = String(params.mode || "from_failed").trim().toLowerCase() === "from_scratch"
+    ? "from_scratch"
+    : "from_failed";
+  const currentRun = engine?.getRunDetail ? engine.getRunDetail(runId) : null;
+  if (!currentRun) throw new Error(`Workflow run "${runId}" not found`);
+  const currentStatus = String(currentRun?.status || "").trim().toLowerCase();
+  if (mode === "from_failed" && currentStatus !== "failed") {
+    throw new Error(`retry mode "from_failed" requires a failed run (current=${currentRun?.status || "unknown"})`);
+  }
+  return engine.retryRun(runId, { mode });
+});
 // ── Skill/prompt actions ────────────────────────────────────────────────────
 registerAction("skill.list", async () => {
@@ -860,6 +937,10 @@ export function getActionManifest() {
     { action: "workflow.saved_list", description: "List installed workflow definitions. params: {}" },
     { action: "workflow.runs", description: "List workflow run history. params: { workflowId?, status?, limit? }" },
     { action: "workflow.run_get", description: "Get a workflow run detail. params: { runId }" },
+    { action: "workflow.save", description: "Create/update a workflow definition. params: { workflowId?, definition }" },
+    { action: "workflow.delete", description: "Delete a workflow definition. params: { workflowId }" },
+    { action: "workflow.execute", description: "Execute a workflow now. params: { workflowId, input?, force? }" },
+    { action: "workflow.retry", description: "Retry a workflow run. params: { runId, mode?: from_failed|from_scratch }" },
     { action: "skill.list", description: "List available skills. params: {}" },
     { action: "prompt.list", description: "List agent prompt definitions. params: {}" },
     { action: "prompt.get", description: "Get a prompt template. params: { key }" },

package/voice-agents-sdk.mjs CHANGED Viewed

@@ -275,7 +275,7 @@ export async function createRealtimeSession(agent, provider, config = {}, option
   ).trim() || "alloy";
   const turnDetection = String(
-    options.turnDetection || config.turnDetection || "server_vad",
+    options.turnDetection || config.turnDetection || "semantic_vad",
   ).trim();
   const sessionConfig = {
@@ -668,7 +668,7 @@ export async function getClientSdkConfig(voiceConfig = {}) {
     tier: availability.info?.tier || 2,
     model: configuredModel,
     voiceId: voiceConfig.voiceId || "alloy",
-    turnDetection: voiceConfig.turnDetection || "server_vad",
+    turnDetection: voiceConfig.turnDetection || "semantic_vad",
     fallbackReason,
   };
 }

package/voice-relay.mjs CHANGED Viewed

@@ -26,7 +26,7 @@ const OPENAI_REALTIME_MODEL = "gpt-realtime-1.5";
 const OPENAI_AUDIO_RESPONSES_MODEL = "gpt-audio-1.5";
 const OPENAI_RESPONSES_URL = "https://api.openai.com/v1/responses";
 const OPENAI_DEFAULT_VISION_MODEL = "gpt-4.1-nano";
-const REALTIME_TRANSCRIBE_MODEL = "gpt-4o-transcribe";
+const DEFAULT_TRANSCRIBE_MODEL = "gpt-4o-transcribe";
 const AZURE_API_VERSION = "2025-04-01-preview";
@@ -73,6 +73,8 @@ function buildOpenAIRealtimeWebRtcUrl(model, overrideBase = "") {
 // GA models (gpt-realtime, gpt-realtime-1.5, gpt-realtime-mini, etc.) use /openai/v1/ paths.
 // Preview models (for example gpt-4o-realtime-preview-*) use legacy /openai/realtimeapi/ paths.
+// NOTE: Azure AI Foundry "Global Standard" deployments may only support preview paths
+// even for GA model names.  We try GA first.  If it 404s the caller falls back to preview.
 function isAzureGaProtocol(deployment) {
   const d = String(deployment || "").toLowerCase().trim();
   return d.startsWith("gpt-realtime") && !d.startsWith("gpt-4o-realtime");
@@ -93,6 +95,13 @@ function normalizeAzureRealtimeDeployment(rawDeployment) {
   return deployment;
 }
+function parseOptionalBoolean(rawValue) {
+  if (rawValue == null) return null;
+  const normalized = String(rawValue).trim().toLowerCase();
+  if (!normalized) return null;
+  return !["0", "false", "no", "off"].includes(normalized);
+}
 function isOpenAIAudioResponsesModel(rawModel) {
   const model = String(rawModel || "").trim().toLowerCase();
   return /^gpt-audio/.test(model);
@@ -305,18 +314,42 @@ function sanitizeVoiceCallContext(context = {}) {
   const rawExecutor = String(context?.executor || "").trim().toLowerCase();
   const rawMode = String(context?.mode || "").trim().toLowerCase();
   const rawModel = String(context?.model || "").trim();
+  const rawVoiceAgentId = String(context?.voiceAgentId || "").trim();
+  const rawVoiceAgentName = String(context?.voiceAgentName || "").trim();
+  const rawVoiceAgentInstructions = String(context?.voiceAgentInstructions || "").trim();
+  const rawVoiceToolCapabilityPrompt = String(context?.voiceToolCapabilityPrompt || "").trim();
+  const rawVoiceAgentSkills = Array.isArray(context?.voiceAgentSkills)
+    ? context.voiceAgentSkills.map((s) => String(s || "").trim()).filter(Boolean)
+    : [];
+  const rawEnabledMcpServers = Array.isArray(context?.enabledMcpServers)
+    ? context.enabledMcpServers.map((s) => String(s || "").trim()).filter(Boolean)
+    : [];
   return {
     sessionId: rawSessionId || null,
     executor: VALID_EXECUTORS.has(rawExecutor) ? rawExecutor : null,
     mode: VALID_AGENT_MODES.has(rawMode) ? rawMode : null,
     model: rawModel || null,
+    voiceAgentId: rawVoiceAgentId || null,
+    voiceAgentName: rawVoiceAgentName || null,
+    voiceAgentInstructions: rawVoiceAgentInstructions || null,
+    voiceToolCapabilityPrompt: rawVoiceToolCapabilityPrompt || null,
+    voiceAgentSkills: rawVoiceAgentSkills,
+    enabledMcpServers: rawEnabledMcpServers,
   };
 }
 async function buildSessionScopedInstructions(baseInstructions, callContext = {}) {
   const context = sanitizeVoiceCallContext(callContext);
-  if (!context.sessionId && !context.executor && !context.mode && !context.model) {
+  if (
+    !context.sessionId
+    && !context.executor
+    && !context.mode
+    && !context.model
+    && !context.voiceAgentId
+    && !context.voiceAgentInstructions
+    && !context.voiceToolCapabilityPrompt
+  ) {
     return baseInstructions;
   }
@@ -381,6 +414,22 @@ async function buildSessionScopedInstructions(baseInstructions, callContext = {}
     "",
     "## Bosun Voice Call Context",
     `Active chat session id: ${context.sessionId || "none"}.`,
+    context.voiceAgentId
+      ? `Active voice agent id: ${context.voiceAgentId}.`
+      : "Active voice agent id: default.",
+    context.voiceAgentName
+      ? `Active voice agent name: ${context.voiceAgentName}.`
+      : "",
+    context.voiceAgentInstructions
+      ? `Voice agent instruction emphasis: ${context.voiceAgentInstructions}`
+      : "",
+    context.voiceToolCapabilityPrompt || "",
+    context.enabledMcpServers?.length
+      ? `Enabled MCP servers for this session: ${context.enabledMcpServers.join(", ")}.`
+      : "",
+    context.voiceAgentSkills?.length
+      ? `Voice agent skills: ${context.voiceAgentSkills.join(", ")}.`
+      : "",
     context.executor
       ? `Preferred executor for delegated work: ${context.executor}.`
       : "Preferred executor for delegated work: use configured default.",
@@ -783,6 +832,12 @@ export function getVoiceConfig(forceReload = false) {
       azureDeployment: String(ep.deployment || ep.azureDeployment || "").trim() || null,
       voiceId: String(ep.voiceId || "").trim() || null,
       visionModel: String(ep.visionModel || "").trim() || null,
+      transcriptionModel: String(ep.transcriptionModel || "").trim() || null,
+      // Azure defaults to transcription OFF unless explicitly enabled because
+      // item-level ASR failures can spam and destabilize long-running calls.
+      transcriptionEnabled: String(ep.provider || "").toLowerCase() === "azure"
+        ? (ep.transcriptionEnabled === true)
+        : (ep.transcriptionEnabled !== false),
       role: String(ep.role || "primary").trim() || "primary",
       weight: typeof ep.weight === "number" ? ep.weight : 100,
       name: String(ep.name || "").trim() || null,
@@ -852,7 +907,7 @@ export function getVoiceConfig(forceReload = false) {
   const model = String(voice.model || process.env.VOICE_MODEL || defaultModel).trim() || defaultModel;
   const voiceId = voice.voiceId || process.env.VOICE_ID || "alloy";
   const turnDetection =
-    voice.turnDetection || process.env.VOICE_TURN_DETECTION || "server_vad";
+    voice.turnDetection || process.env.VOICE_TURN_DETECTION || "semantic_vad";
   const defaultVisionModel =
     provider === "claude"
       ? CLAUDE_DEFAULT_VISION_MODEL
@@ -861,6 +916,19 @@ export function getVoiceConfig(forceReload = false) {
         : OPENAI_DEFAULT_VISION_MODEL;
   const visionModel =
     voice.visionModel || process.env.VOICE_VISION_MODEL || defaultVisionModel;
+  const transcriptionModel =
+    voice.transcriptionModel || process.env.VOICE_TRANSCRIPTION_MODEL || DEFAULT_TRANSCRIBE_MODEL;
+  const transcriptionEnabledRaw =
+    voice.transcriptionEnabled ?? process.env.VOICE_TRANSCRIPTION_ENABLED;
+  const transcriptionEnabled =
+    transcriptionEnabledRaw == null
+      ? true
+      : !["0", "false", "no", "off"].includes(
+          String(transcriptionEnabledRaw).trim().toLowerCase(),
+        );
+  const azureTranscriptionEnabled = parseOptionalBoolean(
+    voice.azureTranscriptionEnabled ?? process.env.VOICE_AZURE_TRANSCRIPTION_ENABLED,
+  );
   const fallbackMode =
     voice.fallbackMode || process.env.VOICE_FALLBACK_MODE || "browser";
   const delegateExecutor =
@@ -906,6 +974,9 @@ For complex operations like writing code or creating PRs, delegate to the approp
     turnDetection,
     visionModel,
     instructions,
+    transcriptionModel,
+    transcriptionEnabled,
+    azureTranscriptionEnabled,
     fallbackMode,
     delegateExecutor,
     enabled,
@@ -1120,6 +1191,13 @@ async function createOpenAIEphemeralToken(cfg, toolDefinitions = [], callContext
   const instructions = await buildSessionScopedInstructions(cfg.instructions, context);
   const model = normalizeOpenAIRealtimeModel(candidate?.model || cfg.model || OPENAI_REALTIME_MODEL);
   const voiceId = String(candidate?.voiceId || cfg.voiceId || "alloy").trim() || "alloy";
+  // Per-endpoint transcription overrides
+  const transcriptionModel = String(candidate?.transcriptionModel || "").trim() || cfg.transcriptionModel;
+  const transcriptionEnabled = candidate?.transcriptionEnabled !== undefined
+    ? candidate.transcriptionEnabled !== false
+    : cfg.azureTranscriptionEnabled != null
+      ? cfg.azureTranscriptionEnabled !== false
+      : false;
   const sessionConfig = {
     model,
@@ -1132,9 +1210,9 @@ async function createOpenAIEphemeralToken(cfg, toolDefinitions = [], callContext
     turn_detection: {
       type: cfg.turnDetection,
       ...(cfg.turnDetection === "server_vad" ? {
-        threshold: 0.35,
+        threshold: 0.7,
         prefix_padding_ms: 400,
-        silence_duration_ms: 700,
+        silence_duration_ms: 1200,
         create_response: true,
         interrupt_response: true,
       } : {}),
@@ -1144,7 +1222,7 @@ async function createOpenAIEphemeralToken(cfg, toolDefinitions = [], callContext
         interrupt_response: true,
       } : {}),
     },
-    input_audio_transcription: { model: REALTIME_TRANSCRIBE_MODEL },
+    ...(transcriptionEnabled ? { input_audio_transcription: { model: transcriptionModel } } : {}),
     tools: toolDefinitions,
   };
@@ -1197,11 +1275,17 @@ async function createAzureEphemeralToken(cfg, toolDefinitions = [], callContext
     candidate?.azureDeployment || candidate?.model || cfg.azureDeployment || OPENAI_REALTIME_MODEL,
   );
   const voiceId = String(candidate?.voiceId || cfg.voiceId || "alloy").trim() || "alloy";
+  // Per-endpoint transcription overrides
+  const transcriptionModel = String(candidate?.transcriptionModel || "").trim() || cfg.transcriptionModel;
+  const transcriptionEnabled = candidate?.transcriptionEnabled !== undefined ? candidate.transcriptionEnabled !== false : cfg.transcriptionEnabled;
   // GA protocol (gpt-realtime-1.5, gpt-realtime, etc.) uses /openai/v1/realtime/sessions?api-version=...
   // Preview protocol uses /openai/realtimeapi/sessions?api-version=...
-  const url = isAzureGaProtocol(deployment)
-    ? `${resolvedEndpoint}/openai/v1/realtime/sessions?api-version=${AZURE_API_VERSION}`
-    : `${resolvedEndpoint}/openai/realtimeapi/sessions?api-version=${AZURE_API_VERSION}&deployment=${encodeURIComponent(deployment)}`;
+  // Azure AI Foundry "Global Standard" resources may not support GA paths even for GA model names,
+  // so we build both and try GA first with automatic fallback to preview.
+  const gaUrl = `${resolvedEndpoint}/openai/v1/realtime/sessions?api-version=${AZURE_API_VERSION}`;
+  const previewUrl = `${resolvedEndpoint}/openai/realtimeapi/sessions?api-version=${AZURE_API_VERSION}&deployment=${encodeURIComponent(deployment)}`;
+  const useGa = isAzureGaProtocol(deployment);
+  const url = useGa ? gaUrl : previewUrl;
   const headers = {
     "Content-Type": "application/json",
@@ -1226,9 +1310,9 @@ async function createAzureEphemeralToken(cfg, toolDefinitions = [], callContext
     turn_detection: {
       type: cfg.turnDetection,
       ...(cfg.turnDetection === "server_vad" ? {
-        threshold: 0.35,
+        threshold: 0.7,
         prefix_padding_ms: 400,
-        silence_duration_ms: 700,
+        silence_duration_ms: 1200,
         create_response: true,
         interrupt_response: true,
       } : {}),
@@ -1238,16 +1322,28 @@ async function createAzureEphemeralToken(cfg, toolDefinitions = [], callContext
         interrupt_response: true,
       } : {}),
     },
-    input_audio_transcription: { model: REALTIME_TRANSCRIBE_MODEL },
+    ...(transcriptionEnabled ? { input_audio_transcription: { model: transcriptionModel } } : {}),
     tools: toolDefinitions,
   };
-  const response = await fetch(url, {
+  let response = await fetch(url, {
     method: "POST",
     headers,
     body: JSON.stringify(sessionConfig),
   });
+  // Azure AI Foundry "Global Standard" deployments may 404 on the GA path.
+  // Automatically fall back to the preview path before giving up.
+  if (!response.ok && response.status === 404 && useGa) {
+    const previewConfig = { ...sessionConfig };
+    delete previewConfig.type; // preview path does not accept type: "realtime"
+    response = await fetch(previewUrl, {
+      method: "POST",
+      headers,
+      body: JSON.stringify(previewConfig),
+    });
+  }
   if (!response.ok) {
     const errorText = await buildProviderErrorDetails(response, "unknown");
     throw new Error(`Azure Realtime session failed (${response.status}): ${errorText}`);
@@ -1255,9 +1351,22 @@ async function createAzureEphemeralToken(cfg, toolDefinitions = [], callContext
   const data = await response.json();
   // WebRTC URL diverges from /sessions URL: GA uses /openai/v1/realtime, preview uses /openai/realtime.
-  const webrtcUrl = isAzureGaProtocol(deployment)
+  // If the GA session was created via fallback to preview, use preview WebRTC URL too.
+  const gaSessionSucceeded = useGa && response.url?.includes("/v1/realtime");
+  const webrtcUrl = (useGa && gaSessionSucceeded)
     ? `${resolvedEndpoint}/openai/v1/realtime?api-version=${AZURE_API_VERSION}`
     : `${resolvedEndpoint}/openai/realtime?api-version=${AZURE_API_VERSION}&deployment=${encodeURIComponent(deployment)}`;
+  // WebSocket fallback URL — Azure Realtime API always supports WebSocket even
+  // when WebRTC SDP is unavailable (404).  The api-key query parameter provides
+  // authentication since browsers cannot set custom headers on WebSocket.
+  const wsAuthParam = resolvedOAuthToken
+    ? `access_token=${encodeURIComponent(resolvedOAuthToken)}`
+    : `api-key=${encodeURIComponent(resolvedApiKey)}`;
+  const wsUrl = (useGa && gaSessionSucceeded)
+    ? `wss://${new URL(resolvedEndpoint).host}/openai/v1/realtime?api-version=${AZURE_API_VERSION}&${wsAuthParam}`
+    : `wss://${new URL(resolvedEndpoint).host}/openai/realtime?api-version=${AZURE_API_VERSION}&deployment=${encodeURIComponent(deployment)}&${wsAuthParam}`;
   return {
     token: data.client_secret?.value || data.token,
     expiresAt: data.client_secret?.expires_at || (Date.now() / 1000 + 60),
@@ -1265,6 +1374,7 @@ async function createAzureEphemeralToken(cfg, toolDefinitions = [], callContext
     voiceId,
     provider: "azure",
     url: webrtcUrl,
+    wsUrl,
     sessionConfig,
     azureEndpoint: resolvedEndpoint,
     azureDeployment: deployment,
@@ -1420,9 +1530,16 @@ const VOICE_SESSION_ALLOWED_TOOLS = new Set([
   "list_directory",
   "get_recent_logs",
   "list_workflows",
+  "create_workflow",
+  "update_workflow_definition",
+  "delete_workflow",
+  "create_workflow_from_template",
+  "generate_workflow_with_agent",
   "get_workflow_definition",
+  "execute_workflow",
   "list_workflow_runs",
   "get_workflow_run",
+  "analyze_workflow",
   "retry_workflow_run",
   "list_skills",
   "list_prompts",