@getpaseo/server 0.1.26 → 0.1.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/server/client/daemon-client.d.ts +1 -1
- package/dist/server/client/daemon-client.d.ts.map +1 -1
- package/dist/server/client/daemon-client.js +1 -1
- package/dist/server/client/daemon-client.js.map +1 -1
- package/dist/server/server/agent/agent-response-loop.js +1 -1
- package/dist/server/server/agent/agent-response-loop.js.map +1 -1
- package/dist/server/server/agent/provider-launch-config.d.ts +14 -2
- package/dist/server/server/agent/provider-launch-config.d.ts.map +1 -1
- package/dist/server/server/agent/provider-launch-config.js +30 -8
- package/dist/server/server/agent/provider-launch-config.js.map +1 -1
- package/dist/server/server/agent/provider-manifest.d.ts.map +1 -1
- package/dist/server/server/agent/provider-manifest.js +10 -5
- package/dist/server/server/agent/provider-manifest.js.map +1 -1
- package/dist/server/server/agent/providers/claude/tool-call-detail-parser.d.ts.map +1 -1
- package/dist/server/server/agent/providers/claude/tool-call-detail-parser.js +2 -0
- package/dist/server/server/agent/providers/claude/tool-call-detail-parser.js.map +1 -1
- package/dist/server/server/agent/providers/claude/tool-call-mapper.d.ts.map +1 -1
- package/dist/server/server/agent/providers/claude/tool-call-mapper.js +2 -0
- package/dist/server/server/agent/providers/claude/tool-call-mapper.js.map +1 -1
- package/dist/server/server/agent/providers/claude-agent.d.ts +3 -1
- package/dist/server/server/agent/providers/claude-agent.d.ts.map +1 -1
- package/dist/server/server/agent/providers/claude-agent.js +5 -1
- package/dist/server/server/agent/providers/claude-agent.js.map +1 -1
- package/dist/server/server/agent/providers/codex-app-server-agent.d.ts.map +1 -1
- package/dist/server/server/agent/providers/codex-app-server-agent.js +146 -46
- package/dist/server/server/agent/providers/codex-app-server-agent.js.map +1 -1
- package/dist/server/server/agent/providers/codex-rollout-timeline.d.ts.map +1 -1
- package/dist/server/server/agent/providers/codex-rollout-timeline.js +77 -9
- package/dist/server/server/agent/providers/codex-rollout-timeline.js.map +1 -1
- package/dist/server/server/agent/providers/opencode-agent.d.ts +1 -0
- package/dist/server/server/agent/providers/opencode-agent.d.ts.map +1 -1
- package/dist/server/server/agent/providers/opencode-agent.js +115 -43
- package/dist/server/server/agent/providers/opencode-agent.js.map +1 -1
- package/dist/server/server/agent/providers/tool-call-mapper-utils.d.ts +1 -0
- package/dist/server/server/agent/providers/tool-call-mapper-utils.d.ts.map +1 -1
- package/dist/server/server/agent/providers/tool-call-mapper-utils.js +8 -0
- package/dist/server/server/agent/providers/tool-call-mapper-utils.js.map +1 -1
- package/dist/server/server/agent/tts-manager.d.ts +8 -1
- package/dist/server/server/agent/tts-manager.d.ts.map +1 -1
- package/dist/server/server/agent/tts-manager.js +215 -108
- package/dist/server/server/agent/tts-manager.js.map +1 -1
- package/dist/server/server/bootstrap.d.ts +2 -2
- package/dist/server/server/bootstrap.d.ts.map +1 -1
- package/dist/server/server/bootstrap.js +26 -5
- package/dist/server/server/bootstrap.js.map +1 -1
- package/dist/server/server/persisted-config.d.ts +25 -0
- package/dist/server/server/persisted-config.d.ts.map +1 -1
- package/dist/server/server/persisted-config.js +6 -0
- package/dist/server/server/persisted-config.js.map +1 -1
- package/dist/server/server/session.d.ts +22 -19
- package/dist/server/server/session.d.ts.map +1 -1
- package/dist/server/server/session.js +305 -294
- package/dist/server/server/session.js.map +1 -1
- package/dist/server/server/speech/providers/local/runtime.d.ts +2 -0
- package/dist/server/server/speech/providers/local/runtime.d.ts.map +1 -1
- package/dist/server/server/speech/providers/local/runtime.js +7 -0
- package/dist/server/server/speech/providers/local/runtime.js.map +1 -1
- package/dist/server/server/speech/providers/local/sherpa/assets/silero_vad.onnx +0 -0
- package/dist/server/server/speech/providers/local/sherpa/sherpa-onnx-node-loader.d.ts +2 -0
- package/dist/server/server/speech/providers/local/sherpa/sherpa-onnx-node-loader.d.ts.map +1 -1
- package/dist/server/server/speech/providers/local/sherpa/sherpa-onnx-node-loader.js.map +1 -1
- package/dist/server/server/speech/providers/local/sherpa/silero-vad-provider.d.ts +13 -0
- package/dist/server/server/speech/providers/local/sherpa/silero-vad-provider.d.ts.map +1 -0
- package/dist/server/server/speech/providers/local/sherpa/silero-vad-provider.js +23 -0
- package/dist/server/server/speech/providers/local/sherpa/silero-vad-provider.js.map +1 -0
- package/dist/server/server/speech/providers/local/sherpa/silero-vad-session.d.ts +32 -0
- package/dist/server/server/speech/providers/local/sherpa/silero-vad-session.d.ts.map +1 -0
- package/dist/server/server/speech/providers/local/sherpa/silero-vad-session.js +107 -0
- package/dist/server/server/speech/providers/local/sherpa/silero-vad-session.js.map +1 -0
- package/dist/server/server/speech/providers/openai/runtime.d.ts +2 -0
- package/dist/server/server/speech/providers/openai/runtime.d.ts.map +1 -1
- package/dist/server/server/speech/providers/openai/runtime.js +2 -0
- package/dist/server/server/speech/providers/openai/runtime.js.map +1 -1
- package/dist/server/server/speech/speech-config-resolver.d.ts.map +1 -1
- package/dist/server/server/speech/speech-config-resolver.js +35 -14
- package/dist/server/server/speech/speech-config-resolver.js.map +1 -1
- package/dist/server/server/speech/speech-runtime.d.ts +3 -1
- package/dist/server/server/speech/speech-runtime.d.ts.map +1 -1
- package/dist/server/server/speech/speech-runtime.js +39 -6
- package/dist/server/server/speech/speech-runtime.js.map +1 -1
- package/dist/server/server/speech/speech-types.d.ts +1 -0
- package/dist/server/server/speech/speech-types.d.ts.map +1 -1
- package/dist/server/server/speech/turn-detection-provider.d.ts +22 -0
- package/dist/server/server/speech/turn-detection-provider.d.ts.map +1 -0
- package/dist/server/server/speech/turn-detection-provider.js +2 -0
- package/dist/server/server/speech/turn-detection-provider.js.map +1 -0
- package/dist/server/server/voice/fixed-duration-pcm-ring-buffer.d.ts +16 -0
- package/dist/server/server/voice/fixed-duration-pcm-ring-buffer.d.ts.map +1 -0
- package/dist/server/server/voice/fixed-duration-pcm-ring-buffer.js +35 -0
- package/dist/server/server/voice/fixed-duration-pcm-ring-buffer.js.map +1 -0
- package/dist/server/server/voice/voice-turn-controller.d.ts +34 -0
- package/dist/server/server/voice/voice-turn-controller.d.ts.map +1 -0
- package/dist/server/server/voice/voice-turn-controller.js +161 -0
- package/dist/server/server/voice/voice-turn-controller.js.map +1 -0
- package/dist/server/server/websocket-server.d.ts +3 -0
- package/dist/server/server/websocket-server.d.ts.map +1 -1
- package/dist/server/server/websocket-server.js +5 -1
- package/dist/server/server/websocket-server.js.map +1 -1
- package/dist/server/server/workspace-registry.d.ts +2 -0
- package/dist/server/server/workspace-registry.d.ts.map +1 -1
- package/dist/server/server/workspace-registry.js +11 -4
- package/dist/server/server/workspace-registry.js.map +1 -1
- package/dist/server/shared/messages.d.ts +97 -0
- package/dist/server/shared/messages.d.ts.map +1 -1
- package/dist/server/shared/messages.js +7 -0
- package/dist/server/shared/messages.js.map +1 -1
- package/dist/server/shared/tool-call-display.d.ts.map +1 -1
- package/dist/server/shared/tool-call-display.js +59 -33
- package/dist/server/shared/tool-call-display.js.map +1 -1
- package/dist/src/server/agent/agent-response-loop.js +1 -1
- package/dist/src/server/agent/agent-response-loop.js.map +1 -1
- package/dist/src/server/agent/provider-launch-config.js +30 -8
- package/dist/src/server/agent/provider-launch-config.js.map +1 -1
- package/dist/src/server/agent/provider-manifest.js +10 -5
- package/dist/src/server/agent/provider-manifest.js.map +1 -1
- package/dist/src/server/agent/providers/claude/tool-call-detail-parser.js +2 -0
- package/dist/src/server/agent/providers/claude/tool-call-detail-parser.js.map +1 -1
- package/dist/src/server/agent/providers/claude/tool-call-mapper.js +2 -0
- package/dist/src/server/agent/providers/claude/tool-call-mapper.js.map +1 -1
- package/dist/src/server/agent/providers/claude-agent.js +5 -1
- package/dist/src/server/agent/providers/claude-agent.js.map +1 -1
- package/dist/src/server/agent/providers/codex-app-server-agent.js +146 -46
- package/dist/src/server/agent/providers/codex-app-server-agent.js.map +1 -1
- package/dist/src/server/agent/providers/codex-rollout-timeline.js +77 -9
- package/dist/src/server/agent/providers/codex-rollout-timeline.js.map +1 -1
- package/dist/src/server/agent/providers/opencode-agent.js +115 -43
- package/dist/src/server/agent/providers/opencode-agent.js.map +1 -1
- package/dist/src/server/agent/providers/tool-call-mapper-utils.js +8 -0
- package/dist/src/server/agent/providers/tool-call-mapper-utils.js.map +1 -1
- package/dist/src/server/agent/tts-manager.js +215 -108
- package/dist/src/server/agent/tts-manager.js.map +1 -1
- package/dist/src/server/bootstrap.js +26 -5
- package/dist/src/server/bootstrap.js.map +1 -1
- package/dist/src/server/persisted-config.js +6 -0
- package/dist/src/server/persisted-config.js.map +1 -1
- package/dist/src/server/session.js +305 -294
- package/dist/src/server/session.js.map +1 -1
- package/dist/src/server/speech/providers/local/runtime.js +7 -0
- package/dist/src/server/speech/providers/local/runtime.js.map +1 -1
- package/dist/src/server/speech/providers/local/sherpa/sherpa-onnx-node-loader.js.map +1 -1
- package/dist/src/server/speech/providers/local/sherpa/silero-vad-provider.js +23 -0
- package/dist/src/server/speech/providers/local/sherpa/silero-vad-provider.js.map +1 -0
- package/dist/src/server/speech/providers/local/sherpa/silero-vad-session.js +107 -0
- package/dist/src/server/speech/providers/local/sherpa/silero-vad-session.js.map +1 -0
- package/dist/src/server/speech/providers/openai/runtime.js +2 -0
- package/dist/src/server/speech/providers/openai/runtime.js.map +1 -1
- package/dist/src/server/speech/speech-config-resolver.js +35 -14
- package/dist/src/server/speech/speech-config-resolver.js.map +1 -1
- package/dist/src/server/speech/speech-runtime.js +39 -6
- package/dist/src/server/speech/speech-runtime.js.map +1 -1
- package/dist/src/server/speech/turn-detection-provider.js +2 -0
- package/dist/src/server/speech/turn-detection-provider.js.map +1 -0
- package/dist/src/server/voice/fixed-duration-pcm-ring-buffer.js +35 -0
- package/dist/src/server/voice/fixed-duration-pcm-ring-buffer.js.map +1 -0
- package/dist/src/server/voice/voice-turn-controller.js +161 -0
- package/dist/src/server/voice/voice-turn-controller.js.map +1 -0
- package/dist/src/server/websocket-server.js +5 -1
- package/dist/src/server/websocket-server.js.map +1 -1
- package/dist/src/server/workspace-registry.js +11 -4
- package/dist/src/server/workspace-registry.js.map +1 -1
- package/dist/src/shared/messages.js +7 -0
- package/dist/src/shared/messages.js.map +1 -1
- package/dist/src/shared/tool-call-display.js +59 -33
- package/dist/src/shared/tool-call-display.js.map +1 -1
- package/package.json +7 -7
- package/src/server/speech/providers/local/sherpa/assets/silero_vad.onnx +0 -0
|
@@ -13,6 +13,7 @@ import { STTManager } from './agent/stt-manager.js';
|
|
|
13
13
|
import { maybePersistTtsDebugAudio } from './agent/tts-debug.js';
|
|
14
14
|
import { isPaseoDictationDebugEnabled } from './agent/recordings-debug.js';
|
|
15
15
|
import { DictationStreamManager, } from './dictation/dictation-stream-manager.js';
|
|
16
|
+
import { createVoiceTurnController, } from './voice/voice-turn-controller.js';
|
|
16
17
|
import { buildConfigOverrides, buildSessionConfig, extractTimestamps } from './persistence-hooks.js';
|
|
17
18
|
import { experimental_createMCPClient } from 'ai';
|
|
18
19
|
import { buildProviderRegistry } from './agent/provider-registry.js';
|
|
@@ -35,6 +36,7 @@ import { getProjectIcon } from '../utils/project-icon.js';
|
|
|
35
36
|
import { expandTilde } from '../utils/path.js';
|
|
36
37
|
import { searchHomeDirectories, searchWorkspaceEntries } from '../utils/directory-suggestions.js';
|
|
37
38
|
import { ensureLocalSpeechModels, getLocalSpeechModelDir, listLocalSpeechModels, } from './speech/providers/local/models.js';
|
|
39
|
+
import { toResolver } from './speech/provider-resolver.js';
|
|
38
40
|
import { resolveClientMessageId } from './client-message-id.js';
|
|
39
41
|
const execAsync = promisify(exec);
|
|
40
42
|
const MAX_INITIAL_AGENT_TITLE_CHARS = Math.min(60, MAX_EXPLICIT_AGENT_TITLE_CHARS);
|
|
@@ -88,11 +90,10 @@ const PCM_BITS_PER_SAMPLE = 16;
|
|
|
88
90
|
const PCM_BYTES_PER_MS = (PCM_SAMPLE_RATE * PCM_CHANNELS * (PCM_BITS_PER_SAMPLE / 8)) / 1000;
|
|
89
91
|
const MIN_STREAMING_SEGMENT_DURATION_MS = 1000;
|
|
90
92
|
const MIN_STREAMING_SEGMENT_BYTES = Math.round(PCM_BYTES_PER_MS * MIN_STREAMING_SEGMENT_DURATION_MS);
|
|
91
|
-
const VOICE_MODE_INACTIVITY_FLUSH_MS = 4500;
|
|
92
|
-
const VOICE_INTERNAL_DICTATION_ID_PREFIX = '__voice_turn__:';
|
|
93
93
|
const SAFE_GIT_REF_PATTERN = /^[A-Za-z0-9._\/-]+$/;
|
|
94
94
|
const AgentIdSchema = z.string().uuid();
|
|
95
95
|
const VOICE_MCP_SERVER_NAME = 'paseo_voice';
|
|
96
|
+
const VOICE_INTERRUPT_CONFIRMATION_MS = 500;
|
|
96
97
|
class VoiceFeatureUnavailableError extends Error {
|
|
97
98
|
constructor(context) {
|
|
98
99
|
super(context.message);
|
|
@@ -161,19 +162,16 @@ export class Session {
|
|
|
161
162
|
// Voice mode state
|
|
162
163
|
this.isVoiceMode = false;
|
|
163
164
|
this.speechInProgress = false;
|
|
165
|
+
this.pendingVoiceSpeechStartAt = null;
|
|
166
|
+
this.pendingVoiceSpeechTimer = null;
|
|
167
|
+
this.voiceTurnController = null;
|
|
168
|
+
this.voiceInputChunkCount = 0;
|
|
169
|
+
this.voiceInputBytes = 0;
|
|
170
|
+
this.voiceInputWindowStartedAt = Date.now();
|
|
164
171
|
// Audio buffering for interruption handling
|
|
165
172
|
this.pendingAudioSegments = [];
|
|
166
173
|
this.bufferTimeout = null;
|
|
167
|
-
this.voiceModeInactivityTimeout = null;
|
|
168
174
|
this.audioBuffer = null;
|
|
169
|
-
this.activeVoiceDictationId = null;
|
|
170
|
-
this.activeVoiceDictationFormat = null;
|
|
171
|
-
this.activeVoiceDictationNextSeq = 0;
|
|
172
|
-
this.activeVoiceDictationStartPromise = null;
|
|
173
|
-
this.activeVoiceDictationFinalizePromise = null;
|
|
174
|
-
this.activeVoiceDictationResultPromise = null;
|
|
175
|
-
this.activeVoiceDictationResolve = null;
|
|
176
|
-
this.activeVoiceDictationReject = null;
|
|
177
175
|
// Optional TTS debug capture (persisted per utterance)
|
|
178
176
|
this.ttsDebugStreams = new Map();
|
|
179
177
|
// Per-session MCP client and tools
|
|
@@ -221,6 +219,7 @@ export class Session {
|
|
|
221
219
|
this.unsubscribeTerminalsChanged = this.terminalManager.subscribeTerminalsChanged((event) => this.handleTerminalsChanged(event));
|
|
222
220
|
}
|
|
223
221
|
this.voiceAgentMcpStdio = voice?.voiceAgentMcpStdio ?? null;
|
|
222
|
+
this.resolveVoiceTurnDetection = toResolver(voice?.turnDetection ?? null);
|
|
224
223
|
const configuredModelsDir = dictation?.localModels?.modelsDir?.trim();
|
|
225
224
|
this.localSpeechModelsDir =
|
|
226
225
|
configuredModelsDir && configuredModelsDir.length > 0
|
|
@@ -257,13 +256,6 @@ export class Session {
|
|
|
257
256
|
stt: dictation?.stt ?? null,
|
|
258
257
|
finalTimeoutMs: dictation?.finalTimeoutMs,
|
|
259
258
|
});
|
|
260
|
-
this.voiceStreamManager = new DictationStreamManager({
|
|
261
|
-
logger: this.sessionLogger.child({ stream: 'voice-internal' }),
|
|
262
|
-
sessionId: this.sessionId,
|
|
263
|
-
emit: (msg) => this.handleDictationManagerMessage(msg),
|
|
264
|
-
stt: stt,
|
|
265
|
-
finalTimeoutMs: dictation?.finalTimeoutMs,
|
|
266
|
-
});
|
|
267
259
|
// Initialize agent MCP client asynchronously
|
|
268
260
|
void this.initializeAgentMcp();
|
|
269
261
|
this.subscribeToAgentEvents();
|
|
@@ -699,6 +691,100 @@ export class Session {
|
|
|
699
691
|
paseoHome: this.paseoHome,
|
|
700
692
|
});
|
|
701
693
|
}
|
|
694
|
+
buildPersistedProjectRecord(input) {
|
|
695
|
+
return createPersistedProjectRecord({
|
|
696
|
+
projectId: input.placement.projectKey,
|
|
697
|
+
rootPath: deriveProjectRootPath({
|
|
698
|
+
cwd: input.workspaceId,
|
|
699
|
+
checkout: input.placement.checkout,
|
|
700
|
+
}),
|
|
701
|
+
kind: deriveProjectKind(input.placement.checkout),
|
|
702
|
+
displayName: input.placement.projectName,
|
|
703
|
+
createdAt: input.createdAt,
|
|
704
|
+
updatedAt: input.updatedAt,
|
|
705
|
+
archivedAt: null,
|
|
706
|
+
});
|
|
707
|
+
}
|
|
708
|
+
buildPersistedWorkspaceRecord(input) {
|
|
709
|
+
return createPersistedWorkspaceRecord({
|
|
710
|
+
workspaceId: input.workspaceId,
|
|
711
|
+
projectId: input.placement.projectKey,
|
|
712
|
+
cwd: input.workspaceId,
|
|
713
|
+
kind: deriveWorkspaceKind(input.placement.checkout),
|
|
714
|
+
displayName: deriveWorkspaceDisplayName({
|
|
715
|
+
cwd: input.workspaceId,
|
|
716
|
+
checkout: input.placement.checkout,
|
|
717
|
+
}),
|
|
718
|
+
createdAt: input.createdAt,
|
|
719
|
+
updatedAt: input.updatedAt,
|
|
720
|
+
archivedAt: null,
|
|
721
|
+
});
|
|
722
|
+
}
|
|
723
|
+
async archiveProjectRecordIfEmpty(projectId, archivedAt) {
|
|
724
|
+
const siblingWorkspaces = (await this.workspaceRegistry.list()).filter((workspace) => workspace.projectId === projectId && !workspace.archivedAt);
|
|
725
|
+
if (siblingWorkspaces.length === 0) {
|
|
726
|
+
await this.projectRegistry.archive(projectId, archivedAt);
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
async reconcileWorkspaceRecord(workspaceId) {
|
|
730
|
+
const normalizedWorkspaceId = normalizePersistedWorkspaceId(workspaceId);
|
|
731
|
+
const existing = await this.workspaceRegistry.get(normalizedWorkspaceId);
|
|
732
|
+
const placement = await this.buildProjectPlacement(normalizedWorkspaceId);
|
|
733
|
+
const now = new Date().toISOString();
|
|
734
|
+
const nextProjectCreatedAt = existing?.createdAt ?? now;
|
|
735
|
+
const nextWorkspaceCreatedAt = existing?.createdAt ?? now;
|
|
736
|
+
const currentProjectRecord = await this.projectRegistry.get(placement.projectKey);
|
|
737
|
+
const nextProjectRecord = this.buildPersistedProjectRecord({
|
|
738
|
+
workspaceId: normalizedWorkspaceId,
|
|
739
|
+
placement,
|
|
740
|
+
createdAt: currentProjectRecord?.createdAt ?? nextProjectCreatedAt,
|
|
741
|
+
updatedAt: now,
|
|
742
|
+
});
|
|
743
|
+
const nextWorkspaceRecord = this.buildPersistedWorkspaceRecord({
|
|
744
|
+
workspaceId: normalizedWorkspaceId,
|
|
745
|
+
placement,
|
|
746
|
+
createdAt: nextWorkspaceCreatedAt,
|
|
747
|
+
updatedAt: now,
|
|
748
|
+
});
|
|
749
|
+
const needsWorkspaceUpdate = !existing ||
|
|
750
|
+
existing.archivedAt ||
|
|
751
|
+
existing.projectId !== nextWorkspaceRecord.projectId ||
|
|
752
|
+
existing.kind !== nextWorkspaceRecord.kind ||
|
|
753
|
+
existing.displayName !== nextWorkspaceRecord.displayName;
|
|
754
|
+
const needsProjectUpdate = !currentProjectRecord ||
|
|
755
|
+
currentProjectRecord.archivedAt ||
|
|
756
|
+
currentProjectRecord.rootPath !== nextProjectRecord.rootPath ||
|
|
757
|
+
currentProjectRecord.kind !== nextProjectRecord.kind ||
|
|
758
|
+
currentProjectRecord.displayName !== nextProjectRecord.displayName;
|
|
759
|
+
if (!needsWorkspaceUpdate && !needsProjectUpdate) {
|
|
760
|
+
return {
|
|
761
|
+
workspace: existing,
|
|
762
|
+
changed: false,
|
|
763
|
+
};
|
|
764
|
+
}
|
|
765
|
+
await this.projectRegistry.upsert(nextProjectRecord);
|
|
766
|
+
await this.workspaceRegistry.upsert(nextWorkspaceRecord);
|
|
767
|
+
if (existing &&
|
|
768
|
+
!existing.archivedAt &&
|
|
769
|
+
existing.projectId !== nextWorkspaceRecord.projectId) {
|
|
770
|
+
await this.archiveProjectRecordIfEmpty(existing.projectId, now);
|
|
771
|
+
}
|
|
772
|
+
return {
|
|
773
|
+
workspace: nextWorkspaceRecord,
|
|
774
|
+
changed: true,
|
|
775
|
+
};
|
|
776
|
+
}
|
|
777
|
+
async reconcileActiveWorkspaceRecords() {
|
|
778
|
+
const changedWorkspaceIds = new Set();
|
|
779
|
+
const activeWorkspaces = (await this.workspaceRegistry.list()).filter((workspace) => !workspace.archivedAt);
|
|
780
|
+
for (const workspace of activeWorkspaces) {
|
|
781
|
+
const result = await this.reconcileWorkspaceRecord(workspace.workspaceId);
|
|
782
|
+
if (result.changed) {
|
|
783
|
+
changedWorkspaceIds.add(result.workspace.workspaceId);
|
|
784
|
+
}
|
|
785
|
+
}
|
|
786
|
+
return changedWorkspaceIds;
|
|
787
|
+
}
|
|
702
788
|
async forwardAgentUpdate(agent) {
|
|
703
789
|
try {
|
|
704
790
|
await this.ensureWorkspaceRegistered(agent.cwd);
|
|
@@ -1315,7 +1401,9 @@ export class Session {
|
|
|
1315
1401
|
* Handle voice mode toggle
|
|
1316
1402
|
*/
|
|
1317
1403
|
async handleSetVoiceMode(enabled, agentId, requestId) {
|
|
1404
|
+
const startedAt = Date.now();
|
|
1318
1405
|
try {
|
|
1406
|
+
this.sessionLogger.info({ enabled, requestedAgentId: agentId ?? null, requestId: requestId ?? null }, 'set_voice_mode started');
|
|
1319
1407
|
if (enabled) {
|
|
1320
1408
|
const unavailable = this.resolveVoiceFeatureUnavailableContext('voice_mode');
|
|
1321
1409
|
if (unavailable) {
|
|
@@ -1325,15 +1413,26 @@ export class Session {
|
|
|
1325
1413
|
if (this.isVoiceMode &&
|
|
1326
1414
|
this.voiceModeAgentId &&
|
|
1327
1415
|
this.voiceModeAgentId !== normalizedAgentId) {
|
|
1416
|
+
this.sessionLogger.info({
|
|
1417
|
+
previousAgentId: this.voiceModeAgentId,
|
|
1418
|
+
nextAgentId: normalizedAgentId,
|
|
1419
|
+
elapsedMs: Date.now() - startedAt,
|
|
1420
|
+
}, 'set_voice_mode disabling previous active voice agent');
|
|
1328
1421
|
await this.disableVoiceModeForActiveAgent(true);
|
|
1329
1422
|
}
|
|
1330
1423
|
if (!this.isVoiceMode || this.voiceModeAgentId !== normalizedAgentId) {
|
|
1424
|
+
this.sessionLogger.info({ agentId: normalizedAgentId, elapsedMs: Date.now() - startedAt }, 'set_voice_mode enabling voice for agent');
|
|
1331
1425
|
const refreshedAgentId = await this.enableVoiceModeForAgent(normalizedAgentId);
|
|
1332
1426
|
this.voiceModeAgentId = refreshedAgentId;
|
|
1427
|
+
this.sessionLogger.info({ agentId: refreshedAgentId, elapsedMs: Date.now() - startedAt }, 'set_voice_mode agent enable complete');
|
|
1333
1428
|
}
|
|
1429
|
+
this.sessionLogger.info({ agentId: this.voiceModeAgentId, elapsedMs: Date.now() - startedAt }, 'set_voice_mode starting voice turn controller');
|
|
1430
|
+
await this.startVoiceTurnController();
|
|
1431
|
+
this.sessionLogger.info({ agentId: this.voiceModeAgentId, elapsedMs: Date.now() - startedAt }, 'set_voice_mode voice turn controller started');
|
|
1334
1432
|
this.isVoiceMode = true;
|
|
1335
1433
|
this.sessionLogger.info({
|
|
1336
1434
|
agentId: this.voiceModeAgentId,
|
|
1435
|
+
elapsedMs: Date.now() - startedAt,
|
|
1337
1436
|
}, 'Voice mode enabled for existing agent');
|
|
1338
1437
|
if (requestId) {
|
|
1339
1438
|
this.emit({
|
|
@@ -1349,9 +1448,10 @@ export class Session {
|
|
|
1349
1448
|
}
|
|
1350
1449
|
return;
|
|
1351
1450
|
}
|
|
1451
|
+
this.sessionLogger.info({ agentId: this.voiceModeAgentId, elapsedMs: Date.now() - startedAt }, 'set_voice_mode disabling active voice mode');
|
|
1352
1452
|
await this.disableVoiceModeForActiveAgent(true);
|
|
1353
1453
|
this.isVoiceMode = false;
|
|
1354
|
-
this.sessionLogger.info('Voice mode disabled');
|
|
1454
|
+
this.sessionLogger.info({ elapsedMs: Date.now() - startedAt }, 'Voice mode disabled');
|
|
1355
1455
|
if (requestId) {
|
|
1356
1456
|
this.emit({
|
|
1357
1457
|
type: 'set_voice_mode_response',
|
|
@@ -1372,6 +1472,7 @@ export class Session {
|
|
|
1372
1472
|
err: error,
|
|
1373
1473
|
enabled,
|
|
1374
1474
|
requestedAgentId: agentId ?? null,
|
|
1475
|
+
elapsedMs: Date.now() - startedAt,
|
|
1375
1476
|
}, 'set_voice_mode failed');
|
|
1376
1477
|
if (requestId) {
|
|
1377
1478
|
this.emit({
|
|
@@ -1419,12 +1520,17 @@ export class Session {
|
|
|
1419
1520
|
};
|
|
1420
1521
|
}
|
|
1421
1522
|
async enableVoiceModeForAgent(agentId) {
|
|
1523
|
+
const startedAt = Date.now();
|
|
1422
1524
|
const ensureVoiceSocket = this.ensureVoiceMcpSocketForAgent;
|
|
1423
1525
|
if (!ensureVoiceSocket) {
|
|
1424
1526
|
throw new Error('Voice MCP socket bridge is not configured');
|
|
1425
1527
|
}
|
|
1528
|
+
this.sessionLogger.info({ agentId }, 'enableVoiceModeForAgent.ensureAgentLoaded.start');
|
|
1426
1529
|
const existing = await this.ensureAgentLoaded(agentId);
|
|
1530
|
+
this.sessionLogger.info({ agentId, elapsedMs: Date.now() - startedAt }, 'enableVoiceModeForAgent.ensureAgentLoaded.done');
|
|
1531
|
+
this.sessionLogger.info({ agentId }, 'enableVoiceModeForAgent.ensureVoiceSocket.start');
|
|
1427
1532
|
const socketPath = await ensureVoiceSocket(agentId);
|
|
1533
|
+
this.sessionLogger.info({ agentId, socketPath, elapsedMs: Date.now() - startedAt }, 'enableVoiceModeForAgent.ensureVoiceSocket.done');
|
|
1428
1534
|
this.registerVoiceBridgeForAgent(agentId);
|
|
1429
1535
|
const baseConfig = {
|
|
1430
1536
|
systemPrompt: stripVoiceModeSystemPrompt(existing.config.systemPrompt),
|
|
@@ -1436,7 +1542,9 @@ export class Session {
|
|
|
1436
1542
|
mcpServers: this.buildVoiceModeMcpServers(baseConfig.mcpServers, socketPath),
|
|
1437
1543
|
};
|
|
1438
1544
|
try {
|
|
1545
|
+
this.sessionLogger.info({ agentId, elapsedMs: Date.now() - startedAt }, 'enableVoiceModeForAgent.reloadAgentSession.start');
|
|
1439
1546
|
const refreshed = await this.agentManager.reloadAgentSession(agentId, refreshOverrides);
|
|
1547
|
+
this.sessionLogger.info({ agentId, refreshedAgentId: refreshed.id, elapsedMs: Date.now() - startedAt }, 'enableVoiceModeForAgent.reloadAgentSession.done');
|
|
1440
1548
|
return refreshed.id;
|
|
1441
1549
|
}
|
|
1442
1550
|
catch (error) {
|
|
@@ -1448,8 +1556,7 @@ export class Session {
|
|
|
1448
1556
|
}
|
|
1449
1557
|
}
|
|
1450
1558
|
async disableVoiceModeForActiveAgent(restoreAgentConfig) {
|
|
1451
|
-
this.
|
|
1452
|
-
this.cancelActiveVoiceDictationStream('voice mode disabled');
|
|
1559
|
+
await this.stopVoiceTurnController();
|
|
1453
1560
|
const agentId = this.voiceModeAgentId;
|
|
1454
1561
|
if (!agentId) {
|
|
1455
1562
|
this.voiceModeBaseConfig = null;
|
|
@@ -1475,197 +1582,107 @@ export class Session {
|
|
|
1475
1582
|
this.voiceModeBaseConfig = null;
|
|
1476
1583
|
this.voiceModeAgentId = null;
|
|
1477
1584
|
}
|
|
1478
|
-
isInternalVoiceDictationId(dictationId) {
|
|
1479
|
-
return dictationId.startsWith(VOICE_INTERNAL_DICTATION_ID_PREFIX);
|
|
1480
|
-
}
|
|
1481
1585
|
handleDictationManagerMessage(msg) {
|
|
1482
|
-
|
|
1483
|
-
const metadata = msg.payload.metadata;
|
|
1484
|
-
const dictationId = metadata && typeof metadata.dictationId === 'string' ? metadata.dictationId : null;
|
|
1485
|
-
if (dictationId && this.isInternalVoiceDictationId(dictationId)) {
|
|
1486
|
-
return;
|
|
1487
|
-
}
|
|
1488
|
-
this.emit(msg);
|
|
1489
|
-
return;
|
|
1490
|
-
}
|
|
1491
|
-
const payloadWithDictationId = msg.payload;
|
|
1492
|
-
const dictationId = payloadWithDictationId && typeof payloadWithDictationId.dictationId === 'string'
|
|
1493
|
-
? payloadWithDictationId.dictationId
|
|
1494
|
-
: null;
|
|
1495
|
-
if (!dictationId || !this.isInternalVoiceDictationId(dictationId)) {
|
|
1496
|
-
this.emit(msg);
|
|
1497
|
-
return;
|
|
1498
|
-
}
|
|
1499
|
-
if (msg.type === 'dictation_stream_final') {
|
|
1500
|
-
if (dictationId !== this.activeVoiceDictationId || !this.activeVoiceDictationResolve) {
|
|
1501
|
-
return;
|
|
1502
|
-
}
|
|
1503
|
-
this.activeVoiceDictationResolve({
|
|
1504
|
-
text: msg.payload.text,
|
|
1505
|
-
...(msg.payload.debugRecordingPath
|
|
1506
|
-
? { debugRecordingPath: msg.payload.debugRecordingPath }
|
|
1507
|
-
: {}),
|
|
1508
|
-
});
|
|
1509
|
-
return;
|
|
1510
|
-
}
|
|
1511
|
-
if (msg.type === 'dictation_stream_error') {
|
|
1512
|
-
if (dictationId !== this.activeVoiceDictationId || !this.activeVoiceDictationReject) {
|
|
1513
|
-
return;
|
|
1514
|
-
}
|
|
1515
|
-
this.activeVoiceDictationReject(new Error(msg.payload.error));
|
|
1516
|
-
return;
|
|
1517
|
-
}
|
|
1518
|
-
// Ack/partial messages for internal voice dictation are consumed server-side.
|
|
1519
|
-
}
|
|
1520
|
-
resetActiveVoiceDictationState() {
|
|
1521
|
-
this.activeVoiceDictationId = null;
|
|
1522
|
-
this.activeVoiceDictationFormat = null;
|
|
1523
|
-
this.activeVoiceDictationNextSeq = 0;
|
|
1524
|
-
this.activeVoiceDictationStartPromise = null;
|
|
1525
|
-
this.activeVoiceDictationFinalizePromise = null;
|
|
1526
|
-
this.activeVoiceDictationResultPromise = null;
|
|
1527
|
-
this.activeVoiceDictationResolve = null;
|
|
1528
|
-
this.activeVoiceDictationReject = null;
|
|
1529
|
-
}
|
|
1530
|
-
cancelActiveVoiceDictationStream(reason) {
|
|
1531
|
-
const dictationId = this.activeVoiceDictationId;
|
|
1532
|
-
if (!dictationId) {
|
|
1533
|
-
return;
|
|
1534
|
-
}
|
|
1535
|
-
this.sessionLogger.debug({ dictationId, reason }, 'Cancelling active internal voice dictation stream');
|
|
1536
|
-
if (this.activeVoiceDictationReject) {
|
|
1537
|
-
this.activeVoiceDictationReject(new Error(`Voice dictation cancelled: ${reason}`));
|
|
1538
|
-
}
|
|
1539
|
-
this.voiceStreamManager.handleCancel(dictationId);
|
|
1540
|
-
this.resetActiveVoiceDictationState();
|
|
1586
|
+
this.emit(msg);
|
|
1541
1587
|
}
|
|
1542
|
-
async
|
|
1543
|
-
if (this.
|
|
1544
|
-
|
|
1545
|
-
await this.activeVoiceDictationStartPromise;
|
|
1546
|
-
}
|
|
1588
|
+
async startVoiceTurnController() {
|
|
1589
|
+
if (this.voiceTurnController) {
|
|
1590
|
+
this.sessionLogger.info('startVoiceTurnController skipped: already running');
|
|
1547
1591
|
return;
|
|
1548
1592
|
}
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
|
|
1570
|
-
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
1575
|
-
|
|
1593
|
+
const turnDetection = this.resolveVoiceTurnDetection();
|
|
1594
|
+
if (!turnDetection) {
|
|
1595
|
+
throw new Error('Voice turn detection is not configured');
|
|
1596
|
+
}
|
|
1597
|
+
this.sessionLogger.info({ providerId: turnDetection.id }, 'startVoiceTurnController creating controller');
|
|
1598
|
+
const controller = createVoiceTurnController({
|
|
1599
|
+
logger: this.sessionLogger.child({ component: 'voice-turn-controller' }),
|
|
1600
|
+
turnDetection,
|
|
1601
|
+
utteranceSink: {
|
|
1602
|
+
submitUtterance: async ({ pcm16, format, sampleRate, startedAt, endedAt }) => {
|
|
1603
|
+
this.sessionLogger.debug({
|
|
1604
|
+
audioBytes: pcm16.length,
|
|
1605
|
+
sampleRate,
|
|
1606
|
+
startedAt,
|
|
1607
|
+
endedAt,
|
|
1608
|
+
durationMs: Math.max(0, endedAt - startedAt),
|
|
1609
|
+
}, 'Submitting detected voice utterance');
|
|
1610
|
+
await this.processCompletedAudio(pcm16, format);
|
|
1611
|
+
},
|
|
1612
|
+
},
|
|
1613
|
+
callbacks: {
|
|
1614
|
+
onSpeechStarted: async () => {
|
|
1615
|
+
this.handleProvisionalVoiceSpeechStarted();
|
|
1616
|
+
},
|
|
1617
|
+
onSpeechStopped: async () => {
|
|
1618
|
+
this.handleVoiceSpeechStopped();
|
|
1619
|
+
},
|
|
1620
|
+
onError: (error) => {
|
|
1621
|
+
this.sessionLogger.error({ err: error }, 'Voice turn controller failed');
|
|
1622
|
+
},
|
|
1576
1623
|
},
|
|
1577
1624
|
});
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
}
|
|
1583
|
-
catch (error) {
|
|
1584
|
-
this.resetActiveVoiceDictationState();
|
|
1585
|
-
throw error;
|
|
1586
|
-
}
|
|
1587
|
-
finally {
|
|
1588
|
-
if (this.activeVoiceDictationId === dictationId) {
|
|
1589
|
-
this.activeVoiceDictationStartPromise = null;
|
|
1590
|
-
}
|
|
1591
|
-
}
|
|
1592
|
-
}
|
|
1593
|
-
async appendToActiveVoiceDictationStream(audioBase64, format) {
|
|
1594
|
-
if (this.activeVoiceDictationFinalizePromise) {
|
|
1595
|
-
await this.activeVoiceDictationFinalizePromise.catch(() => undefined);
|
|
1596
|
-
}
|
|
1597
|
-
await this.ensureActiveVoiceDictationStream(format);
|
|
1598
|
-
const dictationId = this.activeVoiceDictationId;
|
|
1599
|
-
if (!dictationId) {
|
|
1600
|
-
throw new Error('Voice dictation stream did not initialize');
|
|
1601
|
-
}
|
|
1602
|
-
const seq = this.activeVoiceDictationNextSeq;
|
|
1603
|
-
this.activeVoiceDictationNextSeq += 1;
|
|
1604
|
-
await this.voiceStreamManager.handleChunk({
|
|
1605
|
-
dictationId,
|
|
1606
|
-
seq,
|
|
1607
|
-
audioBase64,
|
|
1608
|
-
format,
|
|
1609
|
-
});
|
|
1625
|
+
this.sessionLogger.info('startVoiceTurnController connecting controller');
|
|
1626
|
+
await controller.start();
|
|
1627
|
+
this.voiceTurnController = controller;
|
|
1628
|
+
this.sessionLogger.info('startVoiceTurnController connected');
|
|
1610
1629
|
}
|
|
1611
|
-
async
|
|
1612
|
-
|
|
1613
|
-
if (!dictationId) {
|
|
1630
|
+
async stopVoiceTurnController() {
|
|
1631
|
+
if (!this.voiceTurnController) {
|
|
1614
1632
|
return;
|
|
1615
1633
|
}
|
|
1616
|
-
this.
|
|
1617
|
-
|
|
1618
|
-
|
|
1634
|
+
this.clearPendingVoiceSpeechStart('turn-controller-stop');
|
|
1635
|
+
const controller = this.voiceTurnController;
|
|
1636
|
+
this.voiceTurnController = null;
|
|
1637
|
+
await controller.stop();
|
|
1638
|
+
}
|
|
1639
|
+
clearPendingVoiceSpeechStart(reason) {
|
|
1640
|
+
if (this.pendingVoiceSpeechTimer) {
|
|
1641
|
+
clearTimeout(this.pendingVoiceSpeechTimer);
|
|
1642
|
+
this.pendingVoiceSpeechTimer = null;
|
|
1619
1643
|
}
|
|
1620
|
-
if (this.
|
|
1621
|
-
|
|
1622
|
-
|
|
1644
|
+
if (this.pendingVoiceSpeechStartAt !== null) {
|
|
1645
|
+
this.sessionLogger.debug({ reason }, 'Clearing provisional voice speech start');
|
|
1646
|
+
this.pendingVoiceSpeechStartAt = null;
|
|
1623
1647
|
}
|
|
1624
|
-
|
|
1625
|
-
|
|
1626
|
-
if (
|
|
1627
|
-
this.resetActiveVoiceDictationState();
|
|
1648
|
+
}
|
|
1649
|
+
handleProvisionalVoiceSpeechStarted() {
|
|
1650
|
+
if (this.speechInProgress || this.pendingVoiceSpeechTimer) {
|
|
1628
1651
|
return;
|
|
1629
1652
|
}
|
|
1630
|
-
|
|
1631
|
-
|
|
1632
|
-
|
|
1633
|
-
|
|
1634
|
-
this.
|
|
1635
|
-
|
|
1636
|
-
|
|
1637
|
-
|
|
1638
|
-
|
|
1639
|
-
|
|
1640
|
-
transcriptLength: transcriptText.length,
|
|
1641
|
-
transcript: transcriptText,
|
|
1642
|
-
}, 'Transcription result');
|
|
1643
|
-
await this.handleTranscriptionResultPayload({
|
|
1644
|
-
text: result.text,
|
|
1645
|
-
requestId,
|
|
1646
|
-
...(result.debugRecordingPath
|
|
1647
|
-
? { debugRecordingPath: result.debugRecordingPath, format: 'audio/wav' }
|
|
1648
|
-
: {}),
|
|
1649
|
-
});
|
|
1650
|
-
})();
|
|
1651
|
-
try {
|
|
1652
|
-
await this.activeVoiceDictationFinalizePromise;
|
|
1653
|
-
}
|
|
1654
|
-
catch (error) {
|
|
1655
|
-
this.resetActiveVoiceDictationState();
|
|
1656
|
-
this.setPhase('idle');
|
|
1657
|
-
this.clearSpeechInProgress('transcription error');
|
|
1653
|
+
const startedAt = Date.now();
|
|
1654
|
+
this.pendingVoiceSpeechStartAt = startedAt;
|
|
1655
|
+
this.sessionLogger.info({ confirmationMs: VOICE_INTERRUPT_CONFIRMATION_MS }, 'Silero VAD provisional speech_started');
|
|
1656
|
+
this.pendingVoiceSpeechTimer = setTimeout(() => {
|
|
1657
|
+
this.pendingVoiceSpeechTimer = null;
|
|
1658
|
+
if (this.pendingVoiceSpeechStartAt !== startedAt || this.speechInProgress) {
|
|
1659
|
+
return;
|
|
1660
|
+
}
|
|
1661
|
+
this.pendingVoiceSpeechStartAt = null;
|
|
1662
|
+
this.sessionLogger.info('voice_input_state emitting isSpeaking=true');
|
|
1658
1663
|
this.emit({
|
|
1659
|
-
type: '
|
|
1664
|
+
type: 'voice_input_state',
|
|
1660
1665
|
payload: {
|
|
1661
|
-
|
|
1662
|
-
timestamp: new Date(),
|
|
1663
|
-
type: 'error',
|
|
1664
|
-
content: `Transcription error: ${error instanceof Error ? error.message : String(error)}`,
|
|
1666
|
+
isSpeaking: true,
|
|
1665
1667
|
},
|
|
1666
1668
|
});
|
|
1667
|
-
|
|
1669
|
+
void this.handleVoiceSpeechStart();
|
|
1670
|
+
}, VOICE_INTERRUPT_CONFIRMATION_MS);
|
|
1671
|
+
}
|
|
1672
|
+
handleVoiceSpeechStopped() {
|
|
1673
|
+
if (this.pendingVoiceSpeechStartAt !== null) {
|
|
1674
|
+
const durationMs = Date.now() - this.pendingVoiceSpeechStartAt;
|
|
1675
|
+
this.clearPendingVoiceSpeechStart('speech-stopped-before-confirmation');
|
|
1676
|
+
this.sessionLogger.info({ durationMs, confirmationMs: VOICE_INTERRUPT_CONFIRMATION_MS }, 'Ignoring provisional voice speech start that ended before confirmation');
|
|
1677
|
+
return;
|
|
1668
1678
|
}
|
|
1679
|
+
this.sessionLogger.info('voice_input_state emitting isSpeaking=false');
|
|
1680
|
+
this.emit({
|
|
1681
|
+
type: 'voice_input_state',
|
|
1682
|
+
payload: {
|
|
1683
|
+
isSpeaking: false,
|
|
1684
|
+
},
|
|
1685
|
+
});
|
|
1669
1686
|
}
|
|
1670
1687
|
/**
|
|
1671
1688
|
* Handle text message to agent (with optional image attachments)
|
|
@@ -4027,7 +4044,7 @@ export class Session {
|
|
|
4027
4044
|
diffStat,
|
|
4028
4045
|
};
|
|
4029
4046
|
}
|
|
4030
|
-
async
|
|
4047
|
+
async listWorkspaceDescriptorsSnapshot() {
|
|
4031
4048
|
const [agents, persistedWorkspaces, persistedProjects] = await Promise.all([
|
|
4032
4049
|
this.listAgentPayloads(),
|
|
4033
4050
|
this.workspaceRegistry.list(),
|
|
@@ -4058,6 +4075,10 @@ export class Session {
|
|
|
4058
4075
|
}
|
|
4059
4076
|
return Array.from(descriptorsByWorkspaceId.values());
|
|
4060
4077
|
}
|
|
4078
|
+
async listWorkspaceDescriptors() {
|
|
4079
|
+
await this.reconcileActiveWorkspaceRecords();
|
|
4080
|
+
return this.listWorkspaceDescriptorsSnapshot();
|
|
4081
|
+
}
|
|
4061
4082
|
normalizeFetchWorkspacesSort(sort) {
|
|
4062
4083
|
const fallback = [{ key: 'activity_at', direction: 'desc' }];
|
|
4063
4084
|
if (!sort || sort.length === 0) {
|
|
@@ -4257,41 +4278,7 @@ export class Session {
|
|
|
4257
4278
|
}
|
|
4258
4279
|
async ensureWorkspaceRegistered(cwd) {
|
|
4259
4280
|
const workspaceId = normalizePersistedWorkspaceId(cwd);
|
|
4260
|
-
|
|
4261
|
-
if (existing && !existing.archivedAt) {
|
|
4262
|
-
return existing;
|
|
4263
|
-
}
|
|
4264
|
-
const placement = await this.buildProjectPlacement(workspaceId);
|
|
4265
|
-
const now = new Date().toISOString();
|
|
4266
|
-
const projectExisting = await this.projectRegistry.get(placement.projectKey);
|
|
4267
|
-
const projectRecord = createPersistedProjectRecord({
|
|
4268
|
-
projectId: placement.projectKey,
|
|
4269
|
-
rootPath: deriveProjectRootPath({
|
|
4270
|
-
cwd: workspaceId,
|
|
4271
|
-
checkout: placement.checkout,
|
|
4272
|
-
}),
|
|
4273
|
-
kind: deriveProjectKind(placement.checkout),
|
|
4274
|
-
displayName: placement.projectName,
|
|
4275
|
-
createdAt: projectExisting?.createdAt ?? now,
|
|
4276
|
-
updatedAt: now,
|
|
4277
|
-
archivedAt: null,
|
|
4278
|
-
});
|
|
4279
|
-
await this.projectRegistry.upsert(projectRecord);
|
|
4280
|
-
const workspaceRecord = createPersistedWorkspaceRecord({
|
|
4281
|
-
workspaceId,
|
|
4282
|
-
projectId: placement.projectKey,
|
|
4283
|
-
cwd: workspaceId,
|
|
4284
|
-
kind: deriveWorkspaceKind(placement.checkout),
|
|
4285
|
-
displayName: deriveWorkspaceDisplayName({
|
|
4286
|
-
cwd: workspaceId,
|
|
4287
|
-
checkout: placement.checkout,
|
|
4288
|
-
}),
|
|
4289
|
-
createdAt: existing?.createdAt ?? now,
|
|
4290
|
-
updatedAt: now,
|
|
4291
|
-
archivedAt: null,
|
|
4292
|
-
});
|
|
4293
|
-
await this.workspaceRegistry.upsert(workspaceRecord);
|
|
4294
|
-
return workspaceRecord;
|
|
4281
|
+
return (await this.reconcileWorkspaceRecord(workspaceId)).workspace;
|
|
4295
4282
|
}
|
|
4296
4283
|
async archiveWorkspaceRecord(workspaceId, archivedAt) {
|
|
4297
4284
|
const existing = await this.workspaceRegistry.get(workspaceId);
|
|
@@ -4311,32 +4298,31 @@ export class Session {
|
|
|
4311
4298
|
return;
|
|
4312
4299
|
}
|
|
4313
4300
|
const workspaceId = normalizePersistedWorkspaceId(cwd);
|
|
4314
|
-
const
|
|
4315
|
-
const
|
|
4316
|
-
|
|
4317
|
-
|
|
4318
|
-
|
|
4319
|
-
|
|
4320
|
-
})
|
|
4321
|
-
|
|
4322
|
-
|
|
4323
|
-
|
|
4301
|
+
const changedWorkspaceIds = await this.reconcileActiveWorkspaceRecords();
|
|
4302
|
+
const all = await this.listWorkspaceDescriptorsSnapshot();
|
|
4303
|
+
const descriptorsByWorkspaceId = new Map(all.map((entry) => [entry.id, entry]));
|
|
4304
|
+
const workspaceIdsToEmit = new Set([workspaceId, ...changedWorkspaceIds]);
|
|
4305
|
+
for (const nextWorkspaceId of workspaceIdsToEmit) {
|
|
4306
|
+
const workspace = descriptorsByWorkspaceId.get(nextWorkspaceId);
|
|
4307
|
+
if (!workspace || !this.matchesWorkspaceFilter({ workspace, filter: subscription.filter })) {
|
|
4308
|
+
this.bufferOrEmitWorkspaceUpdate(subscription, {
|
|
4309
|
+
kind: 'remove',
|
|
4310
|
+
id: nextWorkspaceId,
|
|
4311
|
+
});
|
|
4312
|
+
continue;
|
|
4313
|
+
}
|
|
4324
4314
|
this.bufferOrEmitWorkspaceUpdate(subscription, {
|
|
4325
|
-
kind: '
|
|
4326
|
-
|
|
4315
|
+
kind: 'upsert',
|
|
4316
|
+
workspace,
|
|
4327
4317
|
});
|
|
4328
|
-
return;
|
|
4329
4318
|
}
|
|
4330
|
-
this.bufferOrEmitWorkspaceUpdate(subscription, {
|
|
4331
|
-
kind: 'upsert',
|
|
4332
|
-
workspace,
|
|
4333
|
-
});
|
|
4334
4319
|
}
|
|
4335
4320
|
async emitWorkspaceUpdatesForCwds(cwds) {
|
|
4336
4321
|
if (!this.workspaceUpdatesSubscription) {
|
|
4337
4322
|
return;
|
|
4338
4323
|
}
|
|
4339
|
-
const
|
|
4324
|
+
const changedWorkspaceIds = await this.reconcileActiveWorkspaceRecords();
|
|
4325
|
+
const uniqueWorkspaceCwds = new Set(changedWorkspaceIds);
|
|
4340
4326
|
for (const cwd of cwds) {
|
|
4341
4327
|
const normalized = normalizePersistedWorkspaceId(cwd);
|
|
4342
4328
|
if (!normalized) {
|
|
@@ -4344,8 +4330,22 @@ export class Session {
|
|
|
4344
4330
|
}
|
|
4345
4331
|
uniqueWorkspaceCwds.add(normalized);
|
|
4346
4332
|
}
|
|
4347
|
-
|
|
4348
|
-
|
|
4333
|
+
const subscription = this.workspaceUpdatesSubscription;
|
|
4334
|
+
const all = await this.listWorkspaceDescriptorsSnapshot();
|
|
4335
|
+
const descriptorsByWorkspaceId = new Map(all.map((entry) => [entry.id, entry]));
|
|
4336
|
+
for (const workspaceId of uniqueWorkspaceCwds) {
|
|
4337
|
+
const workspace = descriptorsByWorkspaceId.get(workspaceId);
|
|
4338
|
+
if (!workspace || !this.matchesWorkspaceFilter({ workspace, filter: subscription.filter })) {
|
|
4339
|
+
this.bufferOrEmitWorkspaceUpdate(subscription, {
|
|
4340
|
+
kind: 'remove',
|
|
4341
|
+
id: workspaceId,
|
|
4342
|
+
});
|
|
4343
|
+
continue;
|
|
4344
|
+
}
|
|
4345
|
+
this.bufferOrEmitWorkspaceUpdate(subscription, {
|
|
4346
|
+
kind: 'upsert',
|
|
4347
|
+
workspace,
|
|
4348
|
+
});
|
|
4349
4349
|
}
|
|
4350
4350
|
}
|
|
4351
4351
|
async handleFetchAgents(request) {
|
|
@@ -4882,18 +4882,37 @@ export class Session {
|
|
|
4882
4882
|
if (!this.isVoiceMode) {
|
|
4883
4883
|
this.sessionLogger.warn('Received voice_audio_chunk while voice mode is disabled; transcript will be emitted but voice assistant turn is skipped');
|
|
4884
4884
|
}
|
|
4885
|
-
await this.handleVoiceSpeechStart();
|
|
4886
4885
|
const chunkFormat = msg.format || 'audio/wav';
|
|
4887
4886
|
if (this.isVoiceMode) {
|
|
4888
|
-
|
|
4889
|
-
|
|
4890
|
-
|
|
4891
|
-
|
|
4892
|
-
|
|
4887
|
+
if (!this.voiceTurnController) {
|
|
4888
|
+
throw new Error('Voice mode is enabled but the voice turn controller is not running');
|
|
4889
|
+
}
|
|
4890
|
+
const chunkBytes = Buffer.byteLength(msg.audio, 'base64');
|
|
4891
|
+
this.voiceInputChunkCount += 1;
|
|
4892
|
+
this.voiceInputBytes += chunkBytes;
|
|
4893
|
+
if (this.voiceInputChunkCount === 1) {
|
|
4894
|
+
this.sessionLogger.info({
|
|
4895
|
+
format: chunkFormat,
|
|
4896
|
+
audioBytes: chunkBytes,
|
|
4897
|
+
}, 'Received first voice_audio_chunk for active voice mode');
|
|
4893
4898
|
}
|
|
4894
|
-
|
|
4895
|
-
this.
|
|
4896
|
-
|
|
4899
|
+
const now = Date.now();
|
|
4900
|
+
if (this.voiceInputChunkCount % 50 === 0 ||
|
|
4901
|
+
now - this.voiceInputWindowStartedAt >= 1000) {
|
|
4902
|
+
this.sessionLogger.info({
|
|
4903
|
+
chunkCount: this.voiceInputChunkCount,
|
|
4904
|
+
audioBytes: this.voiceInputBytes,
|
|
4905
|
+
windowMs: now - this.voiceInputWindowStartedAt,
|
|
4906
|
+
format: chunkFormat,
|
|
4907
|
+
}, 'Voice input chunk summary');
|
|
4908
|
+
this.voiceInputWindowStartedAt = now;
|
|
4909
|
+
this.voiceInputChunkCount = 0;
|
|
4910
|
+
this.voiceInputBytes = 0;
|
|
4911
|
+
}
|
|
4912
|
+
await this.voiceTurnController.appendClientChunk({
|
|
4913
|
+
audioBase64: msg.audio,
|
|
4914
|
+
format: chunkFormat,
|
|
4915
|
+
});
|
|
4897
4916
|
return;
|
|
4898
4917
|
}
|
|
4899
4918
|
const chunkBuffer = Buffer.from(msg.audio, 'base64');
|
|
@@ -4974,9 +4993,8 @@ export class Session {
|
|
|
4974
4993
|
};
|
|
4975
4994
|
}
|
|
4976
4995
|
async processCompletedAudio(audio, format) {
|
|
4977
|
-
|
|
4978
|
-
|
|
4979
|
-
this.sessionLogger.debug({ phase: this.processingPhase }, `Buffering audio segment (phase: ${this.processingPhase})`);
|
|
4996
|
+
if (this.processingPhase === 'transcribing') {
|
|
4997
|
+
this.sessionLogger.debug({ phase: this.processingPhase, segmentCount: this.pendingAudioSegments.length + 1 }, `Buffering audio segment (phase: ${this.processingPhase})`);
|
|
4980
4998
|
this.pendingAudioSegments.push({
|
|
4981
4999
|
audio,
|
|
4982
5000
|
format,
|
|
@@ -5000,6 +5018,18 @@ export class Session {
|
|
|
5000
5018
|
}
|
|
5001
5019
|
await this.processAudio(audio, format);
|
|
5002
5020
|
}
|
|
5021
|
+
async flushPendingAudioSegments(reason) {
|
|
5022
|
+
if (this.processingPhase === 'transcribing' || this.pendingAudioSegments.length === 0) {
|
|
5023
|
+
return;
|
|
5024
|
+
}
|
|
5025
|
+
const pendingSegments = [...this.pendingAudioSegments];
|
|
5026
|
+
this.pendingAudioSegments = [];
|
|
5027
|
+
this.clearBufferTimeout();
|
|
5028
|
+
this.sessionLogger.debug({ reason, segmentCount: pendingSegments.length }, `Flushing ${pendingSegments.length} buffered audio segment(s)`);
|
|
5029
|
+
const combinedAudio = Buffer.concat(pendingSegments.map((segment) => segment.audio));
|
|
5030
|
+
const combinedFormat = pendingSegments[pendingSegments.length - 1].format;
|
|
5031
|
+
await this.processAudio(combinedAudio, combinedFormat);
|
|
5032
|
+
}
|
|
5003
5033
|
/**
|
|
5004
5034
|
* Process audio through STT and then LLM
|
|
5005
5035
|
*/
|
|
@@ -5042,6 +5072,7 @@ export class Session {
|
|
|
5042
5072
|
catch (error) {
|
|
5043
5073
|
this.setPhase('idle');
|
|
5044
5074
|
this.clearSpeechInProgress('transcription error');
|
|
5075
|
+
await this.flushPendingAudioSegments('transcription error');
|
|
5045
5076
|
this.emit({
|
|
5046
5077
|
type: 'activity_log',
|
|
5047
5078
|
payload: {
|
|
@@ -5076,6 +5107,7 @@ export class Session {
|
|
|
5076
5107
|
this.sessionLogger.debug('Empty transcription (false positive), not aborting');
|
|
5077
5108
|
this.setPhase('idle');
|
|
5078
5109
|
this.clearSpeechInProgress('empty transcription');
|
|
5110
|
+
await this.flushPendingAudioSegments('empty transcription');
|
|
5079
5111
|
return;
|
|
5080
5112
|
}
|
|
5081
5113
|
// Has content - abort any in-progress stream now
|
|
@@ -5113,16 +5145,19 @@ export class Session {
|
|
|
5113
5145
|
this.setPhase('idle');
|
|
5114
5146
|
if (!this.isVoiceMode) {
|
|
5115
5147
|
this.sessionLogger.debug({ requestId: result.requestId }, 'Skipping voice agent processing because voice mode is disabled');
|
|
5148
|
+
await this.flushPendingAudioSegments('voice mode disabled');
|
|
5116
5149
|
return;
|
|
5117
5150
|
}
|
|
5118
5151
|
const agentId = this.voiceModeAgentId;
|
|
5119
5152
|
if (!agentId) {
|
|
5120
5153
|
this.sessionLogger.warn({ requestId: result.requestId }, 'Skipping voice agent processing because no agent is currently voice-enabled');
|
|
5154
|
+
await this.flushPendingAudioSegments('no active voice agent');
|
|
5121
5155
|
return;
|
|
5122
5156
|
}
|
|
5123
5157
|
// Route voice utterances through the same send path as regular text input:
|
|
5124
5158
|
// interrupt-if-running, record message, then start a new stream.
|
|
5125
5159
|
await this.handleSendAgentMessage(agentId, result.text);
|
|
5160
|
+
await this.flushPendingAudioSegments('transcription complete');
|
|
5126
5161
|
}
|
|
5127
5162
|
registerVoiceBridgeForAgent(agentId) {
|
|
5128
5163
|
this.registerVoiceSpeakHandler?.(agentId, async ({ text, signal }) => {
|
|
@@ -5203,8 +5238,6 @@ export class Session {
|
|
|
5203
5238
|
this.sessionLogger.debug({ chunks: this.audioBuffer.chunks.length, pcmBytes: this.audioBuffer.totalPCMBytes }, `Clearing partial audio buffer (${this.audioBuffer.chunks.length} chunk(s)${this.audioBuffer.isPCM ? `, ${this.audioBuffer.totalPCMBytes} PCM bytes` : ''})`);
|
|
5204
5239
|
this.audioBuffer = null;
|
|
5205
5240
|
}
|
|
5206
|
-
this.cancelActiveVoiceDictationStream('new speech turn started');
|
|
5207
|
-
this.clearVoiceModeInactivityTimeout();
|
|
5208
5241
|
this.clearBufferTimeout();
|
|
5209
5242
|
this.abortController.abort();
|
|
5210
5243
|
await this.handleAbort();
|
|
@@ -5215,6 +5248,7 @@ export class Session {
|
|
|
5215
5248
|
* Clear speech-in-progress flag once the user turn has completed
|
|
5216
5249
|
*/
|
|
5217
5250
|
clearSpeechInProgress(reason) {
|
|
5251
|
+
this.clearPendingVoiceSpeechStart(`clear-speech-in-progress:${reason}`);
|
|
5218
5252
|
if (!this.speechInProgress) {
|
|
5219
5253
|
return;
|
|
5220
5254
|
}
|
|
@@ -5244,6 +5278,11 @@ export class Session {
|
|
|
5244
5278
|
this.clearBufferTimeout();
|
|
5245
5279
|
this.bufferTimeout = setTimeout(async () => {
|
|
5246
5280
|
this.sessionLogger.debug('Buffer timeout reached, processing pending segments');
|
|
5281
|
+
if (this.processingPhase === 'transcribing') {
|
|
5282
|
+
this.sessionLogger.debug({ segmentCount: this.pendingAudioSegments.length }, 'Buffer timeout deferred because transcription is still in progress');
|
|
5283
|
+
this.setBufferTimeout();
|
|
5284
|
+
return;
|
|
5285
|
+
}
|
|
5247
5286
|
if (this.pendingAudioSegments.length > 0) {
|
|
5248
5287
|
const segments = [...this.pendingAudioSegments];
|
|
5249
5288
|
this.pendingAudioSegments = [];
|
|
@@ -5253,32 +5292,6 @@ export class Session {
|
|
|
5253
5292
|
}
|
|
5254
5293
|
}, 10000); // 10 second timeout
|
|
5255
5294
|
}
|
|
5256
|
-
setVoiceModeInactivityTimeout() {
|
|
5257
|
-
if (!this.isVoiceMode) {
|
|
5258
|
-
return;
|
|
5259
|
-
}
|
|
5260
|
-
this.clearVoiceModeInactivityTimeout();
|
|
5261
|
-
this.voiceModeInactivityTimeout = setTimeout(() => {
|
|
5262
|
-
this.voiceModeInactivityTimeout = null;
|
|
5263
|
-
if (!this.isVoiceMode || !this.activeVoiceDictationId) {
|
|
5264
|
-
return;
|
|
5265
|
-
}
|
|
5266
|
-
this.sessionLogger.warn({
|
|
5267
|
-
timeoutMs: VOICE_MODE_INACTIVITY_FLUSH_MS,
|
|
5268
|
-
dictationId: this.activeVoiceDictationId,
|
|
5269
|
-
nextSeq: this.activeVoiceDictationNextSeq,
|
|
5270
|
-
}, 'Voice mode inactivity timeout reached without isLast; finalizing active voice dictation stream');
|
|
5271
|
-
void this.finalizeActiveVoiceDictationStream('inactivity timeout').catch((error) => {
|
|
5272
|
-
this.sessionLogger.error({ err: error }, 'Failed to finalize voice dictation stream after inactivity timeout');
|
|
5273
|
-
});
|
|
5274
|
-
}, VOICE_MODE_INACTIVITY_FLUSH_MS);
|
|
5275
|
-
}
|
|
5276
|
-
clearVoiceModeInactivityTimeout() {
|
|
5277
|
-
if (this.voiceModeInactivityTimeout) {
|
|
5278
|
-
clearTimeout(this.voiceModeInactivityTimeout);
|
|
5279
|
-
this.voiceModeInactivityTimeout = null;
|
|
5280
|
-
}
|
|
5281
|
-
}
|
|
5282
5295
|
/**
|
|
5283
5296
|
* Clear buffer timeout
|
|
5284
5297
|
*/
|
|
@@ -5354,16 +5367,14 @@ export class Session {
|
|
|
5354
5367
|
// Abort any ongoing operations
|
|
5355
5368
|
this.abortController.abort();
|
|
5356
5369
|
// Clear timeouts
|
|
5357
|
-
this.clearVoiceModeInactivityTimeout();
|
|
5358
5370
|
this.clearBufferTimeout();
|
|
5359
5371
|
// Clear buffers
|
|
5360
|
-
this.cancelActiveVoiceDictationStream('session cleanup');
|
|
5361
5372
|
this.pendingAudioSegments = [];
|
|
5362
5373
|
this.audioBuffer = null;
|
|
5374
|
+
await this.stopVoiceTurnController();
|
|
5363
5375
|
// Cleanup managers
|
|
5364
5376
|
this.ttsManager.cleanup();
|
|
5365
5377
|
this.sttManager.cleanup();
|
|
5366
|
-
this.voiceStreamManager.cleanupAll();
|
|
5367
5378
|
this.dictationStreamManager.cleanupAll();
|
|
5368
5379
|
// Close MCP clients
|
|
5369
5380
|
if (this.agentMcpClient) {
|
|
@@ -5749,7 +5760,7 @@ export class Session {
|
|
|
5749
5760
|
const streamId = this.allocateTerminalStreamId();
|
|
5750
5761
|
const requestedResumeOffset = typeof msg.resumeOffset === 'number'
|
|
5751
5762
|
? msg.resumeOffset
|
|
5752
|
-
:
|
|
5763
|
+
: 0;
|
|
5753
5764
|
const initialOffset = Math.max(0, Math.floor(requestedResumeOffset));
|
|
5754
5765
|
const binding = {
|
|
5755
5766
|
terminalId: msg.terminalId,
|