@getpaseo/server 0.1.85 → 0.1.86
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/scripts/supervisor-entrypoint.js +1 -0
- package/dist/server/server/agent/agent-metadata-generator.d.ts +9 -0
- package/dist/server/server/agent/agent-metadata-generator.js +11 -2
- package/dist/server/server/agent/agent-response-loop.d.ts +1 -1
- package/dist/server/server/agent/agent-response-loop.js +3 -13
- package/dist/server/server/agent/create-agent/create.d.ts +2 -0
- package/dist/server/server/agent/create-agent/create.js +7 -0
- package/dist/server/server/agent/import-sessions.d.ts +3 -0
- package/dist/server/server/agent/import-sessions.js +11 -0
- package/dist/server/server/agent/providers/claude/agent.d.ts +2 -1
- package/dist/server/server/agent/providers/claude/agent.js +70 -0
- package/dist/server/server/agent/providers/claude/feature-definitions.d.ts +8 -0
- package/dist/server/server/agent/providers/claude/feature-definitions.js +36 -0
- package/dist/server/server/agent/providers/tool-call-detail-primitives.js +6 -3
- package/dist/server/server/agent/providers/tool-call-mapper-utils.d.ts +5 -0
- package/dist/server/server/agent/providers/tool-call-mapper-utils.js +62 -0
- package/dist/server/server/agent/structured-generation-providers.d.ts +29 -0
- package/dist/server/server/agent/structured-generation-providers.js +192 -0
- package/dist/server/server/bootstrap.d.ts +7 -0
- package/dist/server/server/bootstrap.js +3 -0
- package/dist/server/server/config.js +1 -0
- package/dist/server/server/daemon-config-store.js +46 -6
- package/dist/server/server/daemon-worker.js +1 -0
- package/dist/server/server/file-explorer/service.js +4 -4
- package/dist/server/server/persisted-config.d.ts +77 -22
- package/dist/server/server/persisted-config.js +13 -0
- package/dist/server/server/session.d.ts +3 -2
- package/dist/server/server/session.js +76 -24
- package/dist/server/server/speech/providers/local/runtime.js +52 -133
- package/dist/server/server/speech/providers/local/sherpa/model-catalog.d.ts +9 -2
- package/dist/server/server/speech/providers/local/sherpa/model-catalog.js +7 -0
- package/dist/server/server/speech/providers/local/worker-bytes.d.ts +4 -0
- package/dist/server/server/speech/providers/local/worker-bytes.js +9 -0
- package/dist/server/server/speech/providers/local/worker-client.d.ts +80 -0
- package/dist/server/server/speech/providers/local/worker-client.js +438 -0
- package/dist/server/server/speech/providers/local/worker-process.d.ts +2 -0
- package/dist/server/server/speech/providers/local/worker-process.js +270 -0
- package/dist/server/server/speech/providers/local/worker-protocol.d.ts +95 -0
- package/dist/server/server/speech/providers/local/worker-protocol.js +2 -0
- package/dist/server/server/websocket-server.js +2 -0
- package/dist/server/server/worktree-branch-name-generator.d.ts +9 -0
- package/dist/server/server/worktree-branch-name-generator.js +11 -2
- package/dist/src/server/persisted-config.js +13 -0
- package/package.json +5 -5
|
@@ -39,7 +39,8 @@ import { archiveAgentCommand, cancelAgentRunCommand, closeAgentCommand, setAgent
|
|
|
39
39
|
import { buildStoredAgentPayload, resolveEffectiveThinkingOptionId, resolveStoredAgentPayloadUpdatedAt, toAgentPayload, } from "./agent/agent-projections.js";
|
|
40
40
|
import { appendTimelineItemIfAgentKnown, emitLiveTimelineItemIfAgentKnown, } from "./agent/timeline-append.js";
|
|
41
41
|
import { projectTimelineRows, selectTimelineWindowByProjectedLimit, } from "./agent/timeline-projection.js";
|
|
42
|
-
import {
|
|
42
|
+
import { StructuredAgentFallbackError, StructuredAgentResponseError, generateStructuredAgentResponseWithFallback, } from "./agent/agent-response-loop.js";
|
|
43
|
+
import { resolveStructuredGenerationProviders, } from "./agent/structured-generation-providers.js";
|
|
43
44
|
import { getAgentStreamEventTurnId, } from "./agent/agent-sdk-types.js";
|
|
44
45
|
import { ImportSessionsRequestError, importProviderSession, listImportableProviderSessions, normalizeImportAgentRequest, } from "./agent/import-sessions.js";
|
|
45
46
|
import { checkoutLiteFromGitSnapshot, normalizeWorkspaceId as normalizePersistedWorkspaceId, deriveProjectGroupingName, classifyDirectoryForProjectMembership, deriveWorkspaceDisplayName, } from "./workspace-registry-model.js";
|
|
@@ -292,7 +293,6 @@ export class Session {
|
|
|
292
293
|
this.agentUpdatesSubscription = null;
|
|
293
294
|
this.workspaceUpdatesSubscription = null;
|
|
294
295
|
this.clientActivity = null;
|
|
295
|
-
this.MOBILE_BACKGROUND_STREAM_GRACE_MS = 60000;
|
|
296
296
|
this.unsubscribeProviderSnapshotEvents = null;
|
|
297
297
|
this.inflightRequests = 0;
|
|
298
298
|
this.peakInflightRequests = 0;
|
|
@@ -481,6 +481,26 @@ export class Session {
|
|
|
481
481
|
getClientActivity() {
|
|
482
482
|
return this.clientActivity;
|
|
483
483
|
}
|
|
484
|
+
getFocusedAgentSelectionForCwd(cwd) {
|
|
485
|
+
const focusedAgentId = this.clientActivity?.focusedAgentId;
|
|
486
|
+
if (!focusedAgentId) {
|
|
487
|
+
return undefined;
|
|
488
|
+
}
|
|
489
|
+
const agent = this.agentManager.getAgent(focusedAgentId);
|
|
490
|
+
if (!agent || agent.cwd !== cwd) {
|
|
491
|
+
return undefined;
|
|
492
|
+
}
|
|
493
|
+
return {
|
|
494
|
+
provider: agent.provider,
|
|
495
|
+
model: agent.runtimeInfo?.model ?? agent.config.model ?? null,
|
|
496
|
+
thinkingOptionId: agent.runtimeInfo?.thinkingOptionId ?? agent.config.thinkingOptionId ?? null,
|
|
497
|
+
};
|
|
498
|
+
}
|
|
499
|
+
readStructuredGenerationDaemonConfig() {
|
|
500
|
+
return {
|
|
501
|
+
metadataGeneration: this.daemonConfigStore.get().metadataGeneration,
|
|
502
|
+
};
|
|
503
|
+
}
|
|
484
504
|
getRuntimeMetrics() {
|
|
485
505
|
const terminalMetrics = this.terminalController.getMetrics();
|
|
486
506
|
return {
|
|
@@ -669,12 +689,6 @@ export class Session {
|
|
|
669
689
|
}, "Failed to auto-allow speak tool permission in voice mode");
|
|
670
690
|
});
|
|
671
691
|
}
|
|
672
|
-
// Reduce bandwidth/CPU on mobile: only forward high-frequency agent stream events
|
|
673
|
-
// for the focused agent, with a short grace window while backgrounded.
|
|
674
|
-
// History catch-up is handled via pull-based `fetch_agent_timeline_request`.
|
|
675
|
-
if (this.shouldSkipAgentStreamForward(event.agentId)) {
|
|
676
|
-
return;
|
|
677
|
-
}
|
|
678
692
|
const serializedEvent = serializeAgentStreamEvent(event.event);
|
|
679
693
|
if (!serializedEvent) {
|
|
680
694
|
return;
|
|
@@ -713,20 +727,6 @@ export class Session {
|
|
|
713
727
|
// Title updates may be applied asynchronously after agent creation.
|
|
714
728
|
}, { replayState: false });
|
|
715
729
|
}
|
|
716
|
-
shouldSkipAgentStreamForward(agentId) {
|
|
717
|
-
const activity = this.clientActivity;
|
|
718
|
-
if (activity?.deviceType !== "mobile") {
|
|
719
|
-
return false;
|
|
720
|
-
}
|
|
721
|
-
if (!activity.focusedAgentId || activity.focusedAgentId !== agentId) {
|
|
722
|
-
return true;
|
|
723
|
-
}
|
|
724
|
-
if (activity.appVisible) {
|
|
725
|
-
return false;
|
|
726
|
-
}
|
|
727
|
-
const hiddenForMs = Date.now() - activity.appVisibilityChangedAt.getTime();
|
|
728
|
-
return hiddenForMs >= this.MOBILE_BACKGROUND_STREAM_GRACE_MS;
|
|
729
|
-
}
|
|
730
730
|
buildAgentStreamPayload(event, serializedEvent) {
|
|
731
731
|
return {
|
|
732
732
|
agentId: event.agentId,
|
|
@@ -1288,6 +1288,8 @@ export class Session {
|
|
|
1288
1288
|
return this.handleCheckoutPullRequest(msg);
|
|
1289
1289
|
case "checkout_push_request":
|
|
1290
1290
|
return this.handleCheckoutPushRequest(msg);
|
|
1291
|
+
case "checkout.refresh.request":
|
|
1292
|
+
return this.handleCheckoutRefreshRequest(msg);
|
|
1291
1293
|
case "checkout_pr_create_request":
|
|
1292
1294
|
return this.handleCheckoutPrCreateRequest(msg);
|
|
1293
1295
|
case "checkout_pr_merge_request":
|
|
@@ -2093,6 +2095,7 @@ export class Session {
|
|
|
2093
2095
|
paseoHome: this.paseoHome,
|
|
2094
2096
|
workspaceGitService: this.workspaceGitService,
|
|
2095
2097
|
providerSnapshotManager: this.providerSnapshotManager,
|
|
2098
|
+
daemonConfig: this.readStructuredGenerationDaemonConfig(),
|
|
2096
2099
|
}, {
|
|
2097
2100
|
kind: "session",
|
|
2098
2101
|
config: createAgentConfig,
|
|
@@ -2266,6 +2269,8 @@ export class Session {
|
|
|
2266
2269
|
agentManager: this.agentManager,
|
|
2267
2270
|
agentStorage: this.agentStorage,
|
|
2268
2271
|
workspaceGitService: this.workspaceGitService,
|
|
2272
|
+
providerSnapshotManager: this.providerSnapshotManager,
|
|
2273
|
+
daemonConfig: this.readStructuredGenerationDaemonConfig(),
|
|
2269
2274
|
paseoHome: this.paseoHome,
|
|
2270
2275
|
logger: this.sessionLogger,
|
|
2271
2276
|
});
|
|
@@ -2463,6 +2468,9 @@ export class Session {
|
|
|
2463
2468
|
agentManager: this.agentManager,
|
|
2464
2469
|
cwd,
|
|
2465
2470
|
workspaceGitService: this.workspaceGitService,
|
|
2471
|
+
providerSnapshotManager: this.providerSnapshotManager,
|
|
2472
|
+
daemonConfig: this.readStructuredGenerationDaemonConfig(),
|
|
2473
|
+
currentSelection: this.getFocusedAgentSelectionForCwd(cwd),
|
|
2466
2474
|
firstAgentContext,
|
|
2467
2475
|
logger: this.sessionLogger,
|
|
2468
2476
|
});
|
|
@@ -2857,6 +2865,12 @@ export class Session {
|
|
|
2857
2865
|
patch.length > 0 ? patch : "(No diff available)",
|
|
2858
2866
|
].join("\n"),
|
|
2859
2867
|
});
|
|
2868
|
+
const providers = await resolveStructuredGenerationProviders({
|
|
2869
|
+
cwd,
|
|
2870
|
+
providerSnapshotManager: this.providerSnapshotManager,
|
|
2871
|
+
daemonConfig: this.readStructuredGenerationDaemonConfig(),
|
|
2872
|
+
currentSelection: this.getFocusedAgentSelectionForCwd(cwd),
|
|
2873
|
+
});
|
|
2860
2874
|
try {
|
|
2861
2875
|
const result = await generateStructuredAgentResponseWithFallback({
|
|
2862
2876
|
manager: this.agentManager,
|
|
@@ -2865,7 +2879,7 @@ export class Session {
|
|
|
2865
2879
|
schema,
|
|
2866
2880
|
schemaName: "CommitMessage",
|
|
2867
2881
|
maxRetries: 2,
|
|
2868
|
-
providers
|
|
2882
|
+
providers,
|
|
2869
2883
|
persistSession: false,
|
|
2870
2884
|
agentConfigOverrides: {
|
|
2871
2885
|
title: "Commit generator",
|
|
@@ -2919,6 +2933,12 @@ export class Session {
|
|
|
2919
2933
|
patch.length > 0 ? patch : "(No diff available)",
|
|
2920
2934
|
].join("\n"),
|
|
2921
2935
|
});
|
|
2936
|
+
const providers = await resolveStructuredGenerationProviders({
|
|
2937
|
+
cwd,
|
|
2938
|
+
providerSnapshotManager: this.providerSnapshotManager,
|
|
2939
|
+
daemonConfig: this.readStructuredGenerationDaemonConfig(),
|
|
2940
|
+
currentSelection: this.getFocusedAgentSelectionForCwd(cwd),
|
|
2941
|
+
});
|
|
2922
2942
|
try {
|
|
2923
2943
|
return await generateStructuredAgentResponseWithFallback({
|
|
2924
2944
|
manager: this.agentManager,
|
|
@@ -2927,7 +2947,7 @@ export class Session {
|
|
|
2927
2947
|
schema,
|
|
2928
2948
|
schemaName: "PullRequest",
|
|
2929
2949
|
maxRetries: 2,
|
|
2930
|
-
providers
|
|
2950
|
+
providers,
|
|
2931
2951
|
persistSession: false,
|
|
2932
2952
|
agentConfigOverrides: {
|
|
2933
2953
|
title: "PR generator",
|
|
@@ -3972,6 +3992,38 @@ export class Session {
|
|
|
3972
3992
|
});
|
|
3973
3993
|
}
|
|
3974
3994
|
}
|
|
3995
|
+
async handleCheckoutRefreshRequest(msg) {
|
|
3996
|
+
const { cwd, requestId } = msg;
|
|
3997
|
+
try {
|
|
3998
|
+
this.github.invalidate({ cwd });
|
|
3999
|
+
await this.workspaceGitService.getSnapshot(cwd, {
|
|
4000
|
+
force: true,
|
|
4001
|
+
includeGitHub: true,
|
|
4002
|
+
reason: "manual-refresh",
|
|
4003
|
+
});
|
|
4004
|
+
this.checkoutDiffManager.scheduleRefreshForCwd(cwd);
|
|
4005
|
+
this.emit({
|
|
4006
|
+
type: "checkout.refresh.response",
|
|
4007
|
+
payload: {
|
|
4008
|
+
cwd,
|
|
4009
|
+
success: true,
|
|
4010
|
+
error: null,
|
|
4011
|
+
requestId,
|
|
4012
|
+
},
|
|
4013
|
+
});
|
|
4014
|
+
}
|
|
4015
|
+
catch (error) {
|
|
4016
|
+
this.emit({
|
|
4017
|
+
type: "checkout.refresh.response",
|
|
4018
|
+
payload: {
|
|
4019
|
+
cwd,
|
|
4020
|
+
success: false,
|
|
4021
|
+
error: toCheckoutError(error),
|
|
4022
|
+
requestId,
|
|
4023
|
+
},
|
|
4024
|
+
});
|
|
4025
|
+
}
|
|
4026
|
+
}
|
|
3975
4027
|
async handleCheckoutPrCreateRequest(msg) {
|
|
3976
4028
|
const { cwd, requestId } = msg;
|
|
3977
4029
|
try {
|
|
@@ -1,12 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import { SherpaOnnxParakeetSTT } from "./sherpa/sherpa-parakeet-stt.js";
|
|
4
|
-
import { SherpaParakeetRealtimeTranscriptionSession } from "./sherpa/sherpa-parakeet-realtime-session.js";
|
|
5
|
-
import { SherpaOnnxTTS } from "./sherpa/sherpa-tts.js";
|
|
6
|
-
import { ensureSileroVadModel, SherpaSileroTurnDetectionProvider, } from "./sherpa/silero-vad-provider.js";
|
|
7
|
-
function buildModelDownloadHint(modelId) {
|
|
8
|
-
return `Use 'paseo speech download --model ${modelId}' to download this model.`;
|
|
9
|
-
}
|
|
1
|
+
import { DEFAULT_LOCAL_STT_MODEL, DEFAULT_LOCAL_TTS_MODEL, LocalSttModelIdSchema, LocalTtsModelIdSchema, } from "./models.js";
|
|
2
|
+
import { LocalSpeechWorkerClient, WorkerBackedSpeechToTextProvider, WorkerBackedTextToSpeechProvider, WorkerBackedTurnDetectionProvider, } from "./worker-client.js";
|
|
10
3
|
function resolveConfiguredLocalModels(speechConfig) {
|
|
11
4
|
return {
|
|
12
5
|
dictationLocalSttModel: LocalSttModelIdSchema.parse(speechConfig?.local?.models.dictationStt ?? DEFAULT_LOCAL_STT_MODEL),
|
|
@@ -37,84 +30,27 @@ function computeRequiredLocalModelIds(params) {
|
|
|
37
30
|
}
|
|
38
31
|
return Array.from(ids);
|
|
39
32
|
}
|
|
40
|
-
async function createLocalSttEngine(params) {
|
|
41
|
-
const { modelId, modelsDir, logger } = params;
|
|
42
|
-
const modelDir = getLocalSpeechModelDir(modelsDir, modelId);
|
|
43
|
-
return new SherpaOfflineRecognizerEngine({
|
|
44
|
-
model: {
|
|
45
|
-
kind: "nemo_transducer",
|
|
46
|
-
encoder: `${modelDir}/encoder.int8.onnx`,
|
|
47
|
-
decoder: `${modelDir}/decoder.int8.onnx`,
|
|
48
|
-
joiner: `${modelDir}/joiner.int8.onnx`,
|
|
49
|
-
tokens: `${modelDir}/tokens.txt`,
|
|
50
|
-
},
|
|
51
|
-
numThreads: 2,
|
|
52
|
-
debug: 0,
|
|
53
|
-
}, logger);
|
|
54
|
-
}
|
|
55
33
|
function isLocalProviderEnabled(provider) {
|
|
56
34
|
return provider.enabled !== false && provider.provider === "local";
|
|
57
35
|
}
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
if (localConfig) {
|
|
61
|
-
try {
|
|
62
|
-
vadModelPath = await ensureSileroVadModel(localConfig.modelsDir, logger);
|
|
63
|
-
}
|
|
64
|
-
catch (err) {
|
|
65
|
-
logger.warn({ err }, "Failed to provision Silero VAD model, falling back to bundled");
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
return new SherpaSileroTurnDetectionProvider({ modelPath: vadModelPath }, logger);
|
|
36
|
+
function warnLocalConfigMissing(logger, feature) {
|
|
37
|
+
logger.warn({ configured: false }, `Local ${feature} selected but local provider config is missing; ${feature} will be unavailable`);
|
|
69
38
|
}
|
|
70
|
-
|
|
71
|
-
const {
|
|
72
|
-
|
|
73
|
-
logger.warn({ configured: false }, "Local STT selected for voice but local provider config is missing; STT will be unavailable");
|
|
74
|
-
return null;
|
|
75
|
-
}
|
|
76
|
-
const voiceEngine = await getLocalSttEngine(modelId);
|
|
77
|
-
return voiceEngine ? new SherpaOnnxParakeetSTT({ engine: voiceEngine }, logger) : null;
|
|
39
|
+
function initializeLocalTurnDetection(params) {
|
|
40
|
+
const { client } = params;
|
|
41
|
+
return new WorkerBackedTurnDetectionProvider(client);
|
|
78
42
|
}
|
|
79
|
-
|
|
80
|
-
const {
|
|
81
|
-
|
|
82
|
-
logger.warn({ configured: false }, "Local STT selected for dictation but local provider config is missing; dictation STT will be unavailable");
|
|
83
|
-
return null;
|
|
84
|
-
}
|
|
85
|
-
const dictationEngine = await getLocalSttEngine(modelId);
|
|
86
|
-
if (dictationEngine) {
|
|
87
|
-
return {
|
|
88
|
-
id: "local",
|
|
89
|
-
createSession: () => new SherpaParakeetRealtimeTranscriptionSession({ engine: dictationEngine }),
|
|
90
|
-
};
|
|
91
|
-
}
|
|
92
|
-
return null;
|
|
43
|
+
function initializeLocalVoiceStt(params) {
|
|
44
|
+
const { client } = params;
|
|
45
|
+
return new WorkerBackedSpeechToTextProvider(client, "voiceStt");
|
|
93
46
|
}
|
|
94
|
-
|
|
95
|
-
const {
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
const modelDir = getLocalSpeechModelDir(localConfig.modelsDir, localModels.voiceLocalTtsModel);
|
|
102
|
-
return new SherpaOnnxTTS({
|
|
103
|
-
preset: localModels.voiceLocalTtsModel,
|
|
104
|
-
modelDir,
|
|
105
|
-
speakerId: speechConfig?.local?.models.voiceTtsSpeakerId,
|
|
106
|
-
speed: speechConfig?.local?.models.voiceTtsSpeed,
|
|
107
|
-
}, logger);
|
|
108
|
-
}
|
|
109
|
-
catch (err) {
|
|
110
|
-
logger.warn({
|
|
111
|
-
err,
|
|
112
|
-
modelsDir: localConfig.modelsDir,
|
|
113
|
-
modelId: localModels.voiceLocalTtsModel,
|
|
114
|
-
hint: buildModelDownloadHint(localModels.voiceLocalTtsModel),
|
|
115
|
-
}, "Local TTS engine unavailable");
|
|
116
|
-
return null;
|
|
117
|
-
}
|
|
47
|
+
function initializeLocalDictationStt(params) {
|
|
48
|
+
const { client } = params;
|
|
49
|
+
return new WorkerBackedSpeechToTextProvider(client, "dictationStt");
|
|
50
|
+
}
|
|
51
|
+
function initializeLocalVoiceTts(params) {
|
|
52
|
+
const { client } = params;
|
|
53
|
+
return new WorkerBackedTextToSpeechProvider(client);
|
|
118
54
|
}
|
|
119
55
|
export async function initializeLocalSpeechServices(params) {
|
|
120
56
|
const { providers, logger, speechConfig } = params;
|
|
@@ -129,72 +65,55 @@ export async function initializeLocalSpeechServices(params) {
|
|
|
129
65
|
providers,
|
|
130
66
|
models: localModels,
|
|
131
67
|
});
|
|
132
|
-
const
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
if (existing) {
|
|
136
|
-
return existing;
|
|
137
|
-
}
|
|
138
|
-
if (!localConfig) {
|
|
139
|
-
return null;
|
|
140
|
-
}
|
|
141
|
-
try {
|
|
142
|
-
const created = await createLocalSttEngine({
|
|
143
|
-
modelId,
|
|
68
|
+
const workerClient = localConfig
|
|
69
|
+
? new LocalSpeechWorkerClient({
|
|
70
|
+
config: {
|
|
144
71
|
modelsDir: localConfig.modelsDir,
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
72
|
+
voiceSttModel: localModels.voiceLocalSttModel,
|
|
73
|
+
dictationSttModel: localModels.dictationLocalSttModel,
|
|
74
|
+
voiceTtsModel: localModels.voiceLocalTtsModel,
|
|
75
|
+
voiceTtsSpeakerId: speechConfig?.local?.models.voiceTtsSpeakerId,
|
|
76
|
+
voiceTtsSpeed: speechConfig?.local?.models.voiceTtsSpeed,
|
|
77
|
+
},
|
|
78
|
+
})
|
|
79
|
+
: null;
|
|
80
|
+
if (isLocalProviderEnabled(providers.voiceTurnDetection)) {
|
|
81
|
+
if (workerClient) {
|
|
82
|
+
turnDetectionService = initializeLocalTurnDetection({ client: workerClient });
|
|
149
83
|
}
|
|
150
|
-
|
|
151
|
-
logger
|
|
152
|
-
err,
|
|
153
|
-
modelsDir: localConfig.modelsDir,
|
|
154
|
-
modelId,
|
|
155
|
-
hint: buildModelDownloadHint(modelId),
|
|
156
|
-
}, "Local STT engine unavailable");
|
|
157
|
-
return null;
|
|
84
|
+
else {
|
|
85
|
+
warnLocalConfigMissing(logger, "turn detection");
|
|
158
86
|
}
|
|
159
|
-
};
|
|
160
|
-
if (isLocalProviderEnabled(providers.voiceTurnDetection)) {
|
|
161
|
-
turnDetectionService = await initializeLocalTurnDetection(localConfig, logger);
|
|
162
87
|
}
|
|
163
88
|
if (isLocalProviderEnabled(providers.voiceStt)) {
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
}
|
|
89
|
+
if (workerClient) {
|
|
90
|
+
sttService = initializeLocalVoiceStt({ client: workerClient });
|
|
91
|
+
}
|
|
92
|
+
else {
|
|
93
|
+
warnLocalConfigMissing(logger, "voice STT");
|
|
94
|
+
}
|
|
170
95
|
}
|
|
171
96
|
if (isLocalProviderEnabled(providers.dictationStt)) {
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
}
|
|
97
|
+
if (workerClient) {
|
|
98
|
+
dictationSttService = initializeLocalDictationStt({ client: workerClient });
|
|
99
|
+
}
|
|
100
|
+
else {
|
|
101
|
+
warnLocalConfigMissing(logger, "dictation STT");
|
|
102
|
+
}
|
|
178
103
|
}
|
|
179
104
|
if (isLocalProviderEnabled(providers.voiceTts)) {
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
logger,
|
|
185
|
-
}
|
|
105
|
+
if (workerClient) {
|
|
106
|
+
localVoiceTtsProvider = initializeLocalVoiceTts({ client: workerClient });
|
|
107
|
+
}
|
|
108
|
+
else {
|
|
109
|
+
warnLocalConfigMissing(logger, "voice TTS");
|
|
110
|
+
}
|
|
186
111
|
if (localVoiceTtsProvider) {
|
|
187
112
|
ttsService = localVoiceTtsProvider;
|
|
188
113
|
}
|
|
189
114
|
}
|
|
190
115
|
const cleanup = () => {
|
|
191
|
-
|
|
192
|
-
if (typeof maybeFreeable?.free === "function") {
|
|
193
|
-
maybeFreeable.free();
|
|
194
|
-
}
|
|
195
|
-
for (const engine of localSttEngines.values()) {
|
|
196
|
-
engine.free();
|
|
197
|
-
}
|
|
116
|
+
workerClient?.shutdown();
|
|
198
117
|
};
|
|
199
118
|
return {
|
|
200
119
|
turnDetectionService,
|
|
@@ -18,6 +18,13 @@ export declare const SHERPA_ONNX_MODEL_CATALOG: {
|
|
|
18
18
|
readonly description: "NVIDIA Parakeet TDT v2 (offline NeMo transducer, English).";
|
|
19
19
|
readonly defaultFor: "stt";
|
|
20
20
|
};
|
|
21
|
+
readonly "parakeet-tdt-0.6b-v3-int8": {
|
|
22
|
+
readonly kind: "stt-offline";
|
|
23
|
+
readonly archiveUrl: "https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8.tar.bz2";
|
|
24
|
+
readonly extractedDir: "sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8";
|
|
25
|
+
readonly requiredFiles: ["encoder.int8.onnx", "decoder.int8.onnx", "joiner.int8.onnx", "tokens.txt"];
|
|
26
|
+
readonly description: "NVIDIA Parakeet TDT v3 (offline NeMo transducer, 25 European languages, auto-detected).";
|
|
27
|
+
};
|
|
21
28
|
readonly "kokoro-en-v0_19": {
|
|
22
29
|
readonly kind: "tts";
|
|
23
30
|
readonly archiveUrl: "https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2";
|
|
@@ -36,9 +43,9 @@ export type LocalSttModelId = ModelIdByKind<"stt-offline">;
|
|
|
36
43
|
export type LocalTtsModelId = ModelIdByKind<"tts">;
|
|
37
44
|
export declare const LOCAL_STT_MODEL_IDS: LocalSttModelId[];
|
|
38
45
|
export declare const LOCAL_TTS_MODEL_IDS: LocalTtsModelId[];
|
|
39
|
-
export declare const DEFAULT_LOCAL_STT_MODEL:
|
|
46
|
+
export declare const DEFAULT_LOCAL_STT_MODEL: LocalSttModelId;
|
|
40
47
|
export declare const DEFAULT_LOCAL_TTS_MODEL: "kokoro-en-v0_19";
|
|
41
|
-
export declare const LocalSttModelIdSchema: z.ZodType<
|
|
48
|
+
export declare const LocalSttModelIdSchema: z.ZodType<LocalSttModelId, z.ZodTypeDef, string>;
|
|
42
49
|
export declare const LocalTtsModelIdSchema: z.ZodType<"kokoro-en-v0_19", z.ZodTypeDef, string>;
|
|
43
50
|
export type SherpaOnnxModelSpec = SherpaOnnxCatalogEntry & {
|
|
44
51
|
id: SherpaOnnxModelId;
|
|
@@ -8,6 +8,13 @@ export const SHERPA_ONNX_MODEL_CATALOG = {
|
|
|
8
8
|
description: "NVIDIA Parakeet TDT v2 (offline NeMo transducer, English).",
|
|
9
9
|
defaultFor: "stt",
|
|
10
10
|
},
|
|
11
|
+
"parakeet-tdt-0.6b-v3-int8": {
|
|
12
|
+
kind: "stt-offline",
|
|
13
|
+
archiveUrl: "https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8.tar.bz2",
|
|
14
|
+
extractedDir: "sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8",
|
|
15
|
+
requiredFiles: ["encoder.int8.onnx", "decoder.int8.onnx", "joiner.int8.onnx", "tokens.txt"],
|
|
16
|
+
description: "NVIDIA Parakeet TDT v3 (offline NeMo transducer, 25 European languages, auto-detected).",
|
|
17
|
+
},
|
|
11
18
|
"kokoro-en-v0_19": {
|
|
12
19
|
kind: "tts",
|
|
13
20
|
archiveUrl: "https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2",
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export function bufferToWorkerBytes(buffer) {
|
|
2
|
+
const bytes = new Uint8Array(buffer.byteLength);
|
|
3
|
+
bytes.set(buffer);
|
|
4
|
+
return bytes.buffer;
|
|
5
|
+
}
|
|
6
|
+
export function workerBytesToBuffer(bytes) {
|
|
7
|
+
return Buffer.from(bytes);
|
|
8
|
+
}
|
|
9
|
+
//# sourceMappingURL=worker-bytes.js.map
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { EventEmitter } from "node:events";
|
|
2
|
+
import type pino from "pino";
|
|
3
|
+
import type { SpeechStreamResult, SpeechToTextProvider, StreamingTranscriptionSession, TextToSpeechProvider } from "../../speech-provider.js";
|
|
4
|
+
import type { TurnDetectionProvider, TurnDetectionSession } from "../../turn-detection-provider.js";
|
|
5
|
+
import type { LocalSpeechSessionKind, LocalSpeechTranscriptionResult, LocalSpeechWorkerConfig, LocalSpeechWorkerRequest, LocalSpeechWorkerToParentMessage } from "./worker-protocol.js";
|
|
6
|
+
interface LocalSpeechWorkerProcess {
|
|
7
|
+
connected: boolean;
|
|
8
|
+
killed: boolean;
|
|
9
|
+
send(message: LocalSpeechWorkerRequest, callback: (error: Error | null) => void): boolean;
|
|
10
|
+
disconnect(): void;
|
|
11
|
+
kill(): boolean;
|
|
12
|
+
on(event: "message", listener: (message: LocalSpeechWorkerToParentMessage) => void): this;
|
|
13
|
+
on(event: "exit", listener: (code: number | null, signal: NodeJS.Signals | null) => void): this;
|
|
14
|
+
}
|
|
15
|
+
interface LocalSpeechWorkerClientOptions {
|
|
16
|
+
config: LocalSpeechWorkerConfig;
|
|
17
|
+
requestTimeoutMs?: number;
|
|
18
|
+
idleTtlMs?: number;
|
|
19
|
+
forkWorker?: () => LocalSpeechWorkerProcess;
|
|
20
|
+
}
|
|
21
|
+
export declare class LocalSpeechWorkerClient {
|
|
22
|
+
private readonly config;
|
|
23
|
+
private readonly requestTimeoutMs;
|
|
24
|
+
private readonly idleTtlMs;
|
|
25
|
+
private readonly forkWorker;
|
|
26
|
+
private readonly pendingRequests;
|
|
27
|
+
private readonly activeSessionIds;
|
|
28
|
+
private readonly sessionEmitters;
|
|
29
|
+
private worker;
|
|
30
|
+
private inFlightRequests;
|
|
31
|
+
private idleTimer;
|
|
32
|
+
constructor(options: LocalSpeechWorkerClientOptions);
|
|
33
|
+
synthesizeSpeech(text: string): Promise<SpeechStreamResult>;
|
|
34
|
+
transcribeVoice(audio: Buffer, format: string): Promise<LocalSpeechTranscriptionResult>;
|
|
35
|
+
createSession(kind: LocalSpeechSessionKind, emitter: EventEmitter): Promise<{
|
|
36
|
+
sessionId: string;
|
|
37
|
+
requiredSampleRate: number;
|
|
38
|
+
}>;
|
|
39
|
+
appendSessionAudio(sessionId: string, audio: Buffer): void;
|
|
40
|
+
commitSession(sessionId: string): void;
|
|
41
|
+
clearSession(sessionId: string): void;
|
|
42
|
+
flushSession(sessionId: string): void;
|
|
43
|
+
resetSession(sessionId: string): void;
|
|
44
|
+
closeSession(sessionId: string): void;
|
|
45
|
+
shutdown(): void;
|
|
46
|
+
private sendRequest;
|
|
47
|
+
private ensureWorker;
|
|
48
|
+
private handleWorkerMessage;
|
|
49
|
+
private handleWorkerExit;
|
|
50
|
+
private rejectAllPending;
|
|
51
|
+
private emitSessionError;
|
|
52
|
+
private scheduleIdleShutdownIfReady;
|
|
53
|
+
private clearIdleTimer;
|
|
54
|
+
}
|
|
55
|
+
export declare class WorkerBackedTextToSpeechProvider implements TextToSpeechProvider {
|
|
56
|
+
private readonly client;
|
|
57
|
+
constructor(client: LocalSpeechWorkerClient);
|
|
58
|
+
synthesizeSpeech(text: string): Promise<SpeechStreamResult>;
|
|
59
|
+
}
|
|
60
|
+
export declare class WorkerBackedSpeechToTextProvider implements SpeechToTextProvider {
|
|
61
|
+
private readonly client;
|
|
62
|
+
private readonly kind;
|
|
63
|
+
readonly id: "local";
|
|
64
|
+
constructor(client: LocalSpeechWorkerClient, kind: Extract<LocalSpeechSessionKind, "voiceStt" | "dictationStt">);
|
|
65
|
+
createSession(_params: {
|
|
66
|
+
logger: pino.Logger;
|
|
67
|
+
language?: string;
|
|
68
|
+
prompt?: string;
|
|
69
|
+
}): StreamingTranscriptionSession;
|
|
70
|
+
}
|
|
71
|
+
export declare class WorkerBackedTurnDetectionProvider implements TurnDetectionProvider {
|
|
72
|
+
private readonly client;
|
|
73
|
+
readonly id: "local";
|
|
74
|
+
constructor(client: LocalSpeechWorkerClient);
|
|
75
|
+
createSession(_params: {
|
|
76
|
+
logger: pino.Logger;
|
|
77
|
+
}): TurnDetectionSession;
|
|
78
|
+
}
|
|
79
|
+
export {};
|
|
80
|
+
//# sourceMappingURL=worker-client.d.ts.map
|