neoagent 2.2.0 → 2.2.1-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/server/db/database.js +35 -0
- package/server/http/routes.js +1 -0
- package/server/public/assets/fonts/MaterialIcons-Regular.otf +0 -0
- package/server/public/flutter_bootstrap.js +1 -1
- package/server/public/main.dart.js +71727 -70915
- package/server/routes/widgets.js +101 -0
- package/server/services/ai/engine.js +7 -2
- package/server/services/ai/toolResult.js +25 -0
- package/server/services/ai/tools.js +182 -0
- package/server/services/manager.js +31 -0
- package/server/services/scheduler/cron.js +85 -32
- package/server/services/scheduler/cron_utils.js +216 -0
- package/server/services/voice/bufferedLiveRelayAdapter.js +85 -17
- package/server/services/voice/liveSession.js +109 -9
- package/server/services/voice/providers.js +44 -18
- package/server/services/voice/runtimeManager.js +75 -25
- package/server/services/voice/turnRunner.js +53 -25
- package/server/services/websocket.js +26 -1
- package/server/services/widgets/service.js +550 -0
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
const MINUTE_MS = 60 * 1000;
|
|
2
|
+
const MONTH_NAMES = new Map([
|
|
3
|
+
['jan', 1],
|
|
4
|
+
['feb', 2],
|
|
5
|
+
['mar', 3],
|
|
6
|
+
['apr', 4],
|
|
7
|
+
['may', 5],
|
|
8
|
+
['jun', 6],
|
|
9
|
+
['jul', 7],
|
|
10
|
+
['aug', 8],
|
|
11
|
+
['sep', 9],
|
|
12
|
+
['oct', 10],
|
|
13
|
+
['nov', 11],
|
|
14
|
+
['dec', 12],
|
|
15
|
+
]);
|
|
16
|
+
const WEEKDAY_NAMES = new Map([
|
|
17
|
+
['sun', 0],
|
|
18
|
+
['mon', 1],
|
|
19
|
+
['tue', 2],
|
|
20
|
+
['wed', 3],
|
|
21
|
+
['thu', 4],
|
|
22
|
+
['fri', 5],
|
|
23
|
+
['sat', 6],
|
|
24
|
+
]);
|
|
25
|
+
|
|
26
|
+
function normalizeCronValue(raw, names = null) {
|
|
27
|
+
const value = String(raw || '').trim().toLowerCase();
|
|
28
|
+
if (names?.has(value)) {
|
|
29
|
+
return names.get(value);
|
|
30
|
+
}
|
|
31
|
+
const parsed = Number.parseInt(value, 10);
|
|
32
|
+
if (!Number.isInteger(parsed)) {
|
|
33
|
+
throw new Error(`Invalid cron value "${raw}"`);
|
|
34
|
+
}
|
|
35
|
+
return parsed;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function addRange(values, start, end, step, min, max, fieldName) {
|
|
39
|
+
if (!Number.isInteger(start) || !Number.isInteger(end)) {
|
|
40
|
+
throw new Error(`Invalid ${fieldName} range`);
|
|
41
|
+
}
|
|
42
|
+
if (start > end) {
|
|
43
|
+
throw new Error(`Invalid ${fieldName} range "${start}-${end}"`);
|
|
44
|
+
}
|
|
45
|
+
if (start < min || end > max) {
|
|
46
|
+
throw new Error(`${fieldName} range "${start}-${end}" is out of bounds`);
|
|
47
|
+
}
|
|
48
|
+
for (let current = start; current <= end; current += step) {
|
|
49
|
+
values.add(current);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function parseCronField(field, { min, max, fieldName, names = null, normalize = null }) {
|
|
54
|
+
const raw = String(field || '').trim();
|
|
55
|
+
if (!raw) {
|
|
56
|
+
throw new Error(`Missing ${fieldName} field`);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const values = new Set();
|
|
60
|
+
const wildcard = raw === '*';
|
|
61
|
+
const parts = raw.split(',');
|
|
62
|
+
|
|
63
|
+
for (const part of parts) {
|
|
64
|
+
const segment = part.trim();
|
|
65
|
+
if (!segment) continue;
|
|
66
|
+
|
|
67
|
+
const [rangePart, stepPart] = segment.split('/');
|
|
68
|
+
const step = stepPart == null ? 1 : Number.parseInt(stepPart, 10);
|
|
69
|
+
if (!Number.isInteger(step) || step <= 0) {
|
|
70
|
+
throw new Error(`Invalid ${fieldName} step "${stepPart}"`);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (rangePart === '*') {
|
|
74
|
+
addRange(values, min, max, step, min, max, fieldName);
|
|
75
|
+
continue;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (rangePart.includes('-')) {
|
|
79
|
+
const [startRaw, endRaw] = rangePart.split('-', 2);
|
|
80
|
+
let start = normalizeCronValue(startRaw, names);
|
|
81
|
+
let end = normalizeCronValue(endRaw, names);
|
|
82
|
+
if (typeof normalize === 'function') {
|
|
83
|
+
start = normalize(start);
|
|
84
|
+
end = normalize(end);
|
|
85
|
+
}
|
|
86
|
+
addRange(values, start, end, step, min, max, fieldName);
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
let value = normalizeCronValue(rangePart, names);
|
|
91
|
+
if (typeof normalize === 'function') {
|
|
92
|
+
value = normalize(value);
|
|
93
|
+
}
|
|
94
|
+
if (value < min || value > max) {
|
|
95
|
+
throw new Error(`${fieldName} value "${rangePart}" is out of bounds`);
|
|
96
|
+
}
|
|
97
|
+
values.add(value);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return { wildcard, values };
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function parseCronExpression(expression) {
|
|
104
|
+
const fields = String(expression || '').trim().split(/\s+/);
|
|
105
|
+
if (fields.length !== 5) {
|
|
106
|
+
throw new Error(`Invalid cron expression "${expression}"`);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return {
|
|
110
|
+
minute: parseCronField(fields[0], {
|
|
111
|
+
min: 0,
|
|
112
|
+
max: 59,
|
|
113
|
+
fieldName: 'minute',
|
|
114
|
+
}),
|
|
115
|
+
hour: parseCronField(fields[1], {
|
|
116
|
+
min: 0,
|
|
117
|
+
max: 23,
|
|
118
|
+
fieldName: 'hour',
|
|
119
|
+
}),
|
|
120
|
+
dayOfMonth: parseCronField(fields[2], {
|
|
121
|
+
min: 1,
|
|
122
|
+
max: 31,
|
|
123
|
+
fieldName: 'day-of-month',
|
|
124
|
+
}),
|
|
125
|
+
month: parseCronField(fields[3], {
|
|
126
|
+
min: 1,
|
|
127
|
+
max: 12,
|
|
128
|
+
fieldName: 'month',
|
|
129
|
+
names: MONTH_NAMES,
|
|
130
|
+
}),
|
|
131
|
+
dayOfWeek: parseCronField(fields[4], {
|
|
132
|
+
min: 0,
|
|
133
|
+
max: 6,
|
|
134
|
+
fieldName: 'day-of-week',
|
|
135
|
+
names: WEEKDAY_NAMES,
|
|
136
|
+
normalize: (value) => (value === 7 ? 0 : value),
|
|
137
|
+
}),
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function matchesCron(date, schedule) {
|
|
142
|
+
const minute = date.getUTCMinutes();
|
|
143
|
+
const hour = date.getUTCHours();
|
|
144
|
+
const dayOfMonth = date.getUTCDate();
|
|
145
|
+
const month = date.getUTCMonth() + 1;
|
|
146
|
+
const dayOfWeek = date.getUTCDay();
|
|
147
|
+
|
|
148
|
+
if (!schedule.minute.values.has(minute)) return false;
|
|
149
|
+
if (!schedule.hour.values.has(hour)) return false;
|
|
150
|
+
if (!schedule.month.values.has(month)) return false;
|
|
151
|
+
|
|
152
|
+
const domMatch = schedule.dayOfMonth.values.has(dayOfMonth);
|
|
153
|
+
const dowMatch = schedule.dayOfWeek.values.has(dayOfWeek);
|
|
154
|
+
|
|
155
|
+
if (schedule.dayOfMonth.wildcard && schedule.dayOfWeek.wildcard) {
|
|
156
|
+
return true;
|
|
157
|
+
}
|
|
158
|
+
if (schedule.dayOfMonth.wildcard) {
|
|
159
|
+
return dowMatch;
|
|
160
|
+
}
|
|
161
|
+
if (schedule.dayOfWeek.wildcard) {
|
|
162
|
+
return domMatch;
|
|
163
|
+
}
|
|
164
|
+
return domMatch || dowMatch;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
function floorToMinute(date) {
|
|
168
|
+
return new Date(Math.floor(date.getTime() / MINUTE_MS) * MINUTE_MS);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
function findNextRun(expression, fromDate = new Date(), maxLookaheadMinutes = 366 * 24 * 60) {
|
|
172
|
+
const schedule = parseCronExpression(expression);
|
|
173
|
+
const cursor = floorToMinute(fromDate);
|
|
174
|
+
cursor.setUTCSeconds(0, 0);
|
|
175
|
+
|
|
176
|
+
for (let index = 1; index <= maxLookaheadMinutes; index += 1) {
|
|
177
|
+
const candidate = new Date(cursor.getTime() + (index * MINUTE_MS));
|
|
178
|
+
if (matchesCron(candidate, schedule)) {
|
|
179
|
+
return candidate;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
return null;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
function getMinimumIntervalMinutes(expression, occurrenceCount = 3) {
|
|
186
|
+
const matches = [];
|
|
187
|
+
let cursor = new Date();
|
|
188
|
+
for (let index = 0; index < occurrenceCount; index += 1) {
|
|
189
|
+
const next = findNextRun(expression, cursor);
|
|
190
|
+
if (!next) {
|
|
191
|
+
break;
|
|
192
|
+
}
|
|
193
|
+
matches.push(next);
|
|
194
|
+
cursor = new Date(next.getTime());
|
|
195
|
+
}
|
|
196
|
+
if (matches.length < 2) {
|
|
197
|
+
return null;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
let minInterval = Number.POSITIVE_INFINITY;
|
|
201
|
+
for (let index = 1; index < matches.length; index += 1) {
|
|
202
|
+
const intervalMinutes = Math.round((matches[index].getTime() - matches[index - 1].getTime()) / MINUTE_MS);
|
|
203
|
+
if (intervalMinutes < minInterval) {
|
|
204
|
+
minInterval = intervalMinutes;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
return Number.isFinite(minInterval) ? minInterval : null;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
module.exports = {
|
|
212
|
+
findNextRun,
|
|
213
|
+
getMinimumIntervalMinutes,
|
|
214
|
+
matchesCron,
|
|
215
|
+
parseCronExpression,
|
|
216
|
+
};
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
+
const { getProviderRuntimeConfig } = require('../ai/models');
|
|
3
4
|
const { resolveSttModel, transcribeVoiceInput } = require('./providers');
|
|
4
5
|
const { writeTempAudioFile, removeTempFile } = require('./liveAudio');
|
|
5
6
|
|
|
6
|
-
const DEFAULT_PARTIAL_DEBOUNCE_MS =
|
|
7
|
-
const DEFAULT_MIN_PARTIAL_BYTES =
|
|
7
|
+
const DEFAULT_PARTIAL_DEBOUNCE_MS = 700;
|
|
8
|
+
const DEFAULT_MIN_PARTIAL_BYTES = 8000;
|
|
8
9
|
|
|
9
10
|
class BufferedLiveRelayAdapter {
|
|
10
11
|
constructor({
|
|
@@ -32,20 +33,32 @@ class BufferedLiveRelayAdapter {
|
|
|
32
33
|
}
|
|
33
34
|
}
|
|
34
35
|
|
|
35
|
-
async onInputStart(session) {
|
|
36
|
-
session.
|
|
36
|
+
async onInputStart(session, options = {}) {
|
|
37
|
+
session.startTurn(options.turnId, options.mimeType);
|
|
37
38
|
this._clearPartialTimer(session.id);
|
|
38
39
|
this._partialInFlight.delete(session.id);
|
|
39
40
|
}
|
|
40
41
|
|
|
41
42
|
async appendAudioChunk(session, audioBytes, options = {}) {
|
|
42
|
-
session.appendInputChunk(audioBytes, options.mimeType
|
|
43
|
+
const appendResult = session.appendInputChunk(audioBytes, options.mimeType, {
|
|
44
|
+
turnId: options.turnId,
|
|
45
|
+
sequence: options.sequence,
|
|
46
|
+
});
|
|
43
47
|
this._schedulePartialTranscript(session);
|
|
48
|
+
return appendResult;
|
|
44
49
|
}
|
|
45
50
|
|
|
46
|
-
async commitInput(session) {
|
|
51
|
+
async commitInput(session, options = {}) {
|
|
47
52
|
this._clearPartialTimer(session.id);
|
|
48
|
-
const
|
|
53
|
+
const commitState = session.markCommitPending(options.turnId, options.finalSequence);
|
|
54
|
+
if (!commitState.ready) {
|
|
55
|
+
throw new Error(
|
|
56
|
+
`Voice input is incomplete for commit (${commitState.receivedThrough}/${commitState.finalSequence}).`,
|
|
57
|
+
);
|
|
58
|
+
}
|
|
59
|
+
const audioBytes = session.getInputAudioBuffer({
|
|
60
|
+
throughSequence: commitState.finalSequence,
|
|
61
|
+
});
|
|
49
62
|
if (!audioBytes.length) {
|
|
50
63
|
return '';
|
|
51
64
|
}
|
|
@@ -54,6 +67,7 @@ class BufferedLiveRelayAdapter {
|
|
|
54
67
|
model: session.voiceSettings?.liveSttModel,
|
|
55
68
|
userId: session.userId,
|
|
56
69
|
agentId: session.agentId,
|
|
70
|
+
timeoutMs: 20000,
|
|
57
71
|
});
|
|
58
72
|
} finally {
|
|
59
73
|
// Release buffered audio immediately after commit so completed turns do
|
|
@@ -74,11 +88,17 @@ class BufferedLiveRelayAdapter {
|
|
|
74
88
|
}
|
|
75
89
|
this._partialInFlight.add(session.id);
|
|
76
90
|
try {
|
|
77
|
-
const snapshot = session.getInputAudioBuffer(
|
|
91
|
+
const snapshot = session.getInputAudioBuffer({
|
|
92
|
+
contiguousOnly: true,
|
|
93
|
+
});
|
|
94
|
+
if (!snapshot.length) {
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
78
97
|
const transcript = await this._transcribeAudioSnapshot(snapshot, session.inputMimeType, {
|
|
79
98
|
model: session.voiceSettings?.liveSttModel,
|
|
80
99
|
userId: session.userId,
|
|
81
100
|
agentId: session.agentId,
|
|
101
|
+
timeoutMs: 6000,
|
|
82
102
|
});
|
|
83
103
|
if (transcript) {
|
|
84
104
|
await session.publishTranscriptPartial(transcript);
|
|
@@ -104,19 +124,67 @@ class BufferedLiveRelayAdapter {
|
|
|
104
124
|
async _transcribeAudioSnapshot(audioBytes, mimeType, options = {}) {
|
|
105
125
|
const { filePath, mimeType: fileMimeType } = await writeTempAudioFile(audioBytes, mimeType);
|
|
106
126
|
try {
|
|
107
|
-
|
|
108
|
-
const
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
127
|
+
let lastError = null;
|
|
128
|
+
for (const attempt of this._buildSttAttempts(options)) {
|
|
129
|
+
try {
|
|
130
|
+
const transcript = await transcribeVoiceInput(filePath, {
|
|
131
|
+
provider: attempt.provider,
|
|
132
|
+
model: attempt.model,
|
|
133
|
+
mimeType: fileMimeType,
|
|
134
|
+
userId: options.userId,
|
|
135
|
+
agentId: options.agentId,
|
|
136
|
+
apiKey: attempt.apiKey,
|
|
137
|
+
baseUrl: attempt.baseUrl,
|
|
138
|
+
timeoutMs: options.timeoutMs,
|
|
139
|
+
});
|
|
140
|
+
return String(transcript || '').trim();
|
|
141
|
+
} catch (error) {
|
|
142
|
+
lastError = error;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
throw lastError || new Error('Voice transcription failed.');
|
|
116
146
|
} finally {
|
|
117
147
|
await removeTempFile(filePath);
|
|
118
148
|
}
|
|
119
149
|
}
|
|
150
|
+
|
|
151
|
+
_buildSttAttempts(options = {}) {
|
|
152
|
+
const attempts = [];
|
|
153
|
+
const providers = [
|
|
154
|
+
this.provider,
|
|
155
|
+
...['openai', 'deepgram', 'gemini'].filter((provider) => provider !== this.provider),
|
|
156
|
+
];
|
|
157
|
+
for (const provider of providers) {
|
|
158
|
+
const runtime = this._resolveProviderRuntime(provider, options.userId, options.agentId);
|
|
159
|
+
attempts.push({
|
|
160
|
+
provider,
|
|
161
|
+
model: resolveSttModel(provider, provider === this.provider ? options.model : ''),
|
|
162
|
+
apiKey: runtime.apiKey,
|
|
163
|
+
baseUrl: runtime.baseUrl,
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
return attempts;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
_resolveProviderRuntime(provider, userId, agentId) {
|
|
170
|
+
const normalizedProvider = String(provider || '').trim().toLowerCase();
|
|
171
|
+
if (!normalizedProvider || normalizedProvider === 'deepgram') {
|
|
172
|
+
return { apiKey: '', baseUrl: '' };
|
|
173
|
+
}
|
|
174
|
+
try {
|
|
175
|
+
const runtime = getProviderRuntimeConfig(
|
|
176
|
+
userId,
|
|
177
|
+
normalizedProvider === 'gemini' ? 'google' : normalizedProvider,
|
|
178
|
+
agentId,
|
|
179
|
+
);
|
|
180
|
+
return {
|
|
181
|
+
apiKey: typeof runtime.apiKey === 'string' ? runtime.apiKey.trim() : '',
|
|
182
|
+
baseUrl: typeof runtime.baseUrl === 'string' ? runtime.baseUrl.trim() : '',
|
|
183
|
+
};
|
|
184
|
+
} catch {
|
|
185
|
+
return { apiKey: '', baseUrl: '' };
|
|
186
|
+
}
|
|
187
|
+
}
|
|
120
188
|
}
|
|
121
189
|
|
|
122
190
|
module.exports = {
|
|
@@ -21,8 +21,12 @@ class VoiceLiveSession {
|
|
|
21
21
|
this.currentRunId = null;
|
|
22
22
|
this.interrupted = false;
|
|
23
23
|
this.inputMimeType = 'audio/pcm;rate=16000;channels=1';
|
|
24
|
-
this.inputChunks =
|
|
24
|
+
this.inputChunks = new Map();
|
|
25
25
|
this.inputBytes = 0;
|
|
26
|
+
this.activeTurnId = '';
|
|
27
|
+
this.highestContiguousSequence = -1;
|
|
28
|
+
this.highestReceivedSequence = -1;
|
|
29
|
+
this.finalSequence = null;
|
|
26
30
|
this.lastPartialTranscript = '';
|
|
27
31
|
this.lastFinalTranscript = '';
|
|
28
32
|
this.lastAssistantText = '';
|
|
@@ -32,8 +36,12 @@ class VoiceLiveSession {
|
|
|
32
36
|
|
|
33
37
|
resetInput(mimeType = 'audio/pcm;rate=16000;channels=1') {
|
|
34
38
|
this.inputMimeType = String(mimeType || this.inputMimeType).trim() || 'audio/pcm;rate=16000;channels=1';
|
|
35
|
-
this.inputChunks =
|
|
39
|
+
this.inputChunks = new Map();
|
|
36
40
|
this.inputBytes = 0;
|
|
41
|
+
this.activeTurnId = '';
|
|
42
|
+
this.highestContiguousSequence = -1;
|
|
43
|
+
this.highestReceivedSequence = -1;
|
|
44
|
+
this.finalSequence = null;
|
|
37
45
|
this.lastPartialTranscript = '';
|
|
38
46
|
}
|
|
39
47
|
|
|
@@ -45,20 +53,112 @@ class VoiceLiveSession {
|
|
|
45
53
|
this.interrupted = false;
|
|
46
54
|
}
|
|
47
55
|
|
|
48
|
-
|
|
56
|
+
startTurn(turnId, mimeType = null) {
|
|
57
|
+
this.resetInput(mimeType || this.inputMimeType);
|
|
58
|
+
this.activeTurnId = String(turnId || '').trim();
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
appendInputChunk(chunk, mimeType = null, options = {}) {
|
|
49
62
|
if (mimeType) {
|
|
50
63
|
this.inputMimeType = String(mimeType).trim() || this.inputMimeType;
|
|
51
64
|
}
|
|
65
|
+
const turnId = String(options.turnId || '').trim();
|
|
66
|
+
if (turnId && this.activeTurnId && turnId !== this.activeTurnId) {
|
|
67
|
+
throw new Error('Audio chunk turn does not match the active voice turn.');
|
|
68
|
+
}
|
|
69
|
+
if (turnId && !this.activeTurnId) {
|
|
70
|
+
this.activeTurnId = turnId;
|
|
71
|
+
}
|
|
72
|
+
const sequence = Number(options.sequence);
|
|
73
|
+
if (!Number.isInteger(sequence) || sequence < 0) {
|
|
74
|
+
throw new Error('Audio chunk sequence must be a non-negative integer.');
|
|
75
|
+
}
|
|
52
76
|
const payload = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk || []);
|
|
53
|
-
if (payload.length === 0)
|
|
54
|
-
|
|
77
|
+
if (payload.length === 0) {
|
|
78
|
+
return {
|
|
79
|
+
duplicate: false,
|
|
80
|
+
receivedThrough: this.highestContiguousSequence,
|
|
81
|
+
highestReceived: this.highestReceivedSequence,
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
if (this.inputChunks.has(sequence)) {
|
|
85
|
+
return {
|
|
86
|
+
duplicate: true,
|
|
87
|
+
receivedThrough: this.highestContiguousSequence,
|
|
88
|
+
highestReceived: this.highestReceivedSequence,
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
this.inputChunks.set(sequence, payload);
|
|
55
92
|
this.inputBytes += payload.length;
|
|
93
|
+
if (sequence > this.highestReceivedSequence) {
|
|
94
|
+
this.highestReceivedSequence = sequence;
|
|
95
|
+
}
|
|
96
|
+
while (this.inputChunks.has(this.highestContiguousSequence + 1)) {
|
|
97
|
+
this.highestContiguousSequence += 1;
|
|
98
|
+
}
|
|
99
|
+
return {
|
|
100
|
+
duplicate: false,
|
|
101
|
+
receivedThrough: this.highestContiguousSequence,
|
|
102
|
+
highestReceived: this.highestReceivedSequence,
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
markCommitPending(turnId, finalSequence) {
|
|
107
|
+
const normalizedTurnId = String(turnId || '').trim();
|
|
108
|
+
if (normalizedTurnId && this.activeTurnId && normalizedTurnId !== this.activeTurnId) {
|
|
109
|
+
throw new Error('Voice commit turn does not match the active voice turn.');
|
|
110
|
+
}
|
|
111
|
+
if (normalizedTurnId && !this.activeTurnId) {
|
|
112
|
+
this.activeTurnId = normalizedTurnId;
|
|
113
|
+
}
|
|
114
|
+
const normalizedFinalSequence = Number(finalSequence);
|
|
115
|
+
if (!Number.isInteger(normalizedFinalSequence) || normalizedFinalSequence < 0) {
|
|
116
|
+
throw new Error('Voice commit finalSequence must be a non-negative integer.');
|
|
117
|
+
}
|
|
118
|
+
this.finalSequence = normalizedFinalSequence;
|
|
119
|
+
return {
|
|
120
|
+
finalSequence: this.finalSequence,
|
|
121
|
+
receivedThrough: this.highestContiguousSequence,
|
|
122
|
+
ready: this.hasInputThrough(normalizedFinalSequence),
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
hasInputThrough(sequence) {
|
|
127
|
+
const normalizedSequence = Number(sequence);
|
|
128
|
+
if (!Number.isInteger(normalizedSequence) || normalizedSequence < 0) {
|
|
129
|
+
return false;
|
|
130
|
+
}
|
|
131
|
+
return this.highestContiguousSequence >= normalizedSequence;
|
|
56
132
|
}
|
|
57
133
|
|
|
58
|
-
getInputAudioBuffer() {
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
134
|
+
getInputAudioBuffer(options = {}) {
|
|
135
|
+
const contiguousOnly = options.contiguousOnly !== false;
|
|
136
|
+
const throughSequence = Number.isInteger(options.throughSequence)
|
|
137
|
+
? Number(options.throughSequence)
|
|
138
|
+
: null;
|
|
139
|
+
const maxSequence = throughSequence != null
|
|
140
|
+
? throughSequence
|
|
141
|
+
: (contiguousOnly ? this.highestContiguousSequence : this.highestReceivedSequence);
|
|
142
|
+
if (!Number.isInteger(maxSequence) || maxSequence < 0) {
|
|
143
|
+
return Buffer.alloc(0);
|
|
144
|
+
}
|
|
145
|
+
const ordered = [];
|
|
146
|
+
for (let sequence = 0; sequence <= maxSequence; sequence += 1) {
|
|
147
|
+
const chunk = this.inputChunks.get(sequence);
|
|
148
|
+
if (!chunk) {
|
|
149
|
+
if (contiguousOnly || throughSequence != null) {
|
|
150
|
+
break;
|
|
151
|
+
}
|
|
152
|
+
continue;
|
|
153
|
+
}
|
|
154
|
+
ordered.push(chunk);
|
|
155
|
+
}
|
|
156
|
+
if (ordered.length === 0) {
|
|
157
|
+
return Buffer.alloc(0);
|
|
158
|
+
}
|
|
159
|
+
return ordered.length === 1
|
|
160
|
+
? Buffer.from(ordered[0])
|
|
161
|
+
: Buffer.concat(ordered);
|
|
62
162
|
}
|
|
63
163
|
|
|
64
164
|
async setState(state, extra = {}) {
|
|
@@ -35,6 +35,25 @@ const GEMINI_API_BASE_URL = 'https://generativelanguage.googleapis.com/v1beta/mo
|
|
|
35
35
|
const DEFAULT_GEMINI_TRANSCRIPTION_PROMPT =
|
|
36
36
|
'Transcribe this audio verbatim. Return only the transcript text.';
|
|
37
37
|
|
|
38
|
+
function withTimeout(promise, timeoutMs, label) {
|
|
39
|
+
const normalizedTimeout = Number(timeoutMs);
|
|
40
|
+
if (!Number.isFinite(normalizedTimeout) || normalizedTimeout <= 0) {
|
|
41
|
+
return promise;
|
|
42
|
+
}
|
|
43
|
+
let timer = null;
|
|
44
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
45
|
+
timer = setTimeout(() => {
|
|
46
|
+
reject(new Error(`${label} timed out after ${normalizedTimeout}ms.`));
|
|
47
|
+
}, normalizedTimeout);
|
|
48
|
+
timer.unref?.();
|
|
49
|
+
});
|
|
50
|
+
return Promise.race([promise, timeoutPromise]).finally(() => {
|
|
51
|
+
if (timer) {
|
|
52
|
+
clearTimeout(timer);
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
|
|
38
57
|
function readSharedApiKeys() {
|
|
39
58
|
try {
|
|
40
59
|
const keysPath = path.join(AGENT_DATA_DIR, 'API_KEYS.json');
|
|
@@ -287,14 +306,16 @@ async function transcribeWithGemini(filePath, model, mimeType, options = {}) {
|
|
|
287
306
|
async function transcribeVoiceInput(filePath, options = {}) {
|
|
288
307
|
const provider = normalizeSttProvider(options.provider);
|
|
289
308
|
const model = resolveSttModel(provider, options.model);
|
|
309
|
+
let request = null;
|
|
290
310
|
|
|
291
311
|
if (provider === 'openai') {
|
|
292
|
-
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
|
|
312
|
+
request = transcribeWithOpenAi(filePath, model, options);
|
|
313
|
+
} else if (provider === 'deepgram') {
|
|
314
|
+
request = transcribeWithDeepgram(filePath, options.mimeType);
|
|
315
|
+
} else {
|
|
316
|
+
request = transcribeWithGemini(filePath, model, options.mimeType, options);
|
|
296
317
|
}
|
|
297
|
-
return
|
|
318
|
+
return withTimeout(request, options.timeoutMs, `${provider} STT`);
|
|
298
319
|
}
|
|
299
320
|
|
|
300
321
|
async function synthesizeWithOpenAi(text, model, voice, options = {}) {
|
|
@@ -545,14 +566,16 @@ async function synthesizeVoiceReply(text, options = {}) {
|
|
|
545
566
|
}
|
|
546
567
|
|
|
547
568
|
const { provider, model, voice } = normalizeVoiceSynthesisOptions(options);
|
|
569
|
+
let request = null;
|
|
548
570
|
|
|
549
571
|
if (provider === 'openai') {
|
|
550
|
-
|
|
551
|
-
}
|
|
552
|
-
|
|
553
|
-
|
|
572
|
+
request = synthesizeWithOpenAi(content, model, voice, options);
|
|
573
|
+
} else if (provider === 'deepgram') {
|
|
574
|
+
request = synthesizeWithDeepgram(content, model);
|
|
575
|
+
} else {
|
|
576
|
+
request = synthesizeWithGemini(content, model, voice, options);
|
|
554
577
|
}
|
|
555
|
-
return
|
|
578
|
+
return withTimeout(request, options.timeoutMs, `${provider} TTS`);
|
|
556
579
|
}
|
|
557
580
|
|
|
558
581
|
// Minimum characters before flushing a sentence chunk to TTS to avoid tiny requests.
|
|
@@ -590,15 +613,18 @@ async function synthesizeVoiceReplyStream(text, options = {}, onChunk) {
|
|
|
590
613
|
const { provider, model, voice } = normalizeVoiceSynthesisOptions(options);
|
|
591
614
|
const chunks = splitIntoSentenceChunks(content);
|
|
592
615
|
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
616
|
+
const run = (async () => {
|
|
617
|
+
for (const chunk of chunks) {
|
|
618
|
+
if (provider === 'openai') {
|
|
619
|
+
await streamWithOpenAi(chunk, model, voice, options, onChunk);
|
|
620
|
+
} else if (provider === 'deepgram') {
|
|
621
|
+
await streamWithDeepgram(chunk, model, onChunk);
|
|
622
|
+
} else {
|
|
623
|
+
await streamWithGemini(chunk, model, voice, options, onChunk);
|
|
624
|
+
}
|
|
600
625
|
}
|
|
601
|
-
}
|
|
626
|
+
})();
|
|
627
|
+
await withTimeout(run, options.timeoutMs, `${provider} TTS stream`);
|
|
602
628
|
}
|
|
603
629
|
|
|
604
630
|
module.exports = {
|