neoagent 2.2.1-beta.2 → 2.2.1-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/server/public/assets/fonts/MaterialIcons-Regular.otf +0 -0
- package/server/public/flutter_bootstrap.js +1 -1
- package/server/public/main.dart.js +55586 -55226
- package/server/services/ai/tools.js +1 -1
- package/server/services/voice/providers.js +16 -0
- package/server/services/voice/runtimeManager.js +13 -5
- package/server/services/voice/turnRunner.js +15 -2
- package/server/services/widgets/service.js +51 -1
|
@@ -1160,7 +1160,7 @@ function getAvailableTools(app, options = {}) {
|
|
|
1160
1160
|
properties: {
|
|
1161
1161
|
snapshot: {
|
|
1162
1162
|
type: 'object',
|
|
1163
|
-
description: 'Structured widget snapshot payload containing title, optional subtitle/body
|
|
1163
|
+
description: 'Structured widget snapshot payload containing a strong title, optional kicker/subtitle/body, primary and supporting metrics, optional progress, rows, chips, icon/accent/background tokens, optional surfaceColor, updatedAt, and deepLink.'
|
|
1164
1164
|
}
|
|
1165
1165
|
},
|
|
1166
1166
|
required: ['snapshot']
|
|
@@ -34,6 +34,8 @@ const DEFAULT_TTS_VOICES = Object.freeze({
|
|
|
34
34
|
const GEMINI_API_BASE_URL = 'https://generativelanguage.googleapis.com/v1beta/models';
|
|
35
35
|
const DEFAULT_GEMINI_TRANSCRIPTION_PROMPT =
|
|
36
36
|
'Transcribe this audio verbatim. Return only the transcript text.';
|
|
37
|
+
const EMOJI_SPEECH_REGEX =
|
|
38
|
+
/[\p{Extended_Pictographic}\p{Emoji_Presentation}\p{Regional_Indicator}\u200D\uFE0F\u20E3]/gu;
|
|
37
39
|
|
|
38
40
|
function withTimeout(promise, timeoutMs, label) {
|
|
39
41
|
const normalizedTimeout = Number(timeoutMs);
|
|
@@ -54,6 +56,19 @@ function withTimeout(promise, timeoutMs, label) {
|
|
|
54
56
|
});
|
|
55
57
|
}
|
|
56
58
|
|
|
59
|
+
function sanitizeSpeechText(value) {
|
|
60
|
+
const text = String(value || '');
|
|
61
|
+
if (!text) {
|
|
62
|
+
return '';
|
|
63
|
+
}
|
|
64
|
+
return text
|
|
65
|
+
.replace(EMOJI_SPEECH_REGEX, ' ')
|
|
66
|
+
.replace(/[ \t]{2,}/g, ' ')
|
|
67
|
+
.replace(/\s+\n/g, '\n')
|
|
68
|
+
.replace(/\n{3,}/g, '\n\n')
|
|
69
|
+
.trim();
|
|
70
|
+
}
|
|
71
|
+
|
|
57
72
|
function readSharedApiKeys() {
|
|
58
73
|
try {
|
|
59
74
|
const keysPath = path.join(AGENT_DATA_DIR, 'API_KEYS.json');
|
|
@@ -641,6 +656,7 @@ module.exports = {
|
|
|
641
656
|
resolveTtsModel,
|
|
642
657
|
resolveTtsVoice,
|
|
643
658
|
normalizeVoiceSynthesisOptions,
|
|
659
|
+
sanitizeSpeechText,
|
|
644
660
|
guessExtFromMimeType,
|
|
645
661
|
splitIntoSentenceChunks,
|
|
646
662
|
transcribeVoiceInput,
|
|
@@ -7,7 +7,12 @@ const { getVoiceRuntimeSettings } = require('./liveSettings');
|
|
|
7
7
|
const { VoiceLiveSession } = require('./liveSession');
|
|
8
8
|
const { OpenAiLiveRelayAdapter } = require('./openaiLiveRelayAdapter');
|
|
9
9
|
const { GeminiLiveRelayAdapter } = require('./geminiLiveRelayAdapter');
|
|
10
|
-
const {
|
|
10
|
+
const {
|
|
11
|
+
synthesizeVoiceReply,
|
|
12
|
+
normalizeVoiceSynthesisOptions,
|
|
13
|
+
synthesizeVoiceReplyStream,
|
|
14
|
+
sanitizeSpeechText,
|
|
15
|
+
} = require('./providers');
|
|
11
16
|
const { VoiceAgentBridge } = require('./agentBridge');
|
|
12
17
|
|
|
13
18
|
class VoiceRuntimeManager {
|
|
@@ -296,17 +301,19 @@ class VoiceRuntimeManager {
|
|
|
296
301
|
model: session.voiceSettings?.liveTtsModel,
|
|
297
302
|
voice: session.voiceSettings?.liveVoice,
|
|
298
303
|
});
|
|
304
|
+
const spokenContent = sanitizeSpeechText(content);
|
|
299
305
|
|
|
300
306
|
let index = 0;
|
|
301
307
|
let streamError = null;
|
|
302
308
|
const ttsAttempts = this.#buildTtsAttemptOrder(session, voiceOptions);
|
|
303
|
-
|
|
309
|
+
if (spokenContent) {
|
|
310
|
+
try {
|
|
304
311
|
for (const attempt of ttsAttempts) {
|
|
305
312
|
index = 0;
|
|
306
313
|
streamError = null;
|
|
307
314
|
try {
|
|
308
315
|
await synthesizeVoiceReplyStream(
|
|
309
|
-
|
|
316
|
+
spokenContent,
|
|
310
317
|
attempt,
|
|
311
318
|
async ({ audioBytes, mimeType }) => {
|
|
312
319
|
if (session.closed || session.interrupted) return;
|
|
@@ -326,8 +333,9 @@ class VoiceRuntimeManager {
|
|
|
326
333
|
streamError = String(error?.message || error || 'Voice playback failed.');
|
|
327
334
|
}
|
|
328
335
|
}
|
|
329
|
-
|
|
330
|
-
|
|
336
|
+
} catch (error) {
|
|
337
|
+
streamError = String(error?.message || error || 'Voice playback failed.');
|
|
338
|
+
}
|
|
331
339
|
}
|
|
332
340
|
|
|
333
341
|
if (!streamError && !session.closed && !session.interrupted) {
|
|
@@ -5,7 +5,11 @@ const { getProviderRuntimeConfig } = require('../ai/models');
|
|
|
5
5
|
const { buildAgentRunContext } = require('../ai/runContext');
|
|
6
6
|
const { buildDirectVoiceContext } = require('./message');
|
|
7
7
|
const { analyzeVoiceAssistantScreenshot } = require('./screenshotContext');
|
|
8
|
-
const {
|
|
8
|
+
const {
|
|
9
|
+
synthesizeVoiceReply,
|
|
10
|
+
normalizeVoiceSynthesisOptions,
|
|
11
|
+
sanitizeSpeechText,
|
|
12
|
+
} = require('./providers');
|
|
9
13
|
const {
|
|
10
14
|
VOICE_HISTORY_WINDOW,
|
|
11
15
|
buildDirectVoiceRunOptions,
|
|
@@ -132,6 +136,14 @@ async function runVoiceTranscriptTurn({
|
|
|
132
136
|
let modelUsed = voiceOptions.model;
|
|
133
137
|
let voiceUsed = voiceOptions.voice;
|
|
134
138
|
if (synthesize !== false) {
|
|
139
|
+
const spokenReplyText = sanitizeSpeechText(replyText);
|
|
140
|
+
if (!spokenReplyText) {
|
|
141
|
+
synthesized = {
|
|
142
|
+
mimeType: 'audio/mpeg',
|
|
143
|
+
audioBytes: Buffer.alloc(0),
|
|
144
|
+
};
|
|
145
|
+
ttsError = null;
|
|
146
|
+
} else {
|
|
135
147
|
const attemptProviders = [
|
|
136
148
|
voiceOptions.provider,
|
|
137
149
|
...['openai', 'deepgram', 'gemini'].filter((provider) => provider !== voiceOptions.provider),
|
|
@@ -145,7 +157,7 @@ async function runVoiceTranscriptTurn({
|
|
|
145
157
|
});
|
|
146
158
|
const runtime = resolveProviderRuntime(userId, agentId, provider);
|
|
147
159
|
try {
|
|
148
|
-
synthesized = await synthesizeVoiceReply(
|
|
160
|
+
synthesized = await synthesizeVoiceReply(spokenReplyText, {
|
|
149
161
|
...normalized,
|
|
150
162
|
apiKey: runtime.apiKey,
|
|
151
163
|
baseUrl: runtime.baseUrl,
|
|
@@ -167,6 +179,7 @@ async function runVoiceTranscriptTurn({
|
|
|
167
179
|
audioBytes: Buffer.alloc(0),
|
|
168
180
|
};
|
|
169
181
|
}
|
|
182
|
+
}
|
|
170
183
|
} else {
|
|
171
184
|
synthesized = {
|
|
172
185
|
mimeType: 'audio/mpeg',
|
|
@@ -33,6 +33,15 @@ function normalizeOptionalText(value, maxLength = 4000) {
|
|
|
33
33
|
return normalized || null;
|
|
34
34
|
}
|
|
35
35
|
|
|
36
|
+
function normalizeSurfaceColor(value) {
|
|
37
|
+
const normalized = normalizeOptionalText(value, 16);
|
|
38
|
+
if (!normalized) return null;
|
|
39
|
+
const prefixed = normalized.startsWith('#') ? normalized : `#${normalized}`;
|
|
40
|
+
return /^#(?:[0-9A-Fa-f]{6}|[0-9A-Fa-f]{8})$/.test(prefixed)
|
|
41
|
+
? prefixed.toUpperCase()
|
|
42
|
+
: null;
|
|
43
|
+
}
|
|
44
|
+
|
|
36
45
|
function buildWidgetRefreshTaskName(name) {
|
|
37
46
|
return `Refresh widget: ${normalizeText(name, 120)}`;
|
|
38
47
|
}
|
|
@@ -116,6 +125,28 @@ function normalizeTrend(input) {
|
|
|
116
125
|
return { label, direction };
|
|
117
126
|
}
|
|
118
127
|
|
|
128
|
+
function normalizeOptionalNumber(input, { min = null, max = null } = {}) {
|
|
129
|
+
if (input == null || input === '') return null;
|
|
130
|
+
const value = Number(input);
|
|
131
|
+
if (!Number.isFinite(value)) return null;
|
|
132
|
+
if (min != null && value < min) return min;
|
|
133
|
+
if (max != null && value > max) return max;
|
|
134
|
+
return value;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function normalizeProgress(input) {
|
|
138
|
+
if (!input || typeof input !== 'object' || Array.isArray(input)) return null;
|
|
139
|
+
const raw = parseJsonObject(input, {});
|
|
140
|
+
const value = normalizeOptionalNumber(raw.value, { min: 0 });
|
|
141
|
+
const max = normalizeOptionalNumber(raw.max, { min: 0 });
|
|
142
|
+
if (value == null || max == null || max <= 0) return null;
|
|
143
|
+
return {
|
|
144
|
+
value: Math.min(value, max),
|
|
145
|
+
max,
|
|
146
|
+
label: normalizeOptionalText(raw.label, 60),
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
|
|
119
150
|
function normalizeRows(input) {
|
|
120
151
|
if (!Array.isArray(input)) return [];
|
|
121
152
|
return input
|
|
@@ -170,14 +201,23 @@ function validateSnapshotPayload(widget, snapshot = {}) {
|
|
|
170
201
|
template: widget.template,
|
|
171
202
|
layoutVariant: widget.layoutVariant,
|
|
172
203
|
title,
|
|
204
|
+
kicker: normalizeOptionalText(payload.kicker, 80),
|
|
173
205
|
subtitle: normalizeOptionalText(payload.subtitle, 160),
|
|
174
206
|
body: normalizeOptionalText(payload.body, 600),
|
|
175
207
|
metric: normalizeOptionalText(payload.metric, 64),
|
|
208
|
+
metricLabel: normalizeOptionalText(payload.metricLabel, 80),
|
|
209
|
+
secondaryMetric: normalizeOptionalText(payload.secondaryMetric, 64),
|
|
210
|
+
secondaryLabel: normalizeOptionalText(payload.secondaryLabel, 80),
|
|
211
|
+
tertiaryMetric: normalizeOptionalText(payload.tertiaryMetric, 64),
|
|
212
|
+
tertiaryLabel: normalizeOptionalText(payload.tertiaryLabel, 80),
|
|
176
213
|
trend: normalizeTrend(payload.trend),
|
|
214
|
+
progress: normalizeProgress(payload.progress),
|
|
177
215
|
rows: normalizeRows(payload.rows),
|
|
178
216
|
chips: normalizeChips(payload.chips),
|
|
179
217
|
iconToken: normalizeOptionalText(payload.iconToken, 40),
|
|
180
218
|
accentToken: normalizeOptionalText(payload.accentToken, 40),
|
|
219
|
+
backgroundToken: normalizeOptionalText(payload.backgroundToken, 40),
|
|
220
|
+
surfaceColor: normalizeSurfaceColor(payload.surfaceColor),
|
|
181
221
|
updatedAt: normalizeOptionalText(payload.updatedAt, 80) || new Date().toISOString(),
|
|
182
222
|
deepLink: normalizeOptionalText(payload.deepLink, 200) || `widget:${widget.id}`,
|
|
183
223
|
};
|
|
@@ -484,10 +524,20 @@ class WidgetService {
|
|
|
484
524
|
'You are updating a structured product widget. Keep the layout fixed. Refresh only the content snapshot.',
|
|
485
525
|
'Use fresh tools for time-sensitive claims. Do not rely on stale memory for live data such as weather, markets, incidents, or schedules.',
|
|
486
526
|
'After gathering the latest information, call save_widget_snapshot exactly once with a payload matching this schema:',
|
|
487
|
-
'{"title":"","subtitle":"","body":"","metric":"","trend":{"label":"","direction":"flat"},"rows":[{"label":"","value":""}],"chips":[""],"iconToken":"","accentToken":"","updatedAt":"","deepLink":""}',
|
|
527
|
+
'{"title":"","kicker":"","subtitle":"","body":"","metric":"","metricLabel":"","secondaryMetric":"","secondaryLabel":"","tertiaryMetric":"","tertiaryLabel":"","trend":{"label":"","direction":"flat"},"progress":{"value":0,"max":100,"label":""},"rows":[{"label":"","value":""}],"chips":[""],"iconToken":"","accentToken":"","backgroundToken":"","surfaceColor":"","updatedAt":"","deepLink":""}',
|
|
488
528
|
'Rules:',
|
|
489
529
|
'- Do not change the template or layout variant.',
|
|
490
530
|
'- Keep rows to at most 3 and chips to at most 3.',
|
|
531
|
+
'- Prefer concrete data over generic prose. Use metric + supporting fields whenever live data exists.',
|
|
532
|
+
'- Make the widget immediately useful at a glance. Avoid filler copy, duplicated labels, or repeating the widget name unless it helps identify the subject.',
|
|
533
|
+
'- For stat widgets, use title to identify the subject, metric for the main live value, and secondary or tertiary metrics for the next most useful facts.',
|
|
534
|
+
'- For summary widgets, keep body concise and information-dense. Use kicker or subtitle for the context, not for repeated metadata.',
|
|
535
|
+
'- For list widgets, rows should be concrete current items with short labels and values. Do not use rows for vague prose.',
|
|
536
|
+
'- For weather-style widgets, include real temperature/condition/wind/precipitation when available and choose a fitting accent/background token such as sunny, rain, storm, night, or cloud.',
|
|
537
|
+
'- For vehicle-style widgets, include battery or fuel state, range, odometer or distance, and choose a color token or surfaceColor when the vehicle color is known.',
|
|
538
|
+
'- Use backgroundToken and accentToken to reflect the actual state of the data, not a default theme.',
|
|
539
|
+
'- If the subject exposes a progress-like state such as battery charge, tank level, or completion, populate progress with truthful values.',
|
|
540
|
+
'- Never output placeholders such as "null", "n/a", "---", or invented values.',
|
|
491
541
|
'- If the data source fails, explain the problem briefly in body and still save a truthful degraded snapshot if possible.',
|
|
492
542
|
'- If nothing useful can be produced safely, say so clearly instead of inventing content.',
|
|
493
543
|
'',
|