neoagent 2.2.1-beta.2 → 2.2.1-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1160,7 +1160,7 @@ function getAvailableTools(app, options = {}) {
1160
1160
  properties: {
1161
1161
  snapshot: {
1162
1162
  type: 'object',
1163
- description: 'Structured widget snapshot payload containing title, optional subtitle/body/metric/trend/rows/chips/iconToken/accentToken/updatedAt/deepLink.'
1163
+ description: 'Structured widget snapshot payload containing a strong title, optional kicker/subtitle/body, primary and supporting metrics, optional progress, rows, chips, icon/accent/background tokens, optional surfaceColor, updatedAt, and deepLink.'
1164
1164
  }
1165
1165
  },
1166
1166
  required: ['snapshot']
@@ -34,6 +34,8 @@ const DEFAULT_TTS_VOICES = Object.freeze({
34
34
  const GEMINI_API_BASE_URL = 'https://generativelanguage.googleapis.com/v1beta/models';
35
35
  const DEFAULT_GEMINI_TRANSCRIPTION_PROMPT =
36
36
  'Transcribe this audio verbatim. Return only the transcript text.';
37
+ const EMOJI_SPEECH_REGEX =
38
+ /[\p{Extended_Pictographic}\p{Emoji_Presentation}\p{Regional_Indicator}\u200D\uFE0F\u20E3]/gu;
37
39
 
38
40
  function withTimeout(promise, timeoutMs, label) {
39
41
  const normalizedTimeout = Number(timeoutMs);
@@ -54,6 +56,19 @@ function withTimeout(promise, timeoutMs, label) {
54
56
  });
55
57
  }
56
58
 
59
+ function sanitizeSpeechText(value) {
60
+ const text = String(value || '');
61
+ if (!text) {
62
+ return '';
63
+ }
64
+ return text
65
+ .replace(EMOJI_SPEECH_REGEX, ' ')
66
+ .replace(/[ \t]{2,}/g, ' ')
67
+ .replace(/\s+\n/g, '\n')
68
+ .replace(/\n{3,}/g, '\n\n')
69
+ .trim();
70
+ }
71
+
57
72
  function readSharedApiKeys() {
58
73
  try {
59
74
  const keysPath = path.join(AGENT_DATA_DIR, 'API_KEYS.json');
@@ -641,6 +656,7 @@ module.exports = {
641
656
  resolveTtsModel,
642
657
  resolveTtsVoice,
643
658
  normalizeVoiceSynthesisOptions,
659
+ sanitizeSpeechText,
644
660
  guessExtFromMimeType,
645
661
  splitIntoSentenceChunks,
646
662
  transcribeVoiceInput,
@@ -7,7 +7,12 @@ const { getVoiceRuntimeSettings } = require('./liveSettings');
7
7
  const { VoiceLiveSession } = require('./liveSession');
8
8
  const { OpenAiLiveRelayAdapter } = require('./openaiLiveRelayAdapter');
9
9
  const { GeminiLiveRelayAdapter } = require('./geminiLiveRelayAdapter');
10
- const { synthesizeVoiceReply, normalizeVoiceSynthesisOptions, synthesizeVoiceReplyStream } = require('./providers');
10
+ const {
11
+ synthesizeVoiceReply,
12
+ normalizeVoiceSynthesisOptions,
13
+ synthesizeVoiceReplyStream,
14
+ sanitizeSpeechText,
15
+ } = require('./providers');
11
16
  const { VoiceAgentBridge } = require('./agentBridge');
12
17
 
13
18
  class VoiceRuntimeManager {
@@ -296,17 +301,19 @@ class VoiceRuntimeManager {
296
301
  model: session.voiceSettings?.liveTtsModel,
297
302
  voice: session.voiceSettings?.liveVoice,
298
303
  });
304
+ const spokenContent = sanitizeSpeechText(content);
299
305
 
300
306
  let index = 0;
301
307
  let streamError = null;
302
308
  const ttsAttempts = this.#buildTtsAttemptOrder(session, voiceOptions);
303
- try {
309
+ if (spokenContent) {
310
+ try {
304
311
  for (const attempt of ttsAttempts) {
305
312
  index = 0;
306
313
  streamError = null;
307
314
  try {
308
315
  await synthesizeVoiceReplyStream(
309
- content,
316
+ spokenContent,
310
317
  attempt,
311
318
  async ({ audioBytes, mimeType }) => {
312
319
  if (session.closed || session.interrupted) return;
@@ -326,8 +333,9 @@ class VoiceRuntimeManager {
326
333
  streamError = String(error?.message || error || 'Voice playback failed.');
327
334
  }
328
335
  }
329
- } catch (error) {
330
- streamError = String(error?.message || error || 'Voice playback failed.');
336
+ } catch (error) {
337
+ streamError = String(error?.message || error || 'Voice playback failed.');
338
+ }
331
339
  }
332
340
 
333
341
  if (!streamError && !session.closed && !session.interrupted) {
@@ -5,7 +5,11 @@ const { getProviderRuntimeConfig } = require('../ai/models');
5
5
  const { buildAgentRunContext } = require('../ai/runContext');
6
6
  const { buildDirectVoiceContext } = require('./message');
7
7
  const { analyzeVoiceAssistantScreenshot } = require('./screenshotContext');
8
- const { synthesizeVoiceReply, normalizeVoiceSynthesisOptions } = require('./providers');
8
+ const {
9
+ synthesizeVoiceReply,
10
+ normalizeVoiceSynthesisOptions,
11
+ sanitizeSpeechText,
12
+ } = require('./providers');
9
13
  const {
10
14
  VOICE_HISTORY_WINDOW,
11
15
  buildDirectVoiceRunOptions,
@@ -132,6 +136,14 @@ async function runVoiceTranscriptTurn({
132
136
  let modelUsed = voiceOptions.model;
133
137
  let voiceUsed = voiceOptions.voice;
134
138
  if (synthesize !== false) {
139
+ const spokenReplyText = sanitizeSpeechText(replyText);
140
+ if (!spokenReplyText) {
141
+ synthesized = {
142
+ mimeType: 'audio/mpeg',
143
+ audioBytes: Buffer.alloc(0),
144
+ };
145
+ ttsError = null;
146
+ } else {
135
147
  const attemptProviders = [
136
148
  voiceOptions.provider,
137
149
  ...['openai', 'deepgram', 'gemini'].filter((provider) => provider !== voiceOptions.provider),
@@ -145,7 +157,7 @@ async function runVoiceTranscriptTurn({
145
157
  });
146
158
  const runtime = resolveProviderRuntime(userId, agentId, provider);
147
159
  try {
148
- synthesized = await synthesizeVoiceReply(replyText, {
160
+ synthesized = await synthesizeVoiceReply(spokenReplyText, {
149
161
  ...normalized,
150
162
  apiKey: runtime.apiKey,
151
163
  baseUrl: runtime.baseUrl,
@@ -167,6 +179,7 @@ async function runVoiceTranscriptTurn({
167
179
  audioBytes: Buffer.alloc(0),
168
180
  };
169
181
  }
182
+ }
170
183
  } else {
171
184
  synthesized = {
172
185
  mimeType: 'audio/mpeg',
@@ -33,6 +33,15 @@ function normalizeOptionalText(value, maxLength = 4000) {
33
33
  return normalized || null;
34
34
  }
35
35
 
36
+ function normalizeSurfaceColor(value) {
37
+ const normalized = normalizeOptionalText(value, 16);
38
+ if (!normalized) return null;
39
+ const prefixed = normalized.startsWith('#') ? normalized : `#${normalized}`;
40
+ return /^#(?:[0-9A-Fa-f]{6}|[0-9A-Fa-f]{8})$/.test(prefixed)
41
+ ? prefixed.toUpperCase()
42
+ : null;
43
+ }
44
+
36
45
  function buildWidgetRefreshTaskName(name) {
37
46
  return `Refresh widget: ${normalizeText(name, 120)}`;
38
47
  }
@@ -116,6 +125,28 @@ function normalizeTrend(input) {
116
125
  return { label, direction };
117
126
  }
118
127
 
128
+ function normalizeOptionalNumber(input, { min = null, max = null } = {}) {
129
+ if (input == null || input === '') return null;
130
+ const value = Number(input);
131
+ if (!Number.isFinite(value)) return null;
132
+ if (min != null && value < min) return min;
133
+ if (max != null && value > max) return max;
134
+ return value;
135
+ }
136
+
137
+ function normalizeProgress(input) {
138
+ if (!input || typeof input !== 'object' || Array.isArray(input)) return null;
139
+ const raw = parseJsonObject(input, {});
140
+ const value = normalizeOptionalNumber(raw.value, { min: 0 });
141
+ const max = normalizeOptionalNumber(raw.max, { min: 0 });
142
+ if (value == null || max == null || max <= 0) return null;
143
+ return {
144
+ value: Math.min(value, max),
145
+ max,
146
+ label: normalizeOptionalText(raw.label, 60),
147
+ };
148
+ }
149
+
119
150
  function normalizeRows(input) {
120
151
  if (!Array.isArray(input)) return [];
121
152
  return input
@@ -170,14 +201,23 @@ function validateSnapshotPayload(widget, snapshot = {}) {
170
201
  template: widget.template,
171
202
  layoutVariant: widget.layoutVariant,
172
203
  title,
204
+ kicker: normalizeOptionalText(payload.kicker, 80),
173
205
  subtitle: normalizeOptionalText(payload.subtitle, 160),
174
206
  body: normalizeOptionalText(payload.body, 600),
175
207
  metric: normalizeOptionalText(payload.metric, 64),
208
+ metricLabel: normalizeOptionalText(payload.metricLabel, 80),
209
+ secondaryMetric: normalizeOptionalText(payload.secondaryMetric, 64),
210
+ secondaryLabel: normalizeOptionalText(payload.secondaryLabel, 80),
211
+ tertiaryMetric: normalizeOptionalText(payload.tertiaryMetric, 64),
212
+ tertiaryLabel: normalizeOptionalText(payload.tertiaryLabel, 80),
176
213
  trend: normalizeTrend(payload.trend),
214
+ progress: normalizeProgress(payload.progress),
177
215
  rows: normalizeRows(payload.rows),
178
216
  chips: normalizeChips(payload.chips),
179
217
  iconToken: normalizeOptionalText(payload.iconToken, 40),
180
218
  accentToken: normalizeOptionalText(payload.accentToken, 40),
219
+ backgroundToken: normalizeOptionalText(payload.backgroundToken, 40),
220
+ surfaceColor: normalizeSurfaceColor(payload.surfaceColor),
181
221
  updatedAt: normalizeOptionalText(payload.updatedAt, 80) || new Date().toISOString(),
182
222
  deepLink: normalizeOptionalText(payload.deepLink, 200) || `widget:${widget.id}`,
183
223
  };
@@ -484,10 +524,20 @@ class WidgetService {
484
524
  'You are updating a structured product widget. Keep the layout fixed. Refresh only the content snapshot.',
485
525
  'Use fresh tools for time-sensitive claims. Do not rely on stale memory for live data such as weather, markets, incidents, or schedules.',
486
526
  'After gathering the latest information, call save_widget_snapshot exactly once with a payload matching this schema:',
487
- '{"title":"","subtitle":"","body":"","metric":"","trend":{"label":"","direction":"flat"},"rows":[{"label":"","value":""}],"chips":[""],"iconToken":"","accentToken":"","updatedAt":"","deepLink":""}',
527
+ '{"title":"","kicker":"","subtitle":"","body":"","metric":"","metricLabel":"","secondaryMetric":"","secondaryLabel":"","tertiaryMetric":"","tertiaryLabel":"","trend":{"label":"","direction":"flat"},"progress":{"value":0,"max":100,"label":""},"rows":[{"label":"","value":""}],"chips":[""],"iconToken":"","accentToken":"","backgroundToken":"","surfaceColor":"","updatedAt":"","deepLink":""}',
488
528
  'Rules:',
489
529
  '- Do not change the template or layout variant.',
490
530
  '- Keep rows to at most 3 and chips to at most 3.',
531
+ '- Prefer concrete data over generic prose. Use metric + supporting fields whenever live data exists.',
532
+ '- Make the widget immediately useful at a glance. Avoid filler copy, duplicated labels, or repeating the widget name unless it helps identify the subject.',
533
+ '- For stat widgets, use title to identify the subject, metric for the main live value, and secondary or tertiary metrics for the next most useful facts.',
534
+ '- For summary widgets, keep body concise and information-dense. Use kicker or subtitle for the context, not for repeated metadata.',
535
+ '- For list widgets, rows should be concrete current items with short labels and values. Do not use rows for vague prose.',
536
+ '- For weather-style widgets, include real temperature/condition/wind/precipitation when available and choose a fitting accent/background token such as sunny, rain, storm, night, or cloud.',
537
+ '- For vehicle-style widgets, include battery or fuel state, range, odometer or distance, and choose a color token or surfaceColor when the vehicle color is known.',
538
+ '- Use backgroundToken and accentToken to reflect the actual state of the data, not a default theme.',
539
+ '- If the subject exposes a progress-like state such as battery charge, tank level, or completion, populate progress with truthful values.',
540
+ '- Never output placeholders such as "null", "n/a", "---", or invented values.',
491
541
  '- If the data source fails, explain the problem briefly in body and still save a truthful degraded snapshot if possible.',
492
542
  '- If nothing useful can be produced safely, say so clearly instead of inventing content.',
493
543
  '',