oomi-ai 0.2.15 → 0.2.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -132,6 +132,9 @@ That bridge:
132
132
 
133
133
  This is the part of the package most likely to matter when debugging voice turn failures.
134
134
 
135
+ For managed voice replies, the extension also preserves an explicit hidden `metadata.spoken` sidecar when upstream provides one.
136
+ If upstream does not provide one, the extension now synthesizes a conservative hidden fallback from the visible assistant text so backend TTS can speak a cleaner version without changing user-visible chat.
137
+
135
138
  ## Bridge Health States
136
139
 
137
140
  The bridge status file is written locally and should roughly be interpreted as:
@@ -198,6 +201,7 @@ If you are inspecting this package on npm, the main architectural points are:
198
201
  - `idempotencyKey` handling
199
202
  - bridge status that does not report `connected` before managed subscription is ready
200
203
  - runtime fault isolation so local session failures are less likely to crash the whole provider
204
+ - hidden managed-voice speech metadata forwarding, with a synthesized fallback when upstream does not provide `metadata.spoken`
201
205
 
202
206
  If you are developing the plugin, test the packaged surface with:
203
207
 
@@ -168,6 +168,11 @@ Rules:
168
168
  - `metadata.spoken.style` is optional metadata for debugging/future mapping
169
169
  - if no hidden speech sidecar exists, Oomi falls back to speaking the visible assistant text
170
170
 
171
+ Current plugin behavior:
172
+ - if you provide `metadata.spoken`, the plugin preserves it unchanged
173
+ - if you do not provide `metadata.spoken`, the plugin now synthesizes a conservative hidden fallback from visible assistant text for backend TTS
174
+ - visible chat text is still never rewritten by the plugin
175
+
171
176
  ## Avatar Commands
172
177
 
173
178
  Before using avatar commands, call `get_avatar_capabilities` and prefer canonical values.
@@ -194,13 +194,78 @@ function normalizeSpokenMetadata(spoken) {
194
194
  return normalized;
195
195
  }
196
196
 
197
- function normalizeOutgoingMetadata(payloadMetadata, { accountId, correlationId }) {
197
+ function stripEmoji(text) {
198
+ return text.replace(/[\uFE0E\uFE0F]/g, '').replace(/\p{Extended_Pictographic}|\p{Emoji_Presentation}/gu, '');
199
+ }
200
+
201
+ function normalizeSpeechText(text) {
202
+ return stripEmoji(text)
203
+ .replace(/\*\*(.*?)\*\*/g, '$1')
204
+ .replace(/__(.*?)__/g, '$1')
205
+ .replace(/`([^`]+)`/g, '$1')
206
+ .replace(/[–—]/g, ', ')
207
+ .replace(/…/g, '...')
208
+ .replace(/\s+/g, ' ')
209
+ .replace(/\s+([,.;!?])/g, '$1')
210
+ .replace(/([,.;!?])(?=[^\s])/g, '$1 ')
211
+ .replace(/,\s*,+/g, ', ')
212
+ .replace(/\s+/g, ' ')
213
+ .trim();
214
+ }
215
+
216
+ function inferSpokenMetadataFromContent(content) {
217
+ const text = normalizeSpeechText(toString(content));
218
+ if (!text) return null;
219
+
220
+ const normalized = text.toLowerCase();
221
+ const upbeat =
222
+ /!/.test(text) ||
223
+ /\b(hell yeah|awesome|amazing|great|stoked|love|glad|perfect|nice|cool)\b/.test(normalized);
224
+ const gentle =
225
+ /\b(sorry|gentle|softly|careful|reassuring|calm|okay|it'?s okay|i know)\b/.test(normalized);
226
+ const curious = /\?/.test(text);
227
+
228
+ if (upbeat) {
229
+ return {
230
+ text,
231
+ instructions: 'Speak with warm, upbeat conversational energy and natural pacing.',
232
+ style: { emotion: 'upbeat', energy: 'medium' },
233
+ };
234
+ }
235
+
236
+ if (gentle) {
237
+ return {
238
+ text,
239
+ instructions: 'Speak gently and reassuringly, with a calm pace and soft emphasis.',
240
+ style: { emotion: 'gentle', energy: 'low' },
241
+ };
242
+ }
243
+
244
+ if (curious) {
245
+ return {
246
+ text,
247
+ instructions: 'Speak naturally with curious, engaged intonation and a conversational pace.',
248
+ style: { emotion: 'curious', energy: 'medium' },
249
+ };
250
+ }
251
+
252
+ return {
253
+ text,
254
+ instructions: 'Speak naturally with light warmth and conversational pacing.',
255
+ style: { emotion: 'neutral', energy: 'medium' },
256
+ };
257
+ }
258
+
259
+ function normalizeOutgoingMetadata(payloadMetadata, { accountId, correlationId, content }) {
198
260
  const metadata =
199
261
  payloadMetadata && typeof payloadMetadata === 'object' && !Array.isArray(payloadMetadata)
200
262
  ? { ...payloadMetadata }
201
263
  : {};
202
264
 
203
- const spoken = normalizeSpokenMetadata(metadata.spoken);
265
+ const explicitSpokenPresent = Object.prototype.hasOwnProperty.call(metadata, 'spoken');
266
+ const spoken =
267
+ normalizeSpokenMetadata(metadata.spoken) ||
268
+ (!explicitSpokenPresent ? inferSpokenMetadataFromContent(content) : null);
204
269
  if (spoken) {
205
270
  metadata.spoken = spoken;
206
271
  } else {
@@ -331,6 +396,7 @@ const oomiChannelPlugin = {
331
396
  metadata: normalizeOutgoingMetadata(payload?.metadata, {
332
397
  accountId: resolvedAccountId,
333
398
  correlationId,
399
+ content,
334
400
  }),
335
401
  },
336
402
  });
@@ -2,7 +2,7 @@
2
2
  "id": "oomi-ai",
3
3
  "name": "Oomi Channel Plugin",
4
4
  "description": "Managed Oomi channel integration for OpenClaw.",
5
- "version": "0.2.15",
5
+ "version": "0.2.16",
6
6
  "author": "Oomi",
7
7
  "license": "MIT",
8
8
  "openclawVersion": ">=0.5.0",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "oomi-ai",
3
- "version": "0.2.15",
3
+ "version": "0.2.16",
4
4
  "description": "Oomi OpenClaw channel plugin and bridge tooling",
5
5
  "bin": {
6
6
  "oomi": "bin/oomi-ai.js"
@@ -156,6 +156,7 @@ Rules:
156
156
  - `metadata.spoken.text` is backend TTS input only
157
157
  - `metadata.spoken.instructions` should use natural-language speaking guidance
158
158
  - if the speech sidecar is absent, Oomi speaks the visible assistant text
159
+ - if you omit `metadata.spoken`, the plugin synthesizes a conservative hidden fallback from visible assistant text
159
160
 
160
161
  ## Avatar Control
161
162
 
@@ -74,3 +74,5 @@ Rules:
74
74
  - `metadata.spoken.instructions` should be natural-language guidance, not raw bracket tags
75
75
  - `metadata.spoken.style` is optional metadata for debugging or future mapping
76
76
  - if no hidden speech sidecar exists, Oomi falls back to speaking the visible assistant text
77
+ - if you omit `metadata.spoken`, the plugin now synthesizes a conservative hidden fallback from visible assistant text
78
+ - visible chat text is never rewritten by the plugin