npm - oomi-ai - Versions diffs - 0.2.15 → 0.2.16 - Mend

oomi-ai 0.2.15 → 0.2.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md +4 -0
package/agent_instructions.md +5 -0
package/openclaw.extension.js +68 -2
package/openclaw.plugin.json +1 -1
package/package.json +1 -1
package/skills/oomi/SKILL.md +1 -0
package/skills/oomi/agent_instructions.md +2 -0

package/README.md CHANGED Viewed

@@ -132,6 +132,9 @@ That bridge:
 This is the part of the package most likely to matter when debugging voice turn failures.
+For managed voice replies, the extension also preserves an explicit hidden `metadata.spoken` sidecar when upstream provides one.
+If upstream does not provide one, the extension now synthesizes a conservative hidden fallback from the visible assistant text so backend TTS can speak a cleaner version without changing user-visible chat.
 ## Bridge Health States
 The bridge status file is written locally and should roughly be interpreted as:
@@ -198,6 +201,7 @@ If you are inspecting this package on npm, the main architectural points are:
   - `idempotencyKey` handling
   - bridge status that does not report `connected` before managed subscription is ready
   - runtime fault isolation so local session failures are less likely to crash the whole provider
+  - hidden managed-voice speech metadata forwarding, with a synthesized fallback when upstream does not provide `metadata.spoken`
 If you are developing the plugin, test the packaged surface with:

package/agent_instructions.md CHANGED Viewed

@@ -168,6 +168,11 @@ Rules:
 - `metadata.spoken.style` is optional metadata for debugging/future mapping
 - if no hidden speech sidecar exists, Oomi falls back to speaking the visible assistant text
+Current plugin behavior:
+- if you provide `metadata.spoken`, the plugin preserves it unchanged
+- if you do not provide `metadata.spoken`, the plugin now synthesizes a conservative hidden fallback from visible assistant text for backend TTS
+- visible chat text is still never rewritten by the plugin
 ## Avatar Commands
 Before using avatar commands, call `get_avatar_capabilities` and prefer canonical values.

package/openclaw.extension.js CHANGED Viewed

@@ -194,13 +194,78 @@ function normalizeSpokenMetadata(spoken) {
   return normalized;
 }
-function normalizeOutgoingMetadata(payloadMetadata, { accountId, correlationId }) {
+function stripEmoji(text) {
+  return text.replace(/[\uFE0E\uFE0F]/g, '').replace(/\p{Extended_Pictographic}|\p{Emoji_Presentation}/gu, '');
+}
+function normalizeSpeechText(text) {
+  return stripEmoji(text)
+    .replace(/\*\*(.*?)\*\*/g, '$1')
+    .replace(/__(.*?)__/g, '$1')
+    .replace(/`([^`]+)`/g, '$1')
+    .replace(/[–—]/g, ', ')
+    .replace(/…/g, '...')
+    .replace(/\s+/g, ' ')
+    .replace(/\s+([,.;!?])/g, '$1')
+    .replace(/([,.;!?])(?=[^\s])/g, '$1 ')
+    .replace(/,\s*,+/g, ', ')
+    .replace(/\s+/g, ' ')
+    .trim();
+}
+function inferSpokenMetadataFromContent(content) {
+  const text = normalizeSpeechText(toString(content));
+  if (!text) return null;
+  const normalized = text.toLowerCase();
+  const upbeat =
+    /!/.test(text) ||
+    /\b(hell yeah|awesome|amazing|great|stoked|love|glad|perfect|nice|cool)\b/.test(normalized);
+  const gentle =
+    /\b(sorry|gentle|softly|careful|reassuring|calm|okay|it'?s okay|i know)\b/.test(normalized);
+  const curious = /\?/.test(text);
+  if (upbeat) {
+    return {
+      text,
+      instructions: 'Speak with warm, upbeat conversational energy and natural pacing.',
+      style: { emotion: 'upbeat', energy: 'medium' },
+    };
+  }
+  if (gentle) {
+    return {
+      text,
+      instructions: 'Speak gently and reassuringly, with a calm pace and soft emphasis.',
+      style: { emotion: 'gentle', energy: 'low' },
+    };
+  }
+  if (curious) {
+    return {
+      text,
+      instructions: 'Speak naturally with curious, engaged intonation and a conversational pace.',
+      style: { emotion: 'curious', energy: 'medium' },
+    };
+  }
+  return {
+    text,
+    instructions: 'Speak naturally with light warmth and conversational pacing.',
+    style: { emotion: 'neutral', energy: 'medium' },
+  };
+}
+function normalizeOutgoingMetadata(payloadMetadata, { accountId, correlationId, content }) {
   const metadata =
     payloadMetadata && typeof payloadMetadata === 'object' && !Array.isArray(payloadMetadata)
       ? { ...payloadMetadata }
       : {};
-  const spoken = normalizeSpokenMetadata(metadata.spoken);
+  const explicitSpokenPresent = Object.prototype.hasOwnProperty.call(metadata, 'spoken');
+  const spoken =
+    normalizeSpokenMetadata(metadata.spoken) ||
+    (!explicitSpokenPresent ? inferSpokenMetadataFromContent(content) : null);
   if (spoken) {
     metadata.spoken = spoken;
   } else {
@@ -331,6 +396,7 @@ const oomiChannelPlugin = {
           metadata: normalizeOutgoingMetadata(payload?.metadata, {
             accountId: resolvedAccountId,
             correlationId,
+            content,
           }),
         },
       });

package/openclaw.plugin.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "id": "oomi-ai",
   "name": "Oomi Channel Plugin",
   "description": "Managed Oomi channel integration for OpenClaw.",
-  "version": "0.2.15",
+  "version": "0.2.16",
   "author": "Oomi",
   "license": "MIT",
   "openclawVersion": ">=0.5.0",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "oomi-ai",
-  "version": "0.2.15",
+  "version": "0.2.16",
   "description": "Oomi OpenClaw channel plugin and bridge tooling",
   "bin": {
     "oomi": "bin/oomi-ai.js"

package/skills/oomi/SKILL.md CHANGED Viewed

@@ -156,6 +156,7 @@ Rules:
 - `metadata.spoken.text` is backend TTS input only
 - `metadata.spoken.instructions` should use natural-language speaking guidance
 - if the speech sidecar is absent, Oomi speaks the visible assistant text
+- if you omit `metadata.spoken`, the plugin synthesizes a conservative hidden fallback from visible assistant text
 ## Avatar Control

package/skills/oomi/agent_instructions.md CHANGED Viewed

@@ -74,3 +74,5 @@ Rules:
 - `metadata.spoken.instructions` should be natural-language guidance, not raw bracket tags
 - `metadata.spoken.style` is optional metadata for debugging or future mapping
 - if no hidden speech sidecar exists, Oomi falls back to speaking the visible assistant text
+- if you omit `metadata.spoken`, the plugin now synthesizes a conservative hidden fallback from visible assistant text
+- visible chat text is never rewritten by the plugin