oomi-ai 0.2.21 → 0.2.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/oomi-ai.js CHANGED
@@ -1673,6 +1673,31 @@ function extractTextFromGatewayMessage(message) {
1673
1673
  .join(' ');
1674
1674
  }
1675
1675
 
1676
+ function summarizeVoiceFrameContract(frameText) {
1677
+ const frame = parseJsonPayload(frameText);
1678
+ if (!frame || typeof frame !== 'object') {
1679
+ return { parseable: false };
1680
+ }
1681
+ const payload = frame.payload && typeof frame.payload === 'object' ? frame.payload : {};
1682
+ const message = payload.message && typeof payload.message === 'object' ? payload.message : {};
1683
+ const metadata = message.metadata && typeof message.metadata === 'object' ? message.metadata : {};
1684
+ const spokenRaw = Object.prototype.hasOwnProperty.call(metadata, 'spoken') ? metadata.spoken : undefined;
1685
+ const spokenNormalized = normalizeSpokenMetadata(spokenRaw);
1686
+ const text = extractTextFromGatewayMessage(message);
1687
+ return {
1688
+ parseable: true,
1689
+ event: typeof frame.event === 'string' ? frame.event : '',
1690
+ state: typeof payload.state === 'string' ? payload.state : '',
1691
+ role: typeof message.role === 'string' ? message.role : '',
1692
+ contentLength: text.length,
1693
+ hasMetadata: Object.keys(metadata).length > 0,
1694
+ hasSpokenKey: Object.prototype.hasOwnProperty.call(metadata, 'spoken'),
1695
+ spokenRawType: spokenRaw === undefined ? 'missing' : Array.isArray(spokenRaw) ? 'array' : typeof spokenRaw,
1696
+ spokenNormalized: Boolean(spokenNormalized),
1697
+ spokenSegmentCount: Array.isArray(spokenNormalized?.segments) ? spokenNormalized.segments.length : 0,
1698
+ };
1699
+ }
1700
+
1676
1701
  function ensureVoiceAssistantSpokenMetadata(frameText) {
1677
1702
  const frame = parseJsonPayload(frameText);
1678
1703
  if (!frame || typeof frame !== 'object') {
@@ -1702,10 +1727,10 @@ function ensureVoiceAssistantSpokenMetadata(frameText) {
1702
1727
  ? message.metadata
1703
1728
  : {};
1704
1729
  const metadata = { ...originalMetadata };
1705
- const explicitSpokenPresent = Object.prototype.hasOwnProperty.call(originalMetadata, 'spoken');
1730
+ const normalizedExplicitSpoken = normalizeSpokenMetadata(originalMetadata.spoken);
1706
1731
  const spoken =
1707
- normalizeSpokenMetadata(originalMetadata.spoken) ||
1708
- (!explicitSpokenPresent ? inferSpokenMetadataFromContent(extractTextFromGatewayMessage(message)) : null);
1732
+ normalizedExplicitSpoken ||
1733
+ inferSpokenMetadataFromContent(extractTextFromGatewayMessage(message));
1709
1734
  if (!spoken) {
1710
1735
  return { frameText, changed: false, reason: '' };
1711
1736
  }
@@ -1725,7 +1750,7 @@ function ensureVoiceAssistantSpokenMetadata(frameText) {
1725
1750
  return {
1726
1751
  frameText: nextFrame,
1727
1752
  changed: nextFrame !== frameText,
1728
- reason: explicitSpokenPresent ? 'normalized' : (messageRole ? 'synthesized' : 'synthesized_missing_role'),
1753
+ reason: normalizedExplicitSpoken ? 'normalized' : (messageRole ? 'synthesized' : 'synthesized_missing_role'),
1729
1754
  };
1730
1755
  }
1731
1756
 
@@ -2958,10 +2983,16 @@ async function startOpenclawBridge(flags) {
2958
2983
  gatewaySocket.on('message', runBridgeCallbackSafely((gatewayRaw) => {
2959
2984
  let frame = typeof gatewayRaw === 'string' ? gatewayRaw : gatewayRaw.toString();
2960
2985
  if (classifyBridgeSessionScope(sessionId) === 'voice') {
2986
+ const beforeSummary = summarizeVoiceFrameContract(frame);
2961
2987
  const spokenNormalized = ensureVoiceAssistantSpokenMetadata(frame);
2962
2988
  if (spokenNormalized.changed) {
2963
2989
  frame = spokenNormalized.frameText;
2964
- console.log(`[bridge] voice.spoken_metadata.${spokenNormalized.reason} ${sessionId}`);
2990
+ console.log(`[bridge] voice.spoken_metadata.${spokenNormalized.reason} ${sessionId} ${JSON.stringify({
2991
+ before: beforeSummary,
2992
+ after: summarizeVoiceFrameContract(frame),
2993
+ })}`);
2994
+ } else if (beforeSummary.event === 'chat' && beforeSummary.state === 'final') {
2995
+ console.log(`[bridge] voice.chat.final ${sessionId} ${JSON.stringify(beforeSummary)}`);
2965
2996
  }
2966
2997
  }
2967
2998
  const gatewayPayload = parseJsonPayload(frame);
@@ -3318,13 +3349,17 @@ async function startOpenclawBridge(flags) {
3318
3349
  return;
3319
3350
  }
3320
3351
 
3321
- if (payload.type === 'client.frame') {
3322
- const sessionId = String(payload.sessionId || '').trim();
3323
- const frame = typeof payload.frame === 'string' ? payload.frame : '';
3324
- if (!sessionId || !frame) return;
3325
- console.log(`[bridge] client.frame ${sessionId}`);
3326
- const sessionBridge = getOrCreateGatewaySession(sessionId);
3327
- if (!sessionBridge) return;
3352
+ if (payload.type === 'client.frame') {
3353
+ const sessionId = String(payload.sessionId || '').trim();
3354
+ const frame = typeof payload.frame === 'string' ? payload.frame : '';
3355
+ if (!sessionId || !frame) return;
3356
+ if (classifyBridgeSessionScope(sessionId) === 'voice') {
3357
+ console.log(`[bridge] client.frame ${sessionId} ${JSON.stringify(summarizeVoiceFrameContract(frame))}`);
3358
+ } else {
3359
+ console.log(`[bridge] client.frame ${sessionId}`);
3360
+ }
3361
+ const sessionBridge = getOrCreateGatewaySession(sessionId);
3362
+ if (!sessionBridge) return;
3328
3363
  const requestMeta = extractGatewayRequestMeta(frame);
3329
3364
  if (requestMeta) {
3330
3365
  if (!(sessionBridge.pendingRequests instanceof Map)) {
@@ -186,10 +186,9 @@ function normalizeOutgoingMetadata(payloadMetadata, { accountId, correlationId,
186
186
  ? { ...payloadMetadata }
187
187
  : {};
188
188
 
189
- const explicitSpokenPresent = Object.prototype.hasOwnProperty.call(metadata, 'spoken');
190
- const spoken =
191
- normalizeSpokenMetadata(metadata.spoken) ||
192
- (!explicitSpokenPresent ? inferSpokenMetadataFromContent(content) : null);
189
+ const spoken =
190
+ normalizeSpokenMetadata(metadata.spoken) ||
191
+ inferSpokenMetadataFromContent(content);
193
192
  if (spoken) {
194
193
  metadata.spoken = spoken;
195
194
  } else {
@@ -2,7 +2,7 @@
2
2
  "id": "oomi-ai",
3
3
  "name": "Oomi Channel Plugin",
4
4
  "description": "Managed Oomi channel integration for OpenClaw.",
5
- "version": "0.2.21",
5
+ "version": "0.2.23",
6
6
  "author": "Oomi",
7
7
  "license": "MIT",
8
8
  "openclawVersion": ">=0.5.0",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "oomi-ai",
3
- "version": "0.2.21",
3
+ "version": "0.2.23",
4
4
  "description": "Oomi OpenClaw channel plugin and bridge tooling",
5
5
  "bin": {
6
6
  "oomi": "bin/oomi-ai.js"