oomi-ai 0.2.21 → 0.2.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/oomi-ai.js +47 -12
- package/openclaw.extension.js +3 -4
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
package/bin/oomi-ai.js
CHANGED
|
@@ -1673,6 +1673,31 @@ function extractTextFromGatewayMessage(message) {
|
|
|
1673
1673
|
.join(' ');
|
|
1674
1674
|
}
|
|
1675
1675
|
|
|
1676
|
+
function summarizeVoiceFrameContract(frameText) {
|
|
1677
|
+
const frame = parseJsonPayload(frameText);
|
|
1678
|
+
if (!frame || typeof frame !== 'object') {
|
|
1679
|
+
return { parseable: false };
|
|
1680
|
+
}
|
|
1681
|
+
const payload = frame.payload && typeof frame.payload === 'object' ? frame.payload : {};
|
|
1682
|
+
const message = payload.message && typeof payload.message === 'object' ? payload.message : {};
|
|
1683
|
+
const metadata = message.metadata && typeof message.metadata === 'object' ? message.metadata : {};
|
|
1684
|
+
const spokenRaw = Object.prototype.hasOwnProperty.call(metadata, 'spoken') ? metadata.spoken : undefined;
|
|
1685
|
+
const spokenNormalized = normalizeSpokenMetadata(spokenRaw);
|
|
1686
|
+
const text = extractTextFromGatewayMessage(message);
|
|
1687
|
+
return {
|
|
1688
|
+
parseable: true,
|
|
1689
|
+
event: typeof frame.event === 'string' ? frame.event : '',
|
|
1690
|
+
state: typeof payload.state === 'string' ? payload.state : '',
|
|
1691
|
+
role: typeof message.role === 'string' ? message.role : '',
|
|
1692
|
+
contentLength: text.length,
|
|
1693
|
+
hasMetadata: Object.keys(metadata).length > 0,
|
|
1694
|
+
hasSpokenKey: Object.prototype.hasOwnProperty.call(metadata, 'spoken'),
|
|
1695
|
+
spokenRawType: spokenRaw === undefined ? 'missing' : Array.isArray(spokenRaw) ? 'array' : typeof spokenRaw,
|
|
1696
|
+
spokenNormalized: Boolean(spokenNormalized),
|
|
1697
|
+
spokenSegmentCount: Array.isArray(spokenNormalized?.segments) ? spokenNormalized.segments.length : 0,
|
|
1698
|
+
};
|
|
1699
|
+
}
|
|
1700
|
+
|
|
1676
1701
|
function ensureVoiceAssistantSpokenMetadata(frameText) {
|
|
1677
1702
|
const frame = parseJsonPayload(frameText);
|
|
1678
1703
|
if (!frame || typeof frame !== 'object') {
|
|
@@ -1702,10 +1727,10 @@ function ensureVoiceAssistantSpokenMetadata(frameText) {
|
|
|
1702
1727
|
? message.metadata
|
|
1703
1728
|
: {};
|
|
1704
1729
|
const metadata = { ...originalMetadata };
|
|
1705
|
-
const
|
|
1730
|
+
const normalizedExplicitSpoken = normalizeSpokenMetadata(originalMetadata.spoken);
|
|
1706
1731
|
const spoken =
|
|
1707
|
-
|
|
1708
|
-
|
|
1732
|
+
normalizedExplicitSpoken ||
|
|
1733
|
+
inferSpokenMetadataFromContent(extractTextFromGatewayMessage(message));
|
|
1709
1734
|
if (!spoken) {
|
|
1710
1735
|
return { frameText, changed: false, reason: '' };
|
|
1711
1736
|
}
|
|
@@ -1725,7 +1750,7 @@ function ensureVoiceAssistantSpokenMetadata(frameText) {
|
|
|
1725
1750
|
return {
|
|
1726
1751
|
frameText: nextFrame,
|
|
1727
1752
|
changed: nextFrame !== frameText,
|
|
1728
|
-
reason:
|
|
1753
|
+
reason: normalizedExplicitSpoken ? 'normalized' : (messageRole ? 'synthesized' : 'synthesized_missing_role'),
|
|
1729
1754
|
};
|
|
1730
1755
|
}
|
|
1731
1756
|
|
|
@@ -2958,10 +2983,16 @@ async function startOpenclawBridge(flags) {
|
|
|
2958
2983
|
gatewaySocket.on('message', runBridgeCallbackSafely((gatewayRaw) => {
|
|
2959
2984
|
let frame = typeof gatewayRaw === 'string' ? gatewayRaw : gatewayRaw.toString();
|
|
2960
2985
|
if (classifyBridgeSessionScope(sessionId) === 'voice') {
|
|
2986
|
+
const beforeSummary = summarizeVoiceFrameContract(frame);
|
|
2961
2987
|
const spokenNormalized = ensureVoiceAssistantSpokenMetadata(frame);
|
|
2962
2988
|
if (spokenNormalized.changed) {
|
|
2963
2989
|
frame = spokenNormalized.frameText;
|
|
2964
|
-
console.log(`[bridge] voice.spoken_metadata.${spokenNormalized.reason} ${sessionId}
|
|
2990
|
+
console.log(`[bridge] voice.spoken_metadata.${spokenNormalized.reason} ${sessionId} ${JSON.stringify({
|
|
2991
|
+
before: beforeSummary,
|
|
2992
|
+
after: summarizeVoiceFrameContract(frame),
|
|
2993
|
+
})}`);
|
|
2994
|
+
} else if (beforeSummary.event === 'chat' && beforeSummary.state === 'final') {
|
|
2995
|
+
console.log(`[bridge] voice.chat.final ${sessionId} ${JSON.stringify(beforeSummary)}`);
|
|
2965
2996
|
}
|
|
2966
2997
|
}
|
|
2967
2998
|
const gatewayPayload = parseJsonPayload(frame);
|
|
@@ -3318,13 +3349,17 @@ async function startOpenclawBridge(flags) {
|
|
|
3318
3349
|
return;
|
|
3319
3350
|
}
|
|
3320
3351
|
|
|
3321
|
-
if (payload.type === 'client.frame') {
|
|
3322
|
-
const sessionId = String(payload.sessionId || '').trim();
|
|
3323
|
-
const frame = typeof payload.frame === 'string' ? payload.frame : '';
|
|
3324
|
-
if (!sessionId || !frame) return;
|
|
3325
|
-
|
|
3326
|
-
|
|
3327
|
-
|
|
3352
|
+
if (payload.type === 'client.frame') {
|
|
3353
|
+
const sessionId = String(payload.sessionId || '').trim();
|
|
3354
|
+
const frame = typeof payload.frame === 'string' ? payload.frame : '';
|
|
3355
|
+
if (!sessionId || !frame) return;
|
|
3356
|
+
if (classifyBridgeSessionScope(sessionId) === 'voice') {
|
|
3357
|
+
console.log(`[bridge] client.frame ${sessionId} ${JSON.stringify(summarizeVoiceFrameContract(frame))}`);
|
|
3358
|
+
} else {
|
|
3359
|
+
console.log(`[bridge] client.frame ${sessionId}`);
|
|
3360
|
+
}
|
|
3361
|
+
const sessionBridge = getOrCreateGatewaySession(sessionId);
|
|
3362
|
+
if (!sessionBridge) return;
|
|
3328
3363
|
const requestMeta = extractGatewayRequestMeta(frame);
|
|
3329
3364
|
if (requestMeta) {
|
|
3330
3365
|
if (!(sessionBridge.pendingRequests instanceof Map)) {
|
package/openclaw.extension.js
CHANGED
|
@@ -186,10 +186,9 @@ function normalizeOutgoingMetadata(payloadMetadata, { accountId, correlationId,
|
|
|
186
186
|
? { ...payloadMetadata }
|
|
187
187
|
: {};
|
|
188
188
|
|
|
189
|
-
const
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
(!explicitSpokenPresent ? inferSpokenMetadataFromContent(content) : null);
|
|
189
|
+
const spoken =
|
|
190
|
+
normalizeSpokenMetadata(metadata.spoken) ||
|
|
191
|
+
inferSpokenMetadataFromContent(content);
|
|
193
192
|
if (spoken) {
|
|
194
193
|
metadata.spoken = spoken;
|
|
195
194
|
} else {
|
package/openclaw.plugin.json
CHANGED