oomi-ai 0.2.18 → 0.2.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -2
- package/agent_instructions.md +7 -0
- package/bin/oomi-ai.js +153 -23
- package/lib/personaApiClient.js +32 -0
- package/lib/spokenMetadata.js +273 -0
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -4,8 +4,9 @@ OpenClaw channel plugin and bridge tooling for Oomi managed chat and voice.
|
|
|
4
4
|
|
|
5
5
|
## Current Focus
|
|
6
6
|
|
|
7
|
-
`0.2.
|
|
7
|
+
`0.2.19` adds the first live persona automation lane:
|
|
8
8
|
- WebSpatial-based persona scaffolding for generated Oomi apps
|
|
9
|
+
- a high-level `oomi personas create-managed` command for agent-driven persona creation
|
|
9
10
|
- device-authenticated persona runtime registration and job callbacks
|
|
10
11
|
- automatic bridge-side polling for queued `persona_job` control messages
|
|
11
12
|
- end-to-end local persona startup from a structured orchestration payload
|
|
@@ -153,6 +154,7 @@ oomi personas scaffold market-analyst --name "Market Analyst" --description "Pri
|
|
|
153
154
|
|
|
154
155
|
Use:
|
|
155
156
|
- `oomi personas create <id>` for repo-local manifest work
|
|
157
|
+
- `oomi personas create-managed --name "Cooking Persona" --description "Private cooking workspace"` for the end-to-end Oomi-managed persona flow
|
|
156
158
|
- `oomi personas scaffold <slug>` for a WebSpatial-based Oomi app shell with runtime metadata and health documents
|
|
157
159
|
- `oomi persona-jobs execute --message-file <job.json>` when OpenClaw receives a structured persona orchestration job from Oomi
|
|
158
160
|
|
|
@@ -166,7 +168,13 @@ oomi persona-jobs succeed pj_123 --workspace-path ~/.openclaw/personas/market-an
|
|
|
166
168
|
oomi persona-jobs fail pj_123 --code JOB_FAILED --message "Scaffold generation failed."
|
|
167
169
|
```
|
|
168
170
|
|
|
169
|
-
|
|
171
|
+
Recommended agent flow:
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
oomi personas create-managed --name "Cooking Persona" --description "Private cooking workspace for recipes, meal planning, and kitchen notes."
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
That command creates the managed persona record in Oomi using the linked device identity. The backend then enqueues the `persona_job`, and the running bridge consumes that job automatically. The poll path is filtered to `metadata.type = persona_job`, so it does not consume normal queued chat traffic.
|
|
170
178
|
|
|
171
179
|
## Bridge Health States
|
|
172
180
|
|
package/agent_instructions.md
CHANGED
|
@@ -194,6 +194,13 @@ Aliases allowed if needed:
|
|
|
194
194
|
|
|
195
195
|
## Persona App Generation
|
|
196
196
|
|
|
197
|
+
When a user asks you in chat to create a new persona for Oomi:
|
|
198
|
+
|
|
199
|
+
1. Prefer `oomi personas create-managed --name "<name>" --description "<description>"`.
|
|
200
|
+
2. That is the primary end-to-end command for agent-driven persona creation.
|
|
201
|
+
3. It creates the managed persona record in Oomi and relies on the running bridge to consume the queued `persona_job` automatically.
|
|
202
|
+
4. Do not manually scaffold first unless you are recovering a failed persona job or working outside the queued Oomi flow.
|
|
203
|
+
|
|
197
204
|
When generating a managed persona app for Oomi:
|
|
198
205
|
|
|
199
206
|
1. Do not build the app shell from scratch.
|
package/bin/oomi-ai.js
CHANGED
|
@@ -12,6 +12,7 @@ import { scaffoldPersonaApp } from '../lib/scaffold.js';
|
|
|
12
12
|
import { createPersonaApiClient } from '../lib/personaApiClient.js';
|
|
13
13
|
import { startPersonaJobPoller } from '../lib/personaJobPoller.js';
|
|
14
14
|
import { executePersonaJob } from '../lib/personaJobExecutor.js';
|
|
15
|
+
import { inferSpokenMetadataFromContent, normalizeSpokenMetadata } from '../lib/spokenMetadata.js';
|
|
15
16
|
import {
|
|
16
17
|
buildLocalPersonaRuntime,
|
|
17
18
|
defaultPersonaWorkspaceRoot,
|
|
@@ -190,6 +191,8 @@ Commands:
|
|
|
190
191
|
|
|
191
192
|
personas create <id>
|
|
192
193
|
Create a new persona manifest and optionally sync it to the backend.
|
|
194
|
+
personas create-managed [slug]
|
|
195
|
+
Create a managed persona in Oomi and enqueue its build job for the linked device.
|
|
193
196
|
personas scaffold <slug>
|
|
194
197
|
Create an Oomi-managed persona app scaffold for agent customization.
|
|
195
198
|
personas runtime-register <slug>
|
|
@@ -225,9 +228,10 @@ Common flags:
|
|
|
225
228
|
--json Print pairing result as JSON (for automation)
|
|
226
229
|
--backend-url URL Override Oomi backend URL
|
|
227
230
|
--root PATH Override repo root path for persona discovery
|
|
228
|
-
--name NAME Persona display name (for create)
|
|
229
|
-
--description TEXT Persona description (for scaffold)
|
|
230
|
-
--
|
|
231
|
+
--name NAME Persona display name (for create)
|
|
232
|
+
--description TEXT Persona description (for scaffold)
|
|
233
|
+
--slug SLUG Explicit slug override (for create-managed)
|
|
234
|
+
--summary TEXT Persona summary (for create)
|
|
231
235
|
--status STATUS Persona status (for create)
|
|
232
236
|
--type TYPE Persona type (for create)
|
|
233
237
|
--tags a,b,c Persona tags (for create)
|
|
@@ -595,6 +599,32 @@ function printPersonaScaffoldResult(result, asJson = false) {
|
|
|
595
599
|
}
|
|
596
600
|
}
|
|
597
601
|
|
|
602
|
+
function printManagedPersonaCreateResult(result, asJson = false) {
|
|
603
|
+
if (asJson) {
|
|
604
|
+
console.log(JSON.stringify(result, null, 2));
|
|
605
|
+
return;
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
const persona = result?.persona && typeof result.persona === 'object' ? result.persona : {};
|
|
609
|
+
const personaJob = result?.personaJob && typeof result.personaJob === 'object' ? result.personaJob : {};
|
|
610
|
+
console.log(`Managed persona created: ${String(persona.name || persona.slug || 'unknown')}`);
|
|
611
|
+
if (persona.slug) {
|
|
612
|
+
console.log(`Slug: ${persona.slug}`);
|
|
613
|
+
}
|
|
614
|
+
if (persona.lifecycle) {
|
|
615
|
+
console.log(`Lifecycle: ${persona.lifecycle}`);
|
|
616
|
+
}
|
|
617
|
+
if (personaJob.jobId) {
|
|
618
|
+
console.log(`Persona job: ${personaJob.jobId}`);
|
|
619
|
+
}
|
|
620
|
+
if (personaJob.status) {
|
|
621
|
+
console.log(`Job status: ${personaJob.status}`);
|
|
622
|
+
}
|
|
623
|
+
if (personaJob.deviceId) {
|
|
624
|
+
console.log(`Assigned device: ${personaJob.deviceId}`);
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
|
|
598
628
|
function parseOptionalPositiveInteger(value) {
|
|
599
629
|
if (value === undefined || value === null || value === '') return null;
|
|
600
630
|
const parsed = Number(value);
|
|
@@ -932,6 +962,26 @@ async function handlePersonaJobExecuteCommand(flags = {}) {
|
|
|
932
962
|
|
|
933
963
|
printStructuredResult(result, isTruthyFlag(flags.json));
|
|
934
964
|
}
|
|
965
|
+
|
|
966
|
+
async function handlePersonaCreateManagedCommand(flags = {}, positionalSlug = '') {
|
|
967
|
+
const name = String(flags.name || '').trim();
|
|
968
|
+
if (!name) {
|
|
969
|
+
throw new Error('Persona name is required. Usage: oomi personas create-managed [slug] --name "<name>" --description "<description>"');
|
|
970
|
+
}
|
|
971
|
+
|
|
972
|
+
const description = String(flags.description || '').trim() || name;
|
|
973
|
+
const explicitSlug = String(flags.slug || positionalSlug || '').trim();
|
|
974
|
+
const client = createCliPersonaApiClient(flags);
|
|
975
|
+
const result = await client.createManagedPersona({
|
|
976
|
+
slug: explicitSlug,
|
|
977
|
+
name,
|
|
978
|
+
description,
|
|
979
|
+
templateType: String(flags['template-type'] || 'persona-app').trim() || 'persona-app',
|
|
980
|
+
promptTemplateVersion: String(flags['template-version'] || 'v1').trim() || 'v1',
|
|
981
|
+
});
|
|
982
|
+
|
|
983
|
+
printManagedPersonaCreateResult(result, isTruthyFlag(flags.json));
|
|
984
|
+
}
|
|
935
985
|
|
|
936
986
|
function resolveOpenclawConfigPath() {
|
|
937
987
|
const candidates = [
|
|
@@ -1599,13 +1649,80 @@ function prepareGatewayFrameForLocalGateway(frameText, gatewayAuth, options = {}
|
|
|
1599
1649
|
}
|
|
1600
1650
|
}
|
|
1601
1651
|
|
|
1602
|
-
function parseJsonPayload(raw) {
|
|
1603
|
-
try {
|
|
1604
|
-
return JSON.parse(raw);
|
|
1605
|
-
} catch {
|
|
1606
|
-
return null;
|
|
1607
|
-
}
|
|
1608
|
-
}
|
|
1652
|
+
function parseJsonPayload(raw) {
|
|
1653
|
+
try {
|
|
1654
|
+
return JSON.parse(raw);
|
|
1655
|
+
} catch {
|
|
1656
|
+
return null;
|
|
1657
|
+
}
|
|
1658
|
+
}
|
|
1659
|
+
|
|
1660
|
+
function extractTextFromGatewayMessage(message) {
|
|
1661
|
+
if (!message || typeof message !== 'object') return '';
|
|
1662
|
+
|
|
1663
|
+
if (typeof message.content === 'string' && message.content.trim()) {
|
|
1664
|
+
return message.content.trim();
|
|
1665
|
+
}
|
|
1666
|
+
|
|
1667
|
+
if (!Array.isArray(message.content)) return '';
|
|
1668
|
+
|
|
1669
|
+
return message.content
|
|
1670
|
+
.filter((block) => block && typeof block === 'object' && block.type === 'text' && typeof block.text === 'string')
|
|
1671
|
+
.map((block) => block.text.trim())
|
|
1672
|
+
.filter(Boolean)
|
|
1673
|
+
.join(' ');
|
|
1674
|
+
}
|
|
1675
|
+
|
|
1676
|
+
function ensureVoiceAssistantSpokenMetadata(frameText) {
|
|
1677
|
+
const frame = parseJsonPayload(frameText);
|
|
1678
|
+
if (!frame || typeof frame !== 'object') {
|
|
1679
|
+
return { frameText, changed: false, reason: '' };
|
|
1680
|
+
}
|
|
1681
|
+
if (frame.type !== 'event' || frame.event !== 'chat') {
|
|
1682
|
+
return { frameText, changed: false, reason: '' };
|
|
1683
|
+
}
|
|
1684
|
+
|
|
1685
|
+
const payload = frame.payload && typeof frame.payload === 'object' ? frame.payload : null;
|
|
1686
|
+
if (!payload || payload.state !== 'final') {
|
|
1687
|
+
return { frameText, changed: false, reason: '' };
|
|
1688
|
+
}
|
|
1689
|
+
|
|
1690
|
+
const message = payload.message && typeof payload.message === 'object' ? payload.message : null;
|
|
1691
|
+
if (!message || message.role !== 'assistant') {
|
|
1692
|
+
return { frameText, changed: false, reason: '' };
|
|
1693
|
+
}
|
|
1694
|
+
|
|
1695
|
+
const originalMetadata =
|
|
1696
|
+
message.metadata && typeof message.metadata === 'object' && !Array.isArray(message.metadata)
|
|
1697
|
+
? message.metadata
|
|
1698
|
+
: {};
|
|
1699
|
+
const metadata = { ...originalMetadata };
|
|
1700
|
+
const explicitSpokenPresent = Object.prototype.hasOwnProperty.call(originalMetadata, 'spoken');
|
|
1701
|
+
const spoken =
|
|
1702
|
+
normalizeSpokenMetadata(originalMetadata.spoken) ||
|
|
1703
|
+
(!explicitSpokenPresent ? inferSpokenMetadataFromContent(extractTextFromGatewayMessage(message)) : null);
|
|
1704
|
+
if (!spoken) {
|
|
1705
|
+
return { frameText, changed: false, reason: '' };
|
|
1706
|
+
}
|
|
1707
|
+
|
|
1708
|
+
metadata.spoken = spoken;
|
|
1709
|
+
const nextFrame = JSON.stringify({
|
|
1710
|
+
...frame,
|
|
1711
|
+
payload: {
|
|
1712
|
+
...payload,
|
|
1713
|
+
message: {
|
|
1714
|
+
...message,
|
|
1715
|
+
metadata,
|
|
1716
|
+
},
|
|
1717
|
+
},
|
|
1718
|
+
});
|
|
1719
|
+
|
|
1720
|
+
return {
|
|
1721
|
+
frameText: nextFrame,
|
|
1722
|
+
changed: nextFrame !== frameText,
|
|
1723
|
+
reason: explicitSpokenPresent ? 'normalized' : 'synthesized',
|
|
1724
|
+
};
|
|
1725
|
+
}
|
|
1609
1726
|
|
|
1610
1727
|
function extractCorrelationId(params) {
|
|
1611
1728
|
if (!params || typeof params !== 'object') return '';
|
|
@@ -2833,11 +2950,18 @@ async function startOpenclawBridge(flags) {
|
|
|
2833
2950
|
flushSessionQueue(sessionBridge);
|
|
2834
2951
|
});
|
|
2835
2952
|
|
|
2836
|
-
gatewaySocket.on('message', runBridgeCallbackSafely((gatewayRaw) => {
|
|
2837
|
-
|
|
2838
|
-
|
|
2839
|
-
|
|
2840
|
-
|
|
2953
|
+
gatewaySocket.on('message', runBridgeCallbackSafely((gatewayRaw) => {
|
|
2954
|
+
let frame = typeof gatewayRaw === 'string' ? gatewayRaw : gatewayRaw.toString();
|
|
2955
|
+
if (classifyBridgeSessionScope(sessionId) === 'voice') {
|
|
2956
|
+
const spokenNormalized = ensureVoiceAssistantSpokenMetadata(frame);
|
|
2957
|
+
if (spokenNormalized.changed) {
|
|
2958
|
+
frame = spokenNormalized.frameText;
|
|
2959
|
+
console.log(`[bridge] voice.spoken_metadata.${spokenNormalized.reason} ${sessionId}`);
|
|
2960
|
+
}
|
|
2961
|
+
}
|
|
2962
|
+
const gatewayPayload = parseJsonPayload(frame);
|
|
2963
|
+
if (gatewayPayload?.event === 'connect.challenge') {
|
|
2964
|
+
console.log(`[bridge] gateway.connect.challenge ${sessionId}`);
|
|
2841
2965
|
const nonce =
|
|
2842
2966
|
gatewayPayload.payload && typeof gatewayPayload.payload.nonce === 'string'
|
|
2843
2967
|
? gatewayPayload.payload.nonce.trim()
|
|
@@ -3964,15 +4088,20 @@ async function main() {
|
|
|
3964
4088
|
return;
|
|
3965
4089
|
}
|
|
3966
4090
|
|
|
3967
|
-
if (command === 'personas' && subcommand === 'create') {
|
|
4091
|
+
if (command === 'personas' && subcommand === 'create') {
|
|
3968
4092
|
const id = args.positionals[0];
|
|
3969
4093
|
if (!id) {
|
|
3970
4094
|
throw new Error('Persona id is required. Usage: oomi personas create <id>');
|
|
3971
4095
|
}
|
|
3972
4096
|
await createPersona({ id, root: args.flags.root, flags: args.flags });
|
|
3973
4097
|
return;
|
|
3974
|
-
}
|
|
3975
|
-
|
|
4098
|
+
}
|
|
4099
|
+
|
|
4100
|
+
if (command === 'personas' && subcommand === 'create-managed') {
|
|
4101
|
+
await handlePersonaCreateManagedCommand(args.flags, args.positionals[0]);
|
|
4102
|
+
return;
|
|
4103
|
+
}
|
|
4104
|
+
|
|
3976
4105
|
if (command === 'personas' && subcommand === 'scaffold') {
|
|
3977
4106
|
const slug = args.positionals[0];
|
|
3978
4107
|
if (!slug) {
|
|
@@ -4065,11 +4194,12 @@ if (__isDirectExecution) {
|
|
|
4065
4194
|
});
|
|
4066
4195
|
}
|
|
4067
4196
|
|
|
4068
|
-
export {
|
|
4069
|
-
prepareGatewayFrameForLocalGateway,
|
|
4070
|
-
|
|
4071
|
-
|
|
4072
|
-
|
|
4197
|
+
export {
|
|
4198
|
+
prepareGatewayFrameForLocalGateway,
|
|
4199
|
+
ensureVoiceAssistantSpokenMetadata,
|
|
4200
|
+
classifyBridgeFailure,
|
|
4201
|
+
classifyBridgeSessionScope,
|
|
4202
|
+
createBridgeProcessFaultHandler,
|
|
4073
4203
|
computeReconnectDelayMs,
|
|
4074
4204
|
resolveBridgeStatusForBrokerOpen,
|
|
4075
4205
|
resolveBridgeStatusForRuntimeFault,
|
package/lib/personaApiClient.js
CHANGED
|
@@ -66,6 +66,38 @@ export function createPersonaApiClient({
|
|
|
66
66
|
}
|
|
67
67
|
|
|
68
68
|
return {
|
|
69
|
+
createManagedPersona({
|
|
70
|
+
slug,
|
|
71
|
+
name,
|
|
72
|
+
description,
|
|
73
|
+
templateType = 'persona-app',
|
|
74
|
+
promptTemplateVersion = 'v1',
|
|
75
|
+
}) {
|
|
76
|
+
const safeName = trimString(name);
|
|
77
|
+
if (!safeName) {
|
|
78
|
+
throw new Error('Persona name is required.');
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const body = withDevice({
|
|
82
|
+
name: safeName,
|
|
83
|
+
description: trimString(description) || safeName,
|
|
84
|
+
templateType: trimString(templateType) || 'persona-app',
|
|
85
|
+
promptTemplateVersion: trimString(promptTemplateVersion) || 'v1',
|
|
86
|
+
});
|
|
87
|
+
const safeSlug = trimString(slug);
|
|
88
|
+
if (safeSlug) {
|
|
89
|
+
body.slug = safeSlug;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
return postJson({
|
|
93
|
+
fetchImpl,
|
|
94
|
+
backendUrl: resolvedBackendUrl,
|
|
95
|
+
deviceToken: resolvedDeviceToken,
|
|
96
|
+
path: '/v1/personas/managed_create',
|
|
97
|
+
body,
|
|
98
|
+
});
|
|
99
|
+
},
|
|
100
|
+
|
|
69
101
|
registerRuntime({
|
|
70
102
|
slug,
|
|
71
103
|
endpoint,
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
function trimString(value, fallback = '') {
|
|
2
|
+
return typeof value === 'string' && value.trim() ? value.trim() : fallback;
|
|
3
|
+
}
|
|
4
|
+
|
|
5
|
+
function clampInteger(value, fallback, { min = 1, max = Number.MAX_SAFE_INTEGER } = {}) {
|
|
6
|
+
if (typeof value !== 'number' || !Number.isFinite(value)) return fallback;
|
|
7
|
+
const normalized = Math.floor(value);
|
|
8
|
+
if (normalized < min) return fallback;
|
|
9
|
+
if (normalized > max) return max;
|
|
10
|
+
return normalized;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
const BOUNDED_LANGUAGE_TYPES = new Set([
|
|
14
|
+
'Auto',
|
|
15
|
+
'Chinese',
|
|
16
|
+
'English',
|
|
17
|
+
'German',
|
|
18
|
+
'Italian',
|
|
19
|
+
'Portuguese',
|
|
20
|
+
'Spanish',
|
|
21
|
+
'Japanese',
|
|
22
|
+
'Korean',
|
|
23
|
+
'French',
|
|
24
|
+
'Russian',
|
|
25
|
+
]);
|
|
26
|
+
|
|
27
|
+
const BOUNDED_PACE_VALUES = new Set(['very_slow', 'slow', 'medium', 'medium_fast', 'fast']);
|
|
28
|
+
const BOUNDED_PITCH_VALUES = new Set(['low', 'slightly_low', 'neutral', 'slightly_high', 'high']);
|
|
29
|
+
const BOUNDED_ENERGY_VALUES = new Set(['soft', 'calm', 'warm', 'bright', 'intense']);
|
|
30
|
+
const BOUNDED_VOLUME_VALUES = new Set(['soft', 'normal', 'projected']);
|
|
31
|
+
|
|
32
|
+
function inferSpokenLanguage(text) {
|
|
33
|
+
const normalized = trimString(text);
|
|
34
|
+
if (!normalized) return 'English';
|
|
35
|
+
return 'English';
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function normalizeSpokenSegment(segment) {
|
|
39
|
+
if (!segment || typeof segment !== 'object' || Array.isArray(segment)) return null;
|
|
40
|
+
|
|
41
|
+
const text = trimString(segment.text);
|
|
42
|
+
if (!text) return null;
|
|
43
|
+
|
|
44
|
+
const normalized = { text };
|
|
45
|
+
const pace = trimString(segment.pace);
|
|
46
|
+
const pitch = trimString(segment.pitch);
|
|
47
|
+
const energy = trimString(segment.energy);
|
|
48
|
+
const volume = trimString(segment.volume);
|
|
49
|
+
const pauseAfterMs = clampInteger(segment.pause_after_ms, 0, { min: 0, max: 1200 });
|
|
50
|
+
|
|
51
|
+
if (BOUNDED_PACE_VALUES.has(pace)) normalized.pace = pace;
|
|
52
|
+
if (BOUNDED_PITCH_VALUES.has(pitch)) normalized.pitch = pitch;
|
|
53
|
+
if (BOUNDED_ENERGY_VALUES.has(energy)) normalized.energy = energy;
|
|
54
|
+
if (BOUNDED_VOLUME_VALUES.has(volume)) normalized.volume = volume;
|
|
55
|
+
normalized.pause_after_ms = pauseAfterMs;
|
|
56
|
+
|
|
57
|
+
return normalized;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function stripEmoji(text) {
|
|
61
|
+
return text.replace(/[\uFE0E\uFE0F]/g, '').replace(/\p{Extended_Pictographic}|\p{Emoji_Presentation}/gu, '');
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function normalizeSpeechText(text) {
|
|
65
|
+
return stripEmoji(text)
|
|
66
|
+
.replace(/\*\*(.*?)\*\*/g, '$1')
|
|
67
|
+
.replace(/__(.*?)__/g, '$1')
|
|
68
|
+
.replace(/`([^`]+)`/g, '$1')
|
|
69
|
+
.replace(/[–—]/g, ', ')
|
|
70
|
+
.replace(/…/g, '...')
|
|
71
|
+
.replace(/\s+/g, ' ')
|
|
72
|
+
.replace(/\s+([,.;!?])/g, '$1')
|
|
73
|
+
.replace(/([,.;!?])(?=[^\s])/g, '$1 ')
|
|
74
|
+
.replace(/,\s*,+/g, ', ')
|
|
75
|
+
.replace(/\s+/g, ' ')
|
|
76
|
+
.trim();
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function splitSpeechSegments(text) {
|
|
80
|
+
const normalized = normalizeSpeechText(text);
|
|
81
|
+
if (!normalized) return [];
|
|
82
|
+
|
|
83
|
+
const baseSegments = normalized
|
|
84
|
+
.split(/(?<=[.!?])\s+/)
|
|
85
|
+
.map((segment) => segment.trim())
|
|
86
|
+
.filter(Boolean);
|
|
87
|
+
|
|
88
|
+
const segments = [];
|
|
89
|
+
for (const segment of baseSegments) {
|
|
90
|
+
if (segment.length <= 96) {
|
|
91
|
+
segments.push(segment);
|
|
92
|
+
continue;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const clauseParts = segment
|
|
96
|
+
.split(/,\s+/)
|
|
97
|
+
.map((part) => part.trim())
|
|
98
|
+
.filter(Boolean);
|
|
99
|
+
|
|
100
|
+
if (clauseParts.length > 1) {
|
|
101
|
+
for (let index = 0; index < clauseParts.length; index += 1) {
|
|
102
|
+
const part = clauseParts[index];
|
|
103
|
+
const needsComma = index < clauseParts.length - 1 && !/[.!?]$/.test(part);
|
|
104
|
+
segments.push(needsComma ? `${part},` : part);
|
|
105
|
+
}
|
|
106
|
+
continue;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
segments.push(segment);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if (segments.length <= 5) return segments;
|
|
113
|
+
|
|
114
|
+
return [...segments.slice(0, 4), segments.slice(4).join(' ').trim()];
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function inferSegmentStyle(segmentText, index, totalSegments) {
|
|
118
|
+
const normalized = segmentText.toLowerCase();
|
|
119
|
+
const exclamatory = /!/.test(segmentText) || /\b(hell yeah|awesome|amazing|stoked|love|perfect|great)\b/.test(normalized);
|
|
120
|
+
const curious = /\?/.test(segmentText);
|
|
121
|
+
const reflective =
|
|
122
|
+
/\b(i think|i'm|i am|i've|i have|lately|right now|before this|each time|understand|it feels like)\b/.test(normalized) ||
|
|
123
|
+
segmentText.length > 60;
|
|
124
|
+
|
|
125
|
+
if (curious) {
|
|
126
|
+
return {
|
|
127
|
+
pace: 'medium',
|
|
128
|
+
pitch: 'slightly_high',
|
|
129
|
+
energy: 'warm',
|
|
130
|
+
volume: 'normal',
|
|
131
|
+
pause_after_ms: 0,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
if (exclamatory) {
|
|
136
|
+
return {
|
|
137
|
+
pace: 'medium_fast',
|
|
138
|
+
pitch: 'slightly_high',
|
|
139
|
+
energy: 'bright',
|
|
140
|
+
volume: 'normal',
|
|
141
|
+
pause_after_ms: index < totalSegments - 1 ? 220 : 0,
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
if (reflective) {
|
|
146
|
+
return {
|
|
147
|
+
pace: 'medium',
|
|
148
|
+
pitch: 'neutral',
|
|
149
|
+
energy: 'warm',
|
|
150
|
+
volume: 'normal',
|
|
151
|
+
pause_after_ms: index < totalSegments - 1 ? 260 : 0,
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return {
|
|
156
|
+
pace: 'medium',
|
|
157
|
+
pitch: 'neutral',
|
|
158
|
+
energy: 'warm',
|
|
159
|
+
volume: 'normal',
|
|
160
|
+
pause_after_ms: index < totalSegments - 1 ? 180 : 0,
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function synthesizeSpokenSegments(text) {
|
|
165
|
+
const language = inferSpokenLanguage(text);
|
|
166
|
+
const rawSegments = splitSpeechSegments(text);
|
|
167
|
+
if (rawSegments.length === 0) return null;
|
|
168
|
+
|
|
169
|
+
const segments = rawSegments.map((segmentText, index) => ({
|
|
170
|
+
text: segmentText,
|
|
171
|
+
...inferSegmentStyle(segmentText, index, rawSegments.length),
|
|
172
|
+
}));
|
|
173
|
+
|
|
174
|
+
return {
|
|
175
|
+
language,
|
|
176
|
+
segments,
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
function normalizeSpokenMetadata(spoken) {
|
|
181
|
+
if (!spoken || typeof spoken !== 'object' || Array.isArray(spoken)) return null;
|
|
182
|
+
|
|
183
|
+
const text = trimString(spoken.text);
|
|
184
|
+
if (!text) return null;
|
|
185
|
+
|
|
186
|
+
const normalized = { text };
|
|
187
|
+
const language = trimString(spoken.language);
|
|
188
|
+
if (BOUNDED_LANGUAGE_TYPES.has(language)) {
|
|
189
|
+
normalized.language = language;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const explicitSegments =
|
|
193
|
+
Array.isArray(spoken.segments)
|
|
194
|
+
? spoken.segments.map((segment) => normalizeSpokenSegment(segment)).filter(Boolean)
|
|
195
|
+
: [];
|
|
196
|
+
if (explicitSegments.length > 0) {
|
|
197
|
+
normalized.segments = explicitSegments;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
const instructions = trimString(spoken.instructions);
|
|
201
|
+
if (instructions) normalized.instructions = instructions;
|
|
202
|
+
if (spoken.style && typeof spoken.style === 'object' && !Array.isArray(spoken.style)) {
|
|
203
|
+
normalized.style = spoken.style;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
const fallbackSegments = synthesizeSpokenSegments(text);
|
|
207
|
+
if (!normalized.language && fallbackSegments?.language) {
|
|
208
|
+
normalized.language = fallbackSegments.language;
|
|
209
|
+
}
|
|
210
|
+
if (!normalized.segments && fallbackSegments?.segments?.length) {
|
|
211
|
+
normalized.segments = fallbackSegments.segments;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
return normalized;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
function inferSpokenMetadataFromContent(content) {
|
|
218
|
+
const text = normalizeSpeechText(trimString(content));
|
|
219
|
+
if (!text) return null;
|
|
220
|
+
const synthesized = synthesizeSpokenSegments(text);
|
|
221
|
+
|
|
222
|
+
const normalized = text.toLowerCase();
|
|
223
|
+
const upbeat =
|
|
224
|
+
/!/.test(text) ||
|
|
225
|
+
/\b(hell yeah|awesome|amazing|great|stoked|love|glad|perfect|nice|cool)\b/.test(normalized);
|
|
226
|
+
const gentle =
|
|
227
|
+
/\b(sorry|gentle|softly|careful|reassuring|calm|okay|it'?s okay|i know)\b/.test(normalized);
|
|
228
|
+
const curious = /\?/.test(text);
|
|
229
|
+
|
|
230
|
+
if (upbeat) {
|
|
231
|
+
return {
|
|
232
|
+
text,
|
|
233
|
+
language: synthesized?.language || 'English',
|
|
234
|
+
segments: synthesized?.segments,
|
|
235
|
+
instructions: 'Speak with warm, upbeat conversational energy and natural pacing.',
|
|
236
|
+
style: { emotion: 'upbeat', energy: 'medium' },
|
|
237
|
+
};
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
if (gentle) {
|
|
241
|
+
return {
|
|
242
|
+
text,
|
|
243
|
+
language: synthesized?.language || 'English',
|
|
244
|
+
segments: synthesized?.segments,
|
|
245
|
+
instructions: 'Speak gently and reassuringly, with a calm pace and soft emphasis.',
|
|
246
|
+
style: { emotion: 'gentle', energy: 'low' },
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
if (curious) {
|
|
251
|
+
return {
|
|
252
|
+
text,
|
|
253
|
+
language: synthesized?.language || 'English',
|
|
254
|
+
segments: synthesized?.segments,
|
|
255
|
+
instructions: 'Speak naturally with curious, engaged intonation and a conversational pace.',
|
|
256
|
+
style: { emotion: 'curious', energy: 'medium' },
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
return {
|
|
261
|
+
text,
|
|
262
|
+
language: synthesized?.language || 'English',
|
|
263
|
+
segments: synthesized?.segments,
|
|
264
|
+
instructions: 'Speak naturally with light warmth and conversational pacing.',
|
|
265
|
+
style: { emotion: 'neutral', energy: 'medium' },
|
|
266
|
+
};
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
export {
|
|
270
|
+
inferSpokenMetadataFromContent,
|
|
271
|
+
normalizeSpokenMetadata,
|
|
272
|
+
normalizeSpeechText,
|
|
273
|
+
};
|
package/openclaw.plugin.json
CHANGED