oomi-ai 0.2.24 → 0.2.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/spokenMetadata.js +93 -76
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
package/lib/spokenMetadata.js
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
|
-
function trimString(value, fallback = '') {
|
|
2
|
-
return typeof value === 'string' && value.trim() ? value.trim() : fallback;
|
|
3
|
-
}
|
|
1
|
+
function trimString(value, fallback = '') {
|
|
2
|
+
return typeof value === 'string' && value.trim() ? value.trim() : fallback;
|
|
3
|
+
}
|
|
4
|
+
|
|
5
|
+
function stripAvatarCommandTags(text) {
|
|
6
|
+
return text.replace(/\[(anim|animation|face|expression|emotion|gesture|look|gaze):[^\]]+\]/gi, ' ');
|
|
7
|
+
}
|
|
4
8
|
|
|
5
9
|
function clampInteger(value, fallback, { min = 1, max = Number.MAX_SAFE_INTEGER } = {}) {
|
|
6
10
|
if (typeof value !== 'number' || !Number.isFinite(value)) return fallback;
|
|
@@ -35,11 +39,11 @@ function inferSpokenLanguage(text) {
|
|
|
35
39
|
return 'English';
|
|
36
40
|
}
|
|
37
41
|
|
|
38
|
-
function normalizeSpokenSegment(segment) {
|
|
39
|
-
if (!segment || typeof segment !== 'object' || Array.isArray(segment)) return null;
|
|
40
|
-
|
|
41
|
-
const text = trimString(segment.text);
|
|
42
|
-
if (!text) return null;
|
|
42
|
+
function normalizeSpokenSegment(segment) {
|
|
43
|
+
if (!segment || typeof segment !== 'object' || Array.isArray(segment)) return null;
|
|
44
|
+
|
|
45
|
+
const text = normalizeSpeechText(trimString(segment.text));
|
|
46
|
+
if (!text) return null;
|
|
43
47
|
|
|
44
48
|
const normalized = { text };
|
|
45
49
|
const pace = trimString(segment.pace);
|
|
@@ -61,11 +65,11 @@ function stripEmoji(text) {
|
|
|
61
65
|
return text.replace(/[\uFE0E\uFE0F]/g, '').replace(/\p{Extended_Pictographic}|\p{Emoji_Presentation}/gu, '');
|
|
62
66
|
}
|
|
63
67
|
|
|
64
|
-
function normalizeSpeechText(text) {
|
|
65
|
-
return stripEmoji(text)
|
|
66
|
-
.replace(/\*\*(.*?)\*\*/g, '$1')
|
|
67
|
-
.replace(/__(.*?)__/g, '$1')
|
|
68
|
-
.replace(/`([^`]+)`/g, '$1')
|
|
68
|
+
function normalizeSpeechText(text) {
|
|
69
|
+
return stripEmoji(stripAvatarCommandTags(text))
|
|
70
|
+
.replace(/\*\*(.*?)\*\*/g, '$1')
|
|
71
|
+
.replace(/__(.*?)__/g, '$1')
|
|
72
|
+
.replace(/`([^`]+)`/g, '$1')
|
|
69
73
|
.replace(/[\u2013\u2014]/g, ', ')
|
|
70
74
|
.replace(/\u2026/g, '...')
|
|
71
75
|
.replace(/\s+/g, ' ')
|
|
@@ -76,14 +80,14 @@ function normalizeSpeechText(text) {
|
|
|
76
80
|
.trim();
|
|
77
81
|
}
|
|
78
82
|
|
|
79
|
-
function splitSpeechSegments(text) {
|
|
80
|
-
const normalized = normalizeSpeechText(text);
|
|
81
|
-
if (!normalized) return [];
|
|
82
|
-
|
|
83
|
-
const baseSegments = normalized
|
|
84
|
-
.split(/(?<=[.!?])\s+/)
|
|
85
|
-
.map((segment) => segment.trim())
|
|
86
|
-
.filter(Boolean);
|
|
83
|
+
function splitSpeechSegments(text) {
|
|
84
|
+
const normalized = normalizeSpeechText(text);
|
|
85
|
+
if (!normalized) return [];
|
|
86
|
+
|
|
87
|
+
const baseSegments = normalized
|
|
88
|
+
.split(/(?<=[.!?])\s+|\n+/)
|
|
89
|
+
.map((segment) => segment.trim())
|
|
90
|
+
.filter(Boolean);
|
|
87
91
|
|
|
88
92
|
const segments = [];
|
|
89
93
|
for (const segment of baseSegments) {
|
|
@@ -92,10 +96,10 @@ function splitSpeechSegments(text) {
|
|
|
92
96
|
continue;
|
|
93
97
|
}
|
|
94
98
|
|
|
95
|
-
const clauseParts = segment
|
|
96
|
-
.split(
|
|
97
|
-
.map((part) => part.trim())
|
|
98
|
-
.filter(Boolean);
|
|
99
|
+
const clauseParts = segment
|
|
100
|
+
.split(/(?<=[,;:])\s+/)
|
|
101
|
+
.map((part) => part.trim())
|
|
102
|
+
.filter(Boolean);
|
|
99
103
|
|
|
100
104
|
if (clauseParts.length > 1) {
|
|
101
105
|
for (let index = 0; index < clauseParts.length; index += 1) {
|
|
@@ -114,50 +118,62 @@ function splitSpeechSegments(text) {
|
|
|
114
118
|
return [...segments.slice(0, 4), segments.slice(4).join(' ').trim()];
|
|
115
119
|
}
|
|
116
120
|
|
|
117
|
-
function inferSegmentStyle(segmentText, index, totalSegments) {
|
|
118
|
-
const normalized = segmentText.toLowerCase();
|
|
119
|
-
const
|
|
120
|
-
const
|
|
121
|
-
const
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
121
|
+
function inferSegmentStyle(segmentText, index, totalSegments) {
|
|
122
|
+
const normalized = segmentText.toLowerCase();
|
|
123
|
+
const greeting = /^(hey|hi|hello|yo)\b/.test(normalized);
|
|
124
|
+
const exclamatory = /!/.test(segmentText) || /\b(hell yeah|awesome|amazing|stoked|love|perfect|great)\b/.test(normalized);
|
|
125
|
+
const curious = /\?/.test(segmentText);
|
|
126
|
+
const reassuring = /\b(got it|no worries|all good|you'?re good|sounds good|totally|absolutely)\b/.test(normalized);
|
|
127
|
+
const reflective =
|
|
128
|
+
/\b(i think|i'm|i am|i've|i have|lately|right now|before this|each time|understand|it feels like)\b/.test(normalized) ||
|
|
129
|
+
segmentText.length > 60;
|
|
130
|
+
|
|
131
|
+
if (greeting || reassuring) {
|
|
132
|
+
return {
|
|
133
|
+
pace: 'medium_fast',
|
|
134
|
+
pitch: 'slightly_high',
|
|
135
|
+
energy: 'bright',
|
|
136
|
+
volume: 'projected',
|
|
137
|
+
pause_after_ms: index < totalSegments - 1 ? 180 : 0,
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
if (curious) {
|
|
142
|
+
return {
|
|
143
|
+
pace: 'medium',
|
|
144
|
+
pitch: 'slightly_high',
|
|
145
|
+
energy: 'warm',
|
|
146
|
+
volume: 'projected',
|
|
147
|
+
pause_after_ms: 0,
|
|
148
|
+
};
|
|
149
|
+
}
|
|
134
150
|
|
|
135
151
|
if (exclamatory) {
|
|
136
152
|
return {
|
|
137
|
-
pace: 'medium_fast',
|
|
138
|
-
pitch: 'slightly_high',
|
|
139
|
-
energy: 'bright',
|
|
140
|
-
volume: '
|
|
141
|
-
pause_after_ms: index < totalSegments - 1 ? 220 : 0,
|
|
142
|
-
};
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
if (reflective) {
|
|
146
|
-
return {
|
|
147
|
-
pace: '
|
|
148
|
-
pitch: '
|
|
149
|
-
energy: 'warm',
|
|
150
|
-
volume: '
|
|
151
|
-
pause_after_ms: index < totalSegments - 1 ?
|
|
152
|
-
};
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
return {
|
|
156
|
-
pace: 'medium',
|
|
157
|
-
pitch: '
|
|
158
|
-
energy: 'warm',
|
|
159
|
-
volume: 'normal',
|
|
160
|
-
pause_after_ms: index < totalSegments - 1 ? 180 : 0,
|
|
153
|
+
pace: 'medium_fast',
|
|
154
|
+
pitch: 'slightly_high',
|
|
155
|
+
energy: 'bright',
|
|
156
|
+
volume: 'projected',
|
|
157
|
+
pause_after_ms: index < totalSegments - 1 ? 220 : 0,
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if (reflective) {
|
|
162
|
+
return {
|
|
163
|
+
pace: 'slow',
|
|
164
|
+
pitch: 'slightly_low',
|
|
165
|
+
energy: 'warm',
|
|
166
|
+
volume: 'soft',
|
|
167
|
+
pause_after_ms: index < totalSegments - 1 ? 280 : 0,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return {
|
|
172
|
+
pace: 'medium',
|
|
173
|
+
pitch: 'slightly_high',
|
|
174
|
+
energy: 'warm',
|
|
175
|
+
volume: 'normal',
|
|
176
|
+
pause_after_ms: index < totalSegments - 1 ? 180 : 0,
|
|
161
177
|
};
|
|
162
178
|
}
|
|
163
179
|
|
|
@@ -177,11 +193,11 @@ function synthesizeSpokenSegments(text) {
|
|
|
177
193
|
};
|
|
178
194
|
}
|
|
179
195
|
|
|
180
|
-
function normalizeSpokenMetadata(spoken) {
|
|
181
|
-
if (!spoken || typeof spoken !== 'object' || Array.isArray(spoken)) return null;
|
|
182
|
-
|
|
183
|
-
const text = trimString(spoken.text);
|
|
184
|
-
if (!text) return null;
|
|
196
|
+
function normalizeSpokenMetadata(spoken) {
|
|
197
|
+
if (!spoken || typeof spoken !== 'object' || Array.isArray(spoken)) return null;
|
|
198
|
+
|
|
199
|
+
const text = normalizeSpeechText(trimString(spoken.text));
|
|
200
|
+
if (!text) return null;
|
|
185
201
|
|
|
186
202
|
const normalized = { text };
|
|
187
203
|
const language = trimString(spoken.language);
|
|
@@ -266,8 +282,9 @@ function inferSpokenMetadataFromContent(content) {
|
|
|
266
282
|
};
|
|
267
283
|
}
|
|
268
284
|
|
|
269
|
-
export {
|
|
270
|
-
inferSpokenMetadataFromContent,
|
|
271
|
-
normalizeSpokenMetadata,
|
|
272
|
-
normalizeSpeechText,
|
|
273
|
-
|
|
285
|
+
export {
|
|
286
|
+
inferSpokenMetadataFromContent,
|
|
287
|
+
normalizeSpokenMetadata,
|
|
288
|
+
normalizeSpeechText,
|
|
289
|
+
stripAvatarCommandTags,
|
|
290
|
+
};
|
package/openclaw.plugin.json
CHANGED