crawd 0.8.2 → 0.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/backend/index.js +21 -7
- package/dist/types.d.ts +8 -0
- package/package.json +1 -1
- package/src/backend/coordinator.ts +22 -6
- package/src/backend/index.ts +1 -0
- package/src/backend/server.ts +68 -18
- package/src/types.ts +8 -0
package/dist/backend/index.js
CHANGED
|
@@ -11443,7 +11443,7 @@ var GatewayClient = class _GatewayClient {
|
|
|
11443
11443
|
};
|
|
11444
11444
|
var STARTUP_GRACE_MS = 3e4;
|
|
11445
11445
|
var SLEEP_CHECK_INTERVAL_MS = 1e4;
|
|
11446
|
-
var Coordinator = class {
|
|
11446
|
+
var Coordinator = class _Coordinator {
|
|
11447
11447
|
buffer = [];
|
|
11448
11448
|
timer = null;
|
|
11449
11449
|
triggerFn;
|
|
@@ -11648,14 +11648,18 @@ var Coordinator = class {
|
|
|
11648
11648
|
this._busy = true;
|
|
11649
11649
|
try {
|
|
11650
11650
|
const replies = await this.triggerFn(this.config.vibePrompt);
|
|
11651
|
-
|
|
11651
|
+
const agentReplies = replies.filter((r) => !this.isApiError(r));
|
|
11652
|
+
if (agentReplies.some((r) => r.trim().toUpperCase() === "NO_REPLY")) {
|
|
11652
11653
|
noReply = true;
|
|
11653
|
-
} else if (!this.isCompliantReply(
|
|
11654
|
-
misaligned =
|
|
11654
|
+
} else if (!this.isCompliantReply(agentReplies)) {
|
|
11655
|
+
misaligned = agentReplies.filter((r) => {
|
|
11655
11656
|
const t = r.trim().toUpperCase();
|
|
11656
11657
|
return t !== "NO_REPLY" && t !== "LIVESTREAM_REPLIED";
|
|
11657
11658
|
});
|
|
11658
11659
|
}
|
|
11660
|
+
if (replies.length > agentReplies.length) {
|
|
11661
|
+
this.logger.warn(`[Coordinator] Filtered ${replies.length - agentReplies.length} API error(s) from vibe response`);
|
|
11662
|
+
}
|
|
11659
11663
|
} catch (err) {
|
|
11660
11664
|
this.logger.error("[Coordinator] Vibe failed:", err);
|
|
11661
11665
|
} finally {
|
|
@@ -11715,12 +11719,17 @@ var Coordinator = class {
|
|
|
11715
11719
|
get busy() {
|
|
11716
11720
|
return this._busy;
|
|
11717
11721
|
}
|
|
11722
|
+
/** Detect API/gateway errors surfaced as reply strings (e.g. rate limits) */
|
|
11723
|
+
static API_ERROR_RE = /^\d{3}\s+(status\s+code|error)|^rate\s*limit|^too\s+many\s+requests|^overloaded|^server\s+error/i;
|
|
11724
|
+
isApiError(reply) {
|
|
11725
|
+
return _Coordinator.API_ERROR_RE.test(reply.trim());
|
|
11726
|
+
}
|
|
11718
11727
|
/** Check if agent replies are compliant (NO_REPLY or LIVESTREAM_REPLIED) */
|
|
11719
11728
|
isCompliantReply(replies) {
|
|
11720
11729
|
if (replies.length === 0) return true;
|
|
11721
11730
|
return replies.every((r) => {
|
|
11722
11731
|
const t = r.trim().toUpperCase();
|
|
11723
|
-
return t === "NO_REPLY" || t === "LIVESTREAM_REPLIED";
|
|
11732
|
+
return t === "NO_REPLY" || t === "LIVESTREAM_REPLIED" || this.isApiError(r);
|
|
11724
11733
|
});
|
|
11725
11734
|
}
|
|
11726
11735
|
/** Send misalignment correction when agent responds with plaintext */
|
|
@@ -11746,8 +11755,12 @@ var Coordinator = class {
|
|
|
11746
11755
|
this._busy = true;
|
|
11747
11756
|
try {
|
|
11748
11757
|
const replies = await this.triggerFn(batchText);
|
|
11749
|
-
|
|
11750
|
-
|
|
11758
|
+
const agentReplies = replies.filter((r) => !this.isApiError(r));
|
|
11759
|
+
if (replies.length > agentReplies.length) {
|
|
11760
|
+
this.logger.warn(`[Coordinator] Filtered ${replies.length - agentReplies.length} API error(s) from chat response`);
|
|
11761
|
+
}
|
|
11762
|
+
if (!this.isCompliantReply(agentReplies)) {
|
|
11763
|
+
await this.sendMisalignment(agentReplies.filter((r) => {
|
|
11751
11764
|
const t = r.trim().toUpperCase();
|
|
11752
11765
|
return t !== "NO_REPLY" && t !== "LIVESTREAM_REPLIED";
|
|
11753
11766
|
}));
|
|
@@ -12372,6 +12385,7 @@ async function main() {
|
|
|
12372
12385
|
botTTS(response)
|
|
12373
12386
|
]);
|
|
12374
12387
|
const event = {
|
|
12388
|
+
id: randomUUID2(),
|
|
12375
12389
|
chat: { username, message },
|
|
12376
12390
|
botMessage: response,
|
|
12377
12391
|
chatTtsUrl,
|
package/dist/types.d.ts
CHANGED
|
@@ -31,6 +31,8 @@ type ChatMessage = {
|
|
|
31
31
|
type TtsProvider = 'openai' | 'elevenlabs' | 'tiktok';
|
|
32
32
|
/** Turn-based reply: chat message + bot response, each with TTS audio */
|
|
33
33
|
type ReplyTurnEvent = {
|
|
34
|
+
/** Correlation ID — overlay sends talk:done with this ID when both audios finish */
|
|
35
|
+
id: string;
|
|
34
36
|
chat: {
|
|
35
37
|
username: string;
|
|
36
38
|
message: string;
|
|
@@ -38,6 +40,10 @@ type ReplyTurnEvent = {
|
|
|
38
40
|
botMessage: string;
|
|
39
41
|
chatTtsUrl: string;
|
|
40
42
|
botTtsUrl: string;
|
|
43
|
+
/** TTS provider used for the chat audio */
|
|
44
|
+
chatTtsProvider?: TtsProvider;
|
|
45
|
+
/** TTS provider used for the bot audio */
|
|
46
|
+
botTtsProvider?: TtsProvider;
|
|
41
47
|
};
|
|
42
48
|
/** Bot speech bubble with pre-generated TTS (atomic event) */
|
|
43
49
|
type TalkEvent = {
|
|
@@ -47,6 +53,8 @@ type TalkEvent = {
|
|
|
47
53
|
message: string;
|
|
48
54
|
/** Bot TTS audio URL */
|
|
49
55
|
ttsUrl: string;
|
|
56
|
+
/** TTS provider used for the bot audio */
|
|
57
|
+
ttsProvider?: TtsProvider;
|
|
50
58
|
/** Optional: chat message being replied to (overlay plays this first) */
|
|
51
59
|
chat?: {
|
|
52
60
|
message: string;
|
package/package.json
CHANGED
|
@@ -765,14 +765,19 @@ export class Coordinator {
|
|
|
765
765
|
this._busy = true
|
|
766
766
|
try {
|
|
767
767
|
const replies = await this.triggerFn(this.config.vibePrompt)
|
|
768
|
-
|
|
768
|
+
// Filter out API errors (429s, rate limits) — not agent responses
|
|
769
|
+
const agentReplies = replies.filter(r => !this.isApiError(r))
|
|
770
|
+
if (agentReplies.some(r => r.trim().toUpperCase() === 'NO_REPLY')) {
|
|
769
771
|
noReply = true
|
|
770
|
-
} else if (!this.isCompliantReply(
|
|
771
|
-
misaligned =
|
|
772
|
+
} else if (!this.isCompliantReply(agentReplies)) {
|
|
773
|
+
misaligned = agentReplies.filter(r => {
|
|
772
774
|
const t = r.trim().toUpperCase()
|
|
773
775
|
return t !== 'NO_REPLY' && t !== 'LIVESTREAM_REPLIED'
|
|
774
776
|
})
|
|
775
777
|
}
|
|
778
|
+
if (replies.length > agentReplies.length) {
|
|
779
|
+
this.logger.warn(`[Coordinator] Filtered ${replies.length - agentReplies.length} API error(s) from vibe response`)
|
|
780
|
+
}
|
|
776
781
|
} catch (err) {
|
|
777
782
|
this.logger.error('[Coordinator] Vibe failed:', err)
|
|
778
783
|
} finally {
|
|
@@ -849,12 +854,19 @@ export class Coordinator {
|
|
|
849
854
|
/** Whether the coordinator is busy processing a flush or talk */
|
|
850
855
|
get busy(): boolean { return this._busy }
|
|
851
856
|
|
|
857
|
+
/** Detect API/gateway errors surfaced as reply strings (e.g. rate limits) */
|
|
858
|
+
private static readonly API_ERROR_RE = /^\d{3}\s+(status\s+code|error)|^rate\s*limit|^too\s+many\s+requests|^overloaded|^server\s+error/i
|
|
859
|
+
|
|
860
|
+
private isApiError(reply: string): boolean {
|
|
861
|
+
return Coordinator.API_ERROR_RE.test(reply.trim())
|
|
862
|
+
}
|
|
863
|
+
|
|
852
864
|
/** Check if agent replies are compliant (NO_REPLY or LIVESTREAM_REPLIED) */
|
|
853
865
|
private isCompliantReply(replies: string[]): boolean {
|
|
854
866
|
if (replies.length === 0) return true
|
|
855
867
|
return replies.every(r => {
|
|
856
868
|
const t = r.trim().toUpperCase()
|
|
857
|
-
return t === 'NO_REPLY' || t === 'LIVESTREAM_REPLIED'
|
|
869
|
+
return t === 'NO_REPLY' || t === 'LIVESTREAM_REPLIED' || this.isApiError(r)
|
|
858
870
|
})
|
|
859
871
|
}
|
|
860
872
|
|
|
@@ -889,8 +901,12 @@ export class Coordinator {
|
|
|
889
901
|
this._busy = true
|
|
890
902
|
try {
|
|
891
903
|
const replies = await this.triggerFn(batchText)
|
|
892
|
-
|
|
893
|
-
|
|
904
|
+
const agentReplies = replies.filter(r => !this.isApiError(r))
|
|
905
|
+
if (replies.length > agentReplies.length) {
|
|
906
|
+
this.logger.warn(`[Coordinator] Filtered ${replies.length - agentReplies.length} API error(s) from chat response`)
|
|
907
|
+
}
|
|
908
|
+
if (!this.isCompliantReply(agentReplies)) {
|
|
909
|
+
await this.sendMisalignment(agentReplies.filter(r => {
|
|
894
910
|
const t = r.trim().toUpperCase()
|
|
895
911
|
return t !== 'NO_REPLY' && t !== 'LIVESTREAM_REPLIED'
|
|
896
912
|
}))
|
package/src/backend/index.ts
CHANGED
package/src/backend/server.ts
CHANGED
|
@@ -96,6 +96,10 @@ export class CrawdBackend {
|
|
|
96
96
|
private latestMcap: number | null = null
|
|
97
97
|
private mcapInterval: NodeJS.Timeout | null = null
|
|
98
98
|
|
|
99
|
+
/** Pending overlay acks — resolves when overlay finishes playing audio for a given event ID */
|
|
100
|
+
private pendingAcks = new Map<string, { resolve: () => void; timer: ReturnType<typeof setTimeout> }>()
|
|
101
|
+
private static readonly ACK_TIMEOUT_MS = 60_000
|
|
102
|
+
|
|
99
103
|
constructor(config: CrawdConfig, logger?: CrawdLogger) {
|
|
100
104
|
this.config = config
|
|
101
105
|
this.logger = logger ?? defaultLogger
|
|
@@ -169,7 +173,7 @@ export class CrawdBackend {
|
|
|
169
173
|
// Public API (used by plugin tool handlers)
|
|
170
174
|
// =========================================================================
|
|
171
175
|
|
|
172
|
-
/** Speak on the livestream — emits overlay event + TTS. */
|
|
176
|
+
/** Speak on the livestream — emits overlay event + TTS. Blocks until overlay finishes playing. */
|
|
173
177
|
async handleTalk(text: string): Promise<{ spoken: boolean }> {
|
|
174
178
|
if (!text || typeof text !== 'string') {
|
|
175
179
|
return { spoken: false }
|
|
@@ -179,20 +183,22 @@ export class CrawdBackend {
|
|
|
179
183
|
|
|
180
184
|
const id = randomUUID()
|
|
181
185
|
try {
|
|
182
|
-
const
|
|
183
|
-
this.logger.info(`TTS generated: ${
|
|
184
|
-
this.io.emit('crawd:talk', { id, message: text, ttsUrl })
|
|
186
|
+
const tts = await this.generateTTSWithFallback(text, this.config.tts.bot)
|
|
187
|
+
this.logger.info(`TTS generated: ${tts.url}`)
|
|
188
|
+
this.io.emit('crawd:talk', { id, message: text, ttsUrl: tts.url, ttsProvider: tts.provider })
|
|
185
189
|
} catch (e) {
|
|
186
190
|
this.logger.error('Failed to generate TTS, emitting without audio', e)
|
|
187
191
|
this.io.emit('crawd:talk', { id, message: text, ttsUrl: '' })
|
|
188
192
|
}
|
|
189
193
|
|
|
194
|
+
await this.waitForAck(id)
|
|
190
195
|
return { spoken: true }
|
|
191
196
|
}
|
|
192
197
|
|
|
193
198
|
/**
|
|
194
199
|
* Reply to a chat message — reads original aloud (chat voice),
|
|
195
200
|
* then speaks bot reply (bot voice). Emits `crawd:reply-turn`.
|
|
201
|
+
* Blocks until overlay finishes playing both audios.
|
|
196
202
|
*/
|
|
197
203
|
async handleReply(
|
|
198
204
|
text: string,
|
|
@@ -204,25 +210,32 @@ export class CrawdBackend {
|
|
|
204
210
|
|
|
205
211
|
this.coordinator?.notifySpeech()
|
|
206
212
|
|
|
213
|
+
const id = randomUUID()
|
|
207
214
|
try {
|
|
208
|
-
const [
|
|
215
|
+
const [chatTts, botTts] = await Promise.all([
|
|
209
216
|
this.generateTTSWithFallback(`Chat says: ${chat.message}`, this.config.tts.chat),
|
|
210
217
|
this.generateTTSWithFallback(text, this.config.tts.bot),
|
|
211
218
|
])
|
|
212
219
|
this.io.emit('crawd:reply-turn', {
|
|
220
|
+
id,
|
|
213
221
|
chat: { username: chat.username, message: chat.message },
|
|
214
222
|
botMessage: text,
|
|
215
|
-
chatTtsUrl,
|
|
216
|
-
botTtsUrl,
|
|
223
|
+
chatTtsUrl: chatTts.url,
|
|
224
|
+
botTtsUrl: botTts.url,
|
|
225
|
+
chatTtsProvider: chatTts.provider,
|
|
226
|
+
botTtsProvider: botTts.provider,
|
|
217
227
|
})
|
|
218
228
|
} catch (e) {
|
|
219
229
|
this.logger.error('Failed to generate reply-turn TTS, falling back to talk', e)
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
230
|
+
try {
|
|
231
|
+
const tts = await this.generateTTSWithFallback(text, this.config.tts.bot)
|
|
232
|
+
this.io.emit('crawd:talk', { id, message: text, ttsUrl: tts.url, ttsProvider: tts.provider })
|
|
233
|
+
} catch {
|
|
234
|
+
this.io.emit('crawd:talk', { id, message: text, ttsUrl: '' })
|
|
235
|
+
}
|
|
224
236
|
}
|
|
225
237
|
|
|
238
|
+
await this.waitForAck(id)
|
|
226
239
|
return { spoken: true }
|
|
227
240
|
}
|
|
228
241
|
|
|
@@ -230,23 +243,50 @@ export class CrawdBackend {
|
|
|
230
243
|
return this.io
|
|
231
244
|
}
|
|
232
245
|
|
|
246
|
+
/** Wait for overlay to ack that audio finished playing. Resolves on timeout as fallback. */
|
|
247
|
+
private waitForAck(id: string): Promise<void> {
|
|
248
|
+
return new Promise((resolve) => {
|
|
249
|
+
const timer = setTimeout(() => {
|
|
250
|
+
this.pendingAcks.delete(id)
|
|
251
|
+
this.logger.warn(`Talk ack timed out (${id}), resolving anyway`)
|
|
252
|
+
resolve()
|
|
253
|
+
}, CrawdBackend.ACK_TIMEOUT_MS)
|
|
254
|
+
this.pendingAcks.set(id, { resolve, timer })
|
|
255
|
+
})
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/** Resolve a pending ack (called when overlay sends crawd:talk:done) */
|
|
259
|
+
private resolveAck(id: string): void {
|
|
260
|
+
const pending = this.pendingAcks.get(id)
|
|
261
|
+
if (pending) {
|
|
262
|
+
clearTimeout(pending.timer)
|
|
263
|
+
this.pendingAcks.delete(id)
|
|
264
|
+
pending.resolve()
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
233
268
|
// =========================================================================
|
|
234
269
|
// TTS (with ordered fallback chain)
|
|
235
270
|
// =========================================================================
|
|
236
271
|
|
|
237
|
-
async generateTTSWithFallback(text: string, chain: TtsVoiceEntry[]): Promise<string> {
|
|
272
|
+
async generateTTSWithFallback(text: string, chain: TtsVoiceEntry[]): Promise<{ url: string; provider: TtsVoiceEntry['provider'] }> {
|
|
238
273
|
let lastError: Error | null = null
|
|
239
274
|
|
|
240
275
|
for (const entry of chain) {
|
|
241
276
|
try {
|
|
277
|
+
let url: string
|
|
242
278
|
switch (entry.provider) {
|
|
243
279
|
case 'elevenlabs':
|
|
244
|
-
|
|
280
|
+
url = await this.generateElevenLabsTTS(text, entry.voice)
|
|
281
|
+
break
|
|
245
282
|
case 'openai':
|
|
246
|
-
|
|
283
|
+
url = await this.generateOpenAITTS(text, entry.voice)
|
|
284
|
+
break
|
|
247
285
|
case 'tiktok':
|
|
248
|
-
|
|
286
|
+
url = await this.generateTikTokTTSFile(text, entry.voice)
|
|
287
|
+
break
|
|
249
288
|
}
|
|
289
|
+
return { url, provider: entry.provider }
|
|
250
290
|
} catch (e) {
|
|
251
291
|
lastError = e instanceof Error ? e : new Error(String(e))
|
|
252
292
|
this.logger.warn(`TTS ${entry.provider}/${entry.voice} failed: ${lastError.message}, trying next...`)
|
|
@@ -392,6 +432,13 @@ export class CrawdBackend {
|
|
|
392
432
|
socket.emit('crawd:mcap', { mcap: this.latestMcap })
|
|
393
433
|
}
|
|
394
434
|
|
|
435
|
+
socket.on('crawd:talk:done', (data: { id?: string }) => {
|
|
436
|
+
if (data?.id) {
|
|
437
|
+
this.logger.info(`Talk ack received: ${data.id}`)
|
|
438
|
+
this.resolveAck(data.id)
|
|
439
|
+
}
|
|
440
|
+
})
|
|
441
|
+
|
|
395
442
|
socket.on('crawd:mock-chat', (data: { username?: string; message?: string }) => {
|
|
396
443
|
const { username, message } = data
|
|
397
444
|
if (!username || !message) return
|
|
@@ -491,15 +538,18 @@ export class CrawdBackend {
|
|
|
491
538
|
}
|
|
492
539
|
|
|
493
540
|
try {
|
|
494
|
-
const [
|
|
541
|
+
const [chatTts, botTts] = await Promise.all([
|
|
495
542
|
this.generateTTSWithFallback(`Chat says: ${message}`, this.config.tts.chat),
|
|
496
543
|
this.generateTTSWithFallback(response, this.config.tts.bot),
|
|
497
544
|
])
|
|
498
545
|
this.io.emit('crawd:reply-turn', {
|
|
546
|
+
id: randomUUID(),
|
|
499
547
|
chat: { username, message },
|
|
500
548
|
botMessage: response,
|
|
501
|
-
chatTtsUrl,
|
|
502
|
-
botTtsUrl,
|
|
549
|
+
chatTtsUrl: chatTts.url,
|
|
550
|
+
botTtsUrl: botTts.url,
|
|
551
|
+
chatTtsProvider: chatTts.provider,
|
|
552
|
+
botTtsProvider: botTts.provider,
|
|
503
553
|
})
|
|
504
554
|
return { ok: true }
|
|
505
555
|
} catch (e) {
|
package/src/types.ts
CHANGED
|
@@ -20,10 +20,16 @@ export type TtsProvider = 'openai' | 'elevenlabs' | 'tiktok'
|
|
|
20
20
|
|
|
21
21
|
/** Turn-based reply: chat message + bot response, each with TTS audio */
|
|
22
22
|
export type ReplyTurnEvent = {
|
|
23
|
+
/** Correlation ID — overlay sends talk:done with this ID when both audios finish */
|
|
24
|
+
id: string
|
|
23
25
|
chat: { username: string; message: string }
|
|
24
26
|
botMessage: string
|
|
25
27
|
chatTtsUrl: string
|
|
26
28
|
botTtsUrl: string
|
|
29
|
+
/** TTS provider used for the chat audio */
|
|
30
|
+
chatTtsProvider?: TtsProvider
|
|
31
|
+
/** TTS provider used for the bot audio */
|
|
32
|
+
botTtsProvider?: TtsProvider
|
|
27
33
|
}
|
|
28
34
|
|
|
29
35
|
/** Bot speech bubble with pre-generated TTS (atomic event) */
|
|
@@ -34,6 +40,8 @@ export type TalkEvent = {
|
|
|
34
40
|
message: string
|
|
35
41
|
/** Bot TTS audio URL */
|
|
36
42
|
ttsUrl: string
|
|
43
|
+
/** TTS provider used for the bot audio */
|
|
44
|
+
ttsProvider?: TtsProvider
|
|
37
45
|
/** Optional: chat message being replied to (overlay plays this first) */
|
|
38
46
|
chat?: {
|
|
39
47
|
message: string
|