crawd 0.8.2 → 0.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11443,7 +11443,7 @@ var GatewayClient = class _GatewayClient {
11443
11443
  };
11444
11444
  var STARTUP_GRACE_MS = 3e4;
11445
11445
  var SLEEP_CHECK_INTERVAL_MS = 1e4;
11446
- var Coordinator = class {
11446
+ var Coordinator = class _Coordinator {
11447
11447
  buffer = [];
11448
11448
  timer = null;
11449
11449
  triggerFn;
@@ -11648,14 +11648,18 @@ var Coordinator = class {
11648
11648
  this._busy = true;
11649
11649
  try {
11650
11650
  const replies = await this.triggerFn(this.config.vibePrompt);
11651
- if (replies.some((r) => r.trim().toUpperCase() === "NO_REPLY")) {
11651
+ const agentReplies = replies.filter((r) => !this.isApiError(r));
11652
+ if (agentReplies.some((r) => r.trim().toUpperCase() === "NO_REPLY")) {
11652
11653
  noReply = true;
11653
- } else if (!this.isCompliantReply(replies)) {
11654
- misaligned = replies.filter((r) => {
11654
+ } else if (!this.isCompliantReply(agentReplies)) {
11655
+ misaligned = agentReplies.filter((r) => {
11655
11656
  const t = r.trim().toUpperCase();
11656
11657
  return t !== "NO_REPLY" && t !== "LIVESTREAM_REPLIED";
11657
11658
  });
11658
11659
  }
11660
+ if (replies.length > agentReplies.length) {
11661
+ this.logger.warn(`[Coordinator] Filtered ${replies.length - agentReplies.length} API error(s) from vibe response`);
11662
+ }
11659
11663
  } catch (err) {
11660
11664
  this.logger.error("[Coordinator] Vibe failed:", err);
11661
11665
  } finally {
@@ -11715,12 +11719,17 @@ var Coordinator = class {
11715
11719
  get busy() {
11716
11720
  return this._busy;
11717
11721
  }
11722
+ /** Detect API/gateway errors surfaced as reply strings (e.g. rate limits) */
11723
+ static API_ERROR_RE = /^\d{3}\s+(status\s+code|error)|^rate\s*limit|^too\s+many\s+requests|^overloaded|^server\s+error/i;
11724
+ isApiError(reply) {
11725
+ return _Coordinator.API_ERROR_RE.test(reply.trim());
11726
+ }
11718
11727
  /** Check if agent replies are compliant (NO_REPLY or LIVESTREAM_REPLIED) */
11719
11728
  isCompliantReply(replies) {
11720
11729
  if (replies.length === 0) return true;
11721
11730
  return replies.every((r) => {
11722
11731
  const t = r.trim().toUpperCase();
11723
- return t === "NO_REPLY" || t === "LIVESTREAM_REPLIED";
11732
+ return t === "NO_REPLY" || t === "LIVESTREAM_REPLIED" || this.isApiError(r);
11724
11733
  });
11725
11734
  }
11726
11735
  /** Send misalignment correction when agent responds with plaintext */
@@ -11746,8 +11755,12 @@ var Coordinator = class {
11746
11755
  this._busy = true;
11747
11756
  try {
11748
11757
  const replies = await this.triggerFn(batchText);
11749
- if (!this.isCompliantReply(replies)) {
11750
- await this.sendMisalignment(replies.filter((r) => {
11758
+ const agentReplies = replies.filter((r) => !this.isApiError(r));
11759
+ if (replies.length > agentReplies.length) {
11760
+ this.logger.warn(`[Coordinator] Filtered ${replies.length - agentReplies.length} API error(s) from chat response`);
11761
+ }
11762
+ if (!this.isCompliantReply(agentReplies)) {
11763
+ await this.sendMisalignment(agentReplies.filter((r) => {
11751
11764
  const t = r.trim().toUpperCase();
11752
11765
  return t !== "NO_REPLY" && t !== "LIVESTREAM_REPLIED";
11753
11766
  }));
@@ -12372,6 +12385,7 @@ async function main() {
12372
12385
  botTTS(response)
12373
12386
  ]);
12374
12387
  const event = {
12388
+ id: randomUUID2(),
12375
12389
  chat: { username, message },
12376
12390
  botMessage: response,
12377
12391
  chatTtsUrl,
package/dist/types.d.ts CHANGED
@@ -31,6 +31,8 @@ type ChatMessage = {
31
31
  type TtsProvider = 'openai' | 'elevenlabs' | 'tiktok';
32
32
  /** Turn-based reply: chat message + bot response, each with TTS audio */
33
33
  type ReplyTurnEvent = {
34
+ /** Correlation ID — overlay sends talk:done with this ID when both audios finish */
35
+ id: string;
34
36
  chat: {
35
37
  username: string;
36
38
  message: string;
@@ -38,6 +40,10 @@ type ReplyTurnEvent = {
38
40
  botMessage: string;
39
41
  chatTtsUrl: string;
40
42
  botTtsUrl: string;
43
+ /** TTS provider used for the chat audio */
44
+ chatTtsProvider?: TtsProvider;
45
+ /** TTS provider used for the bot audio */
46
+ botTtsProvider?: TtsProvider;
41
47
  };
42
48
  /** Bot speech bubble with pre-generated TTS (atomic event) */
43
49
  type TalkEvent = {
@@ -47,6 +53,8 @@ type TalkEvent = {
47
53
  message: string;
48
54
  /** Bot TTS audio URL */
49
55
  ttsUrl: string;
56
+ /** TTS provider used for the bot audio */
57
+ ttsProvider?: TtsProvider;
50
58
  /** Optional: chat message being replied to (overlay plays this first) */
51
59
  chat?: {
52
60
  message: string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "crawd",
3
- "version": "0.8.2",
3
+ "version": "0.8.4",
4
4
  "description": "CLI for crawd.bot - AI agent livestreaming platform",
5
5
  "type": "module",
6
6
  "types": "./dist/types.d.ts",
@@ -765,14 +765,19 @@ export class Coordinator {
765
765
  this._busy = true
766
766
  try {
767
767
  const replies = await this.triggerFn(this.config.vibePrompt)
768
- if (replies.some(r => r.trim().toUpperCase() === 'NO_REPLY')) {
768
+ // Filter out API errors (429s, rate limits) — not agent responses
769
+ const agentReplies = replies.filter(r => !this.isApiError(r))
770
+ if (agentReplies.some(r => r.trim().toUpperCase() === 'NO_REPLY')) {
769
771
  noReply = true
770
- } else if (!this.isCompliantReply(replies)) {
771
- misaligned = replies.filter(r => {
772
+ } else if (!this.isCompliantReply(agentReplies)) {
773
+ misaligned = agentReplies.filter(r => {
772
774
  const t = r.trim().toUpperCase()
773
775
  return t !== 'NO_REPLY' && t !== 'LIVESTREAM_REPLIED'
774
776
  })
775
777
  }
778
+ if (replies.length > agentReplies.length) {
779
+ this.logger.warn(`[Coordinator] Filtered ${replies.length - agentReplies.length} API error(s) from vibe response`)
780
+ }
776
781
  } catch (err) {
777
782
  this.logger.error('[Coordinator] Vibe failed:', err)
778
783
  } finally {
@@ -849,12 +854,19 @@ export class Coordinator {
849
854
  /** Whether the coordinator is busy processing a flush or talk */
850
855
  get busy(): boolean { return this._busy }
851
856
 
857
+ /** Detect API/gateway errors surfaced as reply strings (e.g. rate limits) */
858
+ private static readonly API_ERROR_RE = /^\d{3}\s+(status\s+code|error)|^rate\s*limit|^too\s+many\s+requests|^overloaded|^server\s+error/i
859
+
860
+ private isApiError(reply: string): boolean {
861
+ return Coordinator.API_ERROR_RE.test(reply.trim())
862
+ }
863
+
852
864
  /** Check if agent replies are compliant (NO_REPLY or LIVESTREAM_REPLIED) */
853
865
  private isCompliantReply(replies: string[]): boolean {
854
866
  if (replies.length === 0) return true
855
867
  return replies.every(r => {
856
868
  const t = r.trim().toUpperCase()
857
- return t === 'NO_REPLY' || t === 'LIVESTREAM_REPLIED'
869
+ return t === 'NO_REPLY' || t === 'LIVESTREAM_REPLIED' || this.isApiError(r)
858
870
  })
859
871
  }
860
872
 
@@ -889,8 +901,12 @@ export class Coordinator {
889
901
  this._busy = true
890
902
  try {
891
903
  const replies = await this.triggerFn(batchText)
892
- if (!this.isCompliantReply(replies)) {
893
- await this.sendMisalignment(replies.filter(r => {
904
+ const agentReplies = replies.filter(r => !this.isApiError(r))
905
+ if (replies.length > agentReplies.length) {
906
+ this.logger.warn(`[Coordinator] Filtered ${replies.length - agentReplies.length} API error(s) from chat response`)
907
+ }
908
+ if (!this.isCompliantReply(agentReplies)) {
909
+ await this.sendMisalignment(agentReplies.filter(r => {
894
910
  const t = r.trim().toUpperCase()
895
911
  return t !== 'NO_REPLY' && t !== 'LIVESTREAM_REPLIED'
896
912
  }))
@@ -551,6 +551,7 @@ async function main() {
551
551
  ]);
552
552
 
553
553
  const event: ReplyTurnEvent = {
554
+ id: randomUUID(),
554
555
  chat: { username, message },
555
556
  botMessage: response,
556
557
  chatTtsUrl,
@@ -96,6 +96,10 @@ export class CrawdBackend {
96
96
  private latestMcap: number | null = null
97
97
  private mcapInterval: NodeJS.Timeout | null = null
98
98
 
99
+ /** Pending overlay acks — resolves when overlay finishes playing audio for a given event ID */
100
+ private pendingAcks = new Map<string, { resolve: () => void; timer: ReturnType<typeof setTimeout> }>()
101
+ private static readonly ACK_TIMEOUT_MS = 60_000
102
+
99
103
  constructor(config: CrawdConfig, logger?: CrawdLogger) {
100
104
  this.config = config
101
105
  this.logger = logger ?? defaultLogger
@@ -169,7 +173,7 @@ export class CrawdBackend {
169
173
  // Public API (used by plugin tool handlers)
170
174
  // =========================================================================
171
175
 
172
- /** Speak on the livestream — emits overlay event + TTS. */
176
+ /** Speak on the livestream — emits overlay event + TTS. Blocks until overlay finishes playing. */
173
177
  async handleTalk(text: string): Promise<{ spoken: boolean }> {
174
178
  if (!text || typeof text !== 'string') {
175
179
  return { spoken: false }
@@ -179,20 +183,22 @@ export class CrawdBackend {
179
183
 
180
184
  const id = randomUUID()
181
185
  try {
182
- const ttsUrl = await this.generateTTSWithFallback(text, this.config.tts.bot)
183
- this.logger.info(`TTS generated: ${ttsUrl}`)
184
- this.io.emit('crawd:talk', { id, message: text, ttsUrl })
186
+ const tts = await this.generateTTSWithFallback(text, this.config.tts.bot)
187
+ this.logger.info(`TTS generated: ${tts.url}`)
188
+ this.io.emit('crawd:talk', { id, message: text, ttsUrl: tts.url, ttsProvider: tts.provider })
185
189
  } catch (e) {
186
190
  this.logger.error('Failed to generate TTS, emitting without audio', e)
187
191
  this.io.emit('crawd:talk', { id, message: text, ttsUrl: '' })
188
192
  }
189
193
 
194
+ await this.waitForAck(id)
190
195
  return { spoken: true }
191
196
  }
192
197
 
193
198
  /**
194
199
  * Reply to a chat message — reads original aloud (chat voice),
195
200
  * then speaks bot reply (bot voice). Emits `crawd:reply-turn`.
201
+ * Blocks until overlay finishes playing both audios.
196
202
  */
197
203
  async handleReply(
198
204
  text: string,
@@ -204,25 +210,32 @@ export class CrawdBackend {
204
210
 
205
211
  this.coordinator?.notifySpeech()
206
212
 
213
+ const id = randomUUID()
207
214
  try {
208
- const [chatTtsUrl, botTtsUrl] = await Promise.all([
215
+ const [chatTts, botTts] = await Promise.all([
209
216
  this.generateTTSWithFallback(`Chat says: ${chat.message}`, this.config.tts.chat),
210
217
  this.generateTTSWithFallback(text, this.config.tts.bot),
211
218
  ])
212
219
  this.io.emit('crawd:reply-turn', {
220
+ id,
213
221
  chat: { username: chat.username, message: chat.message },
214
222
  botMessage: text,
215
- chatTtsUrl,
216
- botTtsUrl,
223
+ chatTtsUrl: chatTts.url,
224
+ botTtsUrl: botTts.url,
225
+ chatTtsProvider: chatTts.provider,
226
+ botTtsProvider: botTts.provider,
217
227
  })
218
228
  } catch (e) {
219
229
  this.logger.error('Failed to generate reply-turn TTS, falling back to talk', e)
220
- const id = randomUUID()
221
- this.generateTTSWithFallback(text, this.config.tts.bot)
222
- .then((ttsUrl) => this.io.emit('crawd:talk', { id, message: text, ttsUrl }))
223
- .catch(() => this.io.emit('crawd:talk', { id, message: text, ttsUrl: '' }))
230
+ try {
231
+ const tts = await this.generateTTSWithFallback(text, this.config.tts.bot)
232
+ this.io.emit('crawd:talk', { id, message: text, ttsUrl: tts.url, ttsProvider: tts.provider })
233
+ } catch {
234
+ this.io.emit('crawd:talk', { id, message: text, ttsUrl: '' })
235
+ }
224
236
  }
225
237
 
238
+ await this.waitForAck(id)
226
239
  return { spoken: true }
227
240
  }
228
241
 
@@ -230,23 +243,50 @@ export class CrawdBackend {
230
243
  return this.io
231
244
  }
232
245
 
246
+ /** Wait for overlay to ack that audio finished playing. Resolves on timeout as fallback. */
247
+ private waitForAck(id: string): Promise<void> {
248
+ return new Promise((resolve) => {
249
+ const timer = setTimeout(() => {
250
+ this.pendingAcks.delete(id)
251
+ this.logger.warn(`Talk ack timed out (${id}), resolving anyway`)
252
+ resolve()
253
+ }, CrawdBackend.ACK_TIMEOUT_MS)
254
+ this.pendingAcks.set(id, { resolve, timer })
255
+ })
256
+ }
257
+
258
+ /** Resolve a pending ack (called when overlay sends crawd:talk:done) */
259
+ private resolveAck(id: string): void {
260
+ const pending = this.pendingAcks.get(id)
261
+ if (pending) {
262
+ clearTimeout(pending.timer)
263
+ this.pendingAcks.delete(id)
264
+ pending.resolve()
265
+ }
266
+ }
267
+
233
268
  // =========================================================================
234
269
  // TTS (with ordered fallback chain)
235
270
  // =========================================================================
236
271
 
237
- async generateTTSWithFallback(text: string, chain: TtsVoiceEntry[]): Promise<string> {
272
+ async generateTTSWithFallback(text: string, chain: TtsVoiceEntry[]): Promise<{ url: string; provider: TtsVoiceEntry['provider'] }> {
238
273
  let lastError: Error | null = null
239
274
 
240
275
  for (const entry of chain) {
241
276
  try {
277
+ let url: string
242
278
  switch (entry.provider) {
243
279
  case 'elevenlabs':
244
- return await this.generateElevenLabsTTS(text, entry.voice)
280
+ url = await this.generateElevenLabsTTS(text, entry.voice)
281
+ break
245
282
  case 'openai':
246
- return await this.generateOpenAITTS(text, entry.voice)
283
+ url = await this.generateOpenAITTS(text, entry.voice)
284
+ break
247
285
  case 'tiktok':
248
- return await this.generateTikTokTTSFile(text, entry.voice)
286
+ url = await this.generateTikTokTTSFile(text, entry.voice)
287
+ break
249
288
  }
289
+ return { url, provider: entry.provider }
250
290
  } catch (e) {
251
291
  lastError = e instanceof Error ? e : new Error(String(e))
252
292
  this.logger.warn(`TTS ${entry.provider}/${entry.voice} failed: ${lastError.message}, trying next...`)
@@ -392,6 +432,13 @@ export class CrawdBackend {
392
432
  socket.emit('crawd:mcap', { mcap: this.latestMcap })
393
433
  }
394
434
 
435
+ socket.on('crawd:talk:done', (data: { id?: string }) => {
436
+ if (data?.id) {
437
+ this.logger.info(`Talk ack received: ${data.id}`)
438
+ this.resolveAck(data.id)
439
+ }
440
+ })
441
+
395
442
  socket.on('crawd:mock-chat', (data: { username?: string; message?: string }) => {
396
443
  const { username, message } = data
397
444
  if (!username || !message) return
@@ -491,15 +538,18 @@ export class CrawdBackend {
491
538
  }
492
539
 
493
540
  try {
494
- const [chatTtsUrl, botTtsUrl] = await Promise.all([
541
+ const [chatTts, botTts] = await Promise.all([
495
542
  this.generateTTSWithFallback(`Chat says: ${message}`, this.config.tts.chat),
496
543
  this.generateTTSWithFallback(response, this.config.tts.bot),
497
544
  ])
498
545
  this.io.emit('crawd:reply-turn', {
546
+ id: randomUUID(),
499
547
  chat: { username, message },
500
548
  botMessage: response,
501
- chatTtsUrl,
502
- botTtsUrl,
549
+ chatTtsUrl: chatTts.url,
550
+ botTtsUrl: botTts.url,
551
+ chatTtsProvider: chatTts.provider,
552
+ botTtsProvider: botTts.provider,
503
553
  })
504
554
  return { ok: true }
505
555
  } catch (e) {
package/src/types.ts CHANGED
@@ -20,10 +20,16 @@ export type TtsProvider = 'openai' | 'elevenlabs' | 'tiktok'
20
20
 
21
21
  /** Turn-based reply: chat message + bot response, each with TTS audio */
22
22
  export type ReplyTurnEvent = {
23
+ /** Correlation ID — overlay sends talk:done with this ID when both audios finish */
24
+ id: string
23
25
  chat: { username: string; message: string }
24
26
  botMessage: string
25
27
  chatTtsUrl: string
26
28
  botTtsUrl: string
29
+ /** TTS provider used for the chat audio */
30
+ chatTtsProvider?: TtsProvider
31
+ /** TTS provider used for the bot audio */
32
+ botTtsProvider?: TtsProvider
27
33
  }
28
34
 
29
35
  /** Bot speech bubble with pre-generated TTS (atomic event) */
@@ -34,6 +40,8 @@ export type TalkEvent = {
34
40
  message: string
35
41
  /** Bot TTS audio URL */
36
42
  ttsUrl: string
43
+ /** TTS provider used for the bot audio */
44
+ ttsProvider?: TtsProvider
37
45
  /** Optional: chat message being replied to (overlay plays this first) */
38
46
  chat?: {
39
47
  message: string