crawd 0.8.6 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,51 +1,38 @@
1
1
  /**
2
- * CrawdBackend — encapsulates the Fastify+Socket.IO server, TTS, coordinator,
2
+ * CrawdBackend — encapsulates the Fastify+Socket.IO server, coordinator,
3
3
  * and chat system. Used by both standalone mode (backend/index.ts) and the
4
4
  * OpenClaw plugin (plugin.ts).
5
+ *
6
+ * TTS generation has been moved to the overlay (Next.js server actions).
7
+ * This backend now sends text-only events.
5
8
  */
6
9
  import { randomUUID } from 'crypto'
7
- import { writeFile, mkdir } from 'fs/promises'
8
- import { join } from 'path'
9
10
  import Fastify, { type FastifyInstance } from 'fastify'
10
- import fastifyStatic from '@fastify/static'
11
11
  import cors from '@fastify/cors'
12
12
  import { Server } from 'socket.io'
13
- import OpenAI from 'openai'
14
13
  import { pumpfun } from '../lib/pumpfun/v2/index.js'
15
14
  import { ChatManager } from '../lib/chat/manager.js'
16
15
  import { PumpFunChatClient } from '../lib/chat/pumpfun/client.js'
17
16
  import { YouTubeChatClient } from '../lib/chat/youtube/client.js'
18
- import { Coordinator, OneShotGateway, type CoordinatorConfig, type CoordinatorEvent } from './coordinator.js'
17
+ import { Coordinator, OneShotGateway, type CoordinatorConfig, type CoordinatorEvent, type Plan, type AutonomyMode } from './coordinator.js'
19
18
  import { generateShortId } from '../lib/chat/types.js'
20
- import { configureTikTokTTS, generateTikTokTTS } from '../lib/tts/tiktok.js'
21
19
  import type { ChatMessage } from '../lib/chat/types.js'
22
20
 
23
21
  // ---------------------------------------------------------------------------
24
22
  // Config types
25
23
  // ---------------------------------------------------------------------------
26
24
 
27
- export type TtsVoiceEntry = {
28
- provider: 'openai' | 'elevenlabs' | 'tiktok'
29
- voice: string
30
- }
31
-
32
25
  export type CrawdConfig = {
33
26
  enabled: boolean
34
27
  port: number
35
28
  bindHost: string
36
- backendUrl?: string
37
- tts: {
38
- chat: TtsVoiceEntry[]
39
- bot: TtsVoiceEntry[]
40
- openaiApiKey?: string
41
- elevenlabsApiKey?: string
42
- tiktokSessionId?: string
43
- }
29
+ autonomyMode?: AutonomyMode
44
30
  vibe: {
45
31
  enabled: boolean
46
32
  intervalMs: number
47
33
  idleAfterMs: number
48
34
  sleepAfterIdleMs: number
35
+ batchWindowMs: number
49
36
  prompt?: string
50
37
  }
51
38
  chat: {
@@ -85,10 +72,6 @@ export class CrawdBackend {
85
72
  private config: CrawdConfig
86
73
  private logger: CrawdLogger
87
74
 
88
- private openai: OpenAI | null = null
89
- private elevenlabs: any = null
90
- private ttsDir: string
91
- private backendUrl: string
92
75
  private buildVersion: string
93
76
 
94
77
  private chatManager: ChatManager | null = null
@@ -104,17 +87,7 @@ export class CrawdBackend {
104
87
  this.config = config
105
88
  this.logger = logger ?? defaultLogger
106
89
  this.fastify = Fastify({ logger: true })
107
- this.ttsDir = join(process.cwd(), 'tmp', 'tts')
108
- this.backendUrl = config.backendUrl ?? `http://localhost:${config.port}`
109
90
  this.buildVersion = randomUUID()
110
-
111
- // Initialize TTS providers based on config
112
- if (config.tts.openaiApiKey) {
113
- this.openai = new OpenAI({ apiKey: config.tts.openaiApiKey })
114
- }
115
- if (config.tts.tiktokSessionId) {
116
- configureTikTokTTS(config.tts.tiktokSessionId)
117
- }
118
91
  }
119
92
 
120
93
  // =========================================================================
@@ -122,23 +95,7 @@ export class CrawdBackend {
122
95
  // =========================================================================
123
96
 
124
97
  async start(): Promise<void> {
125
- // Lazy-init ElevenLabs (optional dep)
126
- if (this.config.tts.elevenlabsApiKey && !this.elevenlabs) {
127
- try {
128
- const { ElevenLabsClient } = await import('@elevenlabs/elevenlabs-js')
129
- this.elevenlabs = new ElevenLabsClient({ apiKey: this.config.tts.elevenlabsApiKey })
130
- } catch {
131
- this.logger.warn('ElevenLabs SDK not installed, ElevenLabs TTS disabled')
132
- }
133
- }
134
-
135
98
  await this.fastify.register(cors, { origin: true })
136
- await mkdir(this.ttsDir, { recursive: true })
137
- await this.fastify.register(fastifyStatic, {
138
- root: this.ttsDir,
139
- prefix: '/tts/',
140
- decorateReply: false,
141
- })
142
99
 
143
100
  this.io = new Server(this.fastify.server, {
144
101
  cors: { origin: '*' },
@@ -173,7 +130,7 @@ export class CrawdBackend {
173
130
  // Public API (used by plugin tool handlers)
174
131
  // =========================================================================
175
132
 
176
- /** Speak on the livestream — emits overlay event + TTS. Blocks until overlay finishes playing. */
133
+ /** Speak on the livestream — emits text-only overlay event. Blocks until overlay finishes. */
177
134
  async handleTalk(text: string): Promise<{ spoken: boolean }> {
178
135
  if (!text || typeof text !== 'string') {
179
136
  return { spoken: false }
@@ -182,23 +139,15 @@ export class CrawdBackend {
182
139
  this.coordinator?.notifySpeech()
183
140
 
184
141
  const id = randomUUID()
185
- try {
186
- const tts = await this.generateTTSWithFallback(text, this.config.tts.bot)
187
- this.logger.info(`TTS generated: ${tts.url}`)
188
- this.io.emit('crawd:talk', { id, message: text, ttsUrl: tts.url, ttsProvider: tts.provider })
189
- } catch (e) {
190
- this.logger.error('Failed to generate TTS, emitting without audio', e)
191
- this.io.emit('crawd:talk', { id, message: text, ttsUrl: '' })
192
- }
142
+ this.io.emit('crawd:talk', { id, message: text })
193
143
 
194
144
  await this.waitForAck(id)
195
145
  return { spoken: true }
196
146
  }
197
147
 
198
148
  /**
199
- * Reply to a chat message — reads original aloud (chat voice),
200
- * then speaks bot reply (bot voice). Emits `crawd:reply-turn`.
201
- * Blocks until overlay finishes playing both audios.
149
+ * Reply to a chat message — emits text-only overlay event with chat + bot message.
150
+ * Blocks until overlay finishes.
202
151
  */
203
152
  async handleReply(
204
153
  text: string,
@@ -211,34 +160,44 @@ export class CrawdBackend {
211
160
  this.coordinator?.notifySpeech()
212
161
 
213
162
  const id = randomUUID()
214
- try {
215
- const [chatTts, botTts] = await Promise.all([
216
- this.generateTTSWithFallback(`Chat says: ${chat.message}`, this.config.tts.chat),
217
- this.generateTTSWithFallback(text, this.config.tts.bot),
218
- ])
219
- this.io.emit('crawd:reply-turn', {
220
- id,
221
- chat: { username: chat.username, message: chat.message },
222
- botMessage: text,
223
- chatTtsUrl: chatTts.url,
224
- botTtsUrl: botTts.url,
225
- chatTtsProvider: chatTts.provider,
226
- botTtsProvider: botTts.provider,
227
- })
228
- } catch (e) {
229
- this.logger.error('Failed to generate reply-turn TTS, falling back to talk', e)
230
- try {
231
- const tts = await this.generateTTSWithFallback(text, this.config.tts.bot)
232
- this.io.emit('crawd:talk', { id, message: text, ttsUrl: tts.url, ttsProvider: tts.provider })
233
- } catch {
234
- this.io.emit('crawd:talk', { id, message: text, ttsUrl: '' })
235
- }
236
- }
163
+ this.io.emit('crawd:reply-turn', {
164
+ id,
165
+ chat: { username: chat.username, message: chat.message },
166
+ botMessage: text,
167
+ })
237
168
 
238
169
  await this.waitForAck(id)
239
170
  return { spoken: true }
240
171
  }
241
172
 
173
+ // =========================================================================
174
+ // Plan API (used by plugin tool handlers)
175
+ // =========================================================================
176
+
177
+ setPlan(goal: string, steps: string[]): { plan: Plan } | { error: string } {
178
+ if (!this.coordinator) return { error: 'Coordinator not enabled' }
179
+ const plan = this.coordinator.setPlan(goal, steps)
180
+ return { plan }
181
+ }
182
+
183
+ markPlanStepDone(step: number): { plan: Plan } | { error: string } {
184
+ if (!this.coordinator) return { error: 'Coordinator not enabled' }
185
+ const plan = this.coordinator.markStepDone(step)
186
+ if (!plan) return { error: 'No active plan or invalid step index' }
187
+ return { plan }
188
+ }
189
+
190
+ abandonPlan(): { plan: Plan } | { error: string } {
191
+ if (!this.coordinator) return { error: 'Coordinator not enabled' }
192
+ const plan = this.coordinator.abandonPlan()
193
+ if (!plan) return { error: 'No active plan to abandon' }
194
+ return { plan }
195
+ }
196
+
197
+ getPlan(): { plan: Plan | null } {
198
+ return { plan: this.coordinator?.getPlan() ?? null }
199
+ }
200
+
242
201
  getIO(): Server {
243
202
  return this.io
244
203
  }
@@ -265,95 +224,6 @@ export class CrawdBackend {
265
224
  }
266
225
  }
267
226
 
268
- // =========================================================================
269
- // TTS (with ordered fallback chain)
270
- // =========================================================================
271
-
272
- async generateTTSWithFallback(text: string, chain: TtsVoiceEntry[]): Promise<{ url: string; provider: TtsVoiceEntry['provider'] }> {
273
- let lastError: Error | null = null
274
-
275
- for (const entry of chain) {
276
- try {
277
- let url: string
278
- switch (entry.provider) {
279
- case 'elevenlabs':
280
- url = await this.generateElevenLabsTTS(text, entry.voice)
281
- break
282
- case 'openai':
283
- url = await this.generateOpenAITTS(text, entry.voice)
284
- break
285
- case 'tiktok':
286
- url = await this.generateTikTokTTSFile(text, entry.voice)
287
- break
288
- }
289
- return { url, provider: entry.provider }
290
- } catch (e) {
291
- lastError = e instanceof Error ? e : new Error(String(e))
292
- this.logger.warn(`TTS ${entry.provider}/${entry.voice} failed: ${lastError.message}, trying next...`)
293
- }
294
- }
295
-
296
- throw lastError ?? new Error('No TTS providers configured')
297
- }
298
-
299
- private async generateOpenAITTS(text: string, voice: string): Promise<string> {
300
- if (!this.openai) throw new Error('OpenAI not configured (missing apiKey)')
301
-
302
- const response = await this.openai.audio.speech.create({
303
- model: 'gpt-4o-mini-tts',
304
- voice: voice as 'onyx',
305
- input: text,
306
- })
307
-
308
- const buffer = Buffer.from(await response.arrayBuffer())
309
- return await this.saveTTSFile(buffer)
310
- }
311
-
312
- private async generateElevenLabsTTS(text: string, voiceId: string): Promise<string> {
313
- if (!this.elevenlabs) throw new Error('ElevenLabs not configured (missing apiKey)')
314
-
315
- const audio = await this.elevenlabs.textToSpeech.convert(voiceId, {
316
- modelId: 'eleven_multilingual_v2',
317
- text,
318
- outputFormat: 'mp3_44100_128',
319
- voiceSettings: {
320
- stability: 0,
321
- similarityBoost: 1.0,
322
- useSpeakerBoost: true,
323
- speed: 1.0,
324
- },
325
- })
326
-
327
- const response = new Response(audio as any)
328
- const arrayBuffer = await response.arrayBuffer()
329
- const buffer = Buffer.from(arrayBuffer)
330
-
331
- // Check if response is valid MP3
332
- const isMP3 =
333
- (buffer[0] === 0x49 && buffer[1] === 0x44 && buffer[2] === 0x33) ||
334
- (buffer[0] === 0xff && (buffer[1] & 0xe0) === 0xe0)
335
-
336
- if (!isMP3) {
337
- const preview = buffer.subarray(0, 200).toString('utf-8')
338
- throw new Error(`ElevenLabs returned non-audio response: ${preview.slice(0, 100)}`)
339
- }
340
-
341
- return await this.saveTTSFile(buffer)
342
- }
343
-
344
- private async generateTikTokTTSFile(text: string, voice?: string): Promise<string> {
345
- const buffer = await generateTikTokTTS(text, voice)
346
- return await this.saveTTSFile(buffer)
347
- }
348
-
349
- private async saveTTSFile(buffer: Buffer): Promise<string> {
350
- const filename = `${randomUUID()}.mp3`
351
- await mkdir(this.ttsDir, { recursive: true })
352
- await writeFile(join(this.ttsDir, filename), buffer)
353
- this.logger.info(`TTS file written: ${filename}, size: ${buffer.length} bytes`)
354
- return `${this.backendUrl}/tts/${filename}`
355
- }
356
-
357
227
  // =========================================================================
358
228
  // Chat system + Coordinator
359
229
  // =========================================================================
@@ -387,10 +257,11 @@ export class CrawdBackend {
387
257
  )
388
258
 
389
259
  const coordConfig: Partial<CoordinatorConfig> = {
390
- vibeEnabled: this.config.vibe.enabled,
260
+ autonomyMode: this.config.autonomyMode ?? 'vibe',
391
261
  vibeIntervalMs: this.config.vibe.intervalMs,
392
262
  idleAfterMs: this.config.vibe.idleAfterMs,
393
263
  sleepAfterIdleMs: this.config.vibe.sleepAfterIdleMs,
264
+ batchWindowMs: this.config.vibe.batchWindowMs,
394
265
  }
395
266
  if (this.config.vibe.prompt) {
396
267
  coordConfig.vibePrompt = this.config.vibe.prompt
@@ -408,6 +279,14 @@ export class CrawdBackend {
408
279
  this.io.emit('crawd:status', { status: 'vibing' })
409
280
  } else if (event.type === 'chatProcessed') {
410
281
  this.io.emit('crawd:status', { status: 'chatting' })
282
+ } else if (event.type === 'planNudgeExecuted' && !event.skipped) {
283
+ this.io.emit('crawd:status', { status: 'planning' })
284
+ } else if (event.type === 'planCreated') {
285
+ this.io.emit('crawd:plan', { type: 'created', planId: event.planId, goal: event.goal })
286
+ } else if (event.type === 'planCompleted') {
287
+ this.io.emit('crawd:plan', { type: 'completed', planId: event.planId })
288
+ } else if (event.type === 'planAbandoned') {
289
+ this.io.emit('crawd:plan', { type: 'abandoned', planId: event.planId })
411
290
  }
412
291
  })
413
292
 
@@ -432,6 +311,11 @@ export class CrawdBackend {
432
311
  socket.emit('crawd:mcap', { mcap: this.latestMcap })
433
312
  }
434
313
 
314
+ // Sync current coordinator state so the overlay knows the initial animation
315
+ if (this.coordinator) {
316
+ socket.emit('crawd:status', { status: this.coordinator.state })
317
+ }
318
+
435
319
  socket.on('crawd:talk:done', (data: { id?: string }) => {
436
320
  if (data?.id) {
437
321
  this.logger.info(`Talk ack received: ${data.id}`)
@@ -495,6 +379,10 @@ export class CrawdBackend {
495
379
  return { enabled: true, ...this.coordinator.getState() }
496
380
  })
497
381
 
382
+ this.fastify.get('/plan', async () => {
383
+ return this.getPlan()
384
+ })
385
+
498
386
  this.fastify.post<{ Body: Partial<CoordinatorConfig> }>(
499
387
  '/coordinator/config',
500
388
  async (request, reply) => {
@@ -537,25 +425,14 @@ export class CrawdBackend {
537
425
  return reply.status(400).send({ error: 'username, message, and response are required' })
538
426
  }
539
427
 
540
- try {
541
- const [chatTts, botTts] = await Promise.all([
542
- this.generateTTSWithFallback(`Chat says: ${message}`, this.config.tts.chat),
543
- this.generateTTSWithFallback(response, this.config.tts.bot),
544
- ])
545
- this.io.emit('crawd:reply-turn', {
546
- id: randomUUID(),
547
- chat: { username, message },
548
- botMessage: response,
549
- chatTtsUrl: chatTts.url,
550
- botTtsUrl: botTts.url,
551
- chatTtsProvider: chatTts.provider,
552
- botTtsProvider: botTts.provider,
553
- })
554
- return { ok: true }
555
- } catch (e) {
556
- this.fastify.log.error(e, 'failed to generate mock turn TTS')
557
- return reply.status(500).send({ error: 'Failed to generate TTS' })
558
- }
428
+ const id = randomUUID()
429
+ this.io.emit('crawd:reply-turn', {
430
+ id,
431
+ chat: { username, message },
432
+ botMessage: response,
433
+ })
434
+
435
+ return { ok: true, id }
559
436
  },
560
437
  )
561
438
  }
@@ -585,41 +462,20 @@ export class CrawdBackend {
585
462
 
586
463
  export function configFromEnv(): CrawdConfig {
587
464
  const port = Number(process.env.PORT || 4000)
588
-
589
- const botChain: TtsVoiceEntry[] = []
590
- const chatChain: TtsVoiceEntry[] = []
591
-
592
- if (process.env.ELEVENLABS_API_KEY) {
593
- botChain.push({ provider: 'elevenlabs', voice: process.env.TTS_BOT_VOICE || 'TX3LPaxmHKxFdv7VOQHJ' })
594
- }
595
- if (process.env.OPENAI_API_KEY) {
596
- botChain.push({ provider: 'openai', voice: process.env.TTS_BOT_VOICE || 'onyx' })
597
- }
598
-
599
- if (process.env.TIKTOK_SESSION_ID) {
600
- chatChain.push({ provider: 'tiktok', voice: process.env.TTS_CHAT_VOICE || 'en_us_002' })
601
- }
602
- if (process.env.OPENAI_API_KEY) {
603
- chatChain.push({ provider: 'openai', voice: process.env.TTS_CHAT_VOICE || 'onyx' })
604
- }
465
+ const rawMode = process.env.AUTONOMY_MODE
466
+ const autonomyMode = (rawMode === 'vibe' || rawMode === 'plan' || rawMode === 'none') ? rawMode : undefined
605
467
 
606
468
  return {
607
469
  enabled: true,
608
470
  port,
609
471
  bindHost: process.env.BIND_HOST || '0.0.0.0',
610
- backendUrl: process.env.BACKEND_URL || `http://localhost:${port}`,
611
- tts: {
612
- chat: chatChain,
613
- bot: botChain,
614
- openaiApiKey: process.env.OPENAI_API_KEY,
615
- elevenlabsApiKey: process.env.ELEVENLABS_API_KEY,
616
- tiktokSessionId: process.env.TIKTOK_SESSION_ID,
617
- },
472
+ autonomyMode,
618
473
  vibe: {
619
474
  enabled: process.env.VIBE_ENABLED !== 'false',
620
475
  intervalMs: Number(process.env.VIBE_INTERVAL_MS || 30_000),
621
476
  idleAfterMs: Number(process.env.IDLE_AFTER_MS || 180_000),
622
477
  sleepAfterIdleMs: Number(process.env.SLEEP_AFTER_IDLE_MS || 180_000),
478
+ batchWindowMs: Number(process.env.CHAT_BATCH_WINDOW_MS || 20_000),
623
479
  prompt: process.env.VIBE_PROMPT,
624
480
  },
625
481
  chat: {
@@ -110,6 +110,9 @@ crawd config set vibe.idleAfter 180
110
110
  # Seconds of inactivity before going to sleep (default: 360)
111
111
  crawd config set vibe.sleepAfter 360
112
112
 
113
+ # Chat batch throttle window in seconds (default: 20)
114
+ crawd config set vibe.chatBatchWindow 20
115
+
113
116
  # Disable vibing entirely
114
117
  crawd config set vibe.enabled false
115
118
  \`\`\`
@@ -54,6 +54,7 @@ function buildEnv(config: Config): NodeJS.ProcessEnv {
54
54
  env.VIBE_INTERVAL_MS = String(config.vibe.interval * 1000)
55
55
  env.IDLE_AFTER_MS = String(config.vibe.idleAfter * 1000)
56
56
  env.SLEEP_AFTER_IDLE_MS = String((config.vibe.sleepAfter - config.vibe.idleAfter) * 1000)
57
+ env.CHAT_BATCH_WINDOW_MS = String(config.vibe.chatBatchWindow * 1000)
57
58
 
58
59
  env.YOUTUBE_ENABLED = String(config.chat.youtube.enabled)
59
60
  if (config.chat.youtube.videoId) {
@@ -50,6 +50,8 @@ export const ConfigSchema = z.object({
50
50
  idleAfter: z.number().default(180),
51
51
  /** Seconds of inactivity before going to sleep (must be > idleAfter) */
52
52
  sleepAfter: z.number().default(360),
53
+ /** Seconds for chat batch throttle window (leading-edge) */
54
+ chatBatchWindow: z.number().default(20),
53
55
  }).default({}),
54
56
 
55
57
  /** Stream configuration */