crawd 0.8.7 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,46 +1,32 @@
1
1
  /**
2
- * CrawdBackend — encapsulates the Fastify+Socket.IO server, TTS, coordinator,
2
+ * CrawdBackend — encapsulates the Fastify+Socket.IO server, coordinator,
3
3
  * and chat system. Used by both standalone mode (backend/index.ts) and the
4
4
  * OpenClaw plugin (plugin.ts).
5
+ *
6
+ * TTS generation has been moved to the overlay (Next.js server actions).
7
+ * This backend now sends text-only events.
5
8
  */
6
9
  import { randomUUID } from 'crypto'
7
- import { writeFile, mkdir } from 'fs/promises'
8
- import { join } from 'path'
9
10
  import Fastify, { type FastifyInstance } from 'fastify'
10
- import fastifyStatic from '@fastify/static'
11
11
  import cors from '@fastify/cors'
12
12
  import { Server } from 'socket.io'
13
- import OpenAI from 'openai'
14
13
  import { pumpfun } from '../lib/pumpfun/v2/index.js'
15
14
  import { ChatManager } from '../lib/chat/manager.js'
16
15
  import { PumpFunChatClient } from '../lib/chat/pumpfun/client.js'
17
16
  import { YouTubeChatClient } from '../lib/chat/youtube/client.js'
18
- import { Coordinator, OneShotGateway, type CoordinatorConfig, type CoordinatorEvent } from './coordinator.js'
17
+ import { Coordinator, OneShotGateway, type CoordinatorConfig, type CoordinatorEvent, type Plan, type AutonomyMode } from './coordinator.js'
19
18
  import { generateShortId } from '../lib/chat/types.js'
20
- import { configureTikTokTTS, generateTikTokTTS } from '../lib/tts/tiktok.js'
21
19
  import type { ChatMessage } from '../lib/chat/types.js'
22
20
 
23
21
  // ---------------------------------------------------------------------------
24
22
  // Config types
25
23
  // ---------------------------------------------------------------------------
26
24
 
27
- export type TtsVoiceEntry = {
28
- provider: 'openai' | 'elevenlabs' | 'tiktok'
29
- voice: string
30
- }
31
-
32
25
  export type CrawdConfig = {
33
26
  enabled: boolean
34
27
  port: number
35
28
  bindHost: string
36
- backendUrl?: string
37
- tts: {
38
- chat: TtsVoiceEntry[]
39
- bot: TtsVoiceEntry[]
40
- openaiApiKey?: string
41
- elevenlabsApiKey?: string
42
- tiktokSessionId?: string
43
- }
29
+ autonomyMode?: AutonomyMode
44
30
  vibe: {
45
31
  enabled: boolean
46
32
  intervalMs: number
@@ -86,10 +72,6 @@ export class CrawdBackend {
86
72
  private config: CrawdConfig
87
73
  private logger: CrawdLogger
88
74
 
89
- private openai: OpenAI | null = null
90
- private elevenlabs: any = null
91
- private ttsDir: string
92
- private backendUrl: string
93
75
  private buildVersion: string
94
76
 
95
77
  private chatManager: ChatManager | null = null
@@ -105,17 +87,7 @@ export class CrawdBackend {
105
87
  this.config = config
106
88
  this.logger = logger ?? defaultLogger
107
89
  this.fastify = Fastify({ logger: true })
108
- this.ttsDir = join(process.cwd(), 'tmp', 'tts')
109
- this.backendUrl = config.backendUrl ?? `http://localhost:${config.port}`
110
90
  this.buildVersion = randomUUID()
111
-
112
- // Initialize TTS providers based on config
113
- if (config.tts.openaiApiKey) {
114
- this.openai = new OpenAI({ apiKey: config.tts.openaiApiKey })
115
- }
116
- if (config.tts.tiktokSessionId) {
117
- configureTikTokTTS(config.tts.tiktokSessionId)
118
- }
119
91
  }
120
92
 
121
93
  // =========================================================================
@@ -123,23 +95,7 @@ export class CrawdBackend {
123
95
  // =========================================================================
124
96
 
125
97
  async start(): Promise<void> {
126
- // Lazy-init ElevenLabs (optional dep)
127
- if (this.config.tts.elevenlabsApiKey && !this.elevenlabs) {
128
- try {
129
- const { ElevenLabsClient } = await import('@elevenlabs/elevenlabs-js')
130
- this.elevenlabs = new ElevenLabsClient({ apiKey: this.config.tts.elevenlabsApiKey })
131
- } catch {
132
- this.logger.warn('ElevenLabs SDK not installed, ElevenLabs TTS disabled')
133
- }
134
- }
135
-
136
98
  await this.fastify.register(cors, { origin: true })
137
- await mkdir(this.ttsDir, { recursive: true })
138
- await this.fastify.register(fastifyStatic, {
139
- root: this.ttsDir,
140
- prefix: '/tts/',
141
- decorateReply: false,
142
- })
143
99
 
144
100
  this.io = new Server(this.fastify.server, {
145
101
  cors: { origin: '*' },
@@ -174,7 +130,7 @@ export class CrawdBackend {
174
130
  // Public API (used by plugin tool handlers)
175
131
  // =========================================================================
176
132
 
177
- /** Speak on the livestream — emits overlay event + TTS. Blocks until overlay finishes playing. */
133
+ /** Speak on the livestream — emits text-only overlay event. Blocks until overlay finishes. */
178
134
  async handleTalk(text: string): Promise<{ spoken: boolean }> {
179
135
  if (!text || typeof text !== 'string') {
180
136
  return { spoken: false }
@@ -183,23 +139,15 @@ export class CrawdBackend {
183
139
  this.coordinator?.notifySpeech()
184
140
 
185
141
  const id = randomUUID()
186
- try {
187
- const tts = await this.generateTTSWithFallback(text, this.config.tts.bot)
188
- this.logger.info(`TTS generated: ${tts.url}`)
189
- this.io.emit('crawd:talk', { id, message: text, ttsUrl: tts.url, ttsProvider: tts.provider })
190
- } catch (e) {
191
- this.logger.error('Failed to generate TTS, emitting without audio', e)
192
- this.io.emit('crawd:talk', { id, message: text, ttsUrl: '' })
193
- }
142
+ this.io.emit('crawd:talk', { id, message: text })
194
143
 
195
144
  await this.waitForAck(id)
196
145
  return { spoken: true }
197
146
  }
198
147
 
199
148
  /**
200
- * Reply to a chat message — reads original aloud (chat voice),
201
- * then speaks bot reply (bot voice). Emits `crawd:reply-turn`.
202
- * Blocks until overlay finishes playing both audios.
149
+ * Reply to a chat message — emits text-only overlay event with chat + bot message.
150
+ * Blocks until overlay finishes.
203
151
  */
204
152
  async handleReply(
205
153
  text: string,
@@ -212,34 +160,44 @@ export class CrawdBackend {
212
160
  this.coordinator?.notifySpeech()
213
161
 
214
162
  const id = randomUUID()
215
- try {
216
- const [chatTts, botTts] = await Promise.all([
217
- this.generateTTSWithFallback(`Chat says: ${chat.message}`, this.config.tts.chat),
218
- this.generateTTSWithFallback(text, this.config.tts.bot),
219
- ])
220
- this.io.emit('crawd:reply-turn', {
221
- id,
222
- chat: { username: chat.username, message: chat.message },
223
- botMessage: text,
224
- chatTtsUrl: chatTts.url,
225
- botTtsUrl: botTts.url,
226
- chatTtsProvider: chatTts.provider,
227
- botTtsProvider: botTts.provider,
228
- })
229
- } catch (e) {
230
- this.logger.error('Failed to generate reply-turn TTS, falling back to talk', e)
231
- try {
232
- const tts = await this.generateTTSWithFallback(text, this.config.tts.bot)
233
- this.io.emit('crawd:talk', { id, message: text, ttsUrl: tts.url, ttsProvider: tts.provider })
234
- } catch {
235
- this.io.emit('crawd:talk', { id, message: text, ttsUrl: '' })
236
- }
237
- }
163
+ this.io.emit('crawd:reply-turn', {
164
+ id,
165
+ chat: { username: chat.username, message: chat.message },
166
+ botMessage: text,
167
+ })
238
168
 
239
169
  await this.waitForAck(id)
240
170
  return { spoken: true }
241
171
  }
242
172
 
173
+ // =========================================================================
174
+ // Plan API (used by plugin tool handlers)
175
+ // =========================================================================
176
+
177
+ setPlan(goal: string, steps: string[]): { plan: Plan } | { error: string } {
178
+ if (!this.coordinator) return { error: 'Coordinator not enabled' }
179
+ const plan = this.coordinator.setPlan(goal, steps)
180
+ return { plan }
181
+ }
182
+
183
+ markPlanStepDone(step: number): { plan: Plan } | { error: string } {
184
+ if (!this.coordinator) return { error: 'Coordinator not enabled' }
185
+ const plan = this.coordinator.markStepDone(step)
186
+ if (!plan) return { error: 'No active plan or invalid step index' }
187
+ return { plan }
188
+ }
189
+
190
+ abandonPlan(): { plan: Plan } | { error: string } {
191
+ if (!this.coordinator) return { error: 'Coordinator not enabled' }
192
+ const plan = this.coordinator.abandonPlan()
193
+ if (!plan) return { error: 'No active plan to abandon' }
194
+ return { plan }
195
+ }
196
+
197
+ getPlan(): { plan: Plan | null } {
198
+ return { plan: this.coordinator?.getPlan() ?? null }
199
+ }
200
+
243
201
  getIO(): Server {
244
202
  return this.io
245
203
  }
@@ -266,95 +224,6 @@ export class CrawdBackend {
266
224
  }
267
225
  }
268
226
 
269
- // =========================================================================
270
- // TTS (with ordered fallback chain)
271
- // =========================================================================
272
-
273
- async generateTTSWithFallback(text: string, chain: TtsVoiceEntry[]): Promise<{ url: string; provider: TtsVoiceEntry['provider'] }> {
274
- let lastError: Error | null = null
275
-
276
- for (const entry of chain) {
277
- try {
278
- let url: string
279
- switch (entry.provider) {
280
- case 'elevenlabs':
281
- url = await this.generateElevenLabsTTS(text, entry.voice)
282
- break
283
- case 'openai':
284
- url = await this.generateOpenAITTS(text, entry.voice)
285
- break
286
- case 'tiktok':
287
- url = await this.generateTikTokTTSFile(text, entry.voice)
288
- break
289
- }
290
- return { url, provider: entry.provider }
291
- } catch (e) {
292
- lastError = e instanceof Error ? e : new Error(String(e))
293
- this.logger.warn(`TTS ${entry.provider}/${entry.voice} failed: ${lastError.message}, trying next...`)
294
- }
295
- }
296
-
297
- throw lastError ?? new Error('No TTS providers configured')
298
- }
299
-
300
- private async generateOpenAITTS(text: string, voice: string): Promise<string> {
301
- if (!this.openai) throw new Error('OpenAI not configured (missing apiKey)')
302
-
303
- const response = await this.openai.audio.speech.create({
304
- model: 'gpt-4o-mini-tts',
305
- voice: voice as 'onyx',
306
- input: text,
307
- })
308
-
309
- const buffer = Buffer.from(await response.arrayBuffer())
310
- return await this.saveTTSFile(buffer)
311
- }
312
-
313
- private async generateElevenLabsTTS(text: string, voiceId: string): Promise<string> {
314
- if (!this.elevenlabs) throw new Error('ElevenLabs not configured (missing apiKey)')
315
-
316
- const audio = await this.elevenlabs.textToSpeech.convert(voiceId, {
317
- modelId: 'eleven_multilingual_v2',
318
- text,
319
- outputFormat: 'mp3_44100_128',
320
- voiceSettings: {
321
- stability: 0,
322
- similarityBoost: 1.0,
323
- useSpeakerBoost: true,
324
- speed: 1.0,
325
- },
326
- })
327
-
328
- const response = new Response(audio as any)
329
- const arrayBuffer = await response.arrayBuffer()
330
- const buffer = Buffer.from(arrayBuffer)
331
-
332
- // Check if response is valid MP3
333
- const isMP3 =
334
- (buffer[0] === 0x49 && buffer[1] === 0x44 && buffer[2] === 0x33) ||
335
- (buffer[0] === 0xff && (buffer[1] & 0xe0) === 0xe0)
336
-
337
- if (!isMP3) {
338
- const preview = buffer.subarray(0, 200).toString('utf-8')
339
- throw new Error(`ElevenLabs returned non-audio response: ${preview.slice(0, 100)}`)
340
- }
341
-
342
- return await this.saveTTSFile(buffer)
343
- }
344
-
345
- private async generateTikTokTTSFile(text: string, voice?: string): Promise<string> {
346
- const buffer = await generateTikTokTTS(text, voice)
347
- return await this.saveTTSFile(buffer)
348
- }
349
-
350
- private async saveTTSFile(buffer: Buffer): Promise<string> {
351
- const filename = `${randomUUID()}.mp3`
352
- await mkdir(this.ttsDir, { recursive: true })
353
- await writeFile(join(this.ttsDir, filename), buffer)
354
- this.logger.info(`TTS file written: ${filename}, size: ${buffer.length} bytes`)
355
- return `${this.backendUrl}/tts/${filename}`
356
- }
357
-
358
227
  // =========================================================================
359
228
  // Chat system + Coordinator
360
229
  // =========================================================================
@@ -388,7 +257,7 @@ export class CrawdBackend {
388
257
  )
389
258
 
390
259
  const coordConfig: Partial<CoordinatorConfig> = {
391
- vibeEnabled: this.config.vibe.enabled,
260
+ autonomyMode: this.config.autonomyMode ?? 'vibe',
392
261
  vibeIntervalMs: this.config.vibe.intervalMs,
393
262
  idleAfterMs: this.config.vibe.idleAfterMs,
394
263
  sleepAfterIdleMs: this.config.vibe.sleepAfterIdleMs,
@@ -410,6 +279,14 @@ export class CrawdBackend {
410
279
  this.io.emit('crawd:status', { status: 'vibing' })
411
280
  } else if (event.type === 'chatProcessed') {
412
281
  this.io.emit('crawd:status', { status: 'chatting' })
282
+ } else if (event.type === 'planNudgeExecuted' && !event.skipped) {
283
+ this.io.emit('crawd:status', { status: 'planning' })
284
+ } else if (event.type === 'planCreated') {
285
+ this.io.emit('crawd:plan', { type: 'created', planId: event.planId, goal: event.goal })
286
+ } else if (event.type === 'planCompleted') {
287
+ this.io.emit('crawd:plan', { type: 'completed', planId: event.planId })
288
+ } else if (event.type === 'planAbandoned') {
289
+ this.io.emit('crawd:plan', { type: 'abandoned', planId: event.planId })
413
290
  }
414
291
  })
415
292
 
@@ -502,6 +379,10 @@ export class CrawdBackend {
502
379
  return { enabled: true, ...this.coordinator.getState() }
503
380
  })
504
381
 
382
+ this.fastify.get('/plan', async () => {
383
+ return this.getPlan()
384
+ })
385
+
505
386
  this.fastify.post<{ Body: Partial<CoordinatorConfig> }>(
506
387
  '/coordinator/config',
507
388
  async (request, reply) => {
@@ -544,25 +425,14 @@ export class CrawdBackend {
544
425
  return reply.status(400).send({ error: 'username, message, and response are required' })
545
426
  }
546
427
 
547
- try {
548
- const [chatTts, botTts] = await Promise.all([
549
- this.generateTTSWithFallback(`Chat says: ${message}`, this.config.tts.chat),
550
- this.generateTTSWithFallback(response, this.config.tts.bot),
551
- ])
552
- this.io.emit('crawd:reply-turn', {
553
- id: randomUUID(),
554
- chat: { username, message },
555
- botMessage: response,
556
- chatTtsUrl: chatTts.url,
557
- botTtsUrl: botTts.url,
558
- chatTtsProvider: chatTts.provider,
559
- botTtsProvider: botTts.provider,
560
- })
561
- return { ok: true }
562
- } catch (e) {
563
- this.fastify.log.error(e, 'failed to generate mock turn TTS')
564
- return reply.status(500).send({ error: 'Failed to generate TTS' })
565
- }
428
+ const id = randomUUID()
429
+ this.io.emit('crawd:reply-turn', {
430
+ id,
431
+ chat: { username, message },
432
+ botMessage: response,
433
+ })
434
+
435
+ return { ok: true, id }
566
436
  },
567
437
  )
568
438
  }
@@ -592,36 +462,14 @@ export class CrawdBackend {
592
462
 
593
463
  export function configFromEnv(): CrawdConfig {
594
464
  const port = Number(process.env.PORT || 4000)
595
-
596
- const botChain: TtsVoiceEntry[] = []
597
- const chatChain: TtsVoiceEntry[] = []
598
-
599
- if (process.env.ELEVENLABS_API_KEY) {
600
- botChain.push({ provider: 'elevenlabs', voice: process.env.TTS_BOT_VOICE || 'TX3LPaxmHKxFdv7VOQHJ' })
601
- }
602
- if (process.env.OPENAI_API_KEY) {
603
- botChain.push({ provider: 'openai', voice: process.env.TTS_BOT_VOICE || 'onyx' })
604
- }
605
-
606
- if (process.env.TIKTOK_SESSION_ID) {
607
- chatChain.push({ provider: 'tiktok', voice: process.env.TTS_CHAT_VOICE || 'en_us_002' })
608
- }
609
- if (process.env.OPENAI_API_KEY) {
610
- chatChain.push({ provider: 'openai', voice: process.env.TTS_CHAT_VOICE || 'onyx' })
611
- }
465
+ const rawMode = process.env.AUTONOMY_MODE
466
+ const autonomyMode = (rawMode === 'vibe' || rawMode === 'plan' || rawMode === 'none') ? rawMode : undefined
612
467
 
613
468
  return {
614
469
  enabled: true,
615
470
  port,
616
471
  bindHost: process.env.BIND_HOST || '0.0.0.0',
617
- backendUrl: process.env.BACKEND_URL || `http://localhost:${port}`,
618
- tts: {
619
- chat: chatChain,
620
- bot: botChain,
621
- openaiApiKey: process.env.OPENAI_API_KEY,
622
- elevenlabsApiKey: process.env.ELEVENLABS_API_KEY,
623
- tiktokSessionId: process.env.TIKTOK_SESSION_ID,
624
- },
472
+ autonomyMode,
625
473
  vibe: {
626
474
  enabled: process.env.VIBE_ENABLED !== 'false',
627
475
  intervalMs: Number(process.env.VIBE_INTERVAL_MS || 30_000),
package/src/plugin.ts CHANGED
@@ -7,7 +7,7 @@
7
7
  * - `crawd` service (Fastify + Socket.IO backend)
8
8
  */
9
9
  import { Type } from '@sinclair/typebox'
10
- import { CrawdBackend, type CrawdConfig, type TtsVoiceEntry } from './backend/server.js'
10
+ import { CrawdBackend, type CrawdConfig } from './backend/server.js'
11
11
 
12
12
  // Minimal plugin types — the real types come from openclaw/plugin-sdk at runtime.
13
13
  // Defined inline so this package builds without the openclaw peerDep installed.
@@ -36,18 +36,6 @@ type PluginDefinition = {
36
36
  // Config parsing — transform pluginConfig → CrawdConfig
37
37
  // ---------------------------------------------------------------------------
38
38
 
39
- function parseTtsChain(raw: unknown): TtsVoiceEntry[] {
40
- if (!Array.isArray(raw)) return []
41
- return raw
42
- .filter((e): e is { provider: string; voice: string } =>
43
- e && typeof e === 'object' && typeof e.provider === 'string' && typeof e.voice === 'string',
44
- )
45
- .map((e) => ({
46
- provider: e.provider as TtsVoiceEntry['provider'],
47
- voice: e.voice,
48
- }))
49
- }
50
-
51
39
  /** Resolve gateway WebSocket URL from env/defaults (same logic as OpenClaw's callGateway) */
52
40
  function resolveGatewayUrl(port?: number): string {
53
41
  if (port) return `ws://127.0.0.1:${port}`
@@ -74,7 +62,6 @@ function resolveGatewayFromHost(api: PluginApi): { token?: string; port?: number
74
62
 
75
63
  function parsePluginConfig(raw: Record<string, unknown> | undefined): CrawdConfig {
76
64
  const cfg = raw ?? {}
77
- const tts = (cfg.tts ?? {}) as Record<string, unknown>
78
65
  const vibe = (cfg.vibe ?? {}) as Record<string, unknown>
79
66
  const chat = (cfg.chat ?? {}) as Record<string, unknown>
80
67
  const youtube = (chat.youtube ?? {}) as Record<string, unknown>
@@ -86,14 +73,6 @@ function parsePluginConfig(raw: Record<string, unknown> | undefined): CrawdConfi
86
73
  enabled: cfg.enabled !== false,
87
74
  port,
88
75
  bindHost: typeof cfg.bindHost === 'string' ? cfg.bindHost : '0.0.0.0',
89
- backendUrl: typeof cfg.backendUrl === 'string' ? cfg.backendUrl : `http://localhost:${port}`,
90
- tts: {
91
- chat: parseTtsChain(tts.chat),
92
- bot: parseTtsChain(tts.bot),
93
- openaiApiKey: typeof tts.openaiApiKey === 'string' ? tts.openaiApiKey : undefined,
94
- elevenlabsApiKey: typeof tts.elevenlabsApiKey === 'string' ? tts.elevenlabsApiKey : undefined,
95
- tiktokSessionId: typeof tts.tiktokSessionId === 'string' ? tts.tiktokSessionId : undefined,
96
- },
97
76
  vibe: {
98
77
  enabled: vibe.enabled !== false,
99
78
  intervalMs: typeof vibe.intervalMs === 'number' ? vibe.intervalMs : 10_000,
@@ -113,6 +92,10 @@ function parsePluginConfig(raw: Record<string, unknown> | undefined): CrawdConfi
113
92
  authToken: typeof pumpfun.authToken === 'string' ? pumpfun.authToken : undefined,
114
93
  },
115
94
  },
95
+ // Autonomy mode: vibe (periodic prompts), plan (goal-driven), none (disabled)
96
+ autonomyMode: cfg.autonomyMode === 'vibe' || cfg.autonomyMode === 'plan' || cfg.autonomyMode === 'none'
97
+ ? cfg.autonomyMode
98
+ : undefined,
116
99
  // Gateway: plugin config overrides, then env vars, then OpenClaw defaults
117
100
  gatewayUrl: typeof cfg.gatewayUrl === 'string' ? cfg.gatewayUrl
118
101
  : process.env.OPENCLAW_GATEWAY_URL ?? resolveGatewayUrl(),
@@ -136,12 +119,7 @@ const crawdConfigSchema = {
136
119
  enabled: { label: 'Enabled' },
137
120
  port: { label: 'Backend Port', placeholder: '4000' },
138
121
  bindHost: { label: 'Bind Host', placeholder: '0.0.0.0', advanced: true },
139
- backendUrl: { label: 'Backend URL', advanced: true, help: 'Public URL for TTS file serving' },
140
- 'tts.chat': { label: 'Chat TTS Voices', help: 'Ordered fallback chain [{provider, voice}]' },
141
- 'tts.bot': { label: 'Bot TTS Voices', help: 'Ordered fallback chain [{provider, voice}]' },
142
- 'tts.openaiApiKey': { label: 'OpenAI API Key', sensitive: true },
143
- 'tts.elevenlabsApiKey': { label: 'ElevenLabs API Key', sensitive: true },
144
- 'tts.tiktokSessionId': { label: 'TikTok Session ID', sensitive: true },
122
+ 'autonomyMode': { label: 'Autonomy Mode', help: 'vibe = timed prompts, plan = goal-driven loop, none = disabled' },
145
123
  'vibe.enabled': { label: 'Vibe Mode' },
146
124
  'vibe.intervalMs': { label: 'Vibe Interval (ms)', advanced: true },
147
125
  'vibe.idleAfterMs': { label: 'Idle After (ms)', advanced: true },
@@ -153,15 +131,15 @@ const crawdConfigSchema = {
153
131
  'chat.pumpfun.enabled': { label: 'PumpFun Chat' },
154
132
  'chat.pumpfun.tokenMint': { label: 'PumpFun Token Mint' },
155
133
  'chat.pumpfun.authToken': { label: 'PumpFun Auth Token', sensitive: true },
156
- gatewayUrl: { label: 'Gateway URL', advanced: true, help: 'Override auto-detected gateway URL (usually not needed)' },
157
- gatewayToken: { label: 'Gateway Token', advanced: true, sensitive: true, help: 'Override OPENCLAW_GATEWAY_TOKEN env var' },
134
+ gatewayUrl: { label: 'Gateway URL', help: 'WebSocket URL for agent triggering', advanced: true },
135
+ gatewayToken: { label: 'Gateway Token', sensitive: true },
158
136
  },
159
137
  }
160
138
 
161
139
  const plugin: PluginDefinition = {
162
140
  id: 'crawd',
163
141
  name: 'Crawd Livestream',
164
- description: 'crawd.bot plugin — AI agent livestreaming with TTS, chat integration, and OBS overlay',
142
+ description: 'crawd.bot plugin — AI agent livestreaming with chat integration and OBS overlay',
165
143
  configSchema: crawdConfigSchema,
166
144
 
167
145
  register(api: PluginApi) {
@@ -209,7 +187,7 @@ const plugin: PluginDefinition = {
209
187
  name: 'livestream_talk',
210
188
  label: 'Livestream Talk',
211
189
  description:
212
- 'Speak on the livestream unprompted. Shows a speech bubble on the overlay and generates TTS audio. Use for narration, vibes, and commentary — NOT for replying to chat (use livestream_reply for that).',
190
+ 'Speak on the livestream unprompted. Shows a speech bubble on the overlay. Use for narration, vibes, and commentary — NOT for replying to chat (use livestream_reply for that).',
213
191
  parameters: Type.Object({
214
192
  text: Type.String({ description: 'Message to speak on stream' }),
215
193
  }),
@@ -232,7 +210,7 @@ const plugin: PluginDefinition = {
232
210
  name: 'livestream_reply',
233
211
  label: 'Livestream Reply',
234
212
  description:
235
- 'Reply to a chat message on the livestream. Reads the original message aloud with the chat voice, then speaks your reply with the bot voice. Use this ONLY when responding to a specific viewer message.',
213
+ 'Reply to a chat message on the livestream. Shows the original message and your reply on the overlay. Use this ONLY when responding to a specific viewer message.',
236
214
  parameters: Type.Object({
237
215
  text: Type.String({ description: 'Your reply to the chat message' }),
238
216
  username: Type.String({ description: 'Username of the person you are replying to' }),
@@ -251,6 +229,117 @@ const plugin: PluginDefinition = {
251
229
  { name: 'livestream_reply' },
252
230
  )
253
231
 
232
+ // plan_set — create or replace current plan
233
+ api.registerTool(
234
+ {
235
+ name: 'plan_set',
236
+ label: 'Set Plan',
237
+ description:
238
+ 'Create or replace the current plan. Provide a goal and ordered steps. Any existing active plan is abandoned.',
239
+ parameters: Type.Object({
240
+ goal: Type.String({ description: 'The overall goal of the plan' }),
241
+ steps: Type.Array(Type.String(), { description: 'Ordered list of steps to accomplish the goal', minItems: 1, maxItems: 20 }),
242
+ }),
243
+ async execute(_toolCallId: string, params: unknown) {
244
+ const b = await ensureBackend()
245
+ const { goal, steps } = params as { goal: string; steps: string[] }
246
+ const result = b.setPlan(goal, steps)
247
+ if ('error' in result) {
248
+ return { content: [{ type: 'text', text: `Failed: ${result.error}` }] }
249
+ }
250
+ const stepList = result.plan.steps.map((s, i) => ` ${i}. ${s.description}`).join('\n')
251
+ return {
252
+ content: [{ type: 'text', text: `Plan created: ${goal}\n${stepList}` }],
253
+ details: result,
254
+ }
255
+ },
256
+ },
257
+ { name: 'plan_set' },
258
+ )
259
+
260
+ // plan_step_done — mark a step as complete
261
+ api.registerTool(
262
+ {
263
+ name: 'plan_step_done',
264
+ label: 'Plan Step Done',
265
+ description:
266
+ 'Mark a plan step as done by its 0-based index. The coordinator will nudge you for the next step.',
267
+ parameters: Type.Object({
268
+ step: Type.Number({ description: 'Zero-based index of the step to mark as done' }),
269
+ }),
270
+ async execute(_toolCallId: string, params: unknown) {
271
+ const b = await ensureBackend()
272
+ const { step } = params as { step: number }
273
+ const result = b.markPlanStepDone(step)
274
+ if ('error' in result) {
275
+ return { content: [{ type: 'text', text: `Failed: ${result.error}` }] }
276
+ }
277
+ const done = result.plan.steps.filter(s => s.status === 'done').length
278
+ const total = result.plan.steps.length
279
+ const isComplete = result.plan.status === 'completed'
280
+ return {
281
+ content: [{ type: 'text', text: isComplete
282
+ ? `Plan completed! All ${total} steps done.`
283
+ : `Step ${step} done (${done}/${total} complete).`
284
+ }],
285
+ details: result,
286
+ }
287
+ },
288
+ },
289
+ { name: 'plan_step_done' },
290
+ )
291
+
292
+ // plan_abandon — abandon current plan
293
+ api.registerTool(
294
+ {
295
+ name: 'plan_abandon',
296
+ label: 'Abandon Plan',
297
+ description:
298
+ 'Abandon the current plan. The coordinator will stop sending plan nudges.',
299
+ parameters: Type.Object({}),
300
+ async execute(_toolCallId: string, _params: unknown) {
301
+ const b = await ensureBackend()
302
+ const result = b.abandonPlan()
303
+ if ('error' in result) {
304
+ return { content: [{ type: 'text', text: `Failed: ${result.error}` }] }
305
+ }
306
+ return {
307
+ content: [{ type: 'text', text: `Plan abandoned: ${result.plan.goal}` }],
308
+ details: result,
309
+ }
310
+ },
311
+ },
312
+ { name: 'plan_abandon' },
313
+ )
314
+
315
+ // plan_get — view current plan state
316
+ api.registerTool(
317
+ {
318
+ name: 'plan_get',
319
+ label: 'Get Plan',
320
+ description:
321
+ 'View the current plan state including goal, steps, and progress.',
322
+ parameters: Type.Object({}),
323
+ async execute(_toolCallId: string, _params: unknown) {
324
+ const b = await ensureBackend()
325
+ const result = b.getPlan()
326
+ if (!result.plan) {
327
+ return { content: [{ type: 'text', text: 'No active plan.' }] }
328
+ }
329
+ const p = result.plan
330
+ const stepList = p.steps.map((s, i) => {
331
+ const marker = s.status === 'done' ? '[x]' : '[ ]'
332
+ return ` ${marker} ${i}. ${s.description}`
333
+ }).join('\n')
334
+ return {
335
+ content: [{ type: 'text', text: `Plan (${p.status}): ${p.goal}\n${stepList}` }],
336
+ details: result,
337
+ }
338
+ },
339
+ },
340
+ { name: 'plan_get' },
341
+ )
342
+
254
343
  // Service lifecycle
255
344
  api.registerService({
256
345
  id: 'crawd',
package/src/types.ts CHANGED
@@ -13,41 +13,22 @@ export type {
13
13
  SuperChatInfo,
14
14
  } from './lib/chat/types'
15
15
 
16
- /** TTS provider identifier */
17
- export type TtsProvider = 'openai' | 'elevenlabs' | 'tiktok'
18
-
19
16
  // --- Socket.IO event payloads ---
20
17
 
21
- /** Turn-based reply: chat message + bot response, each with TTS audio */
18
+ /** Turn-based reply: chat message + bot response (text only, TTS handled by overlay) */
22
19
  export type ReplyTurnEvent = {
23
- /** Correlation ID — overlay sends talk:done with this ID when both audios finish */
20
+ /** Correlation ID — overlay sends talk:done with this ID when finished */
24
21
  id: string
25
22
  chat: { username: string; message: string }
26
23
  botMessage: string
27
- chatTtsUrl: string
28
- botTtsUrl: string
29
- /** TTS provider used for the chat audio */
30
- chatTtsProvider?: TtsProvider
31
- /** TTS provider used for the bot audio */
32
- botTtsProvider?: TtsProvider
33
24
  }
34
25
 
35
- /** Bot speech bubble with pre-generated TTS (atomic event) */
26
+ /** Bot speech bubble (text only, TTS handled by overlay) */
36
27
  export type TalkEvent = {
37
- /** Correlation ID — overlay sends talk:done with this ID when audio finishes */
28
+ /** Correlation ID — overlay sends talk:done with this ID when finished */
38
29
  id: string
39
30
  /** Bot reply text */
40
31
  message: string
41
- /** Bot TTS audio URL */
42
- ttsUrl: string
43
- /** TTS provider used for the bot audio */
44
- ttsProvider?: TtsProvider
45
- /** Optional: chat message being replied to (overlay plays this first) */
46
- chat?: {
47
- message: string
48
- username: string
49
- ttsUrl: string
50
- }
51
32
  }
52
33
 
53
34
  /** Overlay → backend acknowledgement that a talk finished playing */