@bprp/flockcode 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +45 -0
- package/src/app.ts +153 -0
- package/src/diagnose-stream.ts +305 -0
- package/src/env.ts +35 -0
- package/src/event-discovery.ts +355 -0
- package/src/event-driven-test.ts +72 -0
- package/src/index.ts +223 -0
- package/src/opencode.ts +278 -0
- package/src/prompt.ts +127 -0
- package/src/router/agents.ts +57 -0
- package/src/router/base.ts +10 -0
- package/src/router/commands.ts +57 -0
- package/src/router/context.ts +22 -0
- package/src/router/diffs.ts +46 -0
- package/src/router/index.ts +24 -0
- package/src/router/models.ts +55 -0
- package/src/router/permissions.ts +28 -0
- package/src/router/projects.ts +175 -0
- package/src/router/sessions.ts +316 -0
- package/src/router/snapshot.ts +9 -0
- package/src/server.ts +15 -0
- package/src/spawn-opencode.ts +166 -0
- package/src/sprite-configure-services.ts +302 -0
- package/src/sprite-sync.ts +413 -0
- package/src/sprites.ts +328 -0
- package/src/start-server.ts +49 -0
- package/src/state-stream.ts +711 -0
- package/src/transcribe.ts +100 -0
- package/src/types.ts +430 -0
- package/src/voice-prompt.ts +222 -0
- package/src/worktree-name.ts +62 -0
- package/src/worktree.ts +549 -0
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Walking-mode voice prompt handler.
|
|
3
|
+
*
|
|
4
|
+
* When the user is in "walking" hands-free mode and sends audio:
|
|
5
|
+
* 1. Transcribe the audio (existing Gemini transcription).
|
|
6
|
+
* 2. Use Gemini Flash to decide whether to forward to the agent or respond directly.
|
|
7
|
+
* 3. If responding directly, generate TTS audio via Gemini TTS and return it.
|
|
8
|
+
* 4. If forwarding, call sendPrompt() in the background.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { chat } from "@tanstack/ai"
|
|
12
|
+
import { geminiText } from "@tanstack/ai-gemini"
|
|
13
|
+
import { generateSpeech } from "@tanstack/ai"
|
|
14
|
+
import { geminiSpeech } from "@tanstack/ai-gemini"
|
|
15
|
+
import type { OpencodeClient } from "./opencode"
|
|
16
|
+
import { mapMessage } from "./opencode"
|
|
17
|
+
import { transcribeAudio } from "./transcribe"
|
|
18
|
+
import { sendPrompt } from "./prompt"
|
|
19
|
+
import type { Message } from "./types"
|
|
20
|
+
|
|
21
|
+
export type VoicePromptResult =
|
|
22
|
+
| { action: "forwarded" }
|
|
23
|
+
| { action: "responded"; text: string; audioData: string; mimeType: string }
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Handles a voice prompt in walking mode.
|
|
27
|
+
*
|
|
28
|
+
* Transcribes the audio, then uses a single Gemini Flash call to decide
|
|
29
|
+
* whether to forward to the agent or respond directly. If responding,
|
|
30
|
+
* generates TTS audio and returns it.
|
|
31
|
+
*/
|
|
32
|
+
export async function handleVoicePrompt(
|
|
33
|
+
client: OpencodeClient,
|
|
34
|
+
sessionId: string,
|
|
35
|
+
audioData: string,
|
|
36
|
+
mimeType: string,
|
|
37
|
+
directory?: string,
|
|
38
|
+
model?: { providerID: string; modelID: string },
|
|
39
|
+
): Promise<VoicePromptResult> {
|
|
40
|
+
// 1. Fetch conversation context
|
|
41
|
+
let conversationContext: Message[] | undefined
|
|
42
|
+
try {
|
|
43
|
+
const res = await client.session.messages({ sessionID: sessionId, directory })
|
|
44
|
+
if (!res.error && res.data) {
|
|
45
|
+
conversationContext = res.data.map(mapMessage)
|
|
46
|
+
}
|
|
47
|
+
} catch {}
|
|
48
|
+
|
|
49
|
+
// 2. Transcribe the audio
|
|
50
|
+
const transcription = await transcribeAudio(audioData, mimeType, conversationContext)
|
|
51
|
+
if (!transcription?.trim()) {
|
|
52
|
+
// Nothing audible — don't do anything
|
|
53
|
+
return { action: "forwarded" }
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
console.log(`[voice-prompt] session=${sessionId} transcription: "${transcription.slice(0, 200)}"`)
|
|
57
|
+
|
|
58
|
+
// 3. Build context summary for the routing call
|
|
59
|
+
const contextSummary = buildContextSummary(conversationContext)
|
|
60
|
+
|
|
61
|
+
// 4. Single Gemini Flash call to route AND optionally respond
|
|
62
|
+
const routingPrompt = `You are a voice assistant proxy for an AI coding agent. The user has sent a voice message (transcribed below).
|
|
63
|
+
Decide whether to handle the message yourself or forward it to the coding agent.
|
|
64
|
+
|
|
65
|
+
FORWARD to the agent if the user is asking it to do something, giving a new instruction, or continuing/modifying a task.
|
|
66
|
+
RESPOND yourself if the user is asking a question about what the agent just did, asking for a status update, or asking what changed.
|
|
67
|
+
|
|
68
|
+
If forwarding, respond with ONLY:
|
|
69
|
+
<forward/>
|
|
70
|
+
|
|
71
|
+
If responding, reply with:
|
|
72
|
+
<respond>
|
|
73
|
+
[your terse response here, suitable for text-to-speech — 1-3 sentences max]
|
|
74
|
+
</respond>
|
|
75
|
+
|
|
76
|
+
Conversation context:
|
|
77
|
+
${contextSummary}
|
|
78
|
+
|
|
79
|
+
User message: "${transcription}"`
|
|
80
|
+
|
|
81
|
+
const routingResult = await chat({
|
|
82
|
+
adapter: geminiText("gemini-2.5-flash"),
|
|
83
|
+
messages: [{ role: "user", content: routingPrompt }],
|
|
84
|
+
stream: false,
|
|
85
|
+
})
|
|
86
|
+
|
|
87
|
+
const routingText = routingResult.trim()
|
|
88
|
+
console.log(`[voice-prompt] session=${sessionId} routing result: "${routingText.slice(0, 300)}"`)
|
|
89
|
+
|
|
90
|
+
// 5. Parse the response
|
|
91
|
+
if (routingText.includes("<forward")) {
|
|
92
|
+
// Forward to agent
|
|
93
|
+
console.log(`[voice-prompt] session=${sessionId} forwarding to agent`)
|
|
94
|
+
sendPrompt(
|
|
95
|
+
client,
|
|
96
|
+
sessionId,
|
|
97
|
+
[{ type: "text", text: transcription }],
|
|
98
|
+
directory,
|
|
99
|
+
model,
|
|
100
|
+
).catch((err) => {
|
|
101
|
+
console.error(`[voice-prompt] session=${sessionId} sendPrompt failed:`, err)
|
|
102
|
+
})
|
|
103
|
+
return { action: "forwarded" }
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Extract the response text from <respond>...</respond>
|
|
107
|
+
const respondMatch = routingText.match(/<respond>([\s\S]*?)<\/respond>/)
|
|
108
|
+
const responseText = respondMatch?.[1]?.trim() ?? routingText.replace(/<\/?respond>/g, "").trim()
|
|
109
|
+
|
|
110
|
+
if (!responseText) {
|
|
111
|
+
// No response text — just forward
|
|
112
|
+
sendPrompt(
|
|
113
|
+
client,
|
|
114
|
+
sessionId,
|
|
115
|
+
[{ type: "text", text: transcription }],
|
|
116
|
+
directory,
|
|
117
|
+
model,
|
|
118
|
+
).catch((err) => {
|
|
119
|
+
console.error(`[voice-prompt] session=${sessionId} sendPrompt failed:`, err)
|
|
120
|
+
})
|
|
121
|
+
return { action: "forwarded" }
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
console.log(`[voice-prompt] session=${sessionId} responding directly: "${responseText.slice(0, 200)}"`)
|
|
125
|
+
|
|
126
|
+
// 6. Generate TTS audio
|
|
127
|
+
try {
|
|
128
|
+
const ttsResult = await generateSpeech({
|
|
129
|
+
adapter: geminiSpeech("gemini-2.5-flash-preview-tts"),
|
|
130
|
+
text: responseText,
|
|
131
|
+
})
|
|
132
|
+
|
|
133
|
+
const rawFormat = ttsResult.format || "wav"
|
|
134
|
+
console.log(
|
|
135
|
+
`[voice-prompt] session=${sessionId} TTS complete: format=${rawFormat}, audioLength=${ttsResult.audio?.length ?? 0} chars`,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
// Gemini TTS returns raw PCM with a format string like
|
|
139
|
+
// "L16;codec=pcm;rate=24000". AVAudioPlayer can't play raw PCM,
|
|
140
|
+
// so wrap it in a WAV header.
|
|
141
|
+
let resultAudio = ttsResult.audio
|
|
142
|
+
let audioMime = `audio/${rawFormat}`
|
|
143
|
+
const isRawPcm = /l16|pcm|raw/i.test(rawFormat)
|
|
144
|
+
if (isRawPcm) {
|
|
145
|
+
// Extract sample rate from format string (e.g. "rate=24000"), default 24kHz
|
|
146
|
+
const rateMatch = rawFormat.match(/rate=(\d+)/)
|
|
147
|
+
const sampleRate = rateMatch ? parseInt(rateMatch[1], 10) : 24000
|
|
148
|
+
resultAudio = wrapPcmInWav(ttsResult.audio, sampleRate, 1, 16)
|
|
149
|
+
audioMime = "audio/wav"
|
|
150
|
+
console.log(`[voice-prompt] session=${sessionId} wrapped raw PCM in WAV header (sampleRate=${sampleRate})`)
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
return {
|
|
154
|
+
action: "responded",
|
|
155
|
+
text: responseText,
|
|
156
|
+
audioData: resultAudio,
|
|
157
|
+
mimeType: audioMime,
|
|
158
|
+
}
|
|
159
|
+
} catch (err) {
|
|
160
|
+
console.error(`[voice-prompt] session=${sessionId} TTS failed:`, err)
|
|
161
|
+
// Fall back to forwarding if TTS fails
|
|
162
|
+
sendPrompt(
|
|
163
|
+
client,
|
|
164
|
+
sessionId,
|
|
165
|
+
[{ type: "text", text: transcription }],
|
|
166
|
+
directory,
|
|
167
|
+
model,
|
|
168
|
+
).catch(() => {})
|
|
169
|
+
return { action: "forwarded" }
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Wraps raw PCM (signed 16-bit LE) base64 audio data in a WAV container
|
|
175
|
+
* so AVAudioPlayer can decode it.
|
|
176
|
+
*/
|
|
177
|
+
function wrapPcmInWav(
|
|
178
|
+
pcmBase64: string,
|
|
179
|
+
sampleRate: number,
|
|
180
|
+
numChannels: number,
|
|
181
|
+
bitsPerSample: number,
|
|
182
|
+
): string {
|
|
183
|
+
const pcmData = Buffer.from(pcmBase64, "base64")
|
|
184
|
+
const byteRate = sampleRate * numChannels * (bitsPerSample / 8)
|
|
185
|
+
const blockAlign = numChannels * (bitsPerSample / 8)
|
|
186
|
+
const dataSize = pcmData.length
|
|
187
|
+
|
|
188
|
+
// 44-byte WAV header
|
|
189
|
+
const header = Buffer.alloc(44)
|
|
190
|
+
header.write("RIFF", 0)
|
|
191
|
+
header.writeUInt32LE(36 + dataSize, 4)
|
|
192
|
+
header.write("WAVE", 8)
|
|
193
|
+
header.write("fmt ", 12)
|
|
194
|
+
header.writeUInt32LE(16, 16) // PCM subchunk size
|
|
195
|
+
header.writeUInt16LE(1, 20) // PCM format
|
|
196
|
+
header.writeUInt16LE(numChannels, 22)
|
|
197
|
+
header.writeUInt32LE(sampleRate, 24)
|
|
198
|
+
header.writeUInt32LE(byteRate, 28)
|
|
199
|
+
header.writeUInt16LE(blockAlign, 32)
|
|
200
|
+
header.writeUInt16LE(bitsPerSample, 34)
|
|
201
|
+
header.write("data", 36)
|
|
202
|
+
header.writeUInt32LE(dataSize, 40)
|
|
203
|
+
|
|
204
|
+
return Buffer.concat([header, pcmData]).toString("base64")
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function buildContextSummary(messages?: Message[]): string {
|
|
208
|
+
if (!messages?.length) return "(no conversation history)"
|
|
209
|
+
|
|
210
|
+
const recent = messages.slice(-10)
|
|
211
|
+
return recent
|
|
212
|
+
.map((m) => {
|
|
213
|
+
const textParts = m.parts
|
|
214
|
+
.filter((p): p is { type: "text"; id: string; text: string } => p.type === "text")
|
|
215
|
+
.map((p) => p.text)
|
|
216
|
+
.join(" ")
|
|
217
|
+
if (!textParts) return null
|
|
218
|
+
return `${m.role}: ${textParts.slice(0, 300)}`
|
|
219
|
+
})
|
|
220
|
+
.filter(Boolean)
|
|
221
|
+
.join("\n")
|
|
222
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { chat } from "@tanstack/ai"
|
|
2
|
+
import { geminiText } from "@tanstack/ai-gemini"
|
|
3
|
+
import { env } from "./env"
|
|
4
|
+
|
|
5
|
+
const SYSTEM_PROMPT = `\
|
|
6
|
+
You are a git branch name generator for a coding assistant.
|
|
7
|
+
|
|
8
|
+
Given a description of a coding task, generate a concise kebab-case slug (3-6 words) that describes the task.
|
|
9
|
+
|
|
10
|
+
Rules:
|
|
11
|
+
- Output ONLY the slug, nothing else — no explanation, no punctuation, no quotes
|
|
12
|
+
- Use lowercase letters, numbers, and hyphens only
|
|
13
|
+
- 3-6 words separated by hyphens
|
|
14
|
+
- Be specific and descriptive about the task
|
|
15
|
+
- Do not include words like "task", "feature", "branch", "worktree", "implement", "add", "fix" unless they are truly essential to the meaning
|
|
16
|
+
|
|
17
|
+
Examples:
|
|
18
|
+
"Add a button to the home screen for settings" -> home-screen-settings-button
|
|
19
|
+
"Fix the login bug where users can't sign in" -> login-sign-in-bug-fix
|
|
20
|
+
"Refactor the database connection pooling logic" -> database-connection-pool-refactor
|
|
21
|
+
"Update the user profile page to show avatars" -> user-profile-avatar-display`
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Generate a descriptive kebab-case slug for a worktree branch name using Gemini.
|
|
25
|
+
*
|
|
26
|
+
* @param promptText The user's initial prompt text describing the coding task.
|
|
27
|
+
* @returns A 3-6 word kebab-case slug, e.g. `"home-screen-settings-button"`.
|
|
28
|
+
* @throws If `GEMINI_API_KEY` is not set or the Gemini API call fails.
|
|
29
|
+
*/
|
|
30
|
+
export async function generateWorktreeSlug(promptText: string): Promise<string> {
|
|
31
|
+
if (!env.GEMINI_API_KEY) {
|
|
32
|
+
throw new Error("GEMINI_API_KEY is not set — cannot generate worktree name")
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const result = await chat({
|
|
36
|
+
adapter: geminiText("gemini-3-flash-preview"),
|
|
37
|
+
systemPrompts: [SYSTEM_PROMPT],
|
|
38
|
+
messages: [
|
|
39
|
+
{
|
|
40
|
+
role: "user",
|
|
41
|
+
content: promptText,
|
|
42
|
+
},
|
|
43
|
+
],
|
|
44
|
+
stream: false,
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
// Sanitize: lowercase, strip anything not [a-z0-9-], collapse repeated hyphens,
|
|
48
|
+
// trim leading/trailing hyphens, truncate to 60 chars
|
|
49
|
+
const slug = result
|
|
50
|
+
.trim()
|
|
51
|
+
.toLowerCase()
|
|
52
|
+
.replace(/[^a-z0-9-]+/g, "-")
|
|
53
|
+
.replace(/-{2,}/g, "-")
|
|
54
|
+
.replace(/^-+|-+$/g, "")
|
|
55
|
+
.slice(0, 60)
|
|
56
|
+
|
|
57
|
+
if (!slug) {
|
|
58
|
+
throw new Error("Gemini returned an empty slug for worktree name generation")
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return slug
|
|
62
|
+
}
|