@bprp/flockcode 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,100 @@
1
+ import { chat } from "@tanstack/ai"
2
+ import { geminiText } from "@tanstack/ai-gemini"
3
+ import type { Message } from "./types"
4
+ import { env } from "./env"
5
+
6
+ export async function transcribeAudio(
7
+ base64Audio: string,
8
+ mimeType: string,
9
+ conversationContext?: Message[],
10
+ ): Promise<string> {
11
+ // Normalize MIME types that Gemini doesn't recognize
12
+ const normalizedMime = normalizeMimeType(mimeType)
13
+ const contextSummary = buildContextSummary(conversationContext)
14
+
15
+ const systemPrompt = `\
16
+ ## General Instructions
17
+ You are a voice transcription layer between a human user and an AI coding agent.
18
+ The user is speaking voice commands/messages that will be forwarded to the coding agent.
19
+ You will be provided recent conversation history to help you resolve ambiguous words, technical terms,
20
+ variable names, file paths, and other domain-specific vocabulary.
21
+ The final message will be an audio recording from the user to transcribe.
22
+
23
+ ## Transcription Intent Correction
24
+ Transcribe the user's audio message. Transcribe the intent of it - you should not transcribe umms.
25
+ If the user corrects themselves in natural speech, only output the correction.
26
+
27
+ ## Reference Resolution
28
+ Resolve the user's references to real files that may have been recently referenced. If the user says "Add server dot env to gitignore",
29
+ and the agent recently edited \`./server/.env\`, then output \`Add ./server/.env to .gitignore\`.
30
+
31
+ ## Output Instructions
32
+ Output ONLY the text that you believe should be forwarded to the coding agent, nothing else.
33
+ If the audio is unclear or empty, respond with an empty string and nothing will happen.`
34
+
35
+ const contextMessages = [
36
+ {
37
+ role: "user",
38
+ content: `Recent conversation for context:\n${contextSummary}`,
39
+ },
40
+ {
41
+ role: "assistant",
42
+ content: "Understood, I'll use this context to help transcribe the next audio message.",
43
+ }
44
+ ] as const;
45
+
46
+ const result = await chat({
47
+ adapter: geminiText(env.TRANSCRIPTION_MODEL as any),
48
+ systemPrompts: [systemPrompt],
49
+ messages: [
50
+ ...contextMessages,
51
+ {
52
+ role: 'user',
53
+ content: [
54
+ {
55
+ type: "audio",
56
+ source: {
57
+ type: "data",
58
+ value: base64Audio,
59
+ mimeType: normalizedMime,
60
+ }
61
+ }
62
+ ]
63
+ }
64
+ ],
65
+ stream: false,
66
+ })
67
+
68
+ return result.trim()
69
+ }
70
+
71
+ function normalizeMimeType(mime: string): string {
72
+ // Gemini only supports: wav, mp3, aiff, aac, ogg, flac
73
+ // iOS records m4a (AAC in MP4 container) — map to audio/aac
74
+ const map: Record<string, string> = {
75
+ "audio/x-m4a": "audio/aac",
76
+ "audio/m4a": "audio/aac",
77
+ "audio/mp4": "audio/aac",
78
+ "audio/x-caf": "audio/aac",
79
+ "audio/mpeg": "audio/mp3",
80
+ }
81
+ return map[mime] ?? mime
82
+ }
83
+
84
+ function buildContextSummary(messages?: Message[]): string {
85
+ if (!messages?.length) return ""
86
+
87
+ // Take last 10 messages, extract text content
88
+ const recent = messages.slice(-10)
89
+ return recent
90
+ .map((m) => {
91
+ const textParts = m.parts
92
+ .filter((p): p is { type: "text"; id: string; text: string } => p.type === "text")
93
+ .map((p) => p.text)
94
+ .join(" ")
95
+ if (!textParts) return null
96
+ return `${m.role}: ${textParts.slice(0, 200)}`
97
+ })
98
+ .filter(Boolean)
99
+ .join("\n")
100
+ }
package/src/types.ts ADDED
@@ -0,0 +1,430 @@
1
+ import { z } from "zod/v4"
2
+
3
+ /**
4
+ * Re-export SDK types so the native package can import them without
5
+ * depending on @opencode-ai/sdk directly.
6
+ */
7
+ export type { Project, Session, File } from "@opencode-ai/sdk/v2"
8
+
9
+ export interface ChangedFile {
10
+ path: string
11
+ status: 'added' | 'deleted' | 'modified'
12
+ added: number
13
+ removed: number
14
+ }
15
+
16
+ // ---------- Zod Schemas for OpenCode API messages ----------
17
+
18
+ // Shared time range used by parts and tool states
19
+ const TimeRange = z.object({
20
+ start: z.number(),
21
+ end: z.number().optional(),
22
+ })
23
+
24
+ // --- Tool state variants ---
25
+
26
+ const ToolStatePending = z.object({
27
+ status: z.literal("pending"),
28
+ input: z.record(z.string(), z.unknown()),
29
+ raw: z.string().optional(),
30
+ })
31
+
32
+ const ToolStateRunning = z.object({
33
+ status: z.literal("running"),
34
+ input: z.record(z.string(), z.unknown()),
35
+ title: z.string().optional(),
36
+ metadata: z.record(z.string(), z.unknown()).optional(),
37
+ time: z.object({ start: z.number() }),
38
+ })
39
+
40
+ const ToolStateCompleted = z.object({
41
+ status: z.literal("completed"),
42
+ input: z.record(z.string(), z.unknown()),
43
+ output: z.string(),
44
+ title: z.string(),
45
+ metadata: z.record(z.string(), z.unknown()).optional(),
46
+ time: z.object({
47
+ start: z.number(),
48
+ end: z.number(),
49
+ compacted: z.number().optional(),
50
+ }),
51
+ })
52
+
53
+ const ToolStateError = z.object({
54
+ status: z.literal("error"),
55
+ input: z.record(z.string(), z.unknown()),
56
+ error: z.string(),
57
+ metadata: z.record(z.string(), z.unknown()).optional(),
58
+ time: z.object({
59
+ start: z.number(),
60
+ end: z.number(),
61
+ }),
62
+ })
63
+
64
+ const ToolState = z.discriminatedUnion("status", [
65
+ ToolStatePending,
66
+ ToolStateRunning,
67
+ ToolStateCompleted,
68
+ ToolStateError,
69
+ ])
70
+
71
+ // --- Token counts ---
72
+
73
+ const TokenCounts = z.object({
74
+ total: z.number().optional(),
75
+ input: z.number(),
76
+ output: z.number(),
77
+ reasoning: z.number(),
78
+ cache: z.object({
79
+ read: z.number(),
80
+ write: z.number(),
81
+ }),
82
+ })
83
+
84
+ // --- Part schemas ---
85
+
86
+ const TextPart = z.object({
87
+ type: z.literal("text"),
88
+ id: z.string(),
89
+ sessionID: z.string(),
90
+ messageID: z.string(),
91
+ text: z.string(),
92
+ synthetic: z.boolean().optional(),
93
+ ignored: z.boolean().optional(),
94
+ time: TimeRange.optional(),
95
+ metadata: z.record(z.string(), z.unknown()).optional(),
96
+ })
97
+
98
+ const ReasoningPart = z.object({
99
+ type: z.literal("reasoning"),
100
+ id: z.string(),
101
+ sessionID: z.string(),
102
+ messageID: z.string(),
103
+ text: z.string(),
104
+ time: TimeRange.optional(),
105
+ metadata: z.record(z.string(), z.unknown()).optional(),
106
+ })
107
+
108
+ const ToolPart = z.object({
109
+ type: z.literal("tool"),
110
+ id: z.string(),
111
+ sessionID: z.string(),
112
+ messageID: z.string(),
113
+ callID: z.string(),
114
+ tool: z.string(),
115
+ state: ToolState,
116
+ metadata: z.record(z.string(), z.unknown()).optional(),
117
+ })
118
+
119
+ const StepStartPart = z.object({
120
+ type: z.literal("step-start"),
121
+ id: z.string(),
122
+ sessionID: z.string(),
123
+ messageID: z.string(),
124
+ snapshot: z.string().optional(),
125
+ })
126
+
127
+ const StepFinishPart = z.object({
128
+ type: z.literal("step-finish"),
129
+ id: z.string(),
130
+ sessionID: z.string(),
131
+ messageID: z.string(),
132
+ reason: z.string(),
133
+ snapshot: z.string().optional(),
134
+ cost: z.number(),
135
+ tokens: TokenCounts,
136
+ })
137
+
138
+ const PatchPart = z.object({
139
+ type: z.literal("patch"),
140
+ id: z.string(),
141
+ sessionID: z.string(),
142
+ messageID: z.string(),
143
+ hash: z.string(),
144
+ files: z.array(z.string()),
145
+ })
146
+
147
+ const CompactionPart = z.object({
148
+ type: z.literal("compaction"),
149
+ id: z.string(),
150
+ sessionID: z.string(),
151
+ messageID: z.string(),
152
+ auto: z.boolean(),
153
+ })
154
+
155
+ const SubtaskPart = z.object({
156
+ type: z.literal("subtask"),
157
+ id: z.string(),
158
+ sessionID: z.string(),
159
+ messageID: z.string(),
160
+ prompt: z.string(),
161
+ description: z.string(),
162
+ agent: z.string(),
163
+ model: z.object({
164
+ providerID: z.string(),
165
+ modelID: z.string(),
166
+ }).optional(),
167
+ command: z.string().optional(),
168
+ })
169
+
170
+ // File-related parts (from SDK, not yet observed in test data but defined)
171
+ const FilePartSourceText = z.object({
172
+ value: z.string(),
173
+ start: z.number(),
174
+ end: z.number(),
175
+ })
176
+
177
+ const FileSource = z.object({
178
+ type: z.literal("file"),
179
+ text: FilePartSourceText,
180
+ path: z.string(),
181
+ })
182
+
183
+ const SymbolSource = z.object({
184
+ type: z.literal("symbol"),
185
+ text: FilePartSourceText,
186
+ path: z.string(),
187
+ range: z.object({
188
+ start: z.object({ line: z.number(), character: z.number() }),
189
+ end: z.object({ line: z.number(), character: z.number() }),
190
+ }),
191
+ name: z.string(),
192
+ kind: z.number(),
193
+ })
194
+
195
+ const FilePart = z.object({
196
+ type: z.literal("file"),
197
+ id: z.string(),
198
+ sessionID: z.string(),
199
+ messageID: z.string(),
200
+ mime: z.string(),
201
+ filename: z.string().optional(),
202
+ url: z.string(),
203
+ source: z.discriminatedUnion("type", [FileSource, SymbolSource]).optional(),
204
+ })
205
+
206
+ const SnapshotPart = z.object({
207
+ type: z.literal("snapshot"),
208
+ id: z.string(),
209
+ sessionID: z.string(),
210
+ messageID: z.string(),
211
+ snapshot: z.string(),
212
+ })
213
+
214
+ const RetryPart = z.object({
215
+ type: z.literal("retry"),
216
+ id: z.string(),
217
+ sessionID: z.string(),
218
+ messageID: z.string(),
219
+ attempt: z.number(),
220
+ error: z.record(z.string(), z.unknown()),
221
+ time: z.object({ created: z.number() }),
222
+ })
223
+
224
+ const AgentPart = z.object({
225
+ type: z.literal("agent"),
226
+ id: z.string(),
227
+ sessionID: z.string(),
228
+ messageID: z.string(),
229
+ name: z.string(),
230
+ source: FilePartSourceText.optional(),
231
+ })
232
+
233
+ export const MessagePartSchema = z.discriminatedUnion("type", [
234
+ TextPart,
235
+ ReasoningPart,
236
+ ToolPart,
237
+ StepStartPart,
238
+ StepFinishPart,
239
+ PatchPart,
240
+ CompactionPart,
241
+ SubtaskPart,
242
+ FilePart,
243
+ SnapshotPart,
244
+ RetryPart,
245
+ AgentPart,
246
+ ])
247
+
248
+ // --- Message error types ---
249
+
250
+ const ProviderAuthError = z.object({
251
+ name: z.literal("ProviderAuthError"),
252
+ data: z.object({ message: z.string() }),
253
+ })
254
+
255
+ const MessageAbortedError = z.object({
256
+ name: z.literal("MessageAbortedError"),
257
+ data: z.object({ message: z.string() }),
258
+ })
259
+
260
+ const MessageOutputLengthError = z.object({
261
+ name: z.literal("MessageOutputLengthError"),
262
+ data: z.object({ message: z.string() }),
263
+ })
264
+
265
+ const ApiError = z.object({
266
+ name: z.literal("ApiError"),
267
+ data: z.object({ message: z.string() }),
268
+ })
269
+
270
+ const UnknownError = z.object({
271
+ name: z.literal("UnknownError"),
272
+ data: z.object({ message: z.string() }),
273
+ })
274
+
275
+ const MessageError = z.discriminatedUnion("name", [
276
+ ProviderAuthError,
277
+ MessageAbortedError,
278
+ MessageOutputLengthError,
279
+ ApiError,
280
+ UnknownError,
281
+ ])
282
+
283
+ /**
284
+ * File diff used in user message summaries and session diffs.
285
+ */
286
+ export interface FileDiff {
287
+ file: string
288
+ before: string
289
+ after: string
290
+ additions: number
291
+ deletions: number
292
+ }
293
+
294
+ const FileDiffSchema = z.object({
295
+ file: z.string(),
296
+ before: z.string(),
297
+ after: z.string(),
298
+ additions: z.number(),
299
+ deletions: z.number(),
300
+ status: z.string(), // "added", "modified", "deleted", etc.
301
+ })
302
+
303
+ /** User message schema used for message summaries. */
304
+ const UserMessageInfo = z.object({
305
+ id: z.string(),
306
+ sessionID: z.string(),
307
+ role: z.literal("user"),
308
+ time: z.object({
309
+ created: z.number(),
310
+ }),
311
+ summary: z.object({
312
+ title: z.string().optional(),
313
+ body: z.string().optional(),
314
+ diffs: z.array(FileDiffSchema),
315
+ }).optional(),
316
+ agent: z.string(),
317
+ model: z.object({
318
+ providerID: z.string(),
319
+ modelID: z.string(),
320
+ }),
321
+ system: z.string().optional(),
322
+ tools: z.record(z.string(), z.boolean()).optional(),
323
+ })
324
+
325
+ const AssistantMessageInfo = z.object({
326
+ id: z.string(),
327
+ sessionID: z.string(),
328
+ role: z.literal("assistant"),
329
+ time: z.object({
330
+ created: z.number(),
331
+ completed: z.number().optional(),
332
+ }),
333
+ error: MessageError.optional(),
334
+ parentID: z.string(),
335
+ modelID: z.string(),
336
+ providerID: z.string(),
337
+ mode: z.string(),
338
+ agent: z.string().optional(),
339
+ path: z.object({
340
+ cwd: z.string(),
341
+ root: z.string(),
342
+ }),
343
+ summary: z.boolean().optional(),
344
+ cost: z.number(),
345
+ tokens: TokenCounts,
346
+ finish: z.string().optional(),
347
+ })
348
+
349
+ export const MessageInfoSchema = z.discriminatedUnion("role", [
350
+ UserMessageInfo,
351
+ AssistantMessageInfo,
352
+ ])
353
+
354
+ /** Top-level raw message envelope as returned by the API. */
355
+ export const RawMessageSchema = z.object({
356
+ info: MessageInfoSchema,
357
+ parts: z.array(MessagePartSchema),
358
+ })
359
+
360
+ /**
361
+ * Mapped types used by our app.
362
+ */
363
+
364
+ /**
365
+ * Simplified Message type consumed by the native app.
366
+ * This is what mapMessage() in opencode.ts produces.
367
+ */
368
+
369
+ export interface Message {
370
+ id: string
371
+ sessionId: string
372
+ role: "user" | "assistant"
373
+ parts: MessagePart[]
374
+ createdAt: number
375
+ // model info (present on both user and assistant messages)
376
+ modelID?: string
377
+ providerID?: string
378
+ // agent used for this message (present on user messages, optional on assistant)
379
+ agent?: string
380
+ // assistant-specific
381
+ cost?: number
382
+ tokens?: {
383
+ input: number
384
+ output: number
385
+ reasoning: number
386
+ }
387
+ finish?: string
388
+ }
389
+
390
+ export type ToolCallStatus = "pending" | "running" | "completed" | "error"
391
+
392
+ export type MessagePart =
393
+ | { type: "text"; id: string; text: string }
394
+ | {
395
+ type: "tool"
396
+ id: string
397
+ tool: string
398
+ state: {
399
+ status: ToolCallStatus
400
+ input?: Record<string, unknown>
401
+ output?: string
402
+ title?: string
403
+ error?: string
404
+ metadata?: Record<string, unknown>
405
+ time?: { start: number; end?: number; compacted?: number }
406
+ }
407
+ }
408
+ | { type: "step-start"; id: string }
409
+ | { type: "step-finish"; id: string }
410
+ | { type: "reasoning"; id: string; text?: string }
411
+
412
+ export type TextPartInput = { type: "text"; text: string }
413
+ export type AudioPartInput = {
414
+ type: "audio"
415
+ audioData: string
416
+ mimeType?: string
417
+ lineReference?: {
418
+ file: string
419
+ startLine: number
420
+ endLine: number
421
+ side?: "additions" | "deletions"
422
+ }
423
+ }
424
+ export type PromptPartInput = TextPartInput | AudioPartInput
425
+
426
+ /** Inferred types from Zod schemas. */
427
+ export type RawMessage = z.infer<typeof RawMessageSchema>
428
+ export type RawMessageInfo = z.infer<typeof MessageInfoSchema>
429
+ export type RawMessagePart = z.infer<typeof MessagePartSchema>
430
+ export type RawToolState = z.infer<typeof ToolState>