cursor-buddy 0.0.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ import { t as CursorBuddyHandler } from "../../types-COQKMo5C.mjs";
2
+
3
+ //#region src/server/adapters/next.d.ts
4
+ /**
5
+ * Convert a CursorBuddyHandler to Next.js App Router route handlers.
6
+ *
7
+ * @example
8
+ * ```ts
9
+ * // app/api/cursor-buddy/[...path]/route.ts
10
+ * import { toNextJsHandler } from "cursor-buddy/server/next"
11
+ * import { cursorBuddy } from "@/lib/cursor-buddy"
12
+ *
13
+ * export const { GET, POST } = toNextJsHandler(cursorBuddy)
14
+ * ```
15
+ */
16
+ declare function toNextJsHandler(cursorBuddy: CursorBuddyHandler): {
17
+ GET: (request: Request) => Promise<Response>;
18
+ POST: (request: Request) => Promise<Response>;
19
+ };
20
+ //#endregion
21
+ export { toNextJsHandler };
22
+ //# sourceMappingURL=next.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"next.d.mts","names":[],"sources":["../../../src/server/adapters/next.ts"],"mappings":";;;;;AAcA;;;;;;;;;;iBAAgB,eAAA,CAAgB,WAAA,EAAa,kBAAA;iBACjB,OAAA,KAAO,OAAA,CAAA,QAAA;kBAAP,OAAA,KAAO,OAAA,CAAA,QAAA;AAAA"}
@@ -0,0 +1,24 @@
1
+ //#region src/server/adapters/next.ts
2
+ /**
3
+ * Convert a CursorBuddyHandler to Next.js App Router route handlers.
4
+ *
5
+ * @example
6
+ * ```ts
7
+ * // app/api/cursor-buddy/[...path]/route.ts
8
+ * import { toNextJsHandler } from "cursor-buddy/server/next"
9
+ * import { cursorBuddy } from "@/lib/cursor-buddy"
10
+ *
11
+ * export const { GET, POST } = toNextJsHandler(cursorBuddy)
12
+ * ```
13
+ */
14
+ function toNextJsHandler(cursorBuddy) {
15
+ const handler = (request) => cursorBuddy.handler(request);
16
+ return {
17
+ GET: handler,
18
+ POST: handler
19
+ };
20
+ }
21
+ //#endregion
22
+ export { toNextJsHandler };
23
+
24
+ //# sourceMappingURL=next.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"next.mjs","names":[],"sources":["../../../src/server/adapters/next.ts"],"sourcesContent":["import type { CursorBuddyHandler } from \"../types\"\n\n/**\n * Convert a CursorBuddyHandler to Next.js App Router route handlers.\n *\n * @example\n * ```ts\n * // app/api/cursor-buddy/[...path]/route.ts\n * import { toNextJsHandler } from \"cursor-buddy/server/next\"\n * import { cursorBuddy } from \"@/lib/cursor-buddy\"\n *\n * export const { GET, POST } = toNextJsHandler(cursorBuddy)\n * ```\n */\nexport function toNextJsHandler(cursorBuddy: CursorBuddyHandler) {\n const handler = (request: Request) => cursorBuddy.handler(request)\n\n return {\n GET: handler,\n POST: handler,\n }\n}\n"],"mappings":";;;;;;;;;;;;;AAcA,SAAgB,gBAAgB,aAAiC;CAC/D,MAAM,WAAW,YAAqB,YAAY,QAAQ,QAAQ;AAElE,QAAO;EACL,KAAK;EACL,MAAM;EACP"}
@@ -0,0 +1,31 @@
1
+ import { n as pointTool, t as PointToolInput } from "../point-tool-kIviMn1q.mjs";
2
+ import { n as CursorBuddyHandlerConfig, t as CursorBuddyHandler } from "../types-COQKMo5C.mjs";
3
+
4
+ //#region src/server/handler.d.ts
5
+ /**
6
+ * Create a cursor buddy request handler.
7
+ *
8
+ * The handler responds to three routes based on the last path segment:
9
+ * - /chat - Screenshot + transcript → AI SSE stream
10
+ * - /transcribe - Audio → text
11
+ * - /tts - Text → audio
12
+ *
13
+ * @example
14
+ * ```ts
15
+ * import { createCursorBuddyHandler } from "cursor-buddy/server"
16
+ * import { openai } from "@ai-sdk/openai"
17
+ *
18
+ * const cursorBuddy = createCursorBuddyHandler({
19
+ * model: openai("gpt-4o"),
20
+ * speechModel: openai.speech("tts-1"), // optional for browser-only speech
21
+ * transcriptionModel: openai.transcription("whisper-1"),
22
+ * })
23
+ * ```
24
+ */
25
+ declare function createCursorBuddyHandler(config: CursorBuddyHandlerConfig): CursorBuddyHandler;
26
+ //#endregion
27
+ //#region src/server/system-prompt.d.ts
28
+ declare const DEFAULT_SYSTEM_PROMPT = "You are a helpful AI assistant that lives inside a web page as a cursor companion.\n\nYou can see the user's current screen and hear what they say. Respond conversationally. Your response will be spoken aloud with text-to-speech, so keep it natural, concise, and easy to follow.\n\n## Core behavior\n\n- Speak like a helpful companion, not a robot\n- Keep most responses to 1-3 short sentences\n- Focus on what is visible right now on the user's screen\n- If something is unclear or not visible, say that plainly\n- Do not mention screenshots, overlays, annotations, or internal helper data\n- Do not mention marker numbers to the user\n\n## The point tool\n\nYou have a `point` tool that can visually indicate something on the user's screen.\n\nUse the `point` tool when the user is asking you to identify, locate, indicate, highlight, or show something visible on screen.\n\nCommon cases where you should use `point`:\n- the user asks where something is\n- the user asks what to click\n- the user says things like \"show me\", \"point to it\", \"where is it\", \"which one\", \"what should I click\", or \"highlight that\"\n\nDo not use the `point` tool when spoken guidance alone is enough and the user is not asking you to identify a specific on-screen target.\n\nExamples where spoken guidance alone may be enough:\n- explaining what a page does\n- answering a general question about what is on screen\n- giving brief next-step advice that does not depend on locating a specific element\n\nIf using the `point` tool:\n- first give the spoken response\n- then call the tool\n- call it at most once per response\n- point only at the most relevant target\n- never replace the tool call with plain text like \"(point here)\" or \"I\u2019m pointing at it now\"\n\n\nIf the user asks where something is on screen, what to click, or asks you to point something out, you should usually use the point tool rather than only describing it in words.\nDo not say things like \"I can point to it if you want\" when the user already asked where it is. In that case, answer briefly and use the point tool.\n\n## How to point\n\nPrefer marker-based pointing for interactive elements when a marker is available.\nInteractive elements may include buttons, links, inputs, tabs, menus, toggles, and other clickable controls.\n\nUse:\n- `type: \"marker\"` for interactive elements that have a marker\n- `type: \"coordinates\"` only for visible non-interactive content without a marker\n\nNever use coordinates for an interactive element if a marker is available.\n\nCoordinates must refer to the center of the target area.\n\nWhen calling the point tool, choose exactly one mode:\n\n- Marker mode:\n - use type \"marker\"\n - provide markerId\n - do not provide x or y\n\n- Coordinates mode:\n - use type \"coordinates\"\n - provide x and y\n - do not provide markerId\n\nNever combine markerId with x or y in the same tool call.\n\n## What to say\n\nWhen the user asks you to point something out:\n- briefly answer in a natural spoken way\n- then use the tool if the request is about locating or indicating something on screen\n\nGood spoken style:\n- \"Click this button right here.\"\n- \"The error message is over here.\"\n- \"This is the field you want.\"\n- \"That setting is in this section.\"\n\nAvoid:\n- mentioning marker IDs\n- mentioning internal tools\n- describing internal reasoning\n- saying you are looking at a screenshot\n\n## If the target is not clear\n\nIf you cannot confidently find the requested thing on screen:\n- say you cannot see it clearly or cannot find it\n- do not point at a random or uncertain target\n\n## Priority\n\nYour first priority is being helpful and correct.\nYour second priority is using the `point` tool whenever the user is asking you to visually identify a specific thing on screen.\n";
29
+ //#endregion
30
+ export { type CursorBuddyHandler, type CursorBuddyHandlerConfig, DEFAULT_SYSTEM_PROMPT, type PointToolInput, createCursorBuddyHandler, pointTool };
31
+ //# sourceMappingURL=index.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.mts","names":[],"sources":["../../src/server/handler.ts","../../src/server/system-prompt.ts"],"mappings":";;;;;;;AAyBA;;;;;;;;;;;;ACzBA;;;;;iBDyBgB,wBAAA,CACd,MAAA,EAAQ,wBAAA,GACP,kBAAA;;;cC3BU,qBAAA"}
@@ -0,0 +1,278 @@
1
+ import { t as pointTool } from "../point-tool-DtHgq6gQ.mjs";
2
+ import { experimental_generateSpeech, experimental_transcribe, streamText } from "ai";
3
+ //#region src/server/system-prompt.ts
4
+ const DEFAULT_SYSTEM_PROMPT = `You are a helpful AI assistant that lives inside a web page as a cursor companion.
5
+
6
+ You can see the user's current screen and hear what they say. Respond conversationally. Your response will be spoken aloud with text-to-speech, so keep it natural, concise, and easy to follow.
7
+
8
+ ## Core behavior
9
+
10
+ - Speak like a helpful companion, not a robot
11
+ - Keep most responses to 1-3 short sentences
12
+ - Focus on what is visible right now on the user's screen
13
+ - If something is unclear or not visible, say that plainly
14
+ - Do not mention screenshots, overlays, annotations, or internal helper data
15
+ - Do not mention marker numbers to the user
16
+
17
+ ## The point tool
18
+
19
+ You have a \`point\` tool that can visually indicate something on the user's screen.
20
+
21
+ Use the \`point\` tool when the user is asking you to identify, locate, indicate, highlight, or show something visible on screen.
22
+
23
+ Common cases where you should use \`point\`:
24
+ - the user asks where something is
25
+ - the user asks what to click
26
+ - the user says things like "show me", "point to it", "where is it", "which one", "what should I click", or "highlight that"
27
+
28
+ Do not use the \`point\` tool when spoken guidance alone is enough and the user is not asking you to identify a specific on-screen target.
29
+
30
+ Examples where spoken guidance alone may be enough:
31
+ - explaining what a page does
32
+ - answering a general question about what is on screen
33
+ - giving brief next-step advice that does not depend on locating a specific element
34
+
35
+ If using the \`point\` tool:
36
+ - first give the spoken response
37
+ - then call the tool
38
+ - call it at most once per response
39
+ - point only at the most relevant target
40
+ - never replace the tool call with plain text like "(point here)" or "I’m pointing at it now"
41
+
42
+
43
+ If the user asks where something is on screen, what to click, or asks you to point something out, you should usually use the point tool rather than only describing it in words.
44
+ Do not say things like "I can point to it if you want" when the user already asked where it is. In that case, answer briefly and use the point tool.
45
+
46
+ ## How to point
47
+
48
+ Prefer marker-based pointing for interactive elements when a marker is available.
49
+ Interactive elements may include buttons, links, inputs, tabs, menus, toggles, and other clickable controls.
50
+
51
+ Use:
52
+ - \`type: "marker"\` for interactive elements that have a marker
53
+ - \`type: "coordinates"\` only for visible non-interactive content without a marker
54
+
55
+ Never use coordinates for an interactive element if a marker is available.
56
+
57
+ Coordinates must refer to the center of the target area.
58
+
59
+ When calling the point tool, choose exactly one mode:
60
+
61
+ - Marker mode:
62
+ - use type "marker"
63
+ - provide markerId
64
+ - do not provide x or y
65
+
66
+ - Coordinates mode:
67
+ - use type "coordinates"
68
+ - provide x and y
69
+ - do not provide markerId
70
+
71
+ Never combine markerId with x or y in the same tool call.
72
+
73
+ ## What to say
74
+
75
+ When the user asks you to point something out:
76
+ - briefly answer in a natural spoken way
77
+ - then use the tool if the request is about locating or indicating something on screen
78
+
79
+ Good spoken style:
80
+ - "Click this button right here."
81
+ - "The error message is over here."
82
+ - "This is the field you want."
83
+ - "That setting is in this section."
84
+
85
+ Avoid:
86
+ - mentioning marker IDs
87
+ - mentioning internal tools
88
+ - describing internal reasoning
89
+ - saying you are looking at a screenshot
90
+
91
+ ## If the target is not clear
92
+
93
+ If you cannot confidently find the requested thing on screen:
94
+ - say you cannot see it clearly or cannot find it
95
+ - do not point at a random or uncertain target
96
+
97
+ ## Priority
98
+
99
+ Your first priority is being helpful and correct.
100
+ Your second priority is using the \`point\` tool whenever the user is asking you to visually identify a specific thing on screen.
101
+ `;
102
+ //#endregion
103
+ //#region src/server/routes/chat.ts
104
+ /**
105
+ * Handle chat requests: screenshot + transcript → AI SSE stream
106
+ */
107
+ async function handleChat(request, config) {
108
+ const { screenshot, transcript, history, capture, markerContext } = await request.json();
109
+ const systemPrompt = typeof config.system === "function" ? config.system({ defaultPrompt: DEFAULT_SYSTEM_PROMPT }) : config.system ?? DEFAULT_SYSTEM_PROMPT;
110
+ const maxMessages = (config.maxHistory ?? 10) * 2;
111
+ const trimmedHistory = history.slice(-maxMessages);
112
+ const captureContextParts = [];
113
+ if (capture) captureContextParts.push(`Screenshot size: ${capture.width}x${capture.height} pixels.`);
114
+ if (markerContext) captureContextParts.push("", markerContext);
115
+ const captureContext = captureContextParts.length > 0 ? captureContextParts.join("\n") : null;
116
+ const messages = [...trimmedHistory.map((msg) => ({
117
+ role: msg.role,
118
+ content: msg.content
119
+ })), {
120
+ role: "user",
121
+ content: [
122
+ ...captureContext ? [{
123
+ type: "text",
124
+ text: captureContext
125
+ }] : [],
126
+ {
127
+ type: "image",
128
+ image: screenshot
129
+ },
130
+ {
131
+ type: "text",
132
+ text: transcript
133
+ }
134
+ ]
135
+ }];
136
+ return streamText({
137
+ model: config.model,
138
+ system: systemPrompt,
139
+ providerOptions: config?.modelProviderMetadata,
140
+ messages,
141
+ tools: {
142
+ point: pointTool,
143
+ ...config.tools
144
+ },
145
+ experimental_repairToolCall: async ({ toolCall }) => {
146
+ if (toolCall.toolName !== "point") return null;
147
+ let parsed;
148
+ try {
149
+ parsed = JSON.parse(toolCall.input);
150
+ } catch {
151
+ return null;
152
+ }
153
+ if (!parsed || typeof parsed !== "object") return null;
154
+ const input = parsed;
155
+ if (input.type === "marker") {
156
+ const repaired = {
157
+ type: "marker",
158
+ markerId: input.markerId,
159
+ label: input.label
160
+ };
161
+ return {
162
+ ...toolCall,
163
+ input: JSON.stringify(repaired)
164
+ };
165
+ }
166
+ if (input.type === "coordinates") {
167
+ const repaired = {
168
+ type: "coordinates",
169
+ x: input.x,
170
+ y: input.y,
171
+ label: input.label
172
+ };
173
+ return {
174
+ ...toolCall,
175
+ input: JSON.stringify(repaired)
176
+ };
177
+ }
178
+ return null;
179
+ }
180
+ }).toUIMessageStreamResponse();
181
+ }
182
+ //#endregion
183
+ //#region src/server/routes/transcribe.ts
184
+ /**
185
+ * Handle transcription requests: audio file → text
186
+ */
187
+ async function handleTranscribe(request, config) {
188
+ if (!config.transcriptionModel) return new Response(JSON.stringify({ error: "Server transcription is not configured. Provide a transcriptionModel or use browser transcription only." }), {
189
+ status: 501,
190
+ headers: { "Content-Type": "application/json" }
191
+ });
192
+ const audioFile = (await request.formData()).get("audio");
193
+ if (!audioFile || !(audioFile instanceof File)) return new Response(JSON.stringify({ error: "No audio file provided" }), {
194
+ status: 400,
195
+ headers: { "Content-Type": "application/json" }
196
+ });
197
+ const audioBuffer = await audioFile.arrayBuffer();
198
+ const response = { text: (await experimental_transcribe({
199
+ model: config.transcriptionModel,
200
+ audio: new Uint8Array(audioBuffer)
201
+ })).text };
202
+ return new Response(JSON.stringify(response), { headers: { "Content-Type": "application/json" } });
203
+ }
204
+ //#endregion
205
+ //#region src/server/routes/tts.ts
206
+ /**
207
+ * Handle TTS requests: text → audio
208
+ */
209
+ async function handleTTS(request, config) {
210
+ if (!config.speechModel) return new Response(JSON.stringify({ error: "Server speech is not configured. Provide a speechModel or use browser speech only." }), {
211
+ status: 501,
212
+ headers: { "Content-Type": "application/json" }
213
+ });
214
+ const outputFormat = "wav";
215
+ const { text } = await request.json();
216
+ if (!text) return new Response(JSON.stringify({ error: "No text provided" }), {
217
+ status: 400,
218
+ headers: { "Content-Type": "application/json" }
219
+ });
220
+ const result = await experimental_generateSpeech({
221
+ model: config.speechModel,
222
+ text,
223
+ outputFormat
224
+ });
225
+ const audioData = new Uint8Array(result.audio.uint8Array);
226
+ return new Response(audioData, { headers: { "Content-Type": "audio/wav" } });
227
+ }
228
+ //#endregion
229
+ //#region src/server/handler.ts
230
+ /**
231
+ * Create a cursor buddy request handler.
232
+ *
233
+ * The handler responds to three routes based on the last path segment:
234
+ * - /chat - Screenshot + transcript → AI SSE stream
235
+ * - /transcribe - Audio → text
236
+ * - /tts - Text → audio
237
+ *
238
+ * @example
239
+ * ```ts
240
+ * import { createCursorBuddyHandler } from "cursor-buddy/server"
241
+ * import { openai } from "@ai-sdk/openai"
242
+ *
243
+ * const cursorBuddy = createCursorBuddyHandler({
244
+ * model: openai("gpt-4o"),
245
+ * speechModel: openai.speech("tts-1"), // optional for browser-only speech
246
+ * transcriptionModel: openai.transcription("whisper-1"),
247
+ * })
248
+ * ```
249
+ */
250
+ function createCursorBuddyHandler(config) {
251
+ const handler = async (request) => {
252
+ const pathSegments = new URL(request.url).pathname.split("/").filter(Boolean);
253
+ switch (pathSegments[pathSegments.length - 1]) {
254
+ case "chat": return handleChat(request, config);
255
+ case "transcribe": return handleTranscribe(request, config);
256
+ case "tts": return handleTTS(request, config);
257
+ default: return new Response(JSON.stringify({
258
+ error: "Not found",
259
+ availableRoutes: [
260
+ "/chat",
261
+ "/transcribe",
262
+ "/tts"
263
+ ]
264
+ }), {
265
+ status: 404,
266
+ headers: { "Content-Type": "application/json" }
267
+ });
268
+ }
269
+ };
270
+ return {
271
+ handler,
272
+ config
273
+ };
274
+ }
275
+ //#endregion
276
+ export { DEFAULT_SYSTEM_PROMPT, createCursorBuddyHandler, pointTool };
277
+
278
+ //# sourceMappingURL=index.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.mjs","names":["transcribe","generateSpeech"],"sources":["../../src/server/system-prompt.ts","../../src/server/routes/chat.ts","../../src/server/routes/transcribe.ts","../../src/server/routes/tts.ts","../../src/server/handler.ts"],"sourcesContent":["export const DEFAULT_SYSTEM_PROMPT = `You are a helpful AI assistant that lives inside a web page as a cursor companion.\n\nYou can see the user's current screen and hear what they say. Respond conversationally. Your response will be spoken aloud with text-to-speech, so keep it natural, concise, and easy to follow.\n\n## Core behavior\n\n- Speak like a helpful companion, not a robot\n- Keep most responses to 1-3 short sentences\n- Focus on what is visible right now on the user's screen\n- If something is unclear or not visible, say that plainly\n- Do not mention screenshots, overlays, annotations, or internal helper data\n- Do not mention marker numbers to the user\n\n## The point tool\n\nYou have a \\`point\\` tool that can visually indicate something on the user's screen.\n\nUse the \\`point\\` tool when the user is asking you to identify, locate, indicate, highlight, or show something visible on screen.\n\nCommon cases where you should use \\`point\\`:\n- the user asks where something is\n- the user asks what to click\n- the user says things like \"show me\", \"point to it\", \"where is it\", \"which one\", \"what should I click\", or \"highlight that\"\n\nDo not use the \\`point\\` tool when spoken guidance alone is enough and the user is not asking you to identify a specific on-screen target.\n\nExamples where spoken guidance alone may be enough:\n- explaining what a page does\n- answering a general question about what is on screen\n- giving brief next-step advice that does not depend on locating a specific element\n\nIf using the \\`point\\` tool:\n- first give the spoken response\n- then call the tool\n- call it at most once per response\n- point only at the most relevant target\n- never replace the tool call with plain text like \"(point here)\" or \"I’m pointing at it now\"\n\n\nIf the user asks where something is on screen, what to click, or asks you to point something out, you should usually use the point tool rather than only describing it in words.\nDo not say things like \"I can point to it if you want\" when the user already asked where it is. In that case, answer briefly and use the point tool.\n\n## How to point\n\nPrefer marker-based pointing for interactive elements when a marker is available.\nInteractive elements may include buttons, links, inputs, tabs, menus, toggles, and other clickable controls.\n\nUse:\n- \\`type: \"marker\"\\` for interactive elements that have a marker\n- \\`type: \"coordinates\"\\` only for visible non-interactive content without a marker\n\nNever use coordinates for an interactive element if a marker is available.\n\nCoordinates must refer to the center of the target area.\n\nWhen calling the point tool, choose exactly one mode:\n\n- Marker mode:\n - use type \"marker\"\n - provide markerId\n - do not provide x or y\n\n- Coordinates mode:\n - use type \"coordinates\"\n - provide x and y\n - do not provide markerId\n\nNever combine markerId with x or y in the same tool call.\n\n## What to say\n\nWhen the user asks you to point something out:\n- briefly answer in a natural spoken way\n- then use the tool if the request is about locating or indicating something on screen\n\nGood spoken style:\n- \"Click this button right here.\"\n- \"The error message is over here.\"\n- \"This is the field you want.\"\n- \"That setting is in this section.\"\n\nAvoid:\n- mentioning marker IDs\n- mentioning internal tools\n- describing internal reasoning\n- saying you are looking at a screenshot\n\n## If the target is not clear\n\nIf you cannot confidently find the requested thing on screen:\n- say you cannot see it clearly or cannot find it\n- do not point at a random or uncertain target\n\n## Priority\n\nYour first priority is being helpful and correct.\nYour second priority is using the \\`point\\` tool whenever the user is asking you to visually identify a specific thing on screen.\n`\n","import { streamText } from \"ai\"\nimport { pointTool } from \"../../shared/point-tool\"\nimport { DEFAULT_SYSTEM_PROMPT } from \"../system-prompt\"\nimport type { ChatRequestBody, CursorBuddyHandlerConfig } from \"../types\"\n\n/**\n * Handle chat requests: screenshot + transcript → AI SSE stream\n */\nexport async function handleChat(\n request: Request,\n config: CursorBuddyHandlerConfig,\n): Promise<Response> {\n const body = (await request.json()) as ChatRequestBody\n const { screenshot, transcript, history, capture, markerContext } = body\n\n // Resolve system prompt (string or function)\n const systemPrompt =\n typeof config.system === \"function\"\n ? config.system({ defaultPrompt: DEFAULT_SYSTEM_PROMPT })\n : (config.system ?? DEFAULT_SYSTEM_PROMPT)\n\n // Trim history to maxHistory (default 10 exchanges = 20 messages)\n const maxMessages = (config.maxHistory ?? 10) * 2\n const trimmedHistory = history.slice(-maxMessages)\n\n // Build capture context with marker information\n const captureContextParts: string[] = []\n\n if (capture) {\n captureContextParts.push(\n `Screenshot size: ${capture.width}x${capture.height} pixels.`,\n )\n }\n\n if (markerContext) {\n captureContextParts.push(\"\", markerContext)\n }\n\n const captureContext =\n captureContextParts.length > 0 ? captureContextParts.join(\"\\n\") : null\n\n // Build messages array with vision content\n const messages = [\n ...trimmedHistory.map((msg) => ({\n role: msg.role as \"user\" | \"assistant\",\n content: msg.content,\n })),\n {\n role: \"user\" as const,\n content: [\n ...(captureContext\n ? [\n {\n type: \"text\" as const,\n text: captureContext,\n },\n ]\n : []),\n {\n type: \"image\" as const,\n image: screenshot,\n },\n {\n type: \"text\" as const,\n text: transcript,\n },\n ],\n },\n ]\n\n const result = streamText({\n model: config.model,\n system: systemPrompt,\n providerOptions: config?.modelProviderMetadata,\n messages,\n tools: {\n point: pointTool,\n ...config.tools,\n },\n experimental_repairToolCall: async ({ toolCall }) => {\n if (toolCall.toolName !== \"point\") return null\n\n let parsed: unknown\n try {\n parsed = JSON.parse(toolCall.input)\n } catch {\n return null\n }\n\n if (!parsed || typeof parsed !== \"object\") return null\n\n const input = parsed as Record<string, unknown>\n\n if (input.type === \"marker\") {\n const repaired = {\n type: \"marker\",\n markerId: input.markerId,\n label: input.label,\n }\n\n return {\n ...toolCall,\n input: JSON.stringify(repaired),\n }\n }\n\n if (input.type === \"coordinates\") {\n const repaired = {\n type: \"coordinates\",\n x: input.x,\n y: input.y,\n label: input.label,\n }\n\n return {\n ...toolCall,\n input: JSON.stringify(repaired),\n }\n }\n\n return null\n },\n })\n\n return result.toUIMessageStreamResponse()\n}\n","import { experimental_transcribe as transcribe } from \"ai\"\nimport type { CursorBuddyHandlerConfig, TranscribeResponse } from \"../types\"\n\n/**\n * Handle transcription requests: audio file → text\n */\nexport async function handleTranscribe(\n request: Request,\n config: CursorBuddyHandlerConfig,\n): Promise<Response> {\n if (!config.transcriptionModel) {\n return new Response(\n JSON.stringify({\n error:\n \"Server transcription is not configured. Provide a transcriptionModel or use browser transcription only.\",\n }),\n {\n status: 501,\n headers: { \"Content-Type\": \"application/json\" },\n },\n )\n }\n\n const formData = await request.formData()\n const audioFile = formData.get(\"audio\")\n\n if (!audioFile || !(audioFile instanceof File)) {\n return new Response(JSON.stringify({ error: \"No audio file provided\" }), {\n status: 400,\n headers: { \"Content-Type\": \"application/json\" },\n })\n }\n\n const audioBuffer = await audioFile.arrayBuffer()\n\n const result = await transcribe({\n model: config.transcriptionModel,\n audio: new Uint8Array(audioBuffer),\n })\n\n const response: TranscribeResponse = { text: result.text }\n\n return new Response(JSON.stringify(response), {\n headers: { \"Content-Type\": \"application/json\" },\n })\n}\n","import { experimental_generateSpeech as generateSpeech } from \"ai\"\nimport type { CursorBuddyHandlerConfig, TTSRequestBody } from \"../types\"\n\n/**\n * Handle TTS requests: text → audio\n */\nexport async function handleTTS(\n request: Request,\n config: CursorBuddyHandlerConfig,\n): Promise<Response> {\n if (!config.speechModel) {\n return new Response(\n JSON.stringify({\n error:\n \"Server speech is not configured. Provide a speechModel or use browser speech only.\",\n }),\n {\n status: 501,\n headers: { \"Content-Type\": \"application/json\" },\n },\n )\n }\n\n const outputFormat = \"wav\"\n const body = (await request.json()) as TTSRequestBody\n const { text } = body\n\n if (!text) {\n return new Response(JSON.stringify({ error: \"No text provided\" }), {\n status: 400,\n headers: { \"Content-Type\": \"application/json\" },\n })\n }\n\n const result = await generateSpeech({\n model: config.speechModel,\n text,\n outputFormat,\n })\n\n // Create a new ArrayBuffer copy to satisfy TypeScript's strict typing\n const audioData = new Uint8Array(result.audio.uint8Array)\n\n return new Response(audioData, {\n headers: {\n \"Content-Type\": \"audio/wav\",\n },\n })\n}\n","import { handleChat } from \"./routes/chat\"\nimport { handleTranscribe } from \"./routes/transcribe\"\nimport { handleTTS } from \"./routes/tts\"\nimport type { CursorBuddyHandler, CursorBuddyHandlerConfig } from \"./types\"\n\n/**\n * Create a cursor buddy request handler.\n *\n * The handler responds to three routes based on the last path segment:\n * - /chat - Screenshot + transcript → AI SSE stream\n * - /transcribe - Audio → text\n * - /tts - Text → audio\n *\n * @example\n * ```ts\n * import { createCursorBuddyHandler } from \"cursor-buddy/server\"\n * import { openai } from \"@ai-sdk/openai\"\n *\n * const cursorBuddy = createCursorBuddyHandler({\n * model: openai(\"gpt-4o\"),\n * speechModel: openai.speech(\"tts-1\"), // optional for browser-only speech\n * transcriptionModel: openai.transcription(\"whisper-1\"),\n * })\n * ```\n */\nexport function createCursorBuddyHandler(\n config: CursorBuddyHandlerConfig,\n): CursorBuddyHandler {\n const handler = async (request: Request): Promise<Response> => {\n const url = new URL(request.url)\n const pathSegments = url.pathname.split(\"/\").filter(Boolean)\n const route = pathSegments[pathSegments.length - 1]\n\n switch (route) {\n case \"chat\":\n return handleChat(request, config)\n\n case \"transcribe\":\n return handleTranscribe(request, config)\n\n case \"tts\":\n return handleTTS(request, config)\n\n default:\n return new Response(\n JSON.stringify({\n error: \"Not found\",\n availableRoutes: [\"/chat\", \"/transcribe\", \"/tts\"],\n }),\n {\n status: 404,\n headers: { \"Content-Type\": \"application/json\" },\n },\n )\n }\n }\n\n return { handler, config }\n}\n"],"mappings":";;;AAAA,MAAa,wBAAwB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;ACQrC,eAAsB,WACpB,SACA,QACmB;CAEnB,MAAM,EAAE,YAAY,YAAY,SAAS,SAAS,kBADpC,MAAM,QAAQ,MAAM;CAIlC,MAAM,eACJ,OAAO,OAAO,WAAW,aACrB,OAAO,OAAO,EAAE,eAAe,uBAAuB,CAAC,GACtD,OAAO,UAAU;CAGxB,MAAM,eAAe,OAAO,cAAc,MAAM;CAChD,MAAM,iBAAiB,QAAQ,MAAM,CAAC,YAAY;CAGlD,MAAM,sBAAgC,EAAE;AAExC,KAAI,QACF,qBAAoB,KAClB,oBAAoB,QAAQ,MAAM,GAAG,QAAQ,OAAO,UACrD;AAGH,KAAI,cACF,qBAAoB,KAAK,IAAI,cAAc;CAG7C,MAAM,iBACJ,oBAAoB,SAAS,IAAI,oBAAoB,KAAK,KAAK,GAAG;CAGpE,MAAM,WAAW,CACf,GAAG,eAAe,KAAK,SAAS;EAC9B,MAAM,IAAI;EACV,SAAS,IAAI;EACd,EAAE,EACH;EACE,MAAM;EACN,SAAS;GACP,GAAI,iBACA,CACE;IACE,MAAM;IACN,MAAM;IACP,CACF,GACD,EAAE;GACN;IACE,MAAM;IACN,OAAO;IACR;GACD;IACE,MAAM;IACN,MAAM;IACP;GACF;EACF,CACF;AAwDD,QAtDe,WAAW;EACxB,OAAO,OAAO;EACd,QAAQ;EACR,iBAAiB,QAAQ;EACzB;EACA,OAAO;GACL,OAAO;GACP,GAAG,OAAO;GACX;EACD,6BAA6B,OAAO,EAAE,eAAe;AACnD,OAAI,SAAS,aAAa,QAAS,QAAO;GAE1C,IAAI;AACJ,OAAI;AACF,aAAS,KAAK,MAAM,SAAS,MAAM;WAC7B;AACN,WAAO;;AAGT,OAAI,CAAC,UAAU,OAAO,WAAW,SAAU,QAAO;GAElD,MAAM,QAAQ;AAEd,OAAI,MAAM,SAAS,UAAU;IAC3B,MAAM,WAAW;KACf,MAAM;KACN,UAAU,MAAM;KAChB,OAAO,MAAM;KACd;AAED,WAAO;KACL,GAAG;KACH,OAAO,KAAK,UAAU,SAAS;KAChC;;AAGH,OAAI,MAAM,SAAS,eAAe;IAChC,MAAM,WAAW;KACf,MAAM;KACN,GAAG,MAAM;KACT,GAAG,MAAM;KACT,OAAO,MAAM;KACd;AAED,WAAO;KACL,GAAG;KACH,OAAO,KAAK,UAAU,SAAS;KAChC;;AAGH,UAAO;;EAEV,CAAC,CAEY,2BAA2B;;;;;;;ACtH3C,eAAsB,iBACpB,SACA,QACmB;AACnB,KAAI,CAAC,OAAO,mBACV,QAAO,IAAI,SACT,KAAK,UAAU,EACb,OACE,2GACH,CAAC,EACF;EACE,QAAQ;EACR,SAAS,EAAE,gBAAgB,oBAAoB;EAChD,CACF;CAIH,MAAM,aADW,MAAM,QAAQ,UAAU,EACd,IAAI,QAAQ;AAEvC,KAAI,CAAC,aAAa,EAAE,qBAAqB,MACvC,QAAO,IAAI,SAAS,KAAK,UAAU,EAAE,OAAO,0BAA0B,CAAC,EAAE;EACvE,QAAQ;EACR,SAAS,EAAE,gBAAgB,oBAAoB;EAChD,CAAC;CAGJ,MAAM,cAAc,MAAM,UAAU,aAAa;CAOjD,MAAM,WAA+B,EAAE,OALxB,MAAMA,wBAAW;EAC9B,OAAO,OAAO;EACd,OAAO,IAAI,WAAW,YAAY;EACnC,CAAC,EAEkD,MAAM;AAE1D,QAAO,IAAI,SAAS,KAAK,UAAU,SAAS,EAAE,EAC5C,SAAS,EAAE,gBAAgB,oBAAoB,EAChD,CAAC;;;;;;;ACtCJ,eAAsB,UACpB,SACA,QACmB;AACnB,KAAI,CAAC,OAAO,YACV,QAAO,IAAI,SACT,KAAK,UAAU,EACb,OACE,sFACH,CAAC,EACF;EACE,QAAQ;EACR,SAAS,EAAE,gBAAgB,oBAAoB;EAChD,CACF;CAGH,MAAM,eAAe;CAErB,MAAM,EAAE,SADM,MAAM,QAAQ,MAAM;AAGlC,KAAI,CAAC,KACH,QAAO,IAAI,SAAS,KAAK,UAAU,EAAE,OAAO,oBAAoB,CAAC,EAAE;EACjE,QAAQ;EACR,SAAS,EAAE,gBAAgB,oBAAoB;EAChD,CAAC;CAGJ,MAAM,SAAS,MAAMC,4BAAe;EAClC,OAAO,OAAO;EACd;EACA;EACD,CAAC;CAGF,MAAM,YAAY,IAAI,WAAW,OAAO,MAAM,WAAW;AAEzD,QAAO,IAAI,SAAS,WAAW,EAC7B,SAAS,EACP,gBAAgB,aACjB,EACF,CAAC;;;;;;;;;;;;;;;;;;;;;;;;ACtBJ,SAAgB,yBACd,QACoB;CACpB,MAAM,UAAU,OAAO,YAAwC;EAE7D,MAAM,eADM,IAAI,IAAI,QAAQ,IAAI,CACP,SAAS,MAAM,IAAI,CAAC,OAAO,QAAQ;AAG5D,UAFc,aAAa,aAAa,SAAS,IAEjD;GACE,KAAK,OACH,QAAO,WAAW,SAAS,OAAO;GAEpC,KAAK,aACH,QAAO,iBAAiB,SAAS,OAAO;GAE1C,KAAK,MACH,QAAO,UAAU,SAAS,OAAO;GAEnC,QACE,QAAO,IAAI,SACT,KAAK,UAAU;IACb,OAAO;IACP,iBAAiB;KAAC;KAAS;KAAe;KAAO;IAClD,CAAC,EACF;IACE,QAAQ;IACR,SAAS,EAAE,gBAAgB,oBAAoB;IAChD,CACF;;;AAIP,QAAO;EAAE;EAAS;EAAQ"}
@@ -0,0 +1,44 @@
1
+ import { LanguageModel, SpeechModel, Tool, TranscriptionModel } from "ai";
2
+
3
+ //#region src/server/types.d.ts
4
+ /**
5
+ * Configuration for createCursorBuddyHandler
6
+ */
7
+ interface CursorBuddyHandlerConfig {
8
+ /** AI SDK language model for chat (e.g., openai("gpt-4o")) */
9
+ model: LanguageModel;
10
+ modelProviderMetadata?: Record<string, any>;
11
+ /**
12
+ * AI SDK speech model for TTS (e.g., openai.speech("tts-1")).
13
+ * Optional when clients use browser-only speech.
14
+ */
15
+ speechModel?: SpeechModel;
16
+ /**
17
+ * AI SDK transcription model (e.g., openai.transcription("whisper-1")).
18
+ * Optional when clients use browser-only transcription.
19
+ */
20
+ transcriptionModel?: TranscriptionModel;
21
+ /**
22
+ * System prompt for the AI. Can be a string or a function that receives
23
+ * the default prompt and returns a modified version.
24
+ */
25
+ system?: string | ((ctx: {
26
+ defaultPrompt: string;
27
+ }) => string);
28
+ /** AI SDK tools available to the model */
29
+ tools?: Record<string, Tool>;
30
+ /** Maximum conversation history messages to include (default: 10) */
31
+ maxHistory?: number;
32
+ }
33
+ /**
34
+ * Return type of createCursorBuddyHandler
35
+ */
36
+ interface CursorBuddyHandler {
37
+ /** The main request handler */
38
+ handler: (request: Request) => Promise<Response>;
39
+ /** The resolved configuration */
40
+ config: CursorBuddyHandlerConfig;
41
+ }
42
+ //#endregion
43
+ export { CursorBuddyHandlerConfig as n, CursorBuddyHandler as t };
44
+ //# sourceMappingURL=types-COQKMo5C.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types-COQKMo5C.d.mts","names":[],"sources":["../src/server/types.ts"],"mappings":";;;;;AAKA;UAAiB,wBAAA;;EAEf,KAAA,EAAO,aAAA;EACP,qBAAA,GAAwB,MAAA;EAMV;;;;EAAd,WAAA,GAAc,WAAA;EAeA;;;;EATd,kBAAA,GAAqB,kBAAA;EANrB;;;;EAYA,MAAA,cAAoB,GAAA;IAAO,aAAA;EAAA;EAG3B;EAAA,KAAA,GAAQ,MAAA,SAAe,IAAA;EAAA;EAGvB,UAAA;AAAA;;AAMF;;UAAiB,kBAAA;EAEI;EAAnB,OAAA,GAAU,OAAA,EAAS,OAAA,KAAY,OAAA,CAAQ,QAAA;EAAR;EAG/B,MAAA,EAAQ,wBAAA;AAAA"}
package/package.json ADDED
@@ -0,0 +1,108 @@
1
+ {
2
+ "name": "cursor-buddy",
3
+ "version": "0.0.0-beta.2",
4
+ "description": "AI-powered cursor companion for web apps",
5
+ "type": "module",
6
+ "license": "MIT",
7
+ "homepage": "https://github.com/leojuriolli7/cursor-buddy#readme",
8
+ "bugs": {
9
+ "url": "https://github.com/leojuriolli7/cursor-buddy/issues"
10
+ },
11
+ "repository": {
12
+ "type": "git",
13
+ "url": "git+https://github.com/leojuriolli7/cursor-buddy.git",
14
+ "directory": "packages/cursor-buddy"
15
+ },
16
+ "keywords": [
17
+ "ai",
18
+ "assistant",
19
+ "cursor",
20
+ "react",
21
+ "voice",
22
+ "screen-capture"
23
+ ],
24
+ "sideEffects": false,
25
+ "scripts": {
26
+ "build": "tsdown",
27
+ "dev": "tsdown --watch",
28
+ "test": "vitest run",
29
+ "test:watch": "vitest",
30
+ "typecheck": "tsc --noEmit",
31
+ "prepack": "pnpm build",
32
+ "release:check": "pnpm typecheck && npm pack --dry-run",
33
+ "release:publish": "pnpm release:check && npm publish",
34
+ "release:patch": "npm version patch",
35
+ "release:minor": "npm version minor",
36
+ "release:major": "npm version major"
37
+ },
38
+ "files": [
39
+ "dist",
40
+ "README.md"
41
+ ],
42
+ "publishConfig": {
43
+ "access": "public"
44
+ },
45
+ "main": "./dist/index.mjs",
46
+ "module": "./dist/index.mjs",
47
+ "types": "./dist/index.d.mts",
48
+ "exports": {
49
+ ".": {
50
+ "types": "./dist/index.d.mts",
51
+ "default": "./dist/index.mjs"
52
+ },
53
+ "./react": {
54
+ "types": "./dist/react/index.d.mts",
55
+ "default": "./dist/react/index.mjs"
56
+ },
57
+ "./server": {
58
+ "types": "./dist/server/index.d.mts",
59
+ "default": "./dist/server/index.mjs"
60
+ },
61
+ "./server/next": {
62
+ "types": "./dist/server/adapters/next.d.mts",
63
+ "default": "./dist/server/adapters/next.mjs"
64
+ }
65
+ },
66
+ "typesVersions": {
67
+ "*": {
68
+ "react": [
69
+ "./dist/react/index.d.mts"
70
+ ],
71
+ "server": [
72
+ "./dist/server/index.d.mts"
73
+ ],
74
+ "server/next": [
75
+ "./dist/server/adapters/next.d.mts"
76
+ ]
77
+ }
78
+ },
79
+ "dependencies": {
80
+ "@nanostores/react": "^1.1.0",
81
+ "ai": "^6.0.158",
82
+ "html2canvas-pro": "^2.0.2",
83
+ "nanostores": "^1.2.0",
84
+ "zod": "^3.24.0"
85
+ },
86
+ "devDependencies": {
87
+ "@types/react": "^19.0.8",
88
+ "@types/react-dom": "^19.2.3",
89
+ "happy-dom": "^20.9.0",
90
+ "react": "^19.0.0",
91
+ "react-dom": "^19.0.0",
92
+ "tsdown": "^0.21.7",
93
+ "typescript": "^5.7.3",
94
+ "vitest": "^3.2.4"
95
+ },
96
+ "peerDependencies": {
97
+ "react": "^18.0.0 || ^19.0.0",
98
+ "react-dom": "^18.0.0 || ^19.0.0"
99
+ },
100
+ "peerDependenciesMeta": {
101
+ "react": {
102
+ "optional": true
103
+ },
104
+ "react-dom": {
105
+ "optional": true
106
+ }
107
+ }
108
+ }