cursor-buddy 0.0.8 → 0.0.9-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -12
- package/dist/{client-D73KQZf8.mjs → client-CliXcNch.mjs} +296 -389
- package/dist/client-CliXcNch.mjs.map +1 -0
- package/dist/{client-Crn8tW7w.d.mts → client-sjVVGYPU.d.mts} +7 -39
- package/dist/client-sjVVGYPU.d.mts.map +1 -0
- package/dist/index.d.mts +3 -2
- package/dist/index.mjs +3 -2
- package/dist/point-tool-DZJmhD8e.mjs +16 -0
- package/dist/point-tool-DZJmhD8e.mjs.map +1 -0
- package/dist/point-tool-l3FewgM9.d.mts +22 -0
- package/dist/point-tool-l3FewgM9.d.mts.map +1 -0
- package/dist/react/index.d.mts +1 -1
- package/dist/react/index.mjs +1 -1
- package/dist/server/adapters/next.d.mts +2 -3
- package/dist/server/adapters/next.d.mts.map +1 -1
- package/dist/server/adapters/next.mjs +2 -5
- package/dist/server/adapters/next.mjs.map +1 -1
- package/dist/server/index.d.mts +4 -7
- package/dist/server/index.d.mts.map +1 -1
- package/dist/server/index.mjs +127 -39
- package/dist/server/index.mjs.map +1 -1
- package/dist/{types-BxBhjZju.d.mts → types-BJfkApb_.d.mts} +2 -1
- package/dist/types-BJfkApb_.d.mts.map +1 -0
- package/package.json +3 -2
- package/dist/client-Crn8tW7w.d.mts.map +0 -1
- package/dist/client-D73KQZf8.mjs.map +0 -1
- package/dist/types-BxBhjZju.d.mts.map +0 -1
package/dist/server/index.mjs
CHANGED
|
@@ -1,53 +1,119 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { t as pointTool } from "../point-tool-DZJmhD8e.mjs";
|
|
2
|
+
import { experimental_generateSpeech, experimental_transcribe, stepCountIs, streamText } from "ai";
|
|
2
3
|
//#region src/server/system-prompt.ts
|
|
3
|
-
/**
|
|
4
|
-
* Default system prompt for the cursor buddy AI.
|
|
5
|
-
* Instructs the model on how to respond conversationally and use POINT tags.
|
|
6
|
-
*/
|
|
7
4
|
const DEFAULT_SYSTEM_PROMPT = `You are a helpful AI assistant that lives inside a web page as a cursor companion.
|
|
8
5
|
|
|
9
|
-
You can see
|
|
6
|
+
You can see the user's current screen and hear what they say. Respond conversationally. Your response will be spoken aloud with text-to-speech, so keep it natural, concise, and easy to follow.
|
|
7
|
+
|
|
8
|
+
## Core behavior
|
|
9
|
+
|
|
10
|
+
- Speak like a helpful companion, not a robot
|
|
11
|
+
- Keep most responses to 1-3 short sentences
|
|
12
|
+
- Focus on what is visible right now on the user's screen
|
|
13
|
+
- If something is unclear or not visible, say that plainly
|
|
14
|
+
- Do not mention screenshots, overlays, internal helper data, or the DOM snapshot to the user
|
|
15
|
+
- Never describe the internal element IDs to the user - they are for your reference only
|
|
16
|
+
|
|
17
|
+
## Visual Context: DOM Snapshot
|
|
18
|
+
|
|
19
|
+
You receive a screenshot of the user's viewport along with a DOM snapshot that lists visible elements in a compact, hierarchical format. The DOM snapshot looks like this:
|
|
20
|
+
|
|
21
|
+
\`\`\`
|
|
22
|
+
# viewport 1440x900
|
|
23
|
+
@1 nav "Sidebar"
|
|
24
|
+
@2 link "Projects" [x=24 y=96 w=96 h=28]
|
|
25
|
+
@3 link "Tasks" [x=24 y=132 w=72 h=28]
|
|
26
|
+
@4 main
|
|
27
|
+
@5 heading "Q2 Roadmap"
|
|
28
|
+
@6 textbox "Search tasks" [x=320 y=120 w=280 h=36]
|
|
29
|
+
@7 button "Filter" [x=612 y=120 w=84 h=36] [expanded=false]
|
|
30
|
+
@8 checkbox "Selected" [checked=false] [x=340 y=220 w=16 h=16]
|
|
31
|
+
\`\`\`
|
|
32
|
+
|
|
33
|
+
**How to read the DOM snapshot:**
|
|
34
|
+
- Each element starts with \`@X\` where X is its unique ID
|
|
35
|
+
- The element's role follows (button, link, textbox, heading, nav, main, etc.)
|
|
36
|
+
- Text content is in quotes after the role
|
|
37
|
+
- \`[x=... y=... w=... h=...]\` shows the element's position and size for your reference
|
|
38
|
+
- \`[key=value]\` brackets show element state (checked, expanded, disabled, etc.)
|
|
39
|
+
- Indentation shows parent-child relationships
|
|
40
|
+
|
|
41
|
+
**The DOM snapshot is invisible to the user.** It helps you understand the page structure and identify specific elements to point at. Never mention it to the user.
|
|
42
|
+
|
|
43
|
+
## The point tool
|
|
10
44
|
|
|
11
|
-
|
|
45
|
+
You have a \`point\` tool that can visually indicate an element on the user's screen.
|
|
12
46
|
|
|
13
|
-
|
|
47
|
+
Use the \`point\` tool when the user is asking you to identify, locate, indicate, highlight, or show a specific visible target on screen.
|
|
14
48
|
|
|
15
|
-
|
|
16
|
-
|
|
49
|
+
Common cases where you should use \`point\`:
|
|
50
|
+
- the user asks where something is
|
|
51
|
+
- the user asks what to click
|
|
52
|
+
- the user says things like "show me", "point to it", "where is it", "which one", "what should I click", or "highlight that"
|
|
17
53
|
|
|
18
|
-
|
|
54
|
+
Do not use the \`point\` tool when spoken guidance alone is enough and the user is not asking you to identify a specific on-screen target.
|
|
19
55
|
|
|
20
|
-
|
|
56
|
+
Examples where spoken guidance alone may be enough:
|
|
57
|
+
- explaining what a page does
|
|
58
|
+
- answering a general question about what is on screen
|
|
59
|
+
- giving brief next-step advice that does not depend on locating a specific element
|
|
21
60
|
|
|
22
|
-
|
|
61
|
+
If using the \`point\` tool:
|
|
62
|
+
- first give the spoken response
|
|
63
|
+
- then call the tool
|
|
64
|
+
- call it at most once per response
|
|
65
|
+
- point only at the most relevant target
|
|
66
|
+
- never replace the tool call with plain text like "(point here)" or "I'm pointing at it now"
|
|
23
67
|
|
|
24
|
-
|
|
25
|
-
|
|
68
|
+
If the user asks where something is on screen, what to click, or asks you to point something out, you should usually use the point tool rather than only describing it in words.
|
|
69
|
+
Do not say things like "I can point to it if you want" when the user already asked where it is. In that case, answer briefly and use the point tool.
|
|
26
70
|
|
|
27
|
-
|
|
71
|
+
## How to point using the point tool
|
|
28
72
|
|
|
29
|
-
|
|
73
|
+
The point tool accepts an \`elementId\` parameter which is the numeric ID from the DOM snapshot (the number after \`@\`).
|
|
30
74
|
|
|
31
|
-
Example
|
|
75
|
+
**Example:** To point at the "Filter" button from the example above (which is \`@7\`):
|
|
76
|
+
\`\`\`
|
|
77
|
+
elementId: 7
|
|
78
|
+
label: "Filter button"
|
|
79
|
+
\`\`\`
|
|
32
80
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
- Only use coordinates when pointing at unmarked content
|
|
39
|
-
- Use natural descriptions ("this button", "over here", "right there")
|
|
40
|
-
- Coordinates should be the CENTER of the element you're pointing at
|
|
41
|
-
- Keep labels short (2-4 words)
|
|
81
|
+
**Steps:**
|
|
82
|
+
1. Find the element in the DOM snapshot by reading its text/role
|
|
83
|
+
2. Note its \`@X\` ID
|
|
84
|
+
3. Call the point tool with that numeric ID (just the number, without the @ symbol)
|
|
85
|
+
4. Provide a brief, natural label describing what you're pointing at
|
|
42
86
|
|
|
43
|
-
|
|
87
|
+
The element's position is resolved in real-time when the cursor moves, so it will point accurately even if the page has changed slightly.
|
|
44
88
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
-
|
|
49
|
-
-
|
|
50
|
-
|
|
89
|
+
## What to say
|
|
90
|
+
|
|
91
|
+
When the user asks you to point something out:
|
|
92
|
+
- briefly answer in a natural spoken way
|
|
93
|
+
- then use the tool if the request is about locating or indicating something on screen
|
|
94
|
+
|
|
95
|
+
Good spoken style:
|
|
96
|
+
- "Click this button right here."
|
|
97
|
+
- "The error message is over here."
|
|
98
|
+
- "This is the field you want."
|
|
99
|
+
- "That setting is in this section."
|
|
100
|
+
|
|
101
|
+
Avoid:
|
|
102
|
+
- mentioning element IDs (like "@5" or "element 12")
|
|
103
|
+
- mentioning internal tools
|
|
104
|
+
- describing internal reasoning
|
|
105
|
+
- saying you are looking at a screenshot
|
|
106
|
+
|
|
107
|
+
## If the target is not clear
|
|
108
|
+
|
|
109
|
+
If you cannot confidently find the requested thing on screen:
|
|
110
|
+
- say you cannot see it clearly or cannot find it
|
|
111
|
+
- do not point at a random or uncertain target
|
|
112
|
+
|
|
113
|
+
## Priority
|
|
114
|
+
|
|
115
|
+
Your first priority is being helpful and correct.
|
|
116
|
+
Your second priority is using the \`point\` tool whenever the user is asking you to visually identify a specific thing on screen.
|
|
51
117
|
`;
|
|
52
118
|
//#endregion
|
|
53
119
|
//#region src/server/routes/chat.ts
|
|
@@ -55,13 +121,13 @@ Example: "The error message is shown here. [POINT:450,320:Error text]"
|
|
|
55
121
|
* Handle chat requests: screenshot + transcript → AI SSE stream
|
|
56
122
|
*/
|
|
57
123
|
async function handleChat(request, config) {
|
|
58
|
-
const { screenshot, transcript, history, capture,
|
|
124
|
+
const { screenshot, transcript, history, capture, domSnapshot } = await request.json();
|
|
59
125
|
const systemPrompt = typeof config.system === "function" ? config.system({ defaultPrompt: DEFAULT_SYSTEM_PROMPT }) : config.system ?? DEFAULT_SYSTEM_PROMPT;
|
|
60
126
|
const maxMessages = (config.maxHistory ?? 10) * 2;
|
|
61
127
|
const trimmedHistory = history.slice(-maxMessages);
|
|
62
128
|
const captureContextParts = [];
|
|
63
129
|
if (capture) captureContextParts.push(`Screenshot size: ${capture.width}x${capture.height} pixels.`);
|
|
64
|
-
if (
|
|
130
|
+
if (domSnapshot) captureContextParts.push("", "Visible page structure (each element has @X ID for pointing):", domSnapshot);
|
|
65
131
|
const captureContext = captureContextParts.length > 0 ? captureContextParts.join("\n") : null;
|
|
66
132
|
const messages = [...trimmedHistory.map((msg) => ({
|
|
67
133
|
role: msg.role,
|
|
@@ -83,13 +149,34 @@ async function handleChat(request, config) {
|
|
|
83
149
|
}
|
|
84
150
|
]
|
|
85
151
|
}];
|
|
152
|
+
const tools = {
|
|
153
|
+
point: pointTool,
|
|
154
|
+
...config.tools
|
|
155
|
+
};
|
|
156
|
+
const mustContinueUntilText = ({ steps }) => {
|
|
157
|
+
const lastStep = steps.at(-1);
|
|
158
|
+
if (!lastStep) return false;
|
|
159
|
+
const stepText = typeof lastStep.text === "string" ? lastStep.text.trim() : "";
|
|
160
|
+
const hadToolResults = Array.isArray(lastStep.toolResults) && lastStep.toolResults.length > 0;
|
|
161
|
+
if (stepText.length > 0) return true;
|
|
162
|
+
if (hadToolResults) return false;
|
|
163
|
+
return false;
|
|
164
|
+
};
|
|
86
165
|
return streamText({
|
|
87
166
|
model: config.model,
|
|
88
167
|
system: systemPrompt,
|
|
89
168
|
providerOptions: config?.modelProviderMetadata,
|
|
90
169
|
messages,
|
|
91
|
-
tools
|
|
92
|
-
|
|
170
|
+
tools,
|
|
171
|
+
stopWhen: [mustContinueUntilText, stepCountIs(3)],
|
|
172
|
+
prepareStep: async ({ stepNumber, steps }) => {
|
|
173
|
+
if (stepNumber === 0) return {};
|
|
174
|
+
const previousStep = steps.at(-1);
|
|
175
|
+
const prevText = typeof previousStep?.text === "string" ? previousStep.text.trim() : "";
|
|
176
|
+
if ((previousStep?.toolCalls?.some((call) => call.toolName === "point") ?? false) && prevText.length === 0) return { activeTools: Object.keys(tools).filter((name) => name !== "point") };
|
|
177
|
+
return {};
|
|
178
|
+
}
|
|
179
|
+
}).toUIMessageStreamResponse();
|
|
93
180
|
}
|
|
94
181
|
//#endregion
|
|
95
182
|
//#region src/server/routes/transcribe.ts
|
|
@@ -132,6 +219,7 @@ async function handleTTS(request, config) {
|
|
|
132
219
|
const result = await experimental_generateSpeech({
|
|
133
220
|
model: config.speechModel,
|
|
134
221
|
text,
|
|
222
|
+
voice: config?.speechVoice,
|
|
135
223
|
outputFormat
|
|
136
224
|
});
|
|
137
225
|
const audioData = new Uint8Array(result.audio.uint8Array);
|
|
@@ -185,6 +273,6 @@ function createCursorBuddyHandler(config) {
|
|
|
185
273
|
};
|
|
186
274
|
}
|
|
187
275
|
//#endregion
|
|
188
|
-
export { DEFAULT_SYSTEM_PROMPT, createCursorBuddyHandler };
|
|
276
|
+
export { DEFAULT_SYSTEM_PROMPT, createCursorBuddyHandler, pointTool };
|
|
189
277
|
|
|
190
278
|
//# sourceMappingURL=index.mjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.mjs","names":["transcribe","generateSpeech"],"sources":["../../src/server/system-prompt.ts","../../src/server/routes/chat.ts","../../src/server/routes/transcribe.ts","../../src/server/routes/tts.ts","../../src/server/handler.ts"],"sourcesContent":["/**\n * Default system prompt for the cursor buddy AI.\n * Instructs the model on how to respond conversationally and use POINT tags.\n */\nexport const DEFAULT_SYSTEM_PROMPT = `You are a helpful AI assistant that lives inside a web page as a cursor companion.\n\nYou can see screenshots of the user's viewport and hear their voice. Respond conversationally — your responses will be spoken aloud via text-to-speech, so keep them concise and natural.\n\n## Pointing at Elements\n\nWhen you want to direct the user's attention to something on screen, add a pointing tag at the END of your response. Only ONE pointing tag is allowed per response.\n\n### Interactive Elements (Preferred)\nInteractive elements (buttons, links, inputs, etc.) have invisible reference markers. Use the marker number to point at these:\n\n[POINT:marker_number:label]\n\nExample: \"Click this button right here. [POINT:5:Submit]\"\n\nThis is the most accurate pointing method — always prefer it when pointing at interactive elements.\n\n### Anywhere Else (Fallback)\nFor non-interactive content (text, images, areas without markers), use pixel coordinates:\n\n[POINT:x,y:label]\n\nWhere x,y are coordinates in screenshot image pixels (top-left origin).\n\nExample: \"The error message is shown here. [POINT:450,320:Error text]\"\n\n### Guidelines\n- NEVER mention the numbered markers or annotations to the user — these are invisible helpers for you only\n- Only point when it genuinely helps answer the user's specific question or request\n- Do NOT point at elements just because they have markers — point only when relevant to the conversation\n- Prefer marker-based pointing when the element has a marker and pointing is appropriate\n- Only use coordinates when pointing at unmarked content\n- Use natural descriptions (\"this button\", \"over here\", \"right there\")\n- Coordinates should be the CENTER of the element you're pointing at\n- Keep labels short (2-4 words)\n\n## Response Style\n\n- Be concise — aim for 1-3 sentences\n- Sound natural when spoken aloud\n- Avoid technical jargon unless the user is technical\n- If you can't see something clearly, say so\n- Never mention that you're looking at a \"screenshot\" — say \"I can see...\" or \"Looking at your screen...\"\n- Never mention the numbered markers or annotations you see on elements\n`\n","import { streamText } from \"ai\"\nimport { DEFAULT_SYSTEM_PROMPT } from \"../system-prompt\"\nimport type { ChatRequestBody, CursorBuddyHandlerConfig } from \"../types\"\n\n/**\n * Handle chat requests: screenshot + transcript → AI SSE stream\n */\nexport async function handleChat(\n request: Request,\n config: CursorBuddyHandlerConfig,\n): Promise<Response> {\n const body = (await request.json()) as ChatRequestBody\n const { screenshot, transcript, history, capture, markerContext } = body\n\n // Resolve system prompt (string or function)\n const systemPrompt =\n typeof config.system === \"function\"\n ? config.system({ defaultPrompt: DEFAULT_SYSTEM_PROMPT })\n : (config.system ?? DEFAULT_SYSTEM_PROMPT)\n\n // Trim history to maxHistory (default 10 exchanges = 20 messages)\n const maxMessages = (config.maxHistory ?? 10) * 2\n const trimmedHistory = history.slice(-maxMessages)\n\n // Build capture context with marker information\n const captureContextParts: string[] = []\n\n if (capture) {\n captureContextParts.push(\n `Screenshot size: ${capture.width}x${capture.height} pixels.`,\n )\n }\n\n if (markerContext) {\n captureContextParts.push(\"\", markerContext)\n }\n\n const captureContext =\n captureContextParts.length > 0 ? captureContextParts.join(\"\\n\") : null\n\n // Build messages array with vision content\n const messages = [\n ...trimmedHistory.map((msg) => ({\n role: msg.role as \"user\" | \"assistant\",\n content: msg.content,\n })),\n {\n role: \"user\" as const,\n content: [\n ...(captureContext\n ? [\n {\n type: \"text\" as const,\n text: captureContext,\n },\n ]\n : []),\n {\n type: \"image\" as const,\n image: screenshot,\n },\n {\n type: \"text\" as const,\n text: transcript,\n },\n ],\n },\n ]\n\n const result = streamText({\n model: config.model,\n system: systemPrompt,\n providerOptions: config?.modelProviderMetadata,\n messages,\n tools: config.tools,\n })\n\n return result.toTextStreamResponse()\n}\n","import { experimental_transcribe as transcribe } from \"ai\"\nimport type { CursorBuddyHandlerConfig, TranscribeResponse } from \"../types\"\n\n/**\n * Handle transcription requests: audio file → text\n */\nexport async function handleTranscribe(\n request: Request,\n config: CursorBuddyHandlerConfig,\n): Promise<Response> {\n if (!config.transcriptionModel) {\n return new Response(\n JSON.stringify({\n error:\n \"Server transcription is not configured. Provide a transcriptionModel or use browser transcription only.\",\n }),\n {\n status: 501,\n headers: { \"Content-Type\": \"application/json\" },\n },\n )\n }\n\n const formData = await request.formData()\n const audioFile = formData.get(\"audio\")\n\n if (!audioFile || !(audioFile instanceof File)) {\n return new Response(JSON.stringify({ error: \"No audio file provided\" }), {\n status: 400,\n headers: { \"Content-Type\": \"application/json\" },\n })\n }\n\n const audioBuffer = await audioFile.arrayBuffer()\n\n const result = await transcribe({\n model: config.transcriptionModel,\n audio: new Uint8Array(audioBuffer),\n })\n\n const response: TranscribeResponse = { text: result.text }\n\n return new Response(JSON.stringify(response), {\n headers: { \"Content-Type\": \"application/json\" },\n })\n}\n","import { experimental_generateSpeech as generateSpeech } from \"ai\"\nimport type { CursorBuddyHandlerConfig, TTSRequestBody } from \"../types\"\n\n/**\n * Handle TTS requests: text → audio\n */\nexport async function handleTTS(\n request: Request,\n config: CursorBuddyHandlerConfig,\n): Promise<Response> {\n if (!config.speechModel) {\n return new Response(\n JSON.stringify({\n error:\n \"Server speech is not configured. Provide a speechModel or use browser speech only.\",\n }),\n {\n status: 501,\n headers: { \"Content-Type\": \"application/json\" },\n },\n )\n }\n\n const outputFormat = \"wav\"\n const body = (await request.json()) as TTSRequestBody\n const { text } = body\n\n if (!text) {\n return new Response(JSON.stringify({ error: \"No text provided\" }), {\n status: 400,\n headers: { \"Content-Type\": \"application/json\" },\n })\n }\n\n const result = await generateSpeech({\n model: config.speechModel,\n text,\n outputFormat,\n })\n\n // Create a new ArrayBuffer copy to satisfy TypeScript's strict typing\n const audioData = new Uint8Array(result.audio.uint8Array)\n\n return new Response(audioData, {\n headers: {\n \"Content-Type\": \"audio/wav\",\n },\n })\n}\n","import { handleChat } from \"./routes/chat\"\nimport { handleTranscribe } from \"./routes/transcribe\"\nimport { handleTTS } from \"./routes/tts\"\nimport type { CursorBuddyHandler, CursorBuddyHandlerConfig } from \"./types\"\n\n/**\n * Create a cursor buddy request handler.\n *\n * The handler responds to three routes based on the last path segment:\n * - /chat - Screenshot + transcript → AI SSE stream\n * - /transcribe - Audio → text\n * - /tts - Text → audio\n *\n * @example\n * ```ts\n * import { createCursorBuddyHandler } from \"cursor-buddy/server\"\n * import { openai } from \"@ai-sdk/openai\"\n *\n * const cursorBuddy = createCursorBuddyHandler({\n * model: openai(\"gpt-4o\"),\n * speechModel: openai.speech(\"tts-1\"), // optional for browser-only speech\n * transcriptionModel: openai.transcription(\"whisper-1\"),\n * })\n * ```\n */\nexport function createCursorBuddyHandler(\n config: CursorBuddyHandlerConfig,\n): CursorBuddyHandler {\n const handler = async (request: Request): Promise<Response> => {\n const url = new URL(request.url)\n const pathSegments = url.pathname.split(\"/\").filter(Boolean)\n const route = pathSegments[pathSegments.length - 1]\n\n switch (route) {\n case \"chat\":\n return handleChat(request, config)\n\n case \"transcribe\":\n return handleTranscribe(request, config)\n\n case \"tts\":\n return handleTTS(request, config)\n\n default:\n return new Response(\n JSON.stringify({\n error: \"Not found\",\n availableRoutes: [\"/chat\", \"/transcribe\", \"/tts\"],\n }),\n {\n status: 404,\n headers: { \"Content-Type\": \"application/json\" },\n },\n )\n }\n }\n\n return { handler, config }\n}\n"],"mappings":";;;;;;AAIA,MAAa,wBAAwB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;ACGrC,eAAsB,WACpB,SACA,QACmB;CAEnB,MAAM,EAAE,YAAY,YAAY,SAAS,SAAS,kBADpC,MAAM,QAAQ,MAAM;CAIlC,MAAM,eACJ,OAAO,OAAO,WAAW,aACrB,OAAO,OAAO,EAAE,eAAe,uBAAuB,CAAC,GACtD,OAAO,UAAU;CAGxB,MAAM,eAAe,OAAO,cAAc,MAAM;CAChD,MAAM,iBAAiB,QAAQ,MAAM,CAAC,YAAY;CAGlD,MAAM,sBAAgC,EAAE;AAExC,KAAI,QACF,qBAAoB,KAClB,oBAAoB,QAAQ,MAAM,GAAG,QAAQ,OAAO,UACrD;AAGH,KAAI,cACF,qBAAoB,KAAK,IAAI,cAAc;CAG7C,MAAM,iBACJ,oBAAoB,SAAS,IAAI,oBAAoB,KAAK,KAAK,GAAG;CAGpE,MAAM,WAAW,CACf,GAAG,eAAe,KAAK,SAAS;EAC9B,MAAM,IAAI;EACV,SAAS,IAAI;EACd,EAAE,EACH;EACE,MAAM;EACN,SAAS;GACP,GAAI,iBACA,CACE;IACE,MAAM;IACN,MAAM;IACP,CACF,GACD,EAAE;GACN;IACE,MAAM;IACN,OAAO;IACR;GACD;IACE,MAAM;IACN,MAAM;IACP;GACF;EACF,CACF;AAUD,QARe,WAAW;EACxB,OAAO,OAAO;EACd,QAAQ;EACR,iBAAiB,QAAQ;EACzB;EACA,OAAO,OAAO;EACf,CAAC,CAEY,sBAAsB;;;;;;;ACvEtC,eAAsB,iBACpB,SACA,QACmB;AACnB,KAAI,CAAC,OAAO,mBACV,QAAO,IAAI,SACT,KAAK,UAAU,EACb,OACE,2GACH,CAAC,EACF;EACE,QAAQ;EACR,SAAS,EAAE,gBAAgB,oBAAoB;EAChD,CACF;CAIH,MAAM,aADW,MAAM,QAAQ,UAAU,EACd,IAAI,QAAQ;AAEvC,KAAI,CAAC,aAAa,EAAE,qBAAqB,MACvC,QAAO,IAAI,SAAS,KAAK,UAAU,EAAE,OAAO,0BAA0B,CAAC,EAAE;EACvE,QAAQ;EACR,SAAS,EAAE,gBAAgB,oBAAoB;EAChD,CAAC;CAGJ,MAAM,cAAc,MAAM,UAAU,aAAa;CAOjD,MAAM,WAA+B,EAAE,OALxB,MAAMA,wBAAW;EAC9B,OAAO,OAAO;EACd,OAAO,IAAI,WAAW,YAAY;EACnC,CAAC,EAEkD,MAAM;AAE1D,QAAO,IAAI,SAAS,KAAK,UAAU,SAAS,EAAE,EAC5C,SAAS,EAAE,gBAAgB,oBAAoB,EAChD,CAAC;;;;;;;ACtCJ,eAAsB,UACpB,SACA,QACmB;AACnB,KAAI,CAAC,OAAO,YACV,QAAO,IAAI,SACT,KAAK,UAAU,EACb,OACE,sFACH,CAAC,EACF;EACE,QAAQ;EACR,SAAS,EAAE,gBAAgB,oBAAoB;EAChD,CACF;CAGH,MAAM,eAAe;CAErB,MAAM,EAAE,SADM,MAAM,QAAQ,MAAM;AAGlC,KAAI,CAAC,KACH,QAAO,IAAI,SAAS,KAAK,UAAU,EAAE,OAAO,oBAAoB,CAAC,EAAE;EACjE,QAAQ;EACR,SAAS,EAAE,gBAAgB,oBAAoB;EAChD,CAAC;CAGJ,MAAM,SAAS,MAAMC,4BAAe;EAClC,OAAO,OAAO;EACd;EACA;EACD,CAAC;CAGF,MAAM,YAAY,IAAI,WAAW,OAAO,MAAM,WAAW;AAEzD,QAAO,IAAI,SAAS,WAAW,EAC7B,SAAS,EACP,gBAAgB,aACjB,EACF,CAAC;;;;;;;;;;;;;;;;;;;;;;;;ACtBJ,SAAgB,yBACd,QACoB;CACpB,MAAM,UAAU,OAAO,YAAwC;EAE7D,MAAM,eADM,IAAI,IAAI,QAAQ,IAAI,CACP,SAAS,MAAM,IAAI,CAAC,OAAO,QAAQ;AAG5D,UAFc,aAAa,aAAa,SAAS,IAEjD;GACE,KAAK,OACH,QAAO,WAAW,SAAS,OAAO;GAEpC,KAAK,aACH,QAAO,iBAAiB,SAAS,OAAO;GAE1C,KAAK,MACH,QAAO,UAAU,SAAS,OAAO;GAEnC,QACE,QAAO,IAAI,SACT,KAAK,UAAU;IACb,OAAO;IACP,iBAAiB;KAAC;KAAS;KAAe;KAAO;IAClD,CAAC,EACF;IACE,QAAQ;IACR,SAAS,EAAE,gBAAgB,oBAAoB;IAChD,CACF;;;AAIP,QAAO;EAAE;EAAS;EAAQ"}
|
|
1
|
+
{"version":3,"file":"index.mjs","names":["transcribe","generateSpeech"],"sources":["../../src/server/system-prompt.ts","../../src/server/routes/chat.ts","../../src/server/routes/transcribe.ts","../../src/server/routes/tts.ts","../../src/server/handler.ts"],"sourcesContent":["export const DEFAULT_SYSTEM_PROMPT = `You are a helpful AI assistant that lives inside a web page as a cursor companion.\n\nYou can see the user's current screen and hear what they say. Respond conversationally. Your response will be spoken aloud with text-to-speech, so keep it natural, concise, and easy to follow.\n\n## Core behavior\n\n- Speak like a helpful companion, not a robot\n- Keep most responses to 1-3 short sentences\n- Focus on what is visible right now on the user's screen\n- If something is unclear or not visible, say that plainly\n- Do not mention screenshots, overlays, internal helper data, or the DOM snapshot to the user\n- Never describe the internal element IDs to the user - they are for your reference only\n\n## Visual Context: DOM Snapshot\n\nYou receive a screenshot of the user's viewport along with a DOM snapshot that lists visible elements in a compact, hierarchical format. The DOM snapshot looks like this:\n\n\\`\\`\\`\n# viewport 1440x900\n@1 nav \"Sidebar\"\n @2 link \"Projects\" [x=24 y=96 w=96 h=28]\n @3 link \"Tasks\" [x=24 y=132 w=72 h=28]\n@4 main\n @5 heading \"Q2 Roadmap\"\n @6 textbox \"Search tasks\" [x=320 y=120 w=280 h=36]\n @7 button \"Filter\" [x=612 y=120 w=84 h=36] [expanded=false]\n @8 checkbox \"Selected\" [checked=false] [x=340 y=220 w=16 h=16]\n\\`\\`\\`\n\n**How to read the DOM snapshot:**\n- Each element starts with \\`@X\\` where X is its unique ID\n- The element's role follows (button, link, textbox, heading, nav, main, etc.)\n- Text content is in quotes after the role\n- \\`[x=... y=... w=... h=...]\\` shows the element's position and size for your reference\n- \\`[key=value]\\` brackets show element state (checked, expanded, disabled, etc.)\n- Indentation shows parent-child relationships\n\n**The DOM snapshot is invisible to the user.** It helps you understand the page structure and identify specific elements to point at. Never mention it to the user.\n\n## The point tool\n\nYou have a \\`point\\` tool that can visually indicate an element on the user's screen.\n\nUse the \\`point\\` tool when the user is asking you to identify, locate, indicate, highlight, or show a specific visible target on screen.\n\nCommon cases where you should use \\`point\\`:\n- the user asks where something is\n- the user asks what to click\n- the user says things like \"show me\", \"point to it\", \"where is it\", \"which one\", \"what should I click\", or \"highlight that\"\n\nDo not use the \\`point\\` tool when spoken guidance alone is enough and the user is not asking you to identify a specific on-screen target.\n\nExamples where spoken guidance alone may be enough:\n- explaining what a page does\n- answering a general question about what is on screen\n- giving brief next-step advice that does not depend on locating a specific element\n\nIf using the \\`point\\` tool:\n- first give the spoken response\n- then call the tool\n- call it at most once per response\n- point only at the most relevant target\n- never replace the tool call with plain text like \"(point here)\" or \"I'm pointing at it now\"\n\nIf the user asks where something is on screen, what to click, or asks you to point something out, you should usually use the point tool rather than only describing it in words.\nDo not say things like \"I can point to it if you want\" when the user already asked where it is. In that case, answer briefly and use the point tool.\n\n## How to point using the point tool\n\nThe point tool accepts an \\`elementId\\` parameter which is the numeric ID from the DOM snapshot (the number after \\`@\\`).\n\n**Example:** To point at the \"Filter\" button from the example above (which is \\`@7\\`):\n\\`\\`\\`\nelementId: 7\nlabel: \"Filter button\"\n\\`\\`\\`\n\n**Steps:**\n1. Find the element in the DOM snapshot by reading its text/role\n2. Note its \\`@X\\` ID\n3. Call the point tool with that numeric ID (just the number, without the @ symbol)\n4. Provide a brief, natural label describing what you're pointing at\n\nThe element's position is resolved in real-time when the cursor moves, so it will point accurately even if the page has changed slightly.\n\n## What to say\n\nWhen the user asks you to point something out:\n- briefly answer in a natural spoken way\n- then use the tool if the request is about locating or indicating something on screen\n\nGood spoken style:\n- \"Click this button right here.\"\n- \"The error message is over here.\"\n- \"This is the field you want.\"\n- \"That setting is in this section.\"\n\nAvoid:\n- mentioning element IDs (like \"@5\" or \"element 12\")\n- mentioning internal tools\n- describing internal reasoning\n- saying you are looking at a screenshot\n\n## If the target is not clear\n\nIf you cannot confidently find the requested thing on screen:\n- say you cannot see it clearly or cannot find it\n- do not point at a random or uncertain target\n\n## Priority\n\nYour first priority is being helpful and correct.\nYour second priority is using the \\`point\\` tool whenever the user is asking you to visually identify a specific thing on screen.\n`\n","import { type StopCondition, stepCountIs, streamText } from \"ai\"\nimport { pointTool } from \"../../shared/point-tool\"\nimport { DEFAULT_SYSTEM_PROMPT } from \"../system-prompt\"\nimport type { ChatRequestBody, CursorBuddyHandlerConfig } from \"../types\"\n\n/**\n * Handle chat requests: screenshot + transcript → AI SSE stream\n */\nexport async function handleChat(\n request: Request,\n config: CursorBuddyHandlerConfig,\n): Promise<Response> {\n const body = (await request.json()) as ChatRequestBody\n const { screenshot, transcript, history, capture, domSnapshot } = body\n\n // Resolve system prompt (string or function)\n const systemPrompt =\n typeof config.system === \"function\"\n ? config.system({ defaultPrompt: DEFAULT_SYSTEM_PROMPT })\n : (config.system ?? DEFAULT_SYSTEM_PROMPT)\n\n // Trim history to maxHistory (default 10 exchanges = 20 messages)\n const maxMessages = (config.maxHistory ?? 10) * 2\n const trimmedHistory = history.slice(-maxMessages)\n\n // Build capture context with DOM snapshot\n const captureContextParts: string[] = []\n\n if (capture) {\n captureContextParts.push(\n `Screenshot size: ${capture.width}x${capture.height} pixels.`,\n )\n }\n\n if (domSnapshot) {\n captureContextParts.push(\n \"\",\n \"Visible page structure (each element has @X ID for pointing):\",\n domSnapshot,\n )\n }\n\n const captureContext =\n captureContextParts.length > 0 ? captureContextParts.join(\"\\n\") : null\n\n // Build messages array with vision content\n const messages = [\n ...trimmedHistory.map((msg) => ({\n role: msg.role as \"user\" | \"assistant\",\n content: msg.content,\n })),\n {\n role: \"user\" as const,\n content: [\n ...(captureContext\n ? [\n {\n type: \"text\" as const,\n text: captureContext,\n },\n ]\n : []),\n { type: \"image\" as const, image: screenshot },\n { type: \"text\" as const, text: transcript },\n ],\n },\n ]\n\n const tools = {\n point: pointTool,\n ...config.tools,\n }\n\n const mustContinueUntilText: StopCondition<typeof tools> = ({ steps }) => {\n const lastStep = steps.at(-1)\n if (!lastStep) return false\n\n const stepText =\n typeof lastStep.text === \"string\" ? lastStep.text.trim() : \"\"\n const hadToolResults =\n Array.isArray(lastStep.toolResults) && lastStep.toolResults.length > 0\n\n // Stop only after we have actual assistant text.\n // If the step was tool-only, continue the loop.\n if (stepText.length > 0) return true\n if (hadToolResults) return false\n\n return false\n }\n\n const result = streamText({\n model: config.model,\n system: systemPrompt,\n providerOptions: config?.modelProviderMetadata,\n messages,\n tools,\n\n // Allow a follow-up step after tool use instead of the default single step.\n stopWhen: [mustContinueUntilText, stepCountIs(3)],\n\n prepareStep: async ({ stepNumber, steps }) => {\n // Normal first pass: let the model speak and optionally point.\n if (stepNumber === 0) {\n return {}\n }\n\n const previousStep = steps.at(-1)\n\n const prevText =\n typeof previousStep?.text === \"string\" ? previousStep.text.trim() : \"\"\n\n const usedPoint =\n previousStep?.toolCalls?.some((call) => call.toolName === \"point\") ??\n false\n\n // If the previous step pointed but did not speak, force the next step\n // to be text-only by removing the point tool.\n if (usedPoint && prevText.length === 0) {\n const toolNames = Object.keys(tools) as Array<keyof typeof tools>\n\n return {\n activeTools: toolNames.filter((name) => name !== \"point\"),\n }\n }\n\n return {}\n },\n })\n\n return result.toUIMessageStreamResponse()\n}\n","import { experimental_transcribe as transcribe } from \"ai\"\nimport type { CursorBuddyHandlerConfig, TranscribeResponse } from \"../types\"\n\n/**\n * Handle transcription requests: audio file → text\n */\nexport async function handleTranscribe(\n request: Request,\n config: CursorBuddyHandlerConfig,\n): Promise<Response> {\n if (!config.transcriptionModel) {\n return new Response(\n JSON.stringify({\n error:\n \"Server transcription is not configured. Provide a transcriptionModel or use browser transcription only.\",\n }),\n {\n status: 501,\n headers: { \"Content-Type\": \"application/json\" },\n },\n )\n }\n\n const formData = await request.formData()\n const audioFile = formData.get(\"audio\")\n\n if (!audioFile || !(audioFile instanceof File)) {\n return new Response(JSON.stringify({ error: \"No audio file provided\" }), {\n status: 400,\n headers: { \"Content-Type\": \"application/json\" },\n })\n }\n\n const audioBuffer = await audioFile.arrayBuffer()\n\n const result = await transcribe({\n model: config.transcriptionModel,\n audio: new Uint8Array(audioBuffer),\n })\n\n const response: TranscribeResponse = { text: result.text }\n\n return new Response(JSON.stringify(response), {\n headers: { \"Content-Type\": \"application/json\" },\n })\n}\n","import { experimental_generateSpeech as generateSpeech } from \"ai\"\nimport type { CursorBuddyHandlerConfig, TTSRequestBody } from \"../types\"\n\n/**\n * Handle TTS requests: text → audio\n */\nexport async function handleTTS(\n request: Request,\n config: CursorBuddyHandlerConfig,\n): Promise<Response> {\n if (!config.speechModel) {\n return new Response(\n JSON.stringify({\n error:\n \"Server speech is not configured. Provide a speechModel or use browser speech only.\",\n }),\n {\n status: 501,\n headers: { \"Content-Type\": \"application/json\" },\n },\n )\n }\n\n const outputFormat = \"wav\"\n const body = (await request.json()) as TTSRequestBody\n const { text } = body\n\n if (!text) {\n return new Response(JSON.stringify({ error: \"No text provided\" }), {\n status: 400,\n headers: { \"Content-Type\": \"application/json\" },\n })\n }\n\n const result = await generateSpeech({\n model: config.speechModel,\n text,\n voice: config?.speechVoice,\n outputFormat,\n })\n\n // Create a new ArrayBuffer copy to satisfy TypeScript's strict typing\n const audioData = new Uint8Array(result.audio.uint8Array)\n\n return new Response(audioData, {\n headers: {\n \"Content-Type\": \"audio/wav\",\n },\n })\n}\n","import { handleChat } from \"./routes/chat\"\nimport { handleTranscribe } from \"./routes/transcribe\"\nimport { handleTTS } from \"./routes/tts\"\nimport type { CursorBuddyHandler, CursorBuddyHandlerConfig } from \"./types\"\n\n/**\n * Create a cursor buddy request handler.\n *\n * The handler responds to three routes based on the last path segment:\n * - /chat - Screenshot + transcript → AI SSE stream\n * - /transcribe - Audio → text\n * - /tts - Text → audio\n *\n * @example\n * ```ts\n * import { createCursorBuddyHandler } from \"cursor-buddy/server\"\n * import { openai } from \"@ai-sdk/openai\"\n *\n * const cursorBuddy = createCursorBuddyHandler({\n * model: openai(\"gpt-4o\"),\n * speechModel: openai.speech(\"tts-1\"), // optional for browser-only speech\n * transcriptionModel: openai.transcription(\"whisper-1\"),\n * })\n * ```\n */\nexport function createCursorBuddyHandler(\n config: CursorBuddyHandlerConfig,\n): CursorBuddyHandler {\n const handler = async (request: Request): Promise<Response> => {\n const url = new URL(request.url)\n const pathSegments = url.pathname.split(\"/\").filter(Boolean)\n const route = pathSegments[pathSegments.length - 1]\n\n switch (route) {\n case \"chat\":\n return handleChat(request, config)\n\n case \"transcribe\":\n return handleTranscribe(request, config)\n\n case \"tts\":\n return handleTTS(request, config)\n\n default:\n return new Response(\n JSON.stringify({\n error: \"Not found\",\n availableRoutes: [\"/chat\", \"/transcribe\", \"/tts\"],\n }),\n {\n status: 404,\n headers: { \"Content-Type\": \"application/json\" },\n },\n )\n }\n }\n\n return { handler, config }\n}\n"],"mappings":";;;AAAA,MAAa,wBAAwB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;ACQrC,eAAsB,WACpB,SACA,QACmB;CAEnB,MAAM,EAAE,YAAY,YAAY,SAAS,SAAS,gBADpC,MAAM,QAAQ,MAAM;CAIlC,MAAM,eACJ,OAAO,OAAO,WAAW,aACrB,OAAO,OAAO,EAAE,eAAe,uBAAuB,CAAC,GACtD,OAAO,UAAU;CAGxB,MAAM,eAAe,OAAO,cAAc,MAAM;CAChD,MAAM,iBAAiB,QAAQ,MAAM,CAAC,YAAY;CAGlD,MAAM,sBAAgC,EAAE;AAExC,KAAI,QACF,qBAAoB,KAClB,oBAAoB,QAAQ,MAAM,GAAG,QAAQ,OAAO,UACrD;AAGH,KAAI,YACF,qBAAoB,KAClB,IACA,iEACA,YACD;CAGH,MAAM,iBACJ,oBAAoB,SAAS,IAAI,oBAAoB,KAAK,KAAK,GAAG;CAGpE,MAAM,WAAW,CACf,GAAG,eAAe,KAAK,SAAS;EAC9B,MAAM,IAAI;EACV,SAAS,IAAI;EACd,EAAE,EACH;EACE,MAAM;EACN,SAAS;GACP,GAAI,iBACA,CACE;IACE,MAAM;IACN,MAAM;IACP,CACF,GACD,EAAE;GACN;IAAE,MAAM;IAAkB,OAAO;IAAY;GAC7C;IAAE,MAAM;IAAiB,MAAM;IAAY;GAC5C;EACF,CACF;CAED,MAAM,QAAQ;EACZ,OAAO;EACP,GAAG,OAAO;EACX;CAED,MAAM,yBAAsD,EAAE,YAAY;EACxE,MAAM,WAAW,MAAM,GAAG,GAAG;AAC7B,MAAI,CAAC,SAAU,QAAO;EAEtB,MAAM,WACJ,OAAO,SAAS,SAAS,WAAW,SAAS,KAAK,MAAM,GAAG;EAC7D,MAAM,iBACJ,MAAM,QAAQ,SAAS,YAAY,IAAI,SAAS,YAAY,SAAS;AAIvE,MAAI,SAAS,SAAS,EAAG,QAAO;AAChC,MAAI,eAAgB,QAAO;AAE3B,SAAO;;AA0CT,QAvCe,WAAW;EACxB,OAAO,OAAO;EACd,QAAQ;EACR,iBAAiB,QAAQ;EACzB;EACA;EAGA,UAAU,CAAC,uBAAuB,YAAY,EAAE,CAAC;EAEjD,aAAa,OAAO,EAAE,YAAY,YAAY;AAE5C,OAAI,eAAe,EACjB,QAAO,EAAE;GAGX,MAAM,eAAe,MAAM,GAAG,GAAG;GAEjC,MAAM,WACJ,OAAO,cAAc,SAAS,WAAW,aAAa,KAAK,MAAM,GAAG;AAQtE,QALE,cAAc,WAAW,MAAM,SAAS,KAAK,aAAa,QAAQ,IAClE,UAIe,SAAS,WAAW,EAGnC,QAAO,EACL,aAHgB,OAAO,KAAK,MAAM,CAGX,QAAQ,SAAS,SAAS,QAAQ,EAC1D;AAGH,UAAO,EAAE;;EAEZ,CAAC,CAEY,2BAA2B;;;;;;;AC3H3C,eAAsB,iBACpB,SACA,QACmB;AACnB,KAAI,CAAC,OAAO,mBACV,QAAO,IAAI,SACT,KAAK,UAAU,EACb,OACE,2GACH,CAAC,EACF;EACE,QAAQ;EACR,SAAS,EAAE,gBAAgB,oBAAoB;EAChD,CACF;CAIH,MAAM,aADW,MAAM,QAAQ,UAAU,EACd,IAAI,QAAQ;AAEvC,KAAI,CAAC,aAAa,EAAE,qBAAqB,MACvC,QAAO,IAAI,SAAS,KAAK,UAAU,EAAE,OAAO,0BAA0B,CAAC,EAAE;EACvE,QAAQ;EACR,SAAS,EAAE,gBAAgB,oBAAoB;EAChD,CAAC;CAGJ,MAAM,cAAc,MAAM,UAAU,aAAa;CAOjD,MAAM,WAA+B,EAAE,OALxB,MAAMA,wBAAW;EAC9B,OAAO,OAAO;EACd,OAAO,IAAI,WAAW,YAAY;EACnC,CAAC,EAEkD,MAAM;AAE1D,QAAO,IAAI,SAAS,KAAK,UAAU,SAAS,EAAE,EAC5C,SAAS,EAAE,gBAAgB,oBAAoB,EAChD,CAAC;;;;;;;ACtCJ,eAAsB,UACpB,SACA,QACmB;AACnB,KAAI,CAAC,OAAO,YACV,QAAO,IAAI,SACT,KAAK,UAAU,EACb,OACE,sFACH,CAAC,EACF;EACE,QAAQ;EACR,SAAS,EAAE,gBAAgB,oBAAoB;EAChD,CACF;CAGH,MAAM,eAAe;CAErB,MAAM,EAAE,SADM,MAAM,QAAQ,MAAM;AAGlC,KAAI,CAAC,KACH,QAAO,IAAI,SAAS,KAAK,UAAU,EAAE,OAAO,oBAAoB,CAAC,EAAE;EACjE,QAAQ;EACR,SAAS,EAAE,gBAAgB,oBAAoB;EAChD,CAAC;CAGJ,MAAM,SAAS,MAAMC,4BAAe;EAClC,OAAO,OAAO;EACd;EACA,OAAO,QAAQ;EACf;EACD,CAAC;CAGF,MAAM,YAAY,IAAI,WAAW,OAAO,MAAM,WAAW;AAEzD,QAAO,IAAI,SAAS,WAAW,EAC7B,SAAS,EACP,gBAAgB,aACjB,EACF,CAAC;;;;;;;;;;;;;;;;;;;;;;;;ACvBJ,SAAgB,yBACd,QACoB;CACpB,MAAM,UAAU,OAAO,YAAwC;EAE7D,MAAM,eADM,IAAI,IAAI,QAAQ,IAAI,CACP,SAAS,MAAM,IAAI,CAAC,OAAO,QAAQ;AAG5D,UAFc,aAAa,aAAa,SAAS,IAEjD;GACE,KAAK,OACH,QAAO,WAAW,SAAS,OAAO;GAEpC,KAAK,aACH,QAAO,iBAAiB,SAAS,OAAO;GAE1C,KAAK,MACH,QAAO,UAAU,SAAS,OAAO;GAEnC,QACE,QAAO,IAAI,SACT,KAAK,UAAU;IACb,OAAO;IACP,iBAAiB;KAAC;KAAS;KAAe;KAAO;IAClD,CAAC,EACF;IACE,QAAQ;IACR,SAAS,EAAE,gBAAgB,oBAAoB;IAChD,CACF;;;AAIP,QAAO;EAAE;EAAS;EAAQ"}
|
|
@@ -13,6 +13,7 @@ interface CursorBuddyHandlerConfig {
|
|
|
13
13
|
* Optional when clients use browser-only speech.
|
|
14
14
|
*/
|
|
15
15
|
speechModel?: SpeechModel;
|
|
16
|
+
speechVoice?: string;
|
|
16
17
|
/**
|
|
17
18
|
* AI SDK transcription model (e.g., openai.transcription("whisper-1")).
|
|
18
19
|
* Optional when clients use browser-only transcription.
|
|
@@ -41,4 +42,4 @@ interface CursorBuddyHandler {
|
|
|
41
42
|
}
|
|
42
43
|
//#endregion
|
|
43
44
|
export { CursorBuddyHandlerConfig as n, CursorBuddyHandler as t };
|
|
44
|
-
//# sourceMappingURL=types-
|
|
45
|
+
//# sourceMappingURL=types-BJfkApb_.d.mts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types-BJfkApb_.d.mts","names":[],"sources":["../src/server/types.ts"],"mappings":";;;;;AAKA;UAAiB,wBAAA;;EAEf,KAAA,EAAO,aAAA;EACP,qBAAA,GAAwB,MAAA;EAMV;;;;EAAd,WAAA,GAAc,WAAA;EACd,WAAA;EARA;;;;EAcA,kBAAA,GAAqB,kBAAA;EAPP;;;;EAad,MAAA,cAAoB,GAAA;IAAO,aAAA;EAAA;EAG3B;EAAA,KAAA,GAAQ,MAAA,SAAe,IAAA;EAAA;EAGvB,UAAA;AAAA;;AAMF;;UAAiB,kBAAA;EAEI;EAAnB,OAAA,GAAU,OAAA,EAAS,OAAA,KAAY,OAAA,CAAQ,QAAA;EAAR;EAG/B,MAAA,EAAQ,wBAAA;AAAA"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "cursor-buddy",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.9-beta.1",
|
|
4
4
|
"description": "AI-powered cursor companion for web apps",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
@@ -80,7 +80,8 @@
|
|
|
80
80
|
"@nanostores/react": "^1.1.0",
|
|
81
81
|
"ai": "^6.0.158",
|
|
82
82
|
"html2canvas-pro": "^2.0.2",
|
|
83
|
-
"nanostores": "^1.2.0"
|
|
83
|
+
"nanostores": "^1.2.0",
|
|
84
|
+
"zod": "^3.24.0"
|
|
84
85
|
},
|
|
85
86
|
"devDependencies": {
|
|
86
87
|
"@types/react": "^19.0.8",
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"client-Crn8tW7w.d.mts","names":[],"sources":["../src/core/utils/elements.ts","../src/core/types.ts","../src/core/client.ts"],"mappings":";;AAgEA;;;;;;UAAiB,aAAA;EAMf;EAJA,EAAA;EAMA;EAJA,OAAA,EAAS,OAAA;EAIE;EAFX,IAAA,EAAM,OAAA;EAQa;EANnB,WAAA;AAAA;;;;KAMU,SAAA,GAAY,GAAA,SAAY,aAAA;;;;AAdpC;;KC7DY,UAAA;;;;KAKA,UAAA;EACN,IAAA;AAAA;EACA,IAAA;AAAA;EACA,IAAA;AAAA;EACA,IAAA;AAAA;EACA,IAAA;EAAe,KAAA,EAAO,KAAA;AAAA;;;AAV5B;;;;UAkBiB,cAAA;EAbL;EAeV,CAAA;;EAEA,CAAA;EAhBI;EAkBJ,KAAA;AAAA;;;AAaF;UAAiB,KAAA;EACf,CAAA;EACA,CAAA;AAAA;AAMF;;;AAAA,UAAiB,gBAAA;EAEf;EAAA,SAAA;EAIA;EAFA,KAAA;EAMA;EAJA,MAAA;EAIc;EAFd,aAAA;EAWyC;EATzC,cAAA;AAAA;;;;UASe,yBAAA,SAAkC,gBAAA;EAIpC;EAFb,SAAA,EAFyC,SAAA;EAkBX;EAd9B,aAAA;AAAA;;;;KAcU,oBAAA;;;;UAKK,8BAAA;EAgDA;;;;;;;;;;;;;EAlCf,IAAA,GAAO,oBAAA;AAAA;;;;UAMQ,uBAAA;EAsCA;;;;;;;;;;;EA1Bf,IAAA,GAAO,oBAAA;EA2BU;;;;;AAOnB;;;EAxBE,cAAA;AAAA;;;;UAMe,gBAAA;EACf,KAAA,IAAS,OAAA;EACT,IAAA,IAAQ,OAAA,CAAQ,IAAA;EAChB,OAAA,CAAQ,QAAA,GAAW,KAAA;EACnB,OAAA;AAAA;;AAyBF;;UAnBiB,iBAAA;EACf,IAAA,CAAK,IAAA,EAAM,IAAA,EAAM,MAAA,GAAS,WAAA,GAAc,OAAA;EACxC,IAAA;AAAA;;;;UAMe,qBAAA;EACf,WAAA;EACA,KAAA,IAAS,OAAA;EACT,IAAA,IAAQ,OAAA;EACR,SAAA,CAAU,QAAA,GAAW,IAAA;EACrB,OAAA;AAAA;;;;UAMe,iBAAA;EACf,WAAA;EACA,KAAA,CAAM,IAAA,UAAc,MAAA,GAAS,WAAA,GAAc,OAAA;EAC3C,IAAA;AAAA;;;;UAMe,iBAAA;EACf,OAAA,IAAW,OAAA,CAAQ,gBAAA;EACnB,gBAAA,IAAoB,OAAA,CAAQ,yBAAA;AAAA;;;;UAMb,qBAAA;EACf,OAAA,CAAQ,MAAA,EAAQ,cAAA;EAChB,OAAA;EACA,UAAA;EACA,SAAA,CAAU,QAAA;EACV,oBAAA;AAAA;;;;UAMe,mBAAA;EACf,YAAA,GAAe,gBAAA;EACf,aAAA,GAAgB,iBAAA;EAChB,iBAAA,GAAoB,qBAAA;EACpB,aAAA,GAAgB,iBAAA;EAChB,aAAA,GAAgB,iBAAA;EAChB,iBAAA,GAAoB,qBAAA;AAAA;;;;UAML,iBAAA;EAXA;EAaf,KAAA,EAAO,UAAA;EAZS;EAchB,UAAA;EAboB;EAepB,QAAA;EAdgB;EAgBhB,KAAA;AAAA;;;;UAMe,uBAAA;EAdA;EAgBf,IAAA;;EAEA,SAAA;EAhBA;EAkBA,OAAA;AAAA;;;;UAMe,mBAAA;EAZA;EAcf,UAAA;;EAEA,WAAA;AAAA;;;;UAMe,wBAAA;EAVA;;;;;EAgBf,aAAA,GAAgB,8BAAA;EANuB;;;;;;EAavC,MAAA,GAAS,uBAAA;EAUc;EARvB,YAAA,IAAgB,IAAA;EAThB;EAWA,UAAA,IAAc,IAAA;EAJd;EAMA,OAAA,IAAW,MAAA,EAAQ,cAAA;EAJnB;EAMA,aAAA,IAAiB,KAAA,EAAO,UAAA;EAJxB;EAMA,OAAA,IAAW,KAAA,EAAO,KAAA;AAAA;;;;UAMH,mBAAA;EARE;EAUjB,KAAA,EAAO,UAAA;EARW;;;;EAalB,cAAA;EAPkC;EASlC,UAAA;EAIY;EAFZ,QAAA;EATO;EAWP,KAAA,EAAO,KAAA;EAJP;EAMA,UAAA;EAFA;EAIA,SAAA;AAAA;;;;;;;;;;;;cC9LW,iBAAA;EAAA,QACH,QAAA;EAAA,QACA,OAAA;EAAA,QAGA,YAAA;EAAA,QACA,aAAA;EAAA,QACA,aAAA;EAAA,QACA,iBAAA;EAAA,QACA,aAAA;EAAA,QACA,iBAAA;EAAA,QACA,YAAA;EAAA,QAGA,cAAA;EAAA,QACA,UAAA;EAAA,QACA,QAAA;EAAA,QACA,KAAA;EAAA,QACA,eAAA;EAAA,QACA,uBAAA;EAAA,QACA,qBAAA;EAAA,QACA,iBAAA;EAAA,QAGA,cAAA;EAAA,QAGA,SAAA;cAGN,QAAA,UACA,OAAA,GAAS,wBAAA,EACT,QAAA,GAAU,mBAAA;EDxHR;;;;ECmKJ,cAAA,CAAA;EDjK+B;;AAQjC;EC8LQ,aAAA,CAAA,GAAiB,OAAA;;;;EAoIvB,UAAA,CAAW,OAAA;ED5TX;;;ECoUA,OAAA,CAAQ,CAAA,UAAW,CAAA,UAAW,KAAA;EDvTV;;;EC8TpB,eAAA,CAAA;EDtTe;;;EC6Tf,KAAA,CAAA;ED3TA;;;;EC2UA,oBAAA,CAAA;EDnUc;;AAShB;ECiUE,SAAA,CAAU,QAAA;;;;;EASV,WAAA,CAAA,GAAe,mBAAA;EDtUf;;;EAAA,QC6UQ,aAAA;EAAA,QAcA,KAAA;;;;ADxUV;;UC6VU,oBAAA;EAAA,QAcM,UAAA;ED7Va;AAM7B;;;EAN6B,QCmXb,YAAA;EDjWd;;;EAAA,QCwcc,gBAAA;ED9bA;AAMhB;;;;;;;;;EANgB,QC0dN,iBAAA;EDldR;;;;;;;EAAA,QC+ec,oBAAA;ED7eP;AAMT;;;EANS,QC+fO,uBAAA;EDxfY;;;EAAA,QCogBZ,wBAAA;EDpgBd;;;;;;;EAAA,QCkhBc,qBAAA;EDjhBV;AAMN;;;EANM,QC0jBI,qBAAA;EAAA,QAYA,WAAA;ED9jBR;;;EAAA,QCykBQ,oBAAA;EDvkBR;;;EAAA,QC8kBQ,aAAA;ED7kBD;;AAMT;EANS,QColBC,wBAAA;;;;UAOA,iCAAA;EDnlBF;;;EAAA,QC0lBE,8BAAA;EDzlBR;;;AAMF;;;EANE,QCmmBc,qBAAA;ED5lBH;;;;EAAA,QC0oBG,qBAAA;ED1oBd;;;;;;;;EAAA,QCuqBc,iBAAA;EAAA,QAmBN,cAAA;EAAA,QAOA,MAAA;AAAA"}
|