@kirosnn/mosaic 0.0.91 → 0.71.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +2 -2
  3. package/package.json +52 -47
  4. package/src/agent/prompts/systemPrompt.ts +198 -68
  5. package/src/agent/prompts/toolsPrompt.ts +217 -135
  6. package/src/agent/provider/anthropic.ts +19 -15
  7. package/src/agent/provider/google.ts +21 -17
  8. package/src/agent/provider/ollama.ts +80 -41
  9. package/src/agent/provider/openai.ts +107 -67
  10. package/src/agent/provider/reasoning.ts +29 -0
  11. package/src/agent/provider/xai.ts +19 -15
  12. package/src/agent/tools/definitions.ts +9 -5
  13. package/src/agent/tools/executor.ts +655 -46
  14. package/src/agent/tools/exploreExecutor.ts +12 -12
  15. package/src/agent/tools/fetch.ts +58 -0
  16. package/src/agent/tools/glob.ts +20 -4
  17. package/src/agent/tools/grep.ts +62 -8
  18. package/src/agent/tools/plan.ts +27 -0
  19. package/src/agent/tools/read.ts +2 -0
  20. package/src/agent/types.ts +6 -6
  21. package/src/components/App.tsx +67 -25
  22. package/src/components/CustomInput.tsx +274 -68
  23. package/src/components/Main.tsx +323 -168
  24. package/src/components/ShortcutsModal.tsx +11 -8
  25. package/src/components/main/ChatPage.tsx +217 -58
  26. package/src/components/main/HomePage.tsx +5 -1
  27. package/src/components/main/ThinkingIndicator.tsx +11 -1
  28. package/src/components/main/types.ts +11 -10
  29. package/src/index.tsx +3 -5
  30. package/src/utils/approvalBridge.ts +29 -8
  31. package/src/utils/approvalModeBridge.ts +17 -0
  32. package/src/utils/commands/approvals.ts +48 -0
  33. package/src/utils/commands/image.ts +109 -0
  34. package/src/utils/commands/index.ts +5 -1
  35. package/src/utils/diffRendering.tsx +13 -14
  36. package/src/utils/history.ts +82 -40
  37. package/src/utils/imageBridge.ts +28 -0
  38. package/src/utils/images.ts +31 -0
  39. package/src/utils/models.ts +0 -7
  40. package/src/utils/notificationBridge.ts +23 -0
  41. package/src/utils/toolFormatting.ts +162 -43
  42. package/src/web/app.tsx +94 -34
  43. package/src/web/assets/css/ChatPage.css +102 -30
  44. package/src/web/assets/css/MessageItem.css +26 -29
  45. package/src/web/assets/css/ThinkingIndicator.css +44 -6
  46. package/src/web/assets/css/ToolMessage.css +36 -14
  47. package/src/web/components/ChatPage.tsx +228 -105
  48. package/src/web/components/HomePage.tsx +6 -6
  49. package/src/web/components/MessageItem.tsx +88 -89
  50. package/src/web/components/Setup.tsx +1 -1
  51. package/src/web/components/Sidebar.tsx +1 -1
  52. package/src/web/components/ThinkingIndicator.tsx +40 -21
  53. package/src/web/router.ts +1 -1
  54. package/src/web/server.tsx +187 -39
  55. package/src/web/storage.ts +23 -1
  56. package/src/web/types.ts +7 -6
@@ -1,139 +1,221 @@
1
1
  export const TOOLS_PROMPT = `
2
- AVAILABLE TOOLS:
3
-
4
- You have access to the following tools to interact with the workspace:
5
-
6
- FILE READING:
7
- 1. read: Read the complete contents of a file
8
- - Use this to examine existing code, configuration, or documentation
9
- - Parameters: path (string)
10
-
11
- FILE WRITING & EDITING:
12
- 2. write: Write or overwrite a file (with advanced features)
13
- - Creates parent directories automatically if they don't exist
14
- - Can append to existing files instead of overwriting
15
- - Parameters: path (string), content (string, optional), append (boolean, optional)
16
- - Note: content can be omitted or empty to create an empty file
17
-
18
- 3. edit: Edit a specific part of a file without rewriting everything
19
- - More efficient for targeted changes - replace specific text content
20
- - Parameters: path (string), old_content (string), new_content (string), occurrence (number, optional)
21
-
22
- FILE & DIRECTORY OPERATIONS:
23
- 4. list: List files and directories with filtering
24
- - Supports recursive listing through subdirectories
25
- - Can filter by glob patterns (e.g., "*.ts")
26
- - Can include or exclude hidden files
27
- - Parameters: path (string), recursive (boolean, optional), filter (string, optional), include_hidden (boolean, optional)
28
-
29
- SEARCH & DISCOVERY:
30
- 5. glob: Fast file pattern matching
31
- - Find files matching a glob pattern
32
- - REQUIRED: pattern (string) - Glob pattern to match files (e.g., "*.ts", "**/*.tsx", "src/**/*.js")
33
- - OPTIONAL: path (string) - Directory to search in (default: workspace root)
34
-
35
- Examples:
36
- - Find all TypeScript files: glob(pattern="**/*.ts")
37
- - Find React components: glob(pattern="**/*.tsx")
38
- - Search in specific directory: glob(pattern="*.js", path="src")
39
-
40
- 6. grep: Search for text content within files
41
- - Search for text within files matching a glob pattern
42
- - REQUIRED: pattern (string) - Glob pattern to match files (e.g., "*.ts", "**/*.tsx")
43
- - REQUIRED: query (string) - Text content to search for
44
- - OPTIONAL: path (string) - Directory to search in (default: workspace root)
45
- - OPTIONAL: case_sensitive (boolean) - Case-sensitive search (default: false)
46
- - OPTIONAL: max_results (number) - Maximum results (default: 100)
47
-
48
- Examples:
49
- - Find interface in TypeScript files: grep(pattern="**/*.ts", query="interface User")
50
- - Search in specific directory: grep(pattern="*.js", query="console.log", path="src")
51
- - Case-sensitive search: grep(pattern="**/*.ts", query="UserModel", case_sensitive=true)
52
-
53
- AUTONOMOUS EXPLORATION:
54
- 7. explore: Launch an autonomous exploration agent
55
- - Explores the codebase iteratively to gather information
56
- - The agent uses read, glob, grep, and list tools autonomously
57
- - Continues until it has enough information or reaches its limit
58
- - Use for open-ended exploration tasks
59
- - Parameters: purpose (string) - The goal of the exploration
60
-
61
- Examples:
62
- - Understand project structure:
63
- explore(purpose="Understand the project structure and main entry points")
64
- - Find implementations:
65
- explore(purpose="Find all React components that handle user authentication")
66
- - Investigate code patterns:
67
- explore(purpose="Find how errors are handled throughout the codebase")
68
-
69
- COMMAND EXECUTION:
70
- 8. bash: Execute a shell command
71
- - Use this to run build tools, tests, git commands, or other CLI tools
72
- - Parameters: command (string)
73
- - CRITICAL: You MUST add --timeout <ms> at the END of commands that might hang:
74
- * Dev servers: ALWAYS add --timeout 5000
75
- Example: bash(command="npm run dev --timeout 5000")
76
- * Build commands: ALWAYS add --timeout 120000
77
- Example: bash(command="npm run build --timeout 120000")
78
- * Test runners: ALWAYS add --timeout 60000
79
- Example: bash(command="pytest tests/ --timeout 60000")
80
- * Package installs: ALWAYS add --timeout 120000
81
- Example: bash(command="npm install --timeout 120000")
82
- * Interactive CLIs: ALWAYS add --timeout 5000 or avoid entirely
83
- Example: bash(command="npx create-react-app myapp --timeout 5000")
84
- - Quick commands (ls, cat, git status, echo): No --timeout needed (default: 30s)
85
-
86
- USER INTERACTION:
87
- 9. question: Ask the user a question with predefined options
88
- - CRITICAL: This is the ONLY way to ask the user questions. NEVER ask questions in plain text.
89
- - MANDATORY usage scenarios:
90
- * When you need user to pick between choices
91
- * When you need user's confirmation or approval
92
- * When you need clarification on ambiguous requests
93
- * When you're unsure how to proceed
94
- * When a tool operation is rejected and you need to know why
95
- * When multiple approaches are possible and user input is needed
96
- - The UI will show the prompt and options and return the selected option
97
- - Parameters:
98
- - prompt (string) - The question to ask in the user's language
99
- - options (array of objects) - At least 2 options required:
100
- - label (string) - The option text shown to user
101
- - value (string | null) - Optional value returned (use null if not needed)
102
- - Returns: { id, index, label, value }
103
- - Example: question(prompt="Which approach do you prefer?", options=[{label:"Approach A", value:"a"}, {label:"Approach B", value:"b"}])
104
-
105
- TOOL USAGE GUIDELINES:
106
-
107
- - Use explore for open-ended exploration tasks (autonomous agent)
108
- - Use glob to find files by pattern (fast file discovery)
109
- - Use grep to search for text content within files
110
- - Use edit for small changes to avoid rewriting entire files
111
- - Always use read before modifying files to understand the current state
112
- - When writing files, preserve existing code structure and style
113
- - Use list with recursive:true to explore deep directory structures
114
- - All file paths are relative to the workspace root: {{WORKSPACE}}
115
-
116
- ERRORS:
117
- - Some tools return an object like {"error": "..."} when something went wrong. Treat this as a TOOL ERROR (not an API error).
118
- - When a tool returns an error, continue the task using that information (e.g., adjust path, create missing parent directory, retry with correct tool).
119
-
120
- WORKFLOW BEST PRACTICES:
121
-
122
- 1. Discover: Use explore for open-ended exploration, or glob/grep for targeted searches
123
- 2. Understand: Use read to examine files
124
- 3. Plan: Think through modifications before acting
125
- 4. Execute: Use edit for small changes, write for new/complete rewrites
126
- 5. Verify: Use bash to run tests and verify changes
127
- 6. Communicate: Explain your actions to the user in their language
128
-
129
- CRITICAL REMINDERS:
130
- - NEVER ask questions in plain text - ALWAYS use the question tool
131
- - When write/edit/bash operations are rejected by the user, IMMEDIATELY use the question tool to understand why and what to do instead
132
- - The question tool is NOT optional - it's MANDATORY for any user interaction requiring a response
133
- - If you catch yourself about to ask something in text, STOP and use the question tool instead
134
-
135
- Remember: The user can see your tool usage, so be transparent about what you're doing and why.`;
2
+ # Available Tools
3
+
4
+ ## File Operations
5
+
6
+ ### read
7
+ Read file contents. ALWAYS read before modifying.
8
+ - path (string, required): File path relative to workspace
9
+ - start_line (number, optional): Start reading from this line (1-based)
10
+ - end_line (number, optional): End reading at this line (1-based)
11
+
12
+ ### write
13
+ Create or overwrite a file. Creates parent directories automatically.
14
+ - path (string, required): File path
15
+ - content (string, optional): File content (empty to create empty file)
16
+ - append (boolean, optional): Append instead of overwrite
17
+
18
+ ### edit
19
+ Replace specific text in a file. Preferred for targeted changes.
20
+ - path (string, required): File path
21
+ - old_content (string, required): Exact text to replace
22
+ - new_content (string, required): Replacement text
23
+ - occurrence (number, optional): Which occurrence (default: 1)
24
+
25
+ ### list
26
+ List directory contents.
27
+ - path (string, required): Directory path
28
+ - recursive (boolean, optional): Include subdirectories
29
+ - filter (string, optional): Glob pattern filter
30
+ - include_hidden (boolean, optional): Include hidden files
31
+
32
+ ## Search & Discovery
33
+
34
+ ### explore (RECOMMENDED for understanding context)
35
+ Autonomous exploration agent that intelligently searches the codebase.
36
+ - purpose (string, required): What to find/understand
37
+
38
+ USE EXPLORE WHEN:
39
+ - Starting work on an unfamiliar codebase
40
+ - Understanding how something works
41
+ - Finding related code, patterns, or architecture
42
+ - You're unsure where to make changes
43
+
44
+ Examples:
45
+ - explore(purpose="Find API endpoints and understand routing")
46
+ - explore(purpose="Understand the authentication flow")
47
+ - explore(purpose="Find UserService and all its usages")
48
+
49
+ The explore tool is INTELLIGENT - it autonomously reads files, follows imports, and builds understanding. This is MORE EFFICIENT than manual glob/grep/read cycles.
50
+
51
+ ### glob
52
+ Find files by name pattern. Fast file discovery.
53
+ - pattern (string, required): Glob pattern with **/ for recursive search
54
+ - path (string, optional): Directory to search
55
+
56
+ IMPORTANT: Use "**/" prefix for recursive search:
57
+ - "**/*.ts" - All TypeScript files (recursive)
58
+ - "*.ts" - Only in current directory (NOT recursive)
59
+
60
+ ### grep
61
+ Search for text within files.
62
+ - query (string, required): Text to search for
63
+ - file_type (string, optional): ts, tsx, js, jsx, py, java, go, etc.
64
+ - pattern (string, optional): Glob pattern for files
65
+ - regex (boolean, optional): Treat query as regex
66
+ - context (number, optional): Lines around matches
67
+ - output_mode (string, optional): "matches", "files", or "count"
68
+
69
+ RECOMMENDED: Use file_type for best results:
70
+ - grep(query="handleClick", file_type="tsx")
71
+ - grep(query="interface User", file_type="ts")
72
+
73
+ TOOL SELECTION:
74
+ | Need to understand how X works | explore |
75
+ | Find specific file by name | glob |
76
+ | Find specific text in code | grep |
77
+
78
+ ## Planning
79
+
80
+ ### plan
81
+ Track progress on multi-step tasks.
82
+ - explanation (string, optional): Context about the plan
83
+ - plan (array, required): Steps with statuses
84
+ - step (string): Action description
85
+ - status: "pending" | "in_progress" | "completed"
86
+
87
+ Use plan for tasks with 3+ steps. Update as you progress.
88
+ Always update the plan after each step is completed.
89
+
90
+ ## Web Access
91
+
92
+ ### fetch
93
+ Retrieve web content as markdown.
94
+ - url (string, required): URL to fetch
95
+ - max_length (number, optional): Max chars (default: 10000)
96
+ - start_index (number, optional): For pagination
97
+ - raw (boolean, optional): Return raw HTML
98
+ - timeout (number, optional): Timeout in ms (default: 30000)
99
+
100
+ ## Command Execution
101
+
102
+ ### bash
103
+ Execute shell commands. Adapt to OS ({{OS}}).
104
+ - command (string, required): Command to execute
105
+
106
+ Timeouts (add --timeout <ms> to long commands):
107
+ - Dev servers: 5000
108
+ - Builds: 120000
109
+ - Tests: 60000
110
+ - Package installs: 120000
111
+
112
+ ## User Interaction
113
+
114
+ ### question
115
+ Ask user with predefined options. ONLY way to ask questions.
116
+ - prompt (string, required): Question in user's language
117
+ - options (array, required): At least 2 options
118
+ - label (string): Display text
119
+ - value (string|null): Return value
120
+
121
+ CRITICAL: Never ask questions in plain text. Always use this tool.
122
+
123
+ # Tool Selection Guide
124
+
125
+ | Task | Tool | Example |
126
+ |------|------|---------|
127
+ | Understand codebase/architecture | explore | explore(purpose="How does auth work?") |
128
+ | Find files by name | glob | glob(pattern="**/*.config.ts") |
129
+ | Find specific text | grep | grep(query="handleSubmit", file_type="tsx") |
130
+ | Read file contents | read | read(path="src/auth.ts") |
131
+ | Small targeted edit | edit | edit(path="...", old_content="...", new_content="...") |
132
+ | New file or full rewrite | write | write(path="...", content="...") |
133
+ | Run commands/tests | bash | bash(command="npm test") |
134
+ | Track multi-step work | plan | plan(plan=[...]) |
135
+ | Need user input | question | question(prompt="...", options=[...]) |
136
+
137
+ PREFER EXPLORE for understanding context before making changes.
138
+ PREFER grep with file_type for targeted text searches.
139
+
140
+ # Continuation - CRITICAL
141
+
142
+ NEVER stop after using a tool. ALWAYS continue to the next step in the SAME response.
143
+
144
+ Pattern: text → tool → text → tool → text → tool → ... → completion
145
+
146
+ CORRECT:
147
+ "Searching for config files." → [glob] → "Found 3 files. Reading the main one." → [read] → "I see the issue. Fixing now." → [edit] → "Done."
148
+
149
+ WRONG:
150
+ "Searching for config files." → [glob] → "Found 3 files. I'll read them next." → [STOP]
151
+
152
+ After EVERY tool result, you must either:
153
+ 1. Continue with the next action (use another tool), OR
154
+ 2. Complete the task with a summary, OR
155
+ 3. Ask the user via question tool if genuinely blocked
156
+
157
+ FORBIDDEN:
158
+ - Stopping mid-task after announcing what you'll do next
159
+ - Ending with "I'll do X next" without actually doing X
160
+ - Waiting for implicit user approval to continue
161
+
162
+ # Communication with Tools
163
+
164
+ BEFORE using tools:
165
+ - Brief explanation of what you're doing
166
+ - Then IMMEDIATELY use the tool in the same response
167
+
168
+ AFTER tool results:
169
+ - Brief comment on result if needed
170
+ - Then IMMEDIATELY continue to next action
171
+
172
+ AFTER tool errors:
173
+ - Explain what went wrong
174
+ - Then IMMEDIATELY retry with different approach
175
+
176
+ # File Modification - MANDATORY RULE
177
+
178
+ You MUST read a file BEFORE modifying it. This is NOT optional.
179
+
180
+ Correct workflow:
181
+ 1. "Let me examine the current implementation." → read(path="src/auth.ts")
182
+ 2. "I see the issue. I'll fix the validation logic." → edit(path="src/auth.ts", ...)
183
+
184
+ WRONG (will fail):
185
+ - Using edit or write on a file you haven't read in this conversation
186
+ - Assuming you know what's in a file without reading it
187
+
188
+ # Error Recovery
189
+
190
+ When a tool returns {"error": "..."}:
191
+ 1. Tell the user what went wrong
192
+ 2. Explain your retry strategy
193
+ 3. Try with adjusted parameters
194
+ 4. After 2-3 failures, explain the blocker and ask for help
195
+
196
+ # Question Tool - When to Use
197
+
198
+ USE question tool:
199
+ - Multiple valid approaches need user preference
200
+ - Destructive action needs confirmation
201
+ - Requirements are genuinely ambiguous
202
+ - A tool was rejected and you need to understand why
203
+
204
+ DO NOT use question tool:
205
+ - You can figure out the answer by exploring
206
+ - The path forward is reasonably clear
207
+ - It's a standard implementation decision
208
+
209
+ NEVER ask questions in plain text. The question tool is MANDATORY.
210
+
211
+ # Workflow Summary
212
+
213
+ 1. COMMUNICATE: Say what you're about to do
214
+ 2. READ: Always read files before modifying
215
+ 3. ACT: Use the appropriate tool
216
+ 4. VERIFY: Run tests/builds to confirm
217
+ 5. REPORT: Summarize what was done`;
136
218
 
137
219
  export function getToolsPrompt(): string {
138
220
  return TOOLS_PROMPT;
139
- }
221
+ }
@@ -1,6 +1,7 @@
1
- import { streamText, CoreMessage } from 'ai';
2
- import { createAnthropic } from '@ai-sdk/anthropic';
3
- import { AgentEvent, Provider, ProviderConfig, ProviderSendOptions } from '../types';
1
+ import { streamText, CoreMessage } from 'ai';
2
+ import { createAnthropic } from '@ai-sdk/anthropic';
3
+ import { AgentEvent, Provider, ProviderConfig, ProviderSendOptions } from '../types';
4
+ import { shouldEnableReasoning } from './reasoning';
4
5
 
5
6
  export class AnthropicProvider implements Provider {
6
7
  async *sendMessage(
@@ -8,8 +9,9 @@ export class AnthropicProvider implements Provider {
8
9
  config: ProviderConfig,
9
10
  options?: ProviderSendOptions
10
11
  ): AsyncGenerator<AgentEvent> {
11
- const cleanApiKey = config.apiKey?.trim().replace(/[\r\n]+/g, '');
12
- const cleanModel = config.model.trim().replace(/[\r\n]+/g, '');
12
+ const cleanApiKey = config.apiKey?.trim().replace(/[\r\n]+/g, '');
13
+ const cleanModel = config.model.trim().replace(/[\r\n]+/g, '');
14
+ const reasoningEnabled = await shouldEnableReasoning(config.provider, cleanModel);
13
15
 
14
16
  const anthropic = createAnthropic({
15
17
  apiKey: cleanApiKey,
@@ -19,15 +21,17 @@ export class AnthropicProvider implements Provider {
19
21
  model: anthropic(cleanModel),
20
22
  messages: messages,
21
23
  system: config.systemPrompt,
22
- tools: config.tools,
23
- maxSteps: config.maxSteps || 10,
24
- abortSignal: options?.abortSignal,
25
- experimental_providerMetadata: {
26
- anthropic: {
27
- thinkingBudgetTokens: 10000,
28
- },
29
- },
30
- });
24
+ tools: config.tools,
25
+ maxSteps: config.maxSteps || 10,
26
+ abortSignal: options?.abortSignal,
27
+ experimental_providerMetadata: reasoningEnabled
28
+ ? {
29
+ anthropic: {
30
+ thinkingBudgetTokens: 10000,
31
+ },
32
+ }
33
+ : undefined,
34
+ });
31
35
 
32
36
  try {
33
37
  let stepCounter = 0;
@@ -119,4 +123,4 @@ export class AnthropicProvider implements Provider {
119
123
  };
120
124
  }
121
125
  }
122
- }
126
+ }
@@ -1,6 +1,7 @@
1
- import { streamText, CoreMessage } from 'ai';
2
- import { createGoogleGenerativeAI } from '@ai-sdk/google';
3
- import { AgentEvent, Provider, ProviderConfig, ProviderSendOptions } from '../types';
1
+ import { streamText, CoreMessage } from 'ai';
2
+ import { createGoogleGenerativeAI } from '@ai-sdk/google';
3
+ import { AgentEvent, Provider, ProviderConfig, ProviderSendOptions } from '../types';
4
+ import { shouldEnableReasoning } from './reasoning';
4
5
 
5
6
  export class GoogleProvider implements Provider {
6
7
  async *sendMessage(
@@ -8,8 +9,9 @@ export class GoogleProvider implements Provider {
8
9
  config: ProviderConfig,
9
10
  options?: ProviderSendOptions
10
11
  ): AsyncGenerator<AgentEvent> {
11
- const cleanApiKey = config.apiKey?.trim().replace(/[\r\n]+/g, '');
12
- const cleanModel = config.model.trim().replace(/[\r\n]+/g, '');
12
+ const cleanApiKey = config.apiKey?.trim().replace(/[\r\n]+/g, '');
13
+ const cleanModel = config.model.trim().replace(/[\r\n]+/g, '');
14
+ const reasoningEnabled = await shouldEnableReasoning(config.provider, cleanModel);
13
15
 
14
16
  const google = createGoogleGenerativeAI({
15
17
  apiKey: cleanApiKey,
@@ -19,17 +21,19 @@ export class GoogleProvider implements Provider {
19
21
  model: google(cleanModel),
20
22
  messages: messages,
21
23
  system: config.systemPrompt,
22
- tools: config.tools,
23
- maxSteps: config.maxSteps || 10,
24
- abortSignal: options?.abortSignal,
25
- providerOptions: {
26
- google: {
27
- thinkingConfig: {
28
- style: 'THINKING_STYLE_DETAILED',
29
- },
30
- },
31
- },
32
- });
24
+ tools: config.tools,
25
+ maxSteps: config.maxSteps || 10,
26
+ abortSignal: options?.abortSignal,
27
+ providerOptions: reasoningEnabled
28
+ ? {
29
+ google: {
30
+ thinkingConfig: {
31
+ style: 'THINKING_STYLE_DETAILED',
32
+ },
33
+ },
34
+ }
35
+ : undefined,
36
+ });
33
37
 
34
38
  try {
35
39
  let stepCounter = 0;
@@ -121,4 +125,4 @@ export class GoogleProvider implements Provider {
121
125
  };
122
126
  }
123
127
  }
124
- }
128
+ }
@@ -1,7 +1,10 @@
1
1
  import { Ollama } from 'ollama';
2
2
  import { spawn } from 'child_process';
3
- import { CoreMessage, CoreTool } from 'ai';
4
- import { AgentEvent, Provider, ProviderConfig, ProviderSendOptions } from '../types';
3
+ import { CoreMessage, CoreTool } from 'ai';
4
+ import { AgentEvent, Provider, ProviderConfig, ProviderSendOptions } from '../types';
5
+ import { zodToJsonSchema } from 'zod-to-json-schema';
6
+ import { z } from 'zod';
7
+ import { shouldEnableReasoning } from './reasoning';
5
8
 
6
9
  let serveStartPromise: Promise<void> | null = null;
7
10
  const pullPromises = new Map<string, Promise<void>>();
@@ -194,9 +197,9 @@ async function ensureOllamaModelAvailable(ollamaClient: Ollama, model: string):
194
197
  return p;
195
198
  }
196
199
 
197
- function contentToString(content: CoreMessage['content']): string {
198
- if (typeof content === 'string') return content;
199
- if (!content) return '';
200
+ function contentToString(content: CoreMessage['content']): string {
201
+ if (typeof content === 'string') return content;
202
+ if (!content) return '';
200
203
 
201
204
  if (Array.isArray(content)) {
202
205
  const text = content
@@ -215,30 +218,50 @@ function contentToString(content: CoreMessage['content']): string {
215
218
  return JSON.stringify(content);
216
219
  } catch {
217
220
  return String(content);
218
- }
219
- }
221
+ }
222
+ }
223
+
224
+ function imagePartToBase64(image: any): string | undefined {
225
+ if (!image) return undefined;
226
+ if (typeof image === 'string') return image;
227
+ if (Buffer.isBuffer(image)) return image.toString('base64');
228
+ if (image instanceof Uint8Array) return Buffer.from(image).toString('base64');
229
+ if (image instanceof ArrayBuffer) return Buffer.from(new Uint8Array(image)).toString('base64');
230
+ return undefined;
231
+ }
220
232
 
221
233
  function toOllamaTools(tools?: Record<string, CoreTool>): any[] | undefined {
222
234
  if (!tools) return undefined;
223
235
 
224
- return Object.entries(tools).map(([name, tool]) => ({
225
- type: 'function',
226
- function: {
227
- name,
228
- description: String((tool as any)?.description ?? name),
229
- parameters: (() => {
230
- const params = (tool as any)?.parameters;
231
- if (params && typeof params === 'object' && 'type' in params) return params;
232
- return { type: 'object', properties: {} };
233
- })(),
234
- },
235
- }));
236
+ return Object.entries(tools).map(([name, tool]) => {
237
+ const params = (tool as any)?.parameters;
238
+ let jsonSchema: any = { type: 'object', properties: {} };
239
+
240
+ if (params) {
241
+ if (params instanceof z.ZodType) {
242
+ const converted = zodToJsonSchema(params, { target: 'openApi3' });
243
+ jsonSchema = converted;
244
+ if ('$schema' in jsonSchema) delete jsonSchema.$schema;
245
+ } else if (typeof params === 'object' && 'type' in params) {
246
+ jsonSchema = params;
247
+ }
248
+ }
249
+
250
+ return {
251
+ type: 'function',
252
+ function: {
253
+ name,
254
+ description: String((tool as any)?.description ?? name),
255
+ parameters: jsonSchema,
256
+ },
257
+ };
258
+ });
236
259
  }
237
260
 
238
- function coreMessagesToOllamaMessages(messages: CoreMessage[]): any[] {
239
- return messages
240
- .map((message) => {
241
- if (message.role === 'tool') {
261
+ function coreMessagesToOllamaMessages(messages: CoreMessage[]): any[] {
262
+ return messages
263
+ .map((message) => {
264
+ if (message.role === 'tool') {
242
265
  const content: any = message.content;
243
266
  const part = Array.isArray(content) ? content?.[0] : undefined;
244
267
  const toolName = part?.toolName ?? part?.tool_name;
@@ -269,13 +292,28 @@ function coreMessagesToOllamaMessages(messages: CoreMessage[]): any[] {
269
292
  };
270
293
  }
271
294
 
272
- return {
273
- role: message.role,
274
- content: contentToString(message.content),
275
- };
276
- })
277
- .filter(Boolean);
278
- }
295
+ if (message.role === 'user' && Array.isArray(message.content)) {
296
+ const textParts = message.content
297
+ .map((part: any) => (part && typeof part.text === 'string' ? part.text : ''))
298
+ .filter(Boolean)
299
+ .join('');
300
+ const images = message.content
301
+ .map((part: any) => (part && part.type === 'image' ? imagePartToBase64(part.image) : undefined))
302
+ .filter(Boolean);
303
+ const msg: any = { role: 'user', content: textParts };
304
+ if (images.length > 0) {
305
+ msg.images = images;
306
+ }
307
+ return msg;
308
+ }
309
+
310
+ return {
311
+ role: message.role,
312
+ content: contentToString(message.content),
313
+ };
314
+ })
315
+ .filter(Boolean);
316
+ }
279
317
 
280
318
  export async function checkAndStartOllama(): Promise<{ running: boolean; started: boolean; error?: string }> {
281
319
  const ollamaClient = new Ollama();
@@ -306,8 +344,9 @@ export class OllamaProvider implements Provider {
306
344
  config: ProviderConfig,
307
345
  options?: ProviderSendOptions
308
346
  ): AsyncGenerator<AgentEvent> {
309
- const apiKey = config.apiKey?.trim().replace(/[\r\n]+/g, '');
310
- const cleanModel = config.model.trim().replace(/[\r\n]+/g, '');
347
+ const apiKey = config.apiKey?.trim().replace(/[\r\n]+/g, '');
348
+ const cleanModel = config.model.trim().replace(/[\r\n]+/g, '');
349
+ const reasoningEnabled = await shouldEnableReasoning(config.provider, cleanModel);
311
350
 
312
351
  if (options?.abortSignal?.aborted) {
313
352
  return;
@@ -396,14 +435,14 @@ export class OllamaProvider implements Provider {
396
435
 
397
436
  const stream = await retry(
398
437
  () =>
399
- ollamaClient.chat({
400
- model: requestModel,
401
- messages: ollamaMessages,
402
- tools: toolsSchema,
403
- stream: true,
404
- think: true,
405
- signal: options?.abortSignal,
406
- } as any) as any,
438
+ ollamaClient.chat({
439
+ model: requestModel,
440
+ messages: ollamaMessages,
441
+ tools: toolsSchema,
442
+ stream: true,
443
+ think: reasoningEnabled,
444
+ signal: options?.abortSignal,
445
+ } as any) as any,
407
446
  2,
408
447
  500
409
448
  );
@@ -528,4 +567,4 @@ export class OllamaProvider implements Provider {
528
567
  };
529
568
  }
530
569
  }
531
- }
570
+ }