otherwise-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +193 -0
- package/bin/otherwise.js +5 -0
- package/frontend/404.html +84 -0
- package/frontend/assets/OpenDyslexic3-Bold-CDyRs55Y.ttf +0 -0
- package/frontend/assets/OpenDyslexic3-Regular-CIBXa4WE.ttf +0 -0
- package/frontend/assets/__vite-browser-external-BIHI7g3E.js +1 -0
- package/frontend/assets/conversational-worker-CeKiciGk.js +2929 -0
- package/frontend/assets/dictation-worker-D0aYfq8b.js +29 -0
- package/frontend/assets/gemini-color-CgSQmmva.png +0 -0
- package/frontend/assets/index-BLux5ps4.js +21 -0
- package/frontend/assets/index-Blh8_TEM.js +5272 -0
- package/frontend/assets/index-BpQ1PuKu.js +18 -0
- package/frontend/assets/index-Df737c8w.css +1 -0
- package/frontend/assets/index-xaYHL6wb.js +113 -0
- package/frontend/assets/ort-wasm-simd-threaded.asyncify-BynIiDiv.wasm +0 -0
- package/frontend/assets/ort-wasm-simd-threaded.jsep-B0T3yYHD.wasm +0 -0
- package/frontend/assets/transformers-tULNc5V3.js +31 -0
- package/frontend/assets/tts-worker-DPJWqT7N.js +2899 -0
- package/frontend/assets/voice-mode-worker-GzvIE_uh.js +2927 -0
- package/frontend/assets/worker-2d5ABSLU.js +31 -0
- package/frontend/banner.png +0 -0
- package/frontend/favicon.svg +3 -0
- package/frontend/google55e5ec47ee14a5f8.html +1 -0
- package/frontend/index.html +234 -0
- package/frontend/manifest.json +17 -0
- package/frontend/pdf.worker.min.mjs +21 -0
- package/frontend/robots.txt +5 -0
- package/frontend/sitemap.xml +27 -0
- package/package.json +81 -0
- package/src/agent/index.js +1066 -0
- package/src/agent/location.js +51 -0
- package/src/agent/prompt.js +548 -0
- package/src/agent/tools.js +4372 -0
- package/src/browser/detect.js +68 -0
- package/src/browser/session.js +1109 -0
- package/src/config.js +137 -0
- package/src/email/client.js +503 -0
- package/src/index.js +557 -0
- package/src/inference/anthropic.js +113 -0
- package/src/inference/google.js +373 -0
- package/src/inference/index.js +81 -0
- package/src/inference/ollama.js +383 -0
- package/src/inference/openai.js +140 -0
- package/src/inference/openrouter.js +378 -0
- package/src/inference/xai.js +200 -0
- package/src/logBridge.js +9 -0
- package/src/models.js +146 -0
- package/src/remote/client.js +225 -0
- package/src/scheduler/cron.js +243 -0
- package/src/server.js +3876 -0
- package/src/storage/db.js +1135 -0
- package/src/storage/supabase.js +364 -0
- package/src/tunnel/cloudflare.js +241 -0
- package/src/ui/components/App.jsx +687 -0
- package/src/ui/components/BrowserSelect.jsx +111 -0
- package/src/ui/components/FilePicker.jsx +472 -0
- package/src/ui/components/Header.jsx +444 -0
- package/src/ui/components/HelpPanel.jsx +173 -0
- package/src/ui/components/HistoryPanel.jsx +158 -0
- package/src/ui/components/MessageList.jsx +235 -0
- package/src/ui/components/ModelSelector.jsx +304 -0
- package/src/ui/components/PromptInput.jsx +515 -0
- package/src/ui/components/StreamingResponse.jsx +134 -0
- package/src/ui/components/ThinkingIndicator.jsx +365 -0
- package/src/ui/components/ToolExecution.jsx +714 -0
- package/src/ui/components/index.js +82 -0
- package/src/ui/context/TerminalContext.jsx +150 -0
- package/src/ui/context/index.js +13 -0
- package/src/ui/hooks/index.js +16 -0
- package/src/ui/hooks/useChatState.js +675 -0
- package/src/ui/hooks/useCommands.js +280 -0
- package/src/ui/hooks/useFileAttachments.js +216 -0
- package/src/ui/hooks/useKeyboardShortcuts.js +173 -0
- package/src/ui/hooks/useNotifications.js +185 -0
- package/src/ui/hooks/useTerminalSize.js +151 -0
- package/src/ui/hooks/useWebSocket.js +273 -0
- package/src/ui/index.js +94 -0
- package/src/ui/ink-runner.js +22 -0
- package/src/ui/utils/formatters.js +424 -0
- package/src/ui/utils/index.js +6 -0
- package/src/ui/utils/markdown.js +166 -0
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
import { TOOLS } from "../agent/tools.js";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Check if a model is a reasoning/thinking model that supports the think parameter
|
|
5
|
+
* @param {string} model - Model name (e.g., 'deepseek-r1', 'qwq', 'gpt-oss')
|
|
6
|
+
* @returns {boolean} - Whether model supports thinking
|
|
7
|
+
*/
|
|
8
|
+
export function isOllamaReasoningModel(model) {
|
|
9
|
+
const modelLower = model.toLowerCase();
|
|
10
|
+
|
|
11
|
+
// Explicit exclusions - models that have "qwen3" but don't support thinking
|
|
12
|
+
// These are typically coding/specialized variants
|
|
13
|
+
if (modelLower.includes("coder") || modelLower.includes("code")) {
|
|
14
|
+
return false;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
return (
|
|
18
|
+
modelLower.includes("deepseek-r1") ||
|
|
19
|
+
modelLower.includes("deepseek-v3") ||
|
|
20
|
+
modelLower.includes("qwq") ||
|
|
21
|
+
// Only match base qwen3 models, not specialized variants like coder
|
|
22
|
+
// The base qwen3 models (qwen3:8b, qwen3:32b) support thinking
|
|
23
|
+
(modelLower.includes("qwen3") && !modelLower.includes("-")) ||
|
|
24
|
+
modelLower.includes("gpt-oss") ||
|
|
25
|
+
modelLower.includes("thinking") ||
|
|
26
|
+
modelLower.includes("reason")
|
|
27
|
+
);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Map our parameter types to Ollama-supported JSON Schema types.
|
|
32
|
+
* Ollama expects: string, integer, boolean. "number" and "array" can cause
|
|
33
|
+
* "expected element type <function> but have <parameter>" from schema validation.
|
|
34
|
+
*/
|
|
35
|
+
function ollamaParamType(paramSpec) {
|
|
36
|
+
const t = (paramSpec.type || "string").toLowerCase();
|
|
37
|
+
if (t === "integer" || t === "boolean" || t === "string") return t;
|
|
38
|
+
if (t === "number") return "integer";
|
|
39
|
+
// array/object: describe as string so the model passes JSON; avoids schema errors
|
|
40
|
+
if (t === "array" || t === "object") return "string";
|
|
41
|
+
return "string";
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Convert our TOOLS format to Ollama's tool format.
|
|
46
|
+
* Uses only Ollama-supported types (string, integer, boolean) to avoid
|
|
47
|
+
* "expected element type <function> but have <parameter>" errors.
|
|
48
|
+
* @returns {Array} - Array of tools in Ollama format
|
|
49
|
+
*/
|
|
50
|
+
function getOllamaTools() {
|
|
51
|
+
return Object.entries(TOOLS).map(([name, tool]) => {
|
|
52
|
+
const properties = {};
|
|
53
|
+
const required = [];
|
|
54
|
+
const rawType = (spec) => (spec.type || "string").toLowerCase();
|
|
55
|
+
|
|
56
|
+
for (const [paramName, paramSpec] of Object.entries(
|
|
57
|
+
tool.parameters || {},
|
|
58
|
+
)) {
|
|
59
|
+
const type = ollamaParamType(paramSpec);
|
|
60
|
+
let description = paramSpec.description || "";
|
|
61
|
+
if (rawType(paramSpec) === "array") {
|
|
62
|
+
description =
|
|
63
|
+
(description ? description + ". " : "") + "Pass a JSON array string.";
|
|
64
|
+
}
|
|
65
|
+
properties[paramName] = {
|
|
66
|
+
type,
|
|
67
|
+
description: description.trim() || "Parameter value",
|
|
68
|
+
};
|
|
69
|
+
if (paramSpec.required) {
|
|
70
|
+
required.push(paramName);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return {
|
|
75
|
+
type: "function",
|
|
76
|
+
function: {
|
|
77
|
+
name,
|
|
78
|
+
description: tool.description,
|
|
79
|
+
parameters: {
|
|
80
|
+
type: "object",
|
|
81
|
+
properties,
|
|
82
|
+
required,
|
|
83
|
+
},
|
|
84
|
+
},
|
|
85
|
+
};
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Convert a native tool call to XML format for the agent
|
|
91
|
+
* @param {object} toolCall - Ollama tool call object
|
|
92
|
+
* @returns {string} - XML formatted tool call
|
|
93
|
+
*/
|
|
94
|
+
function toolCallToXml(toolCall) {
|
|
95
|
+
const name = toolCall.function?.name || toolCall.name;
|
|
96
|
+
const args = toolCall.function?.arguments || toolCall.arguments || {};
|
|
97
|
+
|
|
98
|
+
let xml = `<tool_call>\n<name>${name}</name>\n`;
|
|
99
|
+
|
|
100
|
+
for (const [key, value] of Object.entries(args)) {
|
|
101
|
+
if (value === null || value === undefined) continue;
|
|
102
|
+
|
|
103
|
+
// For content/code that might have special chars, we just include it raw
|
|
104
|
+
// The agent's XML parser handles this
|
|
105
|
+
const strValue =
|
|
106
|
+
typeof value === "object" ? JSON.stringify(value) : String(value);
|
|
107
|
+
xml += `<${key}>${strValue}</${key}>\n`;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
xml += `</tool_call>`;
|
|
111
|
+
return xml;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Stream chat completion from local Ollama server
|
|
116
|
+
* Supports reasoning/thinking models with separate thinking tokens
|
|
117
|
+
* Also supports native tool calling for models that support it
|
|
118
|
+
* @param {string} model - Model name (e.g., 'llama3', 'deepseek-r1', 'gpt-oss')
|
|
119
|
+
* @param {Array} messages - Array of message objects with role and content
|
|
120
|
+
* @param {string} systemPrompt - System prompt
|
|
121
|
+
* @param {object} config - Configuration with settings
|
|
122
|
+
* @yields {object} - Chunks with type and content (including thinking tokens for reasoning models)
|
|
123
|
+
*/
|
|
124
|
+
export async function* streamOllama(model, messages, systemPrompt, config) {
|
|
125
|
+
const baseUrl = config.ollamaUrl || "http://localhost:11434";
|
|
126
|
+
|
|
127
|
+
// Check if this is a reasoning model that supports thinking
|
|
128
|
+
let isReasoningModel = isOllamaReasoningModel(model);
|
|
129
|
+
|
|
130
|
+
// Helper: extract base64 from data URL for Ollama (expects raw base64 strings)
|
|
131
|
+
const toOllamaImages = (images) => {
|
|
132
|
+
if (!images?.length) return undefined;
|
|
133
|
+
return images
|
|
134
|
+
.map((img) => {
|
|
135
|
+
if (typeof img !== "string") return null;
|
|
136
|
+
const match = img.match(/^data:image\/[^;]+;base64,(.+)$/);
|
|
137
|
+
return match ? match[1] : img;
|
|
138
|
+
})
|
|
139
|
+
.filter(Boolean);
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
// Convert messages to Ollama format (vision: user messages can have images)
|
|
143
|
+
const ollamaMessages = [
|
|
144
|
+
{ role: "system", content: systemPrompt },
|
|
145
|
+
...messages.map((m) => {
|
|
146
|
+
const role = m.role === "user" ? "user" : "assistant";
|
|
147
|
+
const base = { role, content: m.content };
|
|
148
|
+
const imgList = toOllamaImages(m.images);
|
|
149
|
+
if (imgList?.length) {
|
|
150
|
+
base.images = imgList;
|
|
151
|
+
}
|
|
152
|
+
return base;
|
|
153
|
+
}),
|
|
154
|
+
];
|
|
155
|
+
|
|
156
|
+
// Get tools in Ollama format (unless explicitly disabled, e.g. during recovery)
|
|
157
|
+
const tools = config.disableTools ? undefined : getOllamaTools();
|
|
158
|
+
|
|
159
|
+
// Build request body - add think: true for reasoning models
|
|
160
|
+
const requestBody = {
|
|
161
|
+
model,
|
|
162
|
+
messages: ollamaMessages,
|
|
163
|
+
stream: true,
|
|
164
|
+
...(tools ? { tools } : {}),
|
|
165
|
+
options: {
|
|
166
|
+
num_predict: config.maxTokens || 8192,
|
|
167
|
+
temperature: config.temperature || 0.7,
|
|
168
|
+
},
|
|
169
|
+
};
|
|
170
|
+
|
|
171
|
+
// Enable thinking for reasoning models (Ollama's native thinking support)
|
|
172
|
+
if (isReasoningModel) {
|
|
173
|
+
requestBody.think = true;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
let response = await fetch(`${baseUrl}/api/chat`, {
|
|
177
|
+
method: "POST",
|
|
178
|
+
headers: { "Content-Type": "application/json" },
|
|
179
|
+
body: JSON.stringify(requestBody),
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
// Handle "does not support tools" and "does not support thinking" by retrying without those params
|
|
183
|
+
if (!response.ok) {
|
|
184
|
+
let errorText = await response.text();
|
|
185
|
+
|
|
186
|
+
// Retry without tools if model doesn't support tool calling (e.g. gemma3)
|
|
187
|
+
if (errorText.includes("does not support tools") && requestBody.tools) {
|
|
188
|
+
console.warn(
|
|
189
|
+
`[Ollama] Model ${model} does not support tools, retrying without tools`,
|
|
190
|
+
);
|
|
191
|
+
delete requestBody.tools;
|
|
192
|
+
response = await fetch(`${baseUrl}/api/chat`, {
|
|
193
|
+
method: "POST",
|
|
194
|
+
headers: { "Content-Type": "application/json" },
|
|
195
|
+
body: JSON.stringify(requestBody),
|
|
196
|
+
});
|
|
197
|
+
if (!response.ok) errorText = await response.text();
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Retry without think if model doesn't support thinking
|
|
201
|
+
if (
|
|
202
|
+
!response.ok &&
|
|
203
|
+
errorText.includes("does not support thinking") &&
|
|
204
|
+
requestBody.think
|
|
205
|
+
) {
|
|
206
|
+
console.warn(
|
|
207
|
+
`[Ollama] Model ${model} does not support thinking, retrying without think parameter`,
|
|
208
|
+
);
|
|
209
|
+
delete requestBody.think;
|
|
210
|
+
isReasoningModel = false;
|
|
211
|
+
response = await fetch(`${baseUrl}/api/chat`, {
|
|
212
|
+
method: "POST",
|
|
213
|
+
headers: { "Content-Type": "application/json" },
|
|
214
|
+
body: JSON.stringify(requestBody),
|
|
215
|
+
});
|
|
216
|
+
if (!response.ok) errorText = await response.text();
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
if (!response.ok) {
|
|
220
|
+
throw new Error(`Ollama error: ${errorText}`);
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Signal that streaming is starting (include reasoning model info)
|
|
225
|
+
yield { type: "start", model, isReasoningModel };
|
|
226
|
+
|
|
227
|
+
const reader = response.body.getReader();
|
|
228
|
+
const decoder = new TextDecoder();
|
|
229
|
+
let totalThinkingChars = 0;
|
|
230
|
+
let hasContent = false;
|
|
231
|
+
let pendingToolCalls = []; // Collect tool calls for end of stream
|
|
232
|
+
// Ollama /api/chat streams cumulative message.content (and message.thinking); emit only deltas
|
|
233
|
+
let lastTextContent = "";
|
|
234
|
+
let lastThinkingContent = "";
|
|
235
|
+
|
|
236
|
+
while (true) {
|
|
237
|
+
const { done, value } = await reader.read();
|
|
238
|
+
if (done) break;
|
|
239
|
+
|
|
240
|
+
const chunk = decoder.decode(value, { stream: true });
|
|
241
|
+
const lines = chunk.split("\n").filter((line) => line.trim());
|
|
242
|
+
|
|
243
|
+
for (const line of lines) {
|
|
244
|
+
try {
|
|
245
|
+
const data = JSON.parse(line);
|
|
246
|
+
|
|
247
|
+
// Handle thinking tokens (reasoning models with think: true)
|
|
248
|
+
// Ollama returns cumulative message.thinking; emit only the new part
|
|
249
|
+
if (data.message?.thinking) {
|
|
250
|
+
const full = data.message.thinking;
|
|
251
|
+
const delta = full.startsWith(lastThinkingContent)
|
|
252
|
+
? full.slice(lastThinkingContent.length)
|
|
253
|
+
: full;
|
|
254
|
+
lastThinkingContent = full;
|
|
255
|
+
totalThinkingChars += delta.length;
|
|
256
|
+
if (delta) {
|
|
257
|
+
yield {
|
|
258
|
+
type: "thinking",
|
|
259
|
+
content: delta,
|
|
260
|
+
totalChars: totalThinkingChars,
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// Handle regular content: Ollama sends cumulative message.content; yield only the new part
|
|
266
|
+
if (data.message?.content) {
|
|
267
|
+
hasContent = true;
|
|
268
|
+
const full = data.message.content;
|
|
269
|
+
const delta = full.startsWith(lastTextContent)
|
|
270
|
+
? full.slice(lastTextContent.length)
|
|
271
|
+
: full;
|
|
272
|
+
lastTextContent = full;
|
|
273
|
+
if (delta) {
|
|
274
|
+
yield { type: "text", content: delta };
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
// Handle native tool calls from Ollama
|
|
279
|
+
// Tool calls come in message.tool_calls array
|
|
280
|
+
if (
|
|
281
|
+
data.message?.tool_calls &&
|
|
282
|
+
Array.isArray(data.message.tool_calls)
|
|
283
|
+
) {
|
|
284
|
+
for (const toolCall of data.message.tool_calls) {
|
|
285
|
+
pendingToolCalls.push(toolCall);
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
if (data.done) {
|
|
290
|
+
// If we have tool calls, convert them to XML and yield as text
|
|
291
|
+
// This allows the existing agent loop to handle them
|
|
292
|
+
if (pendingToolCalls.length > 0) {
|
|
293
|
+
console.log(
|
|
294
|
+
`[Ollama] Converting ${pendingToolCalls.length} native tool call(s) to XML format`,
|
|
295
|
+
);
|
|
296
|
+
for (const toolCall of pendingToolCalls) {
|
|
297
|
+
const xmlToolCall = toolCallToXml(toolCall);
|
|
298
|
+
yield { type: "text", content: xmlToolCall };
|
|
299
|
+
hasContent = true;
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// If reasoning model produced thinking but no content or tool calls,
|
|
304
|
+
// yield a special marker so the agent can handle recovery
|
|
305
|
+
if (
|
|
306
|
+
isReasoningModel &&
|
|
307
|
+
totalThinkingChars > 0 &&
|
|
308
|
+
!hasContent &&
|
|
309
|
+
pendingToolCalls.length === 0
|
|
310
|
+
) {
|
|
311
|
+
console.warn(
|
|
312
|
+
`[Ollama] Reasoning model produced ${totalThinkingChars} thinking chars but no content or tool calls.`,
|
|
313
|
+
);
|
|
314
|
+
// Yield a marker that the agent can use to trigger recovery
|
|
315
|
+
// Don't yield text - the agent will handle prompting the model
|
|
316
|
+
yield {
|
|
317
|
+
type: "thinking_only",
|
|
318
|
+
thinkingChars: totalThinkingChars,
|
|
319
|
+
message: "Model produced reasoning but no response or tool calls",
|
|
320
|
+
};
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// Ollama provides token counts when done
|
|
324
|
+
// eval_count = output tokens, prompt_eval_count = input tokens
|
|
325
|
+
yield {
|
|
326
|
+
type: "usage",
|
|
327
|
+
inputTokens: data.prompt_eval_count || 0,
|
|
328
|
+
outputTokens: data.eval_count || 0,
|
|
329
|
+
totalTokens: (data.prompt_eval_count || 0) + (data.eval_count || 0),
|
|
330
|
+
thinkingChars: totalThinkingChars,
|
|
331
|
+
};
|
|
332
|
+
return;
|
|
333
|
+
}
|
|
334
|
+
} catch (e) {
|
|
335
|
+
// Skip malformed JSON lines
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
/**
|
|
342
|
+
* Check if Ollama is available
|
|
343
|
+
*/
|
|
344
|
+
export async function checkOllamaAvailable(config) {
|
|
345
|
+
const baseUrl = config.ollamaUrl || "http://localhost:11434";
|
|
346
|
+
try {
|
|
347
|
+
const controller = new AbortController();
|
|
348
|
+
const timeout = setTimeout(() => controller.abort(), 2000);
|
|
349
|
+
const response = await fetch(`${baseUrl}/api/tags`, {
|
|
350
|
+
signal: controller.signal,
|
|
351
|
+
});
|
|
352
|
+
clearTimeout(timeout);
|
|
353
|
+
return response.ok;
|
|
354
|
+
} catch {
|
|
355
|
+
return false;
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
/**
|
|
360
|
+
* List available Ollama models
|
|
361
|
+
*/
|
|
362
|
+
export async function listOllamaModels(config) {
|
|
363
|
+
const baseUrl = config.ollamaUrl || "http://localhost:11434";
|
|
364
|
+
try {
|
|
365
|
+
const response = await fetch(`${baseUrl}/api/tags`);
|
|
366
|
+
if (!response.ok) return [];
|
|
367
|
+
const data = await response.json();
|
|
368
|
+
return (data.models || []).map((m) => ({
|
|
369
|
+
id: `ollama:${m.name}`,
|
|
370
|
+
name: m.name,
|
|
371
|
+
size: m.size,
|
|
372
|
+
}));
|
|
373
|
+
} catch {
|
|
374
|
+
return [];
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
export default {
|
|
379
|
+
streamOllama,
|
|
380
|
+
checkOllamaAvailable,
|
|
381
|
+
listOllamaModels,
|
|
382
|
+
isOllamaReasoningModel,
|
|
383
|
+
};
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import OpenAI from 'openai';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Check if a model is an image generation model
|
|
5
|
+
*/
|
|
6
|
+
export function isOpenAIImageModel(model) {
|
|
7
|
+
return model === 'gpt-image-1' || model.startsWith('dall-e');
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Generate an image using OpenAI image models (gpt-image-1, dall-e-3)
|
|
12
|
+
* @param {string} model - Model identifier (e.g., 'gpt-image-1')
|
|
13
|
+
* @param {string} prompt - Text prompt for image generation
|
|
14
|
+
* @param {object} config - Configuration with API keys and settings
|
|
15
|
+
* @yields {object} - Chunks with type and content (image as base64)
|
|
16
|
+
*/
|
|
17
|
+
export async function* generateOpenAIImage(model, prompt, config) {
|
|
18
|
+
const apiKey = config.apiKeys?.openai;
|
|
19
|
+
if (!apiKey) {
|
|
20
|
+
throw new Error('OpenAI API key not configured. Run: otherwise config set openai <key>');
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const client = new OpenAI({ apiKey });
|
|
24
|
+
|
|
25
|
+
try {
|
|
26
|
+
const response = await client.images.generate({
|
|
27
|
+
model,
|
|
28
|
+
prompt,
|
|
29
|
+
n: 1,
|
|
30
|
+
response_format: 'b64_json',
|
|
31
|
+
size: '1024x1024', // Default size
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
const imageData = response.data?.[0];
|
|
35
|
+
|
|
36
|
+
if (imageData?.b64_json) {
|
|
37
|
+
yield { type: 'text', content: 'Here\'s the generated image:\n\n' };
|
|
38
|
+
yield {
|
|
39
|
+
type: 'image',
|
|
40
|
+
content: imageData.b64_json,
|
|
41
|
+
mimeType: 'image/png',
|
|
42
|
+
revisedPrompt: imageData.revised_prompt || prompt,
|
|
43
|
+
};
|
|
44
|
+
} else if (imageData?.url) {
|
|
45
|
+
yield { type: 'text', content: 'Here\'s the generated image:\n\n' };
|
|
46
|
+
yield {
|
|
47
|
+
type: 'image_url',
|
|
48
|
+
content: imageData.url,
|
|
49
|
+
revisedPrompt: imageData.revised_prompt || prompt,
|
|
50
|
+
};
|
|
51
|
+
} else {
|
|
52
|
+
throw new Error('No image data returned from OpenAI');
|
|
53
|
+
}
|
|
54
|
+
} catch (error) {
|
|
55
|
+
console.error('[OpenAI Image] Generation error:', error);
|
|
56
|
+
throw error;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Stream chat completion from OpenAI GPT
|
|
62
|
+
* @param {string} model - Model identifier (e.g., 'gpt-4o')
|
|
63
|
+
* @param {Array} messages - Array of message objects with role and content
|
|
64
|
+
* @param {string} systemPrompt - System prompt
|
|
65
|
+
* @param {object} config - Configuration with API keys and settings
|
|
66
|
+
* @yields {object} - Chunks with type and content
|
|
67
|
+
*/
|
|
68
|
+
export async function* streamOpenAI(model, messages, systemPrompt, config) {
|
|
69
|
+
const apiKey = config.apiKeys?.openai;
|
|
70
|
+
if (!apiKey) {
|
|
71
|
+
throw new Error('OpenAI API key not configured. Run: otherwise config set openai <key>');
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Route image models to the image generation function
|
|
75
|
+
if (isOpenAIImageModel(model)) {
|
|
76
|
+
const lastUserMessage = [...messages].reverse().find(m => m.role === 'user');
|
|
77
|
+
const prompt = lastUserMessage?.content || 'Generate an image';
|
|
78
|
+
yield* generateOpenAIImage(model, prompt, config);
|
|
79
|
+
return;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const client = new OpenAI({ apiKey });
|
|
83
|
+
|
|
84
|
+
// Convert messages to OpenAI format (vision: user messages can have images)
|
|
85
|
+
const openaiMessages = [
|
|
86
|
+
{ role: 'system', content: systemPrompt },
|
|
87
|
+
...messages.map(m => {
|
|
88
|
+
const role = m.role === 'user' ? 'user' : 'assistant';
|
|
89
|
+
// Vision: user message with images -> multimodal content array
|
|
90
|
+
if (m.images && m.images.length > 0 && m.role === 'user') {
|
|
91
|
+
const imageBlocks = m.images.map((imgDataUrl) => ({
|
|
92
|
+
type: 'image_url',
|
|
93
|
+
image_url: { url: imgDataUrl },
|
|
94
|
+
}));
|
|
95
|
+
return {
|
|
96
|
+
role,
|
|
97
|
+
content: [...imageBlocks, { type: 'text', text: m.content || '' }],
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
return { role, content: m.content };
|
|
101
|
+
}),
|
|
102
|
+
];
|
|
103
|
+
|
|
104
|
+
// Some models (o-series, GPT-5) only support temperature=1
|
|
105
|
+
const isRestrictedTemp = /^o[0-9]|^gpt-5/.test(model);
|
|
106
|
+
|
|
107
|
+
const stream = await client.chat.completions.create({
|
|
108
|
+
model,
|
|
109
|
+
messages: openaiMessages,
|
|
110
|
+
max_completion_tokens: config.maxTokens || 8192,
|
|
111
|
+
stream: true,
|
|
112
|
+
stream_options: { include_usage: true }, // Get real token counts
|
|
113
|
+
...(isRestrictedTemp ? {} : { temperature: config.temperature || 0.7 }),
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
let usage = null;
|
|
117
|
+
|
|
118
|
+
for await (const chunk of stream) {
|
|
119
|
+
const content = chunk.choices?.[0]?.delta?.content;
|
|
120
|
+
if (content) {
|
|
121
|
+
yield { type: 'text', content };
|
|
122
|
+
}
|
|
123
|
+
// Capture usage from final chunk (when stream_options.include_usage is true)
|
|
124
|
+
if (chunk.usage) {
|
|
125
|
+
usage = chunk.usage;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Yield usage stats at the end
|
|
130
|
+
if (usage) {
|
|
131
|
+
yield {
|
|
132
|
+
type: 'usage',
|
|
133
|
+
inputTokens: usage.prompt_tokens || 0,
|
|
134
|
+
outputTokens: usage.completion_tokens || 0,
|
|
135
|
+
totalTokens: usage.total_tokens || 0,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
export default { streamOpenAI, generateOpenAIImage, isOpenAIImageModel };
|