zubo 0.1.19 → 0.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,6 +8,47 @@ import { FailoverProvider } from "./failover";
8
8
  import { SmartRouterProvider } from "./smart-router";
9
9
  import { logger } from "../util/logger";
10
10
 
11
+ /**
12
+ * Query Ollama's /api/show endpoint to get the actual context window for a model.
13
+ * Returns the detected context length, or null if unavailable.
14
+ */
15
+ export async function detectOllamaContextWindow(
16
+ model: string,
17
+ baseUrl: string = "http://localhost:11434"
18
+ ): Promise<number | null> {
19
+ try {
20
+ const res = await fetch(`${baseUrl}/api/show`, {
21
+ method: "POST",
22
+ headers: { "Content-Type": "application/json" },
23
+ body: JSON.stringify({ name: model }),
24
+ signal: AbortSignal.timeout(3000),
25
+ });
26
+ if (!res.ok) return null;
27
+ const data = (await res.json()) as any;
28
+
29
+ // Ollama returns model parameters with num_ctx or context_length
30
+ const params = data.model_info ?? data.details ?? {};
31
+ for (const [key, val] of Object.entries(params)) {
32
+ if (
33
+ (key.includes("context_length") || key === "num_ctx") &&
34
+ typeof val === "number" &&
35
+ val > 0
36
+ ) {
37
+ return val;
38
+ }
39
+ }
40
+
41
+ // Also check modelfile parameters
42
+ const modelfile = data.parameters ?? "";
43
+ const match = typeof modelfile === "string" && modelfile.match(/num_ctx\s+(\d+)/);
44
+ if (match) return parseInt(match[1], 10);
45
+
46
+ return null;
47
+ } catch {
48
+ return null;
49
+ }
50
+ }
51
+
11
52
  const KNOWN_BASE_URLS: Record<string, string> = {
12
53
  openai: "https://api.openai.com/v1",
13
54
  groq: "https://api.groq.com/openai/v1",
@@ -67,7 +108,7 @@ function buildSingleProvider(
67
108
  });
68
109
  }
69
110
 
70
- export function createProvider(config: ZuboConfig): LlmProvider {
111
+ export async function createProvider(config: ZuboConfig): Promise<LlmProvider> {
71
112
  // New multi-provider config
72
113
  if (config.providers && config.activeProvider) {
73
114
  const activeCfg = config.providers[config.activeProvider];
@@ -78,7 +119,18 @@ export function createProvider(config: ZuboConfig): LlmProvider {
78
119
  }
79
120
 
80
121
  const primary = buildSingleProvider(config.activeProvider, activeCfg);
81
- logger.info(`LLM provider: ${primary.providerName}/${primary.model}`);
122
+
123
+ // Auto-detect context window for local providers if not explicitly set
124
+ if (!activeCfg.contextWindow && (config.activeProvider === "ollama" || config.activeProvider === "lmstudio")) {
125
+ const ollamaBase = activeCfg.baseUrl?.replace(/\/v1\/?$/, "") ?? "http://localhost:11434";
126
+ const detected = await detectOllamaContextWindow(activeCfg.model, ollamaBase);
127
+ if (detected) {
128
+ primary.contextWindow = detected;
129
+ logger.info(`Auto-detected context window for ${activeCfg.model}: ${detected} tokens`);
130
+ }
131
+ }
132
+
133
+ logger.info(`LLM provider: ${primary.providerName}/${primary.model} (context: ${primary.contextWindow})`);
82
134
 
83
135
  // Build failover chain
84
136
  let provider: LlmProvider = primary;
@@ -128,3 +180,30 @@ export function createProvider(config: ZuboConfig): LlmProvider {
128
180
  "No LLM provider configured. Run 'zubo setup' or add a providers section to config.json"
129
181
  );
130
182
  }
183
+
184
+ /**
185
+ * Quick connectivity check — sends a minimal request to verify the API key works.
186
+ * Returns null on success, or a friendly error message on failure.
187
+ */
188
+ export async function validateProvider(provider: LlmProvider): Promise<string | null> {
189
+ try {
190
+ await provider.chat({
191
+ system: "Respond with OK.",
192
+ messages: [{ role: "user", content: "Say OK" }],
193
+ maxTokens: 8,
194
+ });
195
+ return null;
196
+ } catch (err: any) {
197
+ const msg = err.message ?? String(err);
198
+ if (msg.includes("401") || msg.includes("Unauthorized") || msg.includes("invalid")) {
199
+ return `API key is invalid. Double-check your key for ${provider.providerName}.`;
200
+ }
201
+ if (msg.includes("404") || msg.includes("not found")) {
202
+ return `Model "${provider.model}" not found on ${provider.providerName}. Check the model name.`;
203
+ }
204
+ if (msg.includes("ECONNREFUSED") || msg.includes("fetch failed") || msg.includes("Connection refused")) {
205
+ return `Cannot reach ${provider.providerName}. Make sure the server is running.`;
206
+ }
207
+ return `${provider.providerName} test failed: ${msg}`;
208
+ }
209
+ }
@@ -1,10 +1,21 @@
1
1
  import type { LlmProvider, LlmRequest, LlmResponse, LlmStreamEvent } from "./provider";
2
+ import { compactMessages } from "../agent/compaction";
2
3
  import { logger } from "../util/logger";
3
4
 
5
+ /** Re-compact messages if the target provider has a smaller context window. */
6
+ function fitToProvider(request: LlmRequest, provider: LlmProvider): LlmRequest {
7
+ const compacted = compactMessages(request.messages, provider.contextWindow);
8
+ if (compacted === request.messages) return request;
9
+ return { ...request, messages: compacted };
10
+ }
11
+
12
+ const PRIMARY_RETRY_INTERVAL_MS = 60_000; // Retry primary after 60 seconds
13
+
4
14
  export class FailoverProvider implements LlmProvider {
5
15
  providerName: string;
6
16
  model: string;
7
17
  contextWindow: number;
18
+ private failedOverAt: number = 0;
8
19
 
9
20
  constructor(
10
21
  private primary: LlmProvider,
@@ -15,9 +26,40 @@ export class FailoverProvider implements LlmProvider {
15
26
  this.contextWindow = primary.contextWindow;
16
27
  }
17
28
 
29
+ private get isOnPrimary(): boolean {
30
+ return this.providerName === this.primary.providerName &&
31
+ this.model === this.primary.model;
32
+ }
33
+
34
+ private restorePrimary(): void {
35
+ this.providerName = this.primary.providerName;
36
+ this.model = this.primary.model;
37
+ this.contextWindow = this.primary.contextWindow;
38
+ this.failedOverAt = 0;
39
+ logger.info(`Recovered to primary provider: ${this.primary.providerName}/${this.primary.model}`);
40
+ }
41
+
42
+ private shouldRetryPrimary(): boolean {
43
+ return !this.isOnPrimary &&
44
+ this.failedOverAt > 0 &&
45
+ Date.now() - this.failedOverAt >= PRIMARY_RETRY_INTERVAL_MS;
46
+ }
47
+
18
48
  async chat(request: LlmRequest): Promise<LlmResponse> {
49
+ // If we're on a fallback, periodically retry the primary
50
+ if (this.shouldRetryPrimary()) {
51
+ try {
52
+ const result = await this.primary.chat(fitToProvider(request, this.primary));
53
+ this.restorePrimary();
54
+ return result;
55
+ } catch {
56
+ // Primary still down, continue with fallbacks below
57
+ this.failedOverAt = Date.now();
58
+ }
59
+ }
60
+
19
61
  try {
20
- return await this.primary.chat(request);
62
+ return await this.primary.chat(fitToProvider(request, this.primary));
21
63
  } catch (err: any) {
22
64
  logger.warn(`Primary provider (${this.primary.providerName}) failed`, {
23
65
  error: err.message,
@@ -26,9 +68,10 @@ export class FailoverProvider implements LlmProvider {
26
68
  for (const fb of this.fallbacks) {
27
69
  try {
28
70
  logger.info(`Trying fallback: ${fb.providerName}/${fb.model}`);
29
- const result = await fb.chat(request);
71
+ const result = await fb.chat(fitToProvider(request, fb));
30
72
  this.providerName = fb.providerName;
31
73
  this.model = fb.model;
74
+ this.failedOverAt = Date.now();
32
75
  return result;
33
76
  } catch (fbErr: any) {
34
77
  logger.warn(`Fallback ${fb.providerName} also failed`, {
@@ -52,7 +95,7 @@ export class FailoverProvider implements LlmProvider {
52
95
  if (!provider.chatStream) return null;
53
96
  const events: LlmStreamEvent[] = [];
54
97
  try {
55
- for await (const event of provider.chatStream(request)) {
98
+ for await (const event of provider.chatStream(fitToProvider(request, provider))) {
56
99
  if (events.length >= MAX_STREAM_EVENTS) {
57
100
  throw new Error(`Stream exceeded maximum event limit (${MAX_STREAM_EVENTS})`);
58
101
  }
@@ -67,6 +110,17 @@ export class FailoverProvider implements LlmProvider {
67
110
  }
68
111
  }
69
112
 
113
+ // If we're on a fallback, periodically retry the primary
114
+ if (this.shouldRetryPrimary()) {
115
+ const retryEvents = await collectStream(this.primary);
116
+ if (retryEvents) {
117
+ this.restorePrimary();
118
+ for (const event of retryEvents) yield event;
119
+ return;
120
+ }
121
+ this.failedOverAt = Date.now();
122
+ }
123
+
70
124
  // Try primary
71
125
  const primaryEvents = await collectStream(this.primary);
72
126
  if (primaryEvents) {
@@ -80,12 +134,13 @@ export class FailoverProvider implements LlmProvider {
80
134
  if (fbEvents) {
81
135
  this.providerName = fb.providerName;
82
136
  this.model = fb.model;
137
+ this.failedOverAt = Date.now();
83
138
  for (const event of fbEvents) yield event;
84
139
  return;
85
140
  }
86
141
  }
87
142
 
88
- // If no provider supports streaming, fall back to non-streaming
143
+ // If no provider supports streaming, fall back to non-streaming (chat() already handles fitToProvider)
89
144
  logger.info("No streaming providers available, falling back to non-streaming");
90
145
  const response = await this.chat(request);
91
146
  for (const block of response.content) {
@@ -4,8 +4,16 @@ import type {
4
4
  LlmResponse,
5
5
  LlmStreamEvent,
6
6
  } from "./provider";
7
+ import { compactMessages } from "../agent/compaction";
7
8
  import { logger } from "../util/logger";
8
9
 
10
+ /** Re-compact messages if the target provider has a smaller context window. */
11
+ function fitToProvider(request: LlmRequest, provider: LlmProvider): LlmRequest {
12
+ const compacted = compactMessages(request.messages, provider.contextWindow);
13
+ if (compacted === request.messages) return request;
14
+ return { ...request, messages: compacted };
15
+ }
16
+
9
17
  const CODE_MARKERS = [
10
18
  "```",
11
19
  "function ",
@@ -163,16 +171,16 @@ export class SmartRouterProvider implements LlmProvider {
163
171
 
164
172
  if (provider === this.fast) {
165
173
  try {
166
- return await provider.chat(request);
174
+ return await provider.chat(fitToProvider(request, provider));
167
175
  } catch (err: any) {
168
176
  logger.warn("Fast model failed, falling back to primary", {
169
177
  error: err.message,
170
178
  });
171
- return this.primary.chat(request);
179
+ return this.primary.chat(fitToProvider(request, this.primary));
172
180
  }
173
181
  }
174
182
 
175
- return provider.chat(request);
183
+ return provider.chat(fitToProvider(request, provider));
176
184
  }
177
185
 
178
186
  async *chatStream(request: LlmRequest): AsyncIterable<LlmStreamEvent> {
@@ -185,7 +193,7 @@ export class SmartRouterProvider implements LlmProvider {
185
193
  const events: LlmStreamEvent[] = [];
186
194
  let succeeded = false;
187
195
  try {
188
- for await (const event of provider.chatStream(request)) {
196
+ for await (const event of provider.chatStream(fitToProvider(request, provider))) {
189
197
  if (events.length >= MAX_STREAM_EVENTS) {
190
198
  throw new Error(`Stream exceeded maximum event limit (${MAX_STREAM_EVENTS})`);
191
199
  }
@@ -214,9 +222,9 @@ export class SmartRouterProvider implements LlmProvider {
214
222
 
215
223
  // Use primary model (streaming or non-streaming fallback)
216
224
  if (this.primary.chatStream) {
217
- yield* this.primary.chatStream(request);
225
+ yield* this.primary.chatStream(fitToProvider(request, this.primary));
218
226
  } else {
219
- const response = await this.primary.chat(request);
227
+ const response = await this.primary.chat(fitToProvider(request, this.primary));
220
228
  for (const block of response.content) {
221
229
  if (block.type === "text" && block.text) {
222
230
  yield { type: "text_delta", text: block.text };
@@ -326,7 +326,7 @@ export function getGraph(
326
326
  export function findMentionedEntities(db: Database, text: string, limit: number = 5): KgEntity[] {
327
327
  try {
328
328
  const allNames = db.query(
329
- "SELECT DISTINCT name FROM kg_entities ORDER BY length(name) DESC"
329
+ "SELECT DISTINCT name FROM kg_entities ORDER BY length(name) DESC LIMIT 500"
330
330
  ).all() as { name: string }[];
331
331
 
332
332
  const lowerText = text.toLowerCase();
@@ -41,7 +41,7 @@ export async function vectorSearch(
41
41
 
42
42
  const rows = db
43
43
  .query(
44
- "SELECT id, content, source_file, embedding FROM memory_chunks WHERE embedding IS NOT NULL ORDER BY id DESC LIMIT 5000"
44
+ "SELECT id, content, source_file, embedding FROM memory_chunks WHERE embedding IS NOT NULL ORDER BY id DESC LIMIT 500"
45
45
  )
46
46
  .all() as Array<{
47
47
  id: number;
@@ -154,7 +154,7 @@ async function executeAgentStep(
154
154
  const { agentLoop } = await import("../agent/loop");
155
155
 
156
156
  const appConfig = await loadConfig();
157
- const llm = createProvider(appConfig);
157
+ const llm = await createProvider(appConfig);
158
158
  const result = await agentLoop(llm, `workflow:${workflowId}`, resolvedPrompt);
159
159
 
160
160
  const output = result.reply;