@bike4mind/cli 0.2.70 → 0.2.71-feat-7629-unified-streaming-model-fallback.22033

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8091,7 +8091,8 @@ const CliConfigSchema = z.object({
8091
8091
  features: z.object({ tavern: z.boolean().optional() }).optional().prefault({}),
8092
8092
  trustedTools: z.array(z.string()).optional().prefault([]),
8093
8093
  sandbox: SandboxConfigSchema.optional(),
8094
- additionalDirectories: z.array(z.string()).optional().prefault([])
8094
+ additionalDirectories: z.array(z.string()).optional().prefault([]),
8095
+ fallbackModels: z.array(z.string()).optional()
8095
8096
  });
8096
8097
  /**
8097
8098
  * Zod schema for ProjectConfig validation
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { i as version, n as fetchLatestVersion, r as forceCheckForUpdate } from "../updateChecker-BEb2EBef.mjs";
2
+ import { i as version, n as fetchLatestVersion, r as forceCheckForUpdate } from "../updateChecker-CRjs4C6H.mjs";
3
3
  import { execSync } from "child_process";
4
4
  import { constants, existsSync, promises } from "fs";
5
5
  import { homedir } from "os";
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env node
2
- import { I as isReadOnlyTool, L as ReActAgent, M as setWebSocketToolExecutor, P as buildCoreSystemPrompt, R as CustomCommandStore, S as getApiUrl, T as generateCliTools, V as SessionStore, _ as McpManager, a as createBackgroundAgentTools, c as AgentStore, f as ApiClient, g as ServerLlmBackend, h as WebSocketLlmBackend, i as createWriteTodosTool, l as SubagentOrchestrator, m as WebSocketConnectionManager, n as createFindDefinitionTool, o as BackgroundAgentManager, p as WebSocketToolExecutor, r as createTodoStore, s as createAgentDelegateTool, t as createGetFileStructureTool, u as createSkillTool, w as PermissionManager, x as loadContextFiles, z as CheckpointStore } from "../tools-CJquAyBq.mjs";
3
- import { n as logger, t as ConfigStore } from "../ConfigStore-DH64GYfC.mjs";
2
+ import { B as CheckpointStore, C as getApiUrl, E as generateCliTools, F as buildCoreSystemPrompt, H as SessionStore, L as isReadOnlyTool, N as setWebSocketToolExecutor, R as ReActAgent, S as loadContextFiles, T as PermissionManager, _ as ServerLlmBackend, a as createBackgroundAgentTools, c as AgentStore, f as ApiClient, g as WebSocketLlmBackend, h as FallbackLlmBackend, i as createWriteTodosTool, l as SubagentOrchestrator, m as WebSocketConnectionManager, n as createFindDefinitionTool, o as BackgroundAgentManager, p as WebSocketToolExecutor, r as createTodoStore, s as createAgentDelegateTool, t as createGetFileStructureTool, u as createSkillTool, v as McpManager, z as CustomCommandStore } from "../tools-8CXe7kfN.mjs";
3
+ import { n as logger, t as ConfigStore } from "../ConfigStore-DCZ0ojCS.mjs";
4
4
  import { t as DEFAULT_SANDBOX_CONFIG } from "../types-DBEjF9YS.mjs";
5
5
  import { t as createSandboxRuntime } from "../SandboxRuntimeAdapter-C1B4t20N.mjs";
6
6
  import { t as SandboxOrchestrator } from "../SandboxOrchestrator-BEW3rqYi.mjs";
@@ -97,6 +97,9 @@ async function handleHeadlessCommand(options) {
97
97
  if (models.length === 0) throw new Error("No models available from server.");
98
98
  const modelInfo = models.find((m) => m.id === config.defaultModel) ?? models[0];
99
99
  llm.currentModel = modelInfo.id;
100
+ const effectiveLlm = config.fallbackModels && config.fallbackModels.length > 0 ? new FallbackLlmBackend(llm, config.fallbackModels, (fromModel, toModel, error) => {
101
+ process.stderr.write(`⚠️ Model "${fromModel}" failed (${error.message}). Falling back to "${toModel}"...\n`);
102
+ }) : llm;
100
103
  const session = {
101
104
  id: v4(),
102
105
  name: `Headless ${(/* @__PURE__ */ new Date()).toISOString()}`,
@@ -195,7 +198,7 @@ async function handleHeadlessCommand(options) {
195
198
  const agent = new ReActAgent({
196
199
  userId: config.userId,
197
200
  logger: silentLogger,
198
- llm,
201
+ llm: effectiveLlm,
199
202
  model: modelInfo.id,
200
203
  tools: allTools,
201
204
  maxIterations,
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as ConfigStore } from "../ConfigStore-DH64GYfC.mjs";
2
+ import { t as ConfigStore } from "../ConfigStore-DCZ0ojCS.mjs";
3
3
  //#region src/commands/mcpCommand.ts
4
4
  /**
5
5
  * External MCP commands (b4m mcp list, b4m mcp add, etc.)
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { i as version, r as forceCheckForUpdate } from "../updateChecker-BEb2EBef.mjs";
2
+ import { i as version, r as forceCheckForUpdate } from "../updateChecker-CRjs4C6H.mjs";
3
3
  import { execSync } from "child_process";
4
4
  //#region src/commands/updateCommand.ts
5
5
  /**
package/dist/index.mjs CHANGED
@@ -1,8 +1,8 @@
1
1
  #!/usr/bin/env node
2
2
  import { n as useCliStore, t as selectActiveBackgroundAgents } from "./store-Dw1nZX2Y.mjs";
3
- import { A as clearFeatureModuleTools, B as CommandHistoryStore, C as getEnvironmentName, D as DEFAULT_AGENT_MODEL, E as ALWAYS_DENIED_FOR_AGENTS, F as buildSkillsPromptSection, G as searchCommands, H as OAuthClient, I as isReadOnlyTool, J as searchFiles, K as mergeCommands, L as ReActAgent, M as setWebSocketToolExecutor, N as OllamaBackend, O as DEFAULT_MAX_ITERATIONS, P as buildCoreSystemPrompt, R as CustomCommandStore, S as getApiUrl, T as generateCliTools, U as hasFileReferences, V as SessionStore, W as processFileReferences, Y as warmFileCache, _ as McpManager, a as createBackgroundAgentTools, b as extractCompactInstructions, c as AgentStore, d as parseAgentConfig, f as ApiClient, g as ServerLlmBackend, h as WebSocketLlmBackend, i as createWriteTodosTool, j as registerFeatureModuleTools, k as DEFAULT_THOROUGHNESS, l as SubagentOrchestrator, m as WebSocketConnectionManager, n as createFindDefinitionTool, o as BackgroundAgentManager, p as WebSocketToolExecutor, q as formatFileSize, r as createTodoStore, s as createAgentDelegateTool, t as createGetFileStructureTool, u as createSkillTool, v as substituteArguments, w as PermissionManager, x as loadContextFiles, y as formatStep, z as CheckpointStore } from "./tools-CJquAyBq.mjs";
4
- import { Dt as validateNotebookPath$1, Et as validateJupyterKernelName, g as ChatModels, m as CREDIT_DEDUCT_TRANSACTION_TYPES, n as logger, t as ConfigStore } from "./ConfigStore-DH64GYfC.mjs";
5
- import { i as version, t as checkForUpdate } from "./updateChecker-BEb2EBef.mjs";
3
+ import { A as DEFAULT_THOROUGHNESS, B as CheckpointStore, C as getApiUrl, D as ALWAYS_DENIED_FOR_AGENTS, E as generateCliTools, F as buildCoreSystemPrompt, G as processFileReferences, H as SessionStore, I as buildSkillsPromptSection, J as formatFileSize, K as searchCommands, L as isReadOnlyTool, M as registerFeatureModuleTools, N as setWebSocketToolExecutor, O as DEFAULT_AGENT_MODEL, P as OllamaBackend, R as ReActAgent, S as loadContextFiles, T as PermissionManager, U as OAuthClient, V as CommandHistoryStore, W as hasFileReferences, X as warmFileCache, Y as searchFiles, _ as ServerLlmBackend, a as createBackgroundAgentTools, b as formatStep, c as AgentStore, d as parseAgentConfig, f as ApiClient, g as WebSocketLlmBackend, h as FallbackLlmBackend, i as createWriteTodosTool, j as clearFeatureModuleTools, k as DEFAULT_MAX_ITERATIONS, l as SubagentOrchestrator, m as WebSocketConnectionManager, n as createFindDefinitionTool, o as BackgroundAgentManager, p as WebSocketToolExecutor, q as mergeCommands, r as createTodoStore, s as createAgentDelegateTool, t as createGetFileStructureTool, u as createSkillTool, v as McpManager, w as getEnvironmentName, x as extractCompactInstructions, y as substituteArguments, z as CustomCommandStore } from "./tools-8CXe7kfN.mjs";
4
+ import { Dt as validateNotebookPath$1, Et as validateJupyterKernelName, g as ChatModels, m as CREDIT_DEDUCT_TRANSACTION_TYPES, n as logger, t as ConfigStore } from "./ConfigStore-DCZ0ojCS.mjs";
5
+ import { i as version, t as checkForUpdate } from "./updateChecker-CRjs4C6H.mjs";
6
6
  import React, { useCallback, useEffect, useMemo, useReducer, useRef, useState } from "react";
7
7
  import { Box, Static, Text, render, useApp, useInput } from "ink";
8
8
  import { execSync } from "child_process";
@@ -4606,7 +4606,9 @@ function CliApp() {
4606
4606
  const agentDelegateTool = createAgentDelegateTool(orchestrator, agentStore, newSession.id, backgroundManager);
4607
4607
  const dynamicAgentTool = config.preferences.enableDynamicAgentCreation === true ? createDynamicAgentTool(orchestrator, newSession.id, backgroundManager) : null;
4608
4608
  const backgroundTools = createBackgroundAgentTools(backgroundManager);
4609
- const notifyingLlm = new NotifyingLlmBackend(llm, backgroundManager);
4609
+ const notifyingLlm = new NotifyingLlmBackend(config.fallbackModels && config.fallbackModels.length > 0 ? new FallbackLlmBackend(llm, config.fallbackModels, (fromModel, toModel) => {
4610
+ logger.warn(`⚠️ Model "${fromModel}" failed — falling back to "${toModel}"`);
4611
+ }) : llm, backgroundManager);
4610
4612
  const writeTodosTool = createWriteTodosTool(createTodoStore());
4611
4613
  const enableSkillTool = config.preferences.enableSkillTool !== false;
4612
4614
  const skillTool = enableSkillTool ? createSkillTool({
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { $ as RegInviteEvents, A as ImageGenerationUsageTransaction, B as OpenAIEmbeddingModel, C as FileEvents, Ct as isGPTImageModel, D as GenericCreditAddTransaction, E as GenerateImageToolCallSchema, F as KnowledgeType, G as ProjectEvents, H as Permission, I as LLMEvents, J as QuestMasterParamsSchema, K as PromptMetaZodSchema, L as MiscEvents, M as InboxEvents, N as InviteEvents, O as GenericCreditDeductTransaction, Ot as CollectionType, P as InviteType, Q as RechartsChartTypeList, R as ModalEvents, S as FeedbackEvents, St as getViewById, T as GEMINI_IMAGE_MODELS, Tt as sanitizeTelemetryError, U as PermissionDeniedError, V as OpenAIImageGenerationInput, W as ProfileEvents, X as RealtimeVoiceUsageTransaction, Y as REASONING_SUPPORTED_MODELS, Z as ReceivedCreditTransaction, _ as CompletionApiUsageTransaction, _t as VoyageAIEmbeddingModel, a as ApiKeyEvents, at as SpeechToTextModels, b as FIXED_TEMPERATURE_MODELS, bt as getDataLakeTags, c as AppFileEvents, ct as TagType, d as BFL_IMAGE_MODELS, dt as ToolUsageTransaction, et as ResearchModeParamsSchema, f as BFL_SAFETY_TOLERANCE, ft as TransferCreditTransaction, g as ChatModels, gt as VideoModels, h as ChatCompletionCreateInputSchema, ht as VideoGenerationUsageTransaction, i as AiEvents, it as SessionEvents, j as ImageModels, k as ImageEditUsageTransaction, l as ArtifactTypeSchema, lt as TaskScheduleHandler, mt as VIDEO_SIZE_CONSTRAINTS, n as logger, nt as ResearchTaskPeriodicFrequencyType, o as ApiKeyScope, ot as SubscriptionCreditTransaction, p as BedrockEmbeddingModel, pt as UiNavigationEvents, q as PurchaseTransaction, r as ALERT_THRESHOLDS, rt as ResearchTaskType, s as ApiKeyType, st as SupportedFabFileMimeTypes, t as ConfigStore, tt as ResearchTaskExecutionType, u as AuthEvents, ut as TextGenerationUsageTransaction, v as DashboardParamsSchema, vt as XAI_IMAGE_MODELS, w as FriendshipEvents, wt as resolveNavigationIntents, x as FavoriteDocumentType, xt as getMcpProviderMetadata, y as ElabsEvents, yt as b4mLLMTools, z as ModelBackend } from "./ConfigStore-DH64GYfC.mjs";
2
+ import { $ as RegInviteEvents, A as ImageGenerationUsageTransaction, B as OpenAIEmbeddingModel, C as FileEvents, Ct as isGPTImageModel, D as GenericCreditAddTransaction, E as GenerateImageToolCallSchema, F as KnowledgeType, G as ProjectEvents, H as Permission, I as LLMEvents, J as QuestMasterParamsSchema, K as PromptMetaZodSchema, L as MiscEvents, M as InboxEvents, N as InviteEvents, O as GenericCreditDeductTransaction, Ot as CollectionType, P as InviteType, Q as RechartsChartTypeList, R as ModalEvents, S as FeedbackEvents, St as getViewById, T as GEMINI_IMAGE_MODELS, Tt as sanitizeTelemetryError, U as PermissionDeniedError, V as OpenAIImageGenerationInput, W as ProfileEvents, X as RealtimeVoiceUsageTransaction, Y as REASONING_SUPPORTED_MODELS, Z as ReceivedCreditTransaction, _ as CompletionApiUsageTransaction, _t as VoyageAIEmbeddingModel, a as ApiKeyEvents, at as SpeechToTextModels, b as FIXED_TEMPERATURE_MODELS, bt as getDataLakeTags, c as AppFileEvents, ct as TagType, d as BFL_IMAGE_MODELS, dt as ToolUsageTransaction, et as ResearchModeParamsSchema, f as BFL_SAFETY_TOLERANCE, ft as TransferCreditTransaction, g as ChatModels, gt as VideoModels, h as ChatCompletionCreateInputSchema, ht as VideoGenerationUsageTransaction, i as AiEvents, it as SessionEvents, j as ImageModels, k as ImageEditUsageTransaction, l as ArtifactTypeSchema, lt as TaskScheduleHandler, mt as VIDEO_SIZE_CONSTRAINTS, n as logger, nt as ResearchTaskPeriodicFrequencyType, o as ApiKeyScope, ot as SubscriptionCreditTransaction, p as BedrockEmbeddingModel, pt as UiNavigationEvents, q as PurchaseTransaction, r as ALERT_THRESHOLDS, rt as ResearchTaskType, s as ApiKeyType, st as SupportedFabFileMimeTypes, t as ConfigStore, tt as ResearchTaskExecutionType, u as AuthEvents, ut as TextGenerationUsageTransaction, v as DashboardParamsSchema, vt as XAI_IMAGE_MODELS, w as FriendshipEvents, wt as resolveNavigationIntents, x as FavoriteDocumentType, xt as getMcpProviderMetadata, y as ElabsEvents, yt as b4mLLMTools, z as ModelBackend } from "./ConfigStore-DCZ0ojCS.mjs";
3
3
  import { n as isPathAllowed, t as assertPathAllowed } from "./pathValidation-CIytuhr3-Dt5dntLx.mjs";
4
4
  import { execFile, execFileSync, spawn } from "child_process";
5
5
  import { createHash, randomBytes } from "crypto";
@@ -18188,16 +18188,16 @@ var StreamLogger = class StreamLogger {
18188
18188
  }
18189
18189
  };
18190
18190
  //#endregion
18191
- //#region src/llm/ServerLlmBackend.ts
18191
+ //#region src/llm/streamAccumulator.ts
18192
18192
  /**
18193
- * Strip <think>...</think> blocks from text
18194
- * Claude's extended thinking should not be shown in final output
18193
+ * Strip <think>...</think> blocks from text.
18194
+ * Claude's extended thinking should not be shown in final output.
18195
18195
  */
18196
- function stripThinkingBlocks$1(text) {
18196
+ function stripThinkingBlocks(text) {
18197
18197
  return text.replace(/<think>[\s\S]*?<\/think>/g, "").trim();
18198
18198
  }
18199
18199
  /**
18200
- * Extract usage and credit information from SSE event
18200
+ * Extract usage and credit information into CompletionInfo shape.
18201
18201
  */
18202
18202
  function extractUsageInfo(parsed) {
18203
18203
  return {
@@ -18208,6 +18208,64 @@ function extractUsageInfo(parsed) {
18208
18208
  };
18209
18209
  }
18210
18210
  /**
18211
+ * Accumulates streaming LLM response chunks (text, tool calls, thinking blocks, usage)
18212
+ * and fires the completion callback once at the end.
18213
+ *
18214
+ * Shared between ServerLlmBackend (SSE) and WebSocketLlmBackend (WebSocket frames)
18215
+ * so accumulation logic lives in exactly one place.
18216
+ */
18217
+ var StreamAccumulator = class {
18218
+ constructor() {
18219
+ this.accumulatedText = "";
18220
+ this.toolsUsed = [];
18221
+ this.thinkingBlocks = [];
18222
+ this.lastUsageInfo = {};
18223
+ }
18224
+ onContent(text, usage, credits) {
18225
+ this.accumulatedText += text;
18226
+ if (usage || credits) this.lastUsageInfo = extractUsageInfo({
18227
+ usage,
18228
+ credits
18229
+ });
18230
+ }
18231
+ onToolUse(text, tools, thinking, usage, credits) {
18232
+ if (text) this.accumulatedText += text;
18233
+ if (tools && tools.length > 0) this.toolsUsed = tools;
18234
+ if (thinking && thinking.length > 0) this.thinkingBlocks = thinking;
18235
+ if (usage || credits) this.lastUsageInfo = extractUsageInfo({
18236
+ usage,
18237
+ credits
18238
+ });
18239
+ }
18240
+ /** True when neither text nor tools have been accumulated (stream produced nothing useful). */
18241
+ isEmpty() {
18242
+ return this.accumulatedText.trim().length === 0 && this.toolsUsed.length === 0;
18243
+ }
18244
+ get accumulatedLength() {
18245
+ return this.accumulatedText.length;
18246
+ }
18247
+ get toolCount() {
18248
+ return this.toolsUsed.length;
18249
+ }
18250
+ /**
18251
+ * Calls the completion callback with all accumulated content.
18252
+ * Strips thinking blocks from text before delivering.
18253
+ */
18254
+ async finalize(callback) {
18255
+ const cleanedText = stripThinkingBlocks(this.accumulatedText);
18256
+ if (this.toolsUsed.length > 0) {
18257
+ const info = {
18258
+ toolsUsed: this.toolsUsed,
18259
+ thinking: this.thinkingBlocks.length > 0 ? this.thinkingBlocks : void 0,
18260
+ ...this.lastUsageInfo
18261
+ };
18262
+ await callback([cleanedText], info);
18263
+ } else if (cleanedText) await callback([cleanedText], this.lastUsageInfo);
18264
+ }
18265
+ };
18266
+ //#endregion
18267
+ //#region src/llm/ServerLlmBackend.ts
18268
+ /**
18211
18269
  * Server-side LLM backend that proxies requests through Bike4Mind API
18212
18270
  * Uses Server-Sent Events (SSE) for streaming responses
18213
18271
  * API keys remain secure on server - never exposed to CLI
@@ -18310,10 +18368,7 @@ var ServerLlmBackend = class ServerLlmBackend {
18310
18368
  const streamLogger = new StreamLogger(logger, "ServerLlmBackend", process.env.B4M_VERBOSE === "1", process.env.B4M_DEBUG_STREAM === "1");
18311
18369
  streamLogger.streamStart();
18312
18370
  let eventCount = 0;
18313
- let accumulatedText = "";
18314
- let lastUsageInfo = {};
18315
- let toolsUsed = [];
18316
- let thinkingBlocks = [];
18371
+ const accumulator = new StreamAccumulator();
18317
18372
  let receivedDone = false;
18318
18373
  const parser = createParser({ onEvent: (event) => {
18319
18374
  eventCount++;
@@ -18321,28 +18376,15 @@ var ServerLlmBackend = class ServerLlmBackend {
18321
18376
  const data = event.data;
18322
18377
  if (data === "[DONE]") {
18323
18378
  receivedDone = true;
18324
- streamLogger.onCriticalEvent(eventCount, "[DONE]", `accumulated text length: ${accumulatedText.length}`);
18325
- const cleanedText = stripThinkingBlocks$1(accumulatedText);
18326
- streamLogger.streamComplete(accumulatedText);
18327
- if (toolsUsed.length > 0) {
18328
- const info = {
18329
- toolsUsed,
18330
- thinking: thinkingBlocks.length > 0 ? thinkingBlocks : void 0,
18331
- ...lastUsageInfo
18332
- };
18333
- logger.debug(`[ServerLlmBackend] Calling callback with tools, thinking blocks: ${thinkingBlocks.length}`);
18334
- callback([cleanedText], info).catch((err) => {
18335
- logger.error("[ServerLlmBackend] Callback error:", err);
18336
- reject(err);
18337
- }).then(() => {
18338
- logger.debug("[ServerLlmBackend] Callback completed, resolving");
18339
- resolve();
18340
- });
18341
- } else if (cleanedText) callback([cleanedText], lastUsageInfo).catch((err) => {
18379
+ streamLogger.onCriticalEvent(eventCount, "[DONE]", `accumulated text length: ${accumulator.accumulatedLength}`);
18380
+ streamLogger.streamComplete("");
18381
+ accumulator.finalize(callback).catch((err) => {
18342
18382
  logger.error("[ServerLlmBackend] Callback error:", err);
18343
18383
  reject(err);
18344
- }).then(() => resolve());
18345
- else resolve();
18384
+ }).then(() => {
18385
+ logger.debug("[ServerLlmBackend] Callback completed, resolving");
18386
+ resolve();
18387
+ });
18346
18388
  return;
18347
18389
  }
18348
18390
  try {
@@ -18354,9 +18396,8 @@ var ServerLlmBackend = class ServerLlmBackend {
18354
18396
  }
18355
18397
  if (parsed.type === "content") {
18356
18398
  const textChunk = parsed.text || "";
18357
- accumulatedText += textChunk;
18358
- if (parsed.usage || parsed.credits) lastUsageInfo = extractUsageInfo(parsed);
18359
- streamLogger.onContent(eventCount, textChunk, accumulatedText);
18399
+ accumulator.onContent(textChunk, parsed.usage, parsed.credits);
18400
+ streamLogger.onContent(eventCount, textChunk, "");
18360
18401
  } else if (parsed.type === "tool_use") {
18361
18402
  streamLogger.onCriticalEvent(eventCount, "TOOL_USE", `tools: ${parsed.tools?.length}`);
18362
18403
  if (parsed.tools && parsed.tools.length > 0) for (const tool of parsed.tools) {
@@ -18368,14 +18409,8 @@ var ServerLlmBackend = class ServerLlmBackend {
18368
18409
  logger.debug(` Params: [Unable to stringify]`);
18369
18410
  }
18370
18411
  }
18371
- const textChunk = parsed.text || "";
18372
- if (textChunk) accumulatedText += textChunk;
18373
- if (parsed.tools && parsed.tools.length > 0) toolsUsed = parsed.tools;
18374
- if (parsed.thinking && parsed.thinking.length > 0) {
18375
- thinkingBlocks = parsed.thinking;
18376
- streamLogger.onCriticalEvent(eventCount, "THINKING", `${thinkingBlocks.length} thinking blocks`);
18377
- }
18378
- if (parsed.usage || parsed.credits) lastUsageInfo = extractUsageInfo(parsed);
18412
+ accumulator.onToolUse(parsed.text || "", parsed.tools, parsed.thinking, parsed.usage, parsed.credits);
18413
+ if (parsed.thinking && parsed.thinking.length > 0) streamLogger.onCriticalEvent(eventCount, "THINKING", `${parsed.thinking.length} thinking blocks`);
18379
18414
  }
18380
18415
  } catch (parseError) {
18381
18416
  streamLogger.streamError(parseError);
@@ -18402,20 +18437,10 @@ var ServerLlmBackend = class ServerLlmBackend {
18402
18437
  });
18403
18438
  response.data.on("end", () => {
18404
18439
  if (!receivedDone) {
18405
- const hasAccumulatedData = accumulatedText.trim().length > 0 || toolsUsed.length > 0;
18406
- logger.warn(`[ServerLlmBackend] Stream ended without [DONE] signal. Accumulated text: ${accumulatedText.length} chars, tools: ${toolsUsed.length}`);
18407
- if (hasAccumulatedData) {
18408
- const cleanedText = stripThinkingBlocks$1(accumulatedText);
18409
- streamLogger.streamComplete(accumulatedText);
18410
- if (toolsUsed.length > 0) {
18411
- const info = {
18412
- toolsUsed,
18413
- thinking: thinkingBlocks.length > 0 ? thinkingBlocks : void 0,
18414
- ...lastUsageInfo
18415
- };
18416
- callback([cleanedText], info).then(() => resolve(), reject);
18417
- } else if (cleanedText) callback([cleanedText], lastUsageInfo).then(() => resolve(), reject);
18418
- else resolve();
18440
+ logger.warn(`[ServerLlmBackend] Stream ended without [DONE] signal. Accumulated text: ${accumulator.accumulatedLength} chars, tools: ${accumulator.toolCount}`);
18441
+ if (!accumulator.isEmpty()) {
18442
+ streamLogger.streamComplete("");
18443
+ accumulator.finalize(callback).then(() => resolve(), reject);
18419
18444
  } else reject(/* @__PURE__ */ new Error("Stream ended prematurely without receiving any data. The server may be experiencing issues."));
18420
18445
  } else logger.debug("[ServerLlmBackend] Stream ended, [DONE] handler will resolve");
18421
18446
  });
@@ -18536,9 +18561,6 @@ var ServerLlmBackend = class ServerLlmBackend {
18536
18561
  };
18537
18562
  //#endregion
18538
18563
  //#region src/llm/WebSocketLlmBackend.ts
18539
- function stripThinkingBlocks(text) {
18540
- return text.replace(/<think>[\s\S]*?<\/think>/g, "").trim();
18541
- }
18542
18564
  /**
18543
18565
  * Hybrid HTTP + WebSocket LLM backend for CLI completions.
18544
18566
  *
@@ -18572,10 +18594,7 @@ var WebSocketLlmBackend = class {
18572
18594
  const streamLogger = new StreamLogger(logger, "WebSocketLlmBackend", process.env.B4M_VERBOSE === "1", process.env.B4M_DEBUG_STREAM === "1");
18573
18595
  streamLogger.streamStart();
18574
18596
  let eventCount = 0;
18575
- let accumulatedText = "";
18576
- let lastUsageInfo = {};
18577
- let toolsUsed = [];
18578
- let thinkingBlocks = [];
18597
+ const accumulator = new StreamAccumulator();
18579
18598
  let settled = false;
18580
18599
  const settle = (action) => {
18581
18600
  if (settled) return;
@@ -18603,12 +18622,6 @@ var WebSocketLlmBackend = class {
18603
18622
  }
18604
18623
  options.abortSignal.addEventListener("abort", abortHandler, { once: true });
18605
18624
  }
18606
- const updateUsage = (usage) => {
18607
- if (usage) lastUsageInfo = {
18608
- inputTokens: usage.inputTokens,
18609
- outputTokens: usage.outputTokens
18610
- };
18611
- };
18612
18625
  this.wsManager.onRequest(requestId, (message) => {
18613
18626
  if (options.abortSignal?.aborted) return;
18614
18627
  const action = message.action;
@@ -18616,28 +18629,20 @@ var WebSocketLlmBackend = class {
18616
18629
  eventCount++;
18617
18630
  const chunk = message.chunk;
18618
18631
  streamLogger.onEvent(eventCount, JSON.stringify(chunk));
18619
- const textChunk = chunk.text || "";
18620
- if (textChunk) accumulatedText += textChunk;
18621
- updateUsage(chunk.usage);
18622
- if (chunk.type === "content") streamLogger.onContent(eventCount, textChunk, accumulatedText);
18623
- else if (chunk.type === "tool_use") {
18632
+ if (chunk.type === "content") {
18633
+ accumulator.onContent(chunk.text || "", chunk.usage);
18634
+ streamLogger.onContent(eventCount, chunk.text || "", "");
18635
+ } else if (chunk.type === "tool_use") {
18624
18636
  streamLogger.onCriticalEvent(eventCount, "TOOL_USE", `tools: ${chunk.tools?.length}`);
18625
- if (chunk.tools && chunk.tools.length > 0) toolsUsed = chunk.tools;
18626
- if (chunk.thinking && chunk.thinking.length > 0) thinkingBlocks = chunk.thinking;
18637
+ accumulator.onToolUse(chunk.text || "", chunk.tools, chunk.thinking, chunk.usage);
18627
18638
  }
18628
18639
  } else if (action === "cli_completion_done") {
18629
- streamLogger.streamComplete(accumulatedText);
18630
- const cleanedText = stripThinkingBlocks(accumulatedText);
18631
- if (!cleanedText && toolsUsed.length === 0) {
18640
+ streamLogger.streamComplete("");
18641
+ if (accumulator.isEmpty()) {
18632
18642
  settleResolve();
18633
18643
  return;
18634
18644
  }
18635
- const info = {
18636
- ...lastUsageInfo,
18637
- ...toolsUsed.length > 0 && { toolsUsed },
18638
- ...thinkingBlocks.length > 0 && { thinking: thinkingBlocks }
18639
- };
18640
- callback([cleanedText], info).then(() => settleResolve()).catch((err) => settleReject(err));
18645
+ accumulator.finalize(callback).then(() => settleResolve()).catch((err) => settleReject(err));
18641
18646
  } else if (action === "cli_completion_error") {
18642
18647
  const errorMsg = message.error || "Server error";
18643
18648
  streamLogger.onCriticalEvent(eventCount, "ERROR", errorMsg);
@@ -18738,6 +18743,59 @@ var WebSocketLlmBackend = class {
18738
18743
  }
18739
18744
  };
18740
18745
  //#endregion
18746
+ //#region src/llm/FallbackLlmBackend.ts
18747
+ /**
18748
+ * LLM backend decorator that provides model-level fallback routing.
18749
+ *
18750
+ * When the primary model fails (after the inner backend's own retries are exhausted),
18751
+ * FallbackLlmBackend tries the next model in the configured fallback chain.
18752
+ *
18753
+ * Example chain: Opus → Sonnet → Haiku (graceful degradation under rate limits)
18754
+ *
18755
+ * Configured via `CliConfig.fallbackModels`. Wraps any `ICompletionBackend`,
18756
+ * fitting cleanly into the existing decorator pattern (NotifyingLlmBackend, etc.).
18757
+ */
18758
+ var FallbackLlmBackend = class {
18759
+ constructor(inner, fallbackModels, onFallback) {
18760
+ this.inner = inner;
18761
+ this.fallbackModels = fallbackModels;
18762
+ this.onFallback = onFallback;
18763
+ }
18764
+ get currentModel() {
18765
+ return this.inner.currentModel;
18766
+ }
18767
+ set currentModel(model) {
18768
+ this.inner.currentModel = model;
18769
+ }
18770
+ async complete(model, messages, options, callback) {
18771
+ if (options.abortSignal?.aborted) return this.inner.complete(model, messages, options, callback);
18772
+ const modelsToTry = [model, ...this.fallbackModels.filter((m) => m !== model)];
18773
+ let lastError;
18774
+ for (let i = 0; i < modelsToTry.length; i++) {
18775
+ const modelToTry = modelsToTry[i];
18776
+ try {
18777
+ await this.inner.complete(modelToTry, messages, options, callback);
18778
+ return;
18779
+ } catch (error) {
18780
+ if (options.abortSignal?.aborted) throw error;
18781
+ lastError = error instanceof Error ? error : new Error(String(error));
18782
+ const nextModel = modelsToTry[i + 1];
18783
+ if (nextModel) {
18784
+ logger.warn(`[FallbackLlmBackend] Model "${modelToTry}" failed: ${lastError.message}`);
18785
+ this.onFallback(modelToTry, nextModel, lastError);
18786
+ }
18787
+ }
18788
+ }
18789
+ throw lastError ?? /* @__PURE__ */ new Error("All fallback models exhausted");
18790
+ }
18791
+ pushToolMessages(messages, tool, result, thinkingBlocks) {
18792
+ this.inner.pushToolMessages(messages, tool, result, thinkingBlocks);
18793
+ }
18794
+ async getModelInfo() {
18795
+ return this.inner.getModelInfo();
18796
+ }
18797
+ };
18798
+ //#endregion
18741
18799
  //#region src/ws/WebSocketConnectionManager.ts
18742
18800
  const useWsPolyfill = typeof globalThis.WebSocket === "undefined";
18743
18801
  const WS = useWsPolyfill ? WsWebSocket : globalThis.WebSocket;
@@ -20919,4 +20977,4 @@ function createGetFileStructureTool() {
20919
20977
  };
20920
20978
  }
20921
20979
  //#endregion
20922
- export { clearFeatureModuleTools as A, CommandHistoryStore as B, getEnvironmentName as C, DEFAULT_AGENT_MODEL as D, ALWAYS_DENIED_FOR_AGENTS as E, buildSkillsPromptSection as F, searchCommands as G, OAuthClient as H, isReadOnlyTool as I, searchFiles as J, mergeCommands as K, ReActAgent as L, setWebSocketToolExecutor as M, OllamaBackend as N, DEFAULT_MAX_ITERATIONS as O, buildCoreSystemPrompt as P, CustomCommandStore as R, getApiUrl as S, generateCliTools as T, hasFileReferences as U, SessionStore as V, processFileReferences as W, warmFileCache as Y, McpManager as _, createBackgroundAgentTools as a, extractCompactInstructions as b, AgentStore as c, parseAgentConfig as d, ApiClient as f, ServerLlmBackend as g, WebSocketLlmBackend as h, createWriteTodosTool as i, registerFeatureModuleTools as j, DEFAULT_THOROUGHNESS as k, SubagentOrchestrator as l, WebSocketConnectionManager as m, createFindDefinitionTool as n, BackgroundAgentManager as o, WebSocketToolExecutor as p, formatFileSize$1 as q, createTodoStore as r, createAgentDelegateTool as s, createGetFileStructureTool as t, createSkillTool as u, substituteArguments as v, PermissionManager as w, loadContextFiles as x, formatStep as y, CheckpointStore as z };
20980
+ export { DEFAULT_THOROUGHNESS as A, CheckpointStore as B, getApiUrl as C, ALWAYS_DENIED_FOR_AGENTS as D, generateCliTools as E, buildCoreSystemPrompt as F, processFileReferences as G, SessionStore as H, buildSkillsPromptSection as I, formatFileSize$1 as J, searchCommands as K, isReadOnlyTool as L, registerFeatureModuleTools as M, setWebSocketToolExecutor as N, DEFAULT_AGENT_MODEL as O, OllamaBackend as P, ReActAgent as R, loadContextFiles as S, PermissionManager as T, OAuthClient as U, CommandHistoryStore as V, hasFileReferences as W, warmFileCache as X, searchFiles as Y, ServerLlmBackend as _, createBackgroundAgentTools as a, formatStep as b, AgentStore as c, parseAgentConfig as d, ApiClient as f, WebSocketLlmBackend as g, FallbackLlmBackend as h, createWriteTodosTool as i, clearFeatureModuleTools as j, DEFAULT_MAX_ITERATIONS as k, SubagentOrchestrator as l, WebSocketConnectionManager as m, createFindDefinitionTool as n, BackgroundAgentManager as o, WebSocketToolExecutor as p, mergeCommands as q, createTodoStore as r, createAgentDelegateTool as s, createGetFileStructureTool as t, createSkillTool as u, McpManager as v, getEnvironmentName as w, extractCompactInstructions as x, substituteArguments as y, CustomCommandStore as z };
@@ -4,7 +4,7 @@ import { homedir } from "os";
4
4
  import path from "path";
5
5
  import axios from "axios";
6
6
  //#region package.json
7
- var version = "0.2.70";
7
+ var version = "0.2.71-feat-7629-unified-streaming-model-fallback.22033+9b05427c0";
8
8
  //#endregion
9
9
  //#region src/utils/updateChecker.ts
10
10
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bike4mind/cli",
3
- "version": "0.2.70",
3
+ "version": "0.2.71-feat-7629-unified-streaming-model-fallback.22033+9b05427c0",
4
4
  "type": "module",
5
5
  "description": "Interactive CLI tool for Bike4Mind with ReAct agents",
6
6
  "license": "UNLICENSED",
@@ -60,7 +60,7 @@
60
60
  "@opensearch-project/opensearch": "2.11.0",
61
61
  "@smithy/node-http-handler": "^4.5.0",
62
62
  "async-mutex": "^0.5.0",
63
- "axios": "^1.13.6",
63
+ "axios": "1.14.0",
64
64
  "bcryptjs": "^3.0.2",
65
65
  "better-sqlite3": "^12.8.0",
66
66
  "cheerio": "1.0.0-rc.12",
@@ -115,11 +115,11 @@
115
115
  "zustand": "^4.5.4"
116
116
  },
117
117
  "devDependencies": {
118
- "@bike4mind/agents": "0.4.7",
119
- "@bike4mind/common": "2.78.0",
120
- "@bike4mind/mcp": "1.33.26",
121
- "@bike4mind/services": "2.71.1",
122
- "@bike4mind/utils": "2.16.8",
118
+ "@bike4mind/agents": "0.4.8-feat-7629-unified-streaming-model-fallback.22033+9b05427c0",
119
+ "@bike4mind/common": "2.78.1-feat-7629-unified-streaming-model-fallback.22033+9b05427c0",
120
+ "@bike4mind/mcp": "1.33.27-feat-7629-unified-streaming-model-fallback.22033+9b05427c0",
121
+ "@bike4mind/services": "2.71.2-feat-7629-unified-streaming-model-fallback.22033+9b05427c0",
122
+ "@bike4mind/utils": "2.16.9-feat-7629-unified-streaming-model-fallback.22033+9b05427c0",
123
123
  "@types/better-sqlite3": "^7.6.13",
124
124
  "@types/jsonwebtoken": "^9.0.4",
125
125
  "@types/node": "^22.9.0",
@@ -136,5 +136,5 @@
136
136
  "optionalDependencies": {
137
137
  "@vscode/ripgrep": "^1.17.1"
138
138
  },
139
- "gitHead": "3953e8b96bf559a76010be7880c2c1f74aec410d"
139
+ "gitHead": "9b05427c060904f024c75194742301784f85a0e3"
140
140
  }