cascade-ai 0.12.7 → 0.12.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.cjs CHANGED
@@ -101,7 +101,7 @@ var __export = (target, all) => {
101
101
  var CASCADE_VERSION, CASCADE_CONFIG_FILE, CASCADE_DB_FILE, CASCADE_DASHBOARD_SECRET_FILE, GLOBAL_CONFIG_DIR, GLOBAL_DB_FILE, GLOBAL_KEYSTORE_FILE, GLOBAL_RUNTIME_DB_FILE, DEFAULT_DASHBOARD_PORT, DEFAULT_CONTEXT_LIMIT, DEFAULT_AUTO_SUMMARIZE_AT, MODELS, T1_MODEL_PRIORITY, T2_MODEL_PRIORITY, T3_MODEL_PRIORITY, VISION_MODEL_PRIORITY, COMPLEXITY_T2_COUNT, THEME_NAMES, DEFAULT_THEME, OLLAMA_BASE_URL, LM_STUDIO_BASE_URL, AZURE_BASE_URL_TEMPLATE, TOOL_NAMES, DEFAULT_APPROVAL_REQUIRED;
102
102
  var init_constants = __esm({
103
103
  "src/constants.ts"() {
104
- CASCADE_VERSION = "0.12.7";
104
+ CASCADE_VERSION = "0.12.9";
105
105
  CASCADE_CONFIG_FILE = ".cascade/config.json";
106
106
  CASCADE_DB_FILE = ".cascade/memory.db";
107
107
  CASCADE_DASHBOARD_SECRET_FILE = ".cascade/dashboard-secret";
@@ -502,6 +502,12 @@ var anthropic_exports = {};
502
502
  __export(anthropic_exports, {
503
503
  AnthropicProvider: () => AnthropicProvider
504
504
  });
505
+ function anthropicThinkingParam(modelId, maxTokens) {
506
+ if (!/claude-(opus|sonnet)-4/i.test(modelId)) return {};
507
+ const budget = Math.min(8e3, maxTokens - 1024);
508
+ if (budget < 1024) return {};
509
+ return { thinking: { type: "enabled", budget_tokens: budget } };
510
+ }
505
511
  var AnthropicProvider;
506
512
  var init_anthropic = __esm({
507
513
  "src/providers/anthropic.ts"() {
@@ -536,13 +542,18 @@ var init_anthropic = __esm({
536
542
  let fullContent = "";
537
543
  let inputTokens = 0;
538
544
  let outputTokens = 0;
545
+ const maxTokens = options.maxTokens ?? this.model.maxOutputTokens;
546
+ const thinkParam = anthropicThinkingParam(this.model.id, maxTokens);
547
+ const useThinking = !!thinkParam.thinking;
539
548
  const stream = this.client.messages.stream({
540
549
  model: this.model.id,
541
- max_tokens: options.maxTokens ?? this.model.maxOutputTokens,
542
- temperature: options.temperature ?? 0.7,
550
+ max_tokens: maxTokens,
551
+ // Extended thinking requires temperature = 1; otherwise honor the request.
552
+ temperature: useThinking ? 1 : options.temperature ?? 0.7,
543
553
  system: options.systemPrompt,
544
554
  messages,
545
- tools: tools?.length ? tools : void 0
555
+ tools: tools?.length ? tools : void 0,
556
+ ...thinkParam
546
557
  }, { signal: options.signal });
547
558
  let isThinking = false;
548
559
  for await (const event of stream) {
@@ -3369,6 +3380,7 @@ var ModelSelector = class {
3369
3380
  if (lower.includes("claude")) providerStr = "anthropic";
3370
3381
  else if (lower.startsWith("gpt") || lower.startsWith("o1") || lower.startsWith("o3")) providerStr = "openai";
3371
3382
  else if (lower.includes("gemini")) providerStr = "gemini";
3383
+ else if ((lower.endsWith(".gguf") || actualId.includes("/") || actualId.includes("\\")) && this.availableProviders.has("openai-compatible")) providerStr = "openai-compatible";
3372
3384
  else if (this.availableProviders.has("ollama")) providerStr = "ollama";
3373
3385
  else if (this.availableProviders.has("openai-compatible")) providerStr = "openai-compatible";
3374
3386
  else if (this.availableProviders.size === 1) providerStr = Array.from(this.availableProviders)[0];
@@ -4051,6 +4063,11 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter__default.default {
4051
4063
  if (availableProviders.has("ollama")) {
4052
4064
  await this.discoverOllamaModels(ollamaCfg);
4053
4065
  }
4066
+ if (availableProviders.has("openai-compatible")) {
4067
+ await Promise.all(
4068
+ config.providers.filter((p) => p.type === "openai-compatible").map((cfg) => this.discoverOpenAICompatibleModels(cfg))
4069
+ );
4070
+ }
4054
4071
  for (const tier of ["T1", "T2", "T3"]) {
4055
4072
  const override = tier === "T1" ? config.models.t1 : tier === "T2" ? config.models.t2 : config.models.t3;
4056
4073
  if (!override || override === "auto") continue;
@@ -4482,6 +4499,14 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter__default.default {
4482
4499
  getModelsForProvider(provider) {
4483
4500
  return this.selector.getAvailableModelsForProvider(provider);
4484
4501
  }
4502
+ /**
4503
+ * Every model available across the configured + reachable providers, after
4504
+ * discovery (Ollama tags, OpenAI-compatible/llama.cpp models, cloud catalog).
4505
+ * Used to populate the desktop model pickers with the user's real models.
4506
+ */
4507
+ getAvailableModels() {
4508
+ return this.selector?.getAllAvailableModels() ?? [];
4509
+ }
4485
4510
  // ── Private ──────────────────────────────────
4486
4511
  async detectAvailableProviders(configs) {
4487
4512
  const available = /* @__PURE__ */ new Set();
@@ -4512,6 +4537,28 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter__default.default {
4512
4537
  } catch {
4513
4538
  }
4514
4539
  }
4540
+ async discoverOpenAICompatibleModels(cfg) {
4541
+ try {
4542
+ const seed = {
4543
+ id: "openai-compatible",
4544
+ name: "openai-compatible",
4545
+ provider: "openai-compatible",
4546
+ contextWindow: 32e3,
4547
+ isVisionCapable: false,
4548
+ inputCostPer1kTokens: 0,
4549
+ outputCostPer1kTokens: 0,
4550
+ maxOutputTokens: 4e3,
4551
+ supportsStreaming: true,
4552
+ isLocal: false
4553
+ };
4554
+ const provider = new OpenAICompatibleProvider(cfg, seed);
4555
+ const models = await provider.listModels();
4556
+ for (const m of models) {
4557
+ this.selector.addDynamicModel(m);
4558
+ }
4559
+ } catch {
4560
+ }
4561
+ }
4515
4562
  ensureProvider(model, configs) {
4516
4563
  const key = `${model.provider}:${model.id}`;
4517
4564
  if (this.providers.has(key)) return;
@@ -4541,7 +4588,23 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter__default.default {
4541
4588
  }
4542
4589
  }
4543
4590
  getAnyModelForProvider(type) {
4544
- return Object.values(MODELS).find((m) => m.provider === type);
4591
+ const fromCatalog = Object.values(MODELS).find((m) => m.provider === type);
4592
+ if (fromCatalog) return fromCatalog;
4593
+ if (type === "openai-compatible" || type === "azure") {
4594
+ return {
4595
+ id: type,
4596
+ name: type,
4597
+ provider: type,
4598
+ contextWindow: 32e3,
4599
+ isVisionCapable: false,
4600
+ inputCostPer1kTokens: 0,
4601
+ outputCostPer1kTokens: 0,
4602
+ maxOutputTokens: 4e3,
4603
+ supportsStreaming: true,
4604
+ isLocal: false
4605
+ };
4606
+ }
4607
+ return void 0;
4545
4608
  }
4546
4609
  recordStats(tier, model, usage) {
4547
4610
  this.stats.totalTokens += usage.totalTokens;
@@ -10410,7 +10473,11 @@ ${last.partialOutput}` : "");
10410
10473
  looksLikeConversational(prompt) {
10411
10474
  const LOW_COMPLEXITY = [
10412
10475
  /^(?:hi|hello|hey|thanks|thank you|ok|okay|yes|no|sure|got it|sounds good)\b/i,
10413
- /^(?:what is|what are|list|show me|tell me|who is|where is|when is|how do i)\b/i,
10476
+ /^(?:what is|what are|what'?s|list|show me|tell me|who is|who are|who'?re|where is|when is|how do i)\b/i,
10477
+ // Self-identity / capability questions ("who are you", "what can you do",
10478
+ // "who made you") are pure conversation — never a multi-agent build.
10479
+ /^(?:who|what)\b.*\byou\b/i,
10480
+ /^what can you\b/i,
10414
10481
  /\b(?:simple|quick|brief|small|single|one-line|typo|rename)\b/i
10415
10482
  ];
10416
10483
  const wordCount = prompt.trim().split(/\s+/).length;
@@ -10508,10 +10575,16 @@ ${prompt}` : prompt;
10508
10575
  temperature: 0
10509
10576
  });
10510
10577
  const content = result.content.trim();
10511
- const firstWord = (content.split(/[\s—–-]+/)[0] ?? "").toLowerCase();
10578
+ const match = content.toLowerCase().match(/\b(simple|moderate|complex)\b/);
10512
10579
  const reason = content.replace(/^\S+\s*[—–-]*\s*/, "").trim();
10513
- const verdict = firstWord.includes("simple") ? "Simple" : firstWord.includes("moderate") ? "Moderate" : "Complex";
10514
- this.recordDecision("complexity", `${verdict} \u2014 classifier: ${reason || "no reason given"}`);
10580
+ let verdict;
10581
+ if (match) {
10582
+ verdict = match[1] === "simple" ? "Simple" : match[1] === "moderate" ? "Moderate" : "Complex";
10583
+ this.recordDecision("complexity", `${verdict} \u2014 classifier: ${reason || "no reason given"}`);
10584
+ } else {
10585
+ verdict = prompt.trim().split(/\s+/).length <= 12 ? "Simple" : "Moderate";
10586
+ this.recordDecision("complexity", `${verdict} \u2014 classifier output unparseable; defaulted by length`);
10587
+ }
10515
10588
  return verdict;
10516
10589
  } catch {
10517
10590
  const followUpPrompt = /^(proceed|continue|go ahead|do it|yes|yep|ok|okay|carry on)$/i.test(prompt.trim());
@@ -13920,7 +13993,8 @@ function SetupWizard({ workspacePath, onComplete }) {
13920
13993
  ) })
13921
13994
  ] });
13922
13995
  }
13923
- const prompt = isAzure && fieldStage === "deploymentName" ? `Azure deployment name (${currentEntry.label})` : isAzure && fieldStage === "baseUrl" ? `Azure endpoint URL` : isAzure && fieldStage === "apiKey" ? `${currentEntry.label} API Key` : isAzure && fieldStage === "apiVersion" ? `Azure API version (e.g. 2024-08-01-preview)` : isCompat && fieldStage === "label" ? `Name for this endpoint (e.g. Groq)` : isCompat && fieldStage === "baseUrl" ? `Base URL (e.g. https://api.groq.com/openai/v1)` : isOllama ? `Ollama URL` : `${currentEntry.label} API Key`;
13996
+ const prompt = isAzure && fieldStage === "deploymentName" ? `Azure deployment name (${currentEntry.label})` : isAzure && fieldStage === "baseUrl" ? `Azure endpoint URL` : isAzure && fieldStage === "apiKey" ? `${currentEntry.label} API Key` : isAzure && fieldStage === "apiVersion" ? `Azure API version (e.g. 2024-08-01-preview)` : isCompat && fieldStage === "label" ? `Name for this endpoint (e.g. Groq)` : isCompat && fieldStage === "baseUrl" ? `Base URL (e.g. https://api.groq.com/openai/v1)` : isCompat && fieldStage === "apiKey" ? `${currentEntry.label} API Key (optional)` : isOllama ? `Ollama URL` : `${currentEntry.label} API Key`;
13997
+ const keyOptional = isCompat && fieldStage === "apiKey";
13924
13998
  const isMasked = fieldStage === "apiKey" && !isOllama;
13925
13999
  return /* @__PURE__ */ jsxRuntime.jsxs(Frame, { theme, phase: "keys", children: [
13926
14000
  doneEntries.length > 0 && /* @__PURE__ */ jsxRuntime.jsx(ink.Box, { flexDirection: "column", marginBottom: 1, children: doneEntries.map((e) => /* @__PURE__ */ jsxRuntime.jsxs(ink.Box, { children: [
@@ -13938,8 +14012,8 @@ function SetupWizard({ workspacePath, onComplete }) {
13938
14012
  {
13939
14013
  theme,
13940
14014
  label: prompt,
13941
- tag: isOllama ? "optional \u2014 Enter for default" : "required",
13942
- tagColor: isOllama ? theme.colors.muted : theme.colors.error,
14015
+ tag: isOllama ? "optional \u2014 Enter for default" : keyOptional ? "optional \u2014 Enter to skip" : "required",
14016
+ tagColor: isOllama || keyOptional ? theme.colors.muted : theme.colors.error,
13943
14017
  active: true,
13944
14018
  children: /* @__PURE__ */ jsxRuntime.jsx(
13945
14019
  SafeTextInput,