cascade-ai 0.12.7 → 0.12.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -77,7 +77,7 @@ var cron__default = /*#__PURE__*/_interopDefault(cron);
77
77
 
78
78
 
79
79
  // src/constants.ts
80
- var CASCADE_VERSION = "0.12.7";
80
+ var CASCADE_VERSION = "0.12.9";
81
81
  var CASCADE_CONFIG_DIR = ".cascade";
82
82
  var CASCADE_MD_FILE = "CASCADE.md";
83
83
  var CASCADE_IGNORE_FILE = ".cascadeignore";
@@ -477,6 +477,12 @@ var BaseProvider = class {
477
477
  };
478
478
 
479
479
  // src/providers/anthropic.ts
480
+ function anthropicThinkingParam(modelId, maxTokens) {
481
+ if (!/claude-(opus|sonnet)-4/i.test(modelId)) return {};
482
+ const budget = Math.min(8e3, maxTokens - 1024);
483
+ if (budget < 1024) return {};
484
+ return { thinking: { type: "enabled", budget_tokens: budget } };
485
+ }
480
486
  var AnthropicProvider = class extends BaseProvider {
481
487
  client;
482
488
  constructor(config, model) {
@@ -506,13 +512,18 @@ var AnthropicProvider = class extends BaseProvider {
506
512
  let fullContent = "";
507
513
  let inputTokens = 0;
508
514
  let outputTokens = 0;
515
+ const maxTokens = options.maxTokens ?? this.model.maxOutputTokens;
516
+ const thinkParam = anthropicThinkingParam(this.model.id, maxTokens);
517
+ const useThinking = !!thinkParam.thinking;
509
518
  const stream = this.client.messages.stream({
510
519
  model: this.model.id,
511
- max_tokens: options.maxTokens ?? this.model.maxOutputTokens,
512
- temperature: options.temperature ?? 0.7,
520
+ max_tokens: maxTokens,
521
+ // Extended thinking requires temperature = 1; otherwise honor the request.
522
+ temperature: useThinking ? 1 : options.temperature ?? 0.7,
513
523
  system: options.systemPrompt,
514
524
  messages,
515
- tools: tools?.length ? tools : void 0
525
+ tools: tools?.length ? tools : void 0,
526
+ ...thinkParam
516
527
  }, { signal: options.signal });
517
528
  let isThinking = false;
518
529
  for await (const event of stream) {
@@ -1516,6 +1527,7 @@ var ModelSelector = class {
1516
1527
  if (lower.includes("claude")) providerStr = "anthropic";
1517
1528
  else if (lower.startsWith("gpt") || lower.startsWith("o1") || lower.startsWith("o3")) providerStr = "openai";
1518
1529
  else if (lower.includes("gemini")) providerStr = "gemini";
1530
+ else if ((lower.endsWith(".gguf") || actualId.includes("/") || actualId.includes("\\")) && this.availableProviders.has("openai-compatible")) providerStr = "openai-compatible";
1519
1531
  else if (this.availableProviders.has("ollama")) providerStr = "ollama";
1520
1532
  else if (this.availableProviders.has("openai-compatible")) providerStr = "openai-compatible";
1521
1533
  else if (this.availableProviders.size === 1) providerStr = Array.from(this.availableProviders)[0];
@@ -2203,6 +2215,11 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter__default.default {
2203
2215
  if (availableProviders.has("ollama")) {
2204
2216
  await this.discoverOllamaModels(ollamaCfg);
2205
2217
  }
2218
+ if (availableProviders.has("openai-compatible")) {
2219
+ await Promise.all(
2220
+ config.providers.filter((p) => p.type === "openai-compatible").map((cfg) => this.discoverOpenAICompatibleModels(cfg))
2221
+ );
2222
+ }
2206
2223
  for (const tier of ["T1", "T2", "T3"]) {
2207
2224
  const override = tier === "T1" ? config.models.t1 : tier === "T2" ? config.models.t2 : config.models.t3;
2208
2225
  if (!override || override === "auto") continue;
@@ -2634,6 +2651,14 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter__default.default {
2634
2651
  getModelsForProvider(provider) {
2635
2652
  return this.selector.getAvailableModelsForProvider(provider);
2636
2653
  }
2654
+ /**
2655
+ * Every model available across the configured + reachable providers, after
2656
+ * discovery (Ollama tags, OpenAI-compatible/llama.cpp models, cloud catalog).
2657
+ * Used to populate the desktop model pickers with the user's real models.
2658
+ */
2659
+ getAvailableModels() {
2660
+ return this.selector?.getAllAvailableModels() ?? [];
2661
+ }
2637
2662
  // ── Private ──────────────────────────────────
2638
2663
  async detectAvailableProviders(configs) {
2639
2664
  const available = /* @__PURE__ */ new Set();
@@ -2664,6 +2689,28 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter__default.default {
2664
2689
  } catch {
2665
2690
  }
2666
2691
  }
2692
+ async discoverOpenAICompatibleModels(cfg) {
2693
+ try {
2694
+ const seed = {
2695
+ id: "openai-compatible",
2696
+ name: "openai-compatible",
2697
+ provider: "openai-compatible",
2698
+ contextWindow: 32e3,
2699
+ isVisionCapable: false,
2700
+ inputCostPer1kTokens: 0,
2701
+ outputCostPer1kTokens: 0,
2702
+ maxOutputTokens: 4e3,
2703
+ supportsStreaming: true,
2704
+ isLocal: false
2705
+ };
2706
+ const provider = new OpenAICompatibleProvider(cfg, seed);
2707
+ const models = await provider.listModels();
2708
+ for (const m of models) {
2709
+ this.selector.addDynamicModel(m);
2710
+ }
2711
+ } catch {
2712
+ }
2713
+ }
2667
2714
  ensureProvider(model, configs) {
2668
2715
  const key = `${model.provider}:${model.id}`;
2669
2716
  if (this.providers.has(key)) return;
@@ -2693,7 +2740,23 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter__default.default {
2693
2740
  }
2694
2741
  }
2695
2742
  getAnyModelForProvider(type) {
2696
- return Object.values(MODELS).find((m) => m.provider === type);
2743
+ const fromCatalog = Object.values(MODELS).find((m) => m.provider === type);
2744
+ if (fromCatalog) return fromCatalog;
2745
+ if (type === "openai-compatible" || type === "azure") {
2746
+ return {
2747
+ id: type,
2748
+ name: type,
2749
+ provider: type,
2750
+ contextWindow: 32e3,
2751
+ isVisionCapable: false,
2752
+ inputCostPer1kTokens: 0,
2753
+ outputCostPer1kTokens: 0,
2754
+ maxOutputTokens: 4e3,
2755
+ supportsStreaming: true,
2756
+ isLocal: false
2757
+ };
2758
+ }
2759
+ return void 0;
2697
2760
  }
2698
2761
  recordStats(tier, model, usage) {
2699
2762
  this.stats.totalTokens += usage.totalTokens;
@@ -8834,7 +8897,11 @@ ${last.partialOutput}` : "");
8834
8897
  looksLikeConversational(prompt) {
8835
8898
  const LOW_COMPLEXITY = [
8836
8899
  /^(?:hi|hello|hey|thanks|thank you|ok|okay|yes|no|sure|got it|sounds good)\b/i,
8837
- /^(?:what is|what are|list|show me|tell me|who is|where is|when is|how do i)\b/i,
8900
+ /^(?:what is|what are|what'?s|list|show me|tell me|who is|who are|who'?re|where is|when is|how do i)\b/i,
8901
+ // Self-identity / capability questions ("who are you", "what can you do",
8902
+ // "who made you") are pure conversation — never a multi-agent build.
8903
+ /^(?:who|what)\b.*\byou\b/i,
8904
+ /^what can you\b/i,
8838
8905
  /\b(?:simple|quick|brief|small|single|one-line|typo|rename)\b/i
8839
8906
  ];
8840
8907
  const wordCount = prompt.trim().split(/\s+/).length;
@@ -8932,10 +8999,16 @@ ${prompt}` : prompt;
8932
8999
  temperature: 0
8933
9000
  });
8934
9001
  const content = result.content.trim();
8935
- const firstWord = (content.split(/[\s—–-]+/)[0] ?? "").toLowerCase();
9002
+ const match = content.toLowerCase().match(/\b(simple|moderate|complex)\b/);
8936
9003
  const reason = content.replace(/^\S+\s*[—–-]*\s*/, "").trim();
8937
- const verdict = firstWord.includes("simple") ? "Simple" : firstWord.includes("moderate") ? "Moderate" : "Complex";
8938
- this.recordDecision("complexity", `${verdict} \u2014 classifier: ${reason || "no reason given"}`);
9004
+ let verdict;
9005
+ if (match) {
9006
+ verdict = match[1] === "simple" ? "Simple" : match[1] === "moderate" ? "Moderate" : "Complex";
9007
+ this.recordDecision("complexity", `${verdict} \u2014 classifier: ${reason || "no reason given"}`);
9008
+ } else {
9009
+ verdict = prompt.trim().split(/\s+/).length <= 12 ? "Simple" : "Moderate";
9010
+ this.recordDecision("complexity", `${verdict} \u2014 classifier output unparseable; defaulted by length`);
9011
+ }
8939
9012
  return verdict;
8940
9013
  } catch {
8941
9014
  const followUpPrompt = /^(proceed|continue|go ahead|do it|yes|yep|ok|okay|carry on)$/i.test(prompt.trim());