cascade-ai 0.12.7 → 0.12.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -1084,8 +1084,15 @@ declare class CascadeRouter extends EventEmitter {
1084
1084
  * Useful for listing configured/usable models per provider.
1085
1085
  */
1086
1086
  getModelsForProvider(provider: ProviderType): ModelInfo[];
1087
+ /**
1088
+ * Every model available across the configured + reachable providers, after
1089
+ * discovery (Ollama tags, OpenAI-compatible/llama.cpp models, cloud catalog).
1090
+ * Used to populate the desktop model pickers with the user's real models.
1091
+ */
1092
+ getAvailableModels(): ModelInfo[];
1087
1093
  private detectAvailableProviders;
1088
1094
  private discoverOllamaModels;
1095
+ private discoverOpenAICompatibleModels;
1089
1096
  private ensureProvider;
1090
1097
  private getProvider;
1091
1098
  private createProvider;
package/dist/index.d.ts CHANGED
@@ -1084,8 +1084,15 @@ declare class CascadeRouter extends EventEmitter {
1084
1084
  * Useful for listing configured/usable models per provider.
1085
1085
  */
1086
1086
  getModelsForProvider(provider: ProviderType): ModelInfo[];
1087
+ /**
1088
+ * Every model available across the configured + reachable providers, after
1089
+ * discovery (Ollama tags, OpenAI-compatible/llama.cpp models, cloud catalog).
1090
+ * Used to populate the desktop model pickers with the user's real models.
1091
+ */
1092
+ getAvailableModels(): ModelInfo[];
1087
1093
  private detectAvailableProviders;
1088
1094
  private discoverOllamaModels;
1095
+ private discoverOpenAICompatibleModels;
1089
1096
  private ensureProvider;
1090
1097
  private getProvider;
1091
1098
  private createProvider;
package/dist/index.js CHANGED
@@ -35,7 +35,7 @@ import cron from 'node-cron';
35
35
 
36
36
 
37
37
  // src/constants.ts
38
- var CASCADE_VERSION = "0.12.7";
38
+ var CASCADE_VERSION = "0.12.9";
39
39
  var CASCADE_CONFIG_DIR = ".cascade";
40
40
  var CASCADE_MD_FILE = "CASCADE.md";
41
41
  var CASCADE_IGNORE_FILE = ".cascadeignore";
@@ -435,6 +435,12 @@ var BaseProvider = class {
435
435
  };
436
436
 
437
437
  // src/providers/anthropic.ts
438
+ function anthropicThinkingParam(modelId, maxTokens) {
439
+ if (!/claude-(opus|sonnet)-4/i.test(modelId)) return {};
440
+ const budget = Math.min(8e3, maxTokens - 1024);
441
+ if (budget < 1024) return {};
442
+ return { thinking: { type: "enabled", budget_tokens: budget } };
443
+ }
438
444
  var AnthropicProvider = class extends BaseProvider {
439
445
  client;
440
446
  constructor(config, model) {
@@ -464,13 +470,18 @@ var AnthropicProvider = class extends BaseProvider {
464
470
  let fullContent = "";
465
471
  let inputTokens = 0;
466
472
  let outputTokens = 0;
473
+ const maxTokens = options.maxTokens ?? this.model.maxOutputTokens;
474
+ const thinkParam = anthropicThinkingParam(this.model.id, maxTokens);
475
+ const useThinking = !!thinkParam.thinking;
467
476
  const stream = this.client.messages.stream({
468
477
  model: this.model.id,
469
- max_tokens: options.maxTokens ?? this.model.maxOutputTokens,
470
- temperature: options.temperature ?? 0.7,
478
+ max_tokens: maxTokens,
479
+ // Extended thinking requires temperature = 1; otherwise honor the request.
480
+ temperature: useThinking ? 1 : options.temperature ?? 0.7,
471
481
  system: options.systemPrompt,
472
482
  messages,
473
- tools: tools?.length ? tools : void 0
483
+ tools: tools?.length ? tools : void 0,
484
+ ...thinkParam
474
485
  }, { signal: options.signal });
475
486
  let isThinking = false;
476
487
  for await (const event of stream) {
@@ -1474,6 +1485,7 @@ var ModelSelector = class {
1474
1485
  if (lower.includes("claude")) providerStr = "anthropic";
1475
1486
  else if (lower.startsWith("gpt") || lower.startsWith("o1") || lower.startsWith("o3")) providerStr = "openai";
1476
1487
  else if (lower.includes("gemini")) providerStr = "gemini";
1488
+ else if ((lower.endsWith(".gguf") || actualId.includes("/") || actualId.includes("\\")) && this.availableProviders.has("openai-compatible")) providerStr = "openai-compatible";
1477
1489
  else if (this.availableProviders.has("ollama")) providerStr = "ollama";
1478
1490
  else if (this.availableProviders.has("openai-compatible")) providerStr = "openai-compatible";
1479
1491
  else if (this.availableProviders.size === 1) providerStr = Array.from(this.availableProviders)[0];
@@ -2161,6 +2173,11 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
2161
2173
  if (availableProviders.has("ollama")) {
2162
2174
  await this.discoverOllamaModels(ollamaCfg);
2163
2175
  }
2176
+ if (availableProviders.has("openai-compatible")) {
2177
+ await Promise.all(
2178
+ config.providers.filter((p) => p.type === "openai-compatible").map((cfg) => this.discoverOpenAICompatibleModels(cfg))
2179
+ );
2180
+ }
2164
2181
  for (const tier of ["T1", "T2", "T3"]) {
2165
2182
  const override = tier === "T1" ? config.models.t1 : tier === "T2" ? config.models.t2 : config.models.t3;
2166
2183
  if (!override || override === "auto") continue;
@@ -2592,6 +2609,14 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
2592
2609
  getModelsForProvider(provider) {
2593
2610
  return this.selector.getAvailableModelsForProvider(provider);
2594
2611
  }
2612
+ /**
2613
+ * Every model available across the configured + reachable providers, after
2614
+ * discovery (Ollama tags, OpenAI-compatible/llama.cpp models, cloud catalog).
2615
+ * Used to populate the desktop model pickers with the user's real models.
2616
+ */
2617
+ getAvailableModels() {
2618
+ return this.selector?.getAllAvailableModels() ?? [];
2619
+ }
2595
2620
  // ── Private ──────────────────────────────────
2596
2621
  async detectAvailableProviders(configs) {
2597
2622
  const available = /* @__PURE__ */ new Set();
@@ -2622,6 +2647,28 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
2622
2647
  } catch {
2623
2648
  }
2624
2649
  }
2650
+ async discoverOpenAICompatibleModels(cfg) {
2651
+ try {
2652
+ const seed = {
2653
+ id: "openai-compatible",
2654
+ name: "openai-compatible",
2655
+ provider: "openai-compatible",
2656
+ contextWindow: 32e3,
2657
+ isVisionCapable: false,
2658
+ inputCostPer1kTokens: 0,
2659
+ outputCostPer1kTokens: 0,
2660
+ maxOutputTokens: 4e3,
2661
+ supportsStreaming: true,
2662
+ isLocal: false
2663
+ };
2664
+ const provider = new OpenAICompatibleProvider(cfg, seed);
2665
+ const models = await provider.listModels();
2666
+ for (const m of models) {
2667
+ this.selector.addDynamicModel(m);
2668
+ }
2669
+ } catch {
2670
+ }
2671
+ }
2625
2672
  ensureProvider(model, configs) {
2626
2673
  const key = `${model.provider}:${model.id}`;
2627
2674
  if (this.providers.has(key)) return;
@@ -2651,7 +2698,23 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
2651
2698
  }
2652
2699
  }
2653
2700
  getAnyModelForProvider(type) {
2654
- return Object.values(MODELS).find((m) => m.provider === type);
2701
+ const fromCatalog = Object.values(MODELS).find((m) => m.provider === type);
2702
+ if (fromCatalog) return fromCatalog;
2703
+ if (type === "openai-compatible" || type === "azure") {
2704
+ return {
2705
+ id: type,
2706
+ name: type,
2707
+ provider: type,
2708
+ contextWindow: 32e3,
2709
+ isVisionCapable: false,
2710
+ inputCostPer1kTokens: 0,
2711
+ outputCostPer1kTokens: 0,
2712
+ maxOutputTokens: 4e3,
2713
+ supportsStreaming: true,
2714
+ isLocal: false
2715
+ };
2716
+ }
2717
+ return void 0;
2655
2718
  }
2656
2719
  recordStats(tier, model, usage) {
2657
2720
  this.stats.totalTokens += usage.totalTokens;
@@ -8792,7 +8855,11 @@ ${last.partialOutput}` : "");
8792
8855
  looksLikeConversational(prompt) {
8793
8856
  const LOW_COMPLEXITY = [
8794
8857
  /^(?:hi|hello|hey|thanks|thank you|ok|okay|yes|no|sure|got it|sounds good)\b/i,
8795
- /^(?:what is|what are|list|show me|tell me|who is|where is|when is|how do i)\b/i,
8858
+ /^(?:what is|what are|what'?s|list|show me|tell me|who is|who are|who'?re|where is|when is|how do i)\b/i,
8859
+ // Self-identity / capability questions ("who are you", "what can you do",
8860
+ // "who made you") are pure conversation — never a multi-agent build.
8861
+ /^(?:who|what)\b.*\byou\b/i,
8862
+ /^what can you\b/i,
8796
8863
  /\b(?:simple|quick|brief|small|single|one-line|typo|rename)\b/i
8797
8864
  ];
8798
8865
  const wordCount = prompt.trim().split(/\s+/).length;
@@ -8890,10 +8957,16 @@ ${prompt}` : prompt;
8890
8957
  temperature: 0
8891
8958
  });
8892
8959
  const content = result.content.trim();
8893
- const firstWord = (content.split(/[\s—–-]+/)[0] ?? "").toLowerCase();
8960
+ const match = content.toLowerCase().match(/\b(simple|moderate|complex)\b/);
8894
8961
  const reason = content.replace(/^\S+\s*[—–-]*\s*/, "").trim();
8895
- const verdict = firstWord.includes("simple") ? "Simple" : firstWord.includes("moderate") ? "Moderate" : "Complex";
8896
- this.recordDecision("complexity", `${verdict} \u2014 classifier: ${reason || "no reason given"}`);
8962
+ let verdict;
8963
+ if (match) {
8964
+ verdict = match[1] === "simple" ? "Simple" : match[1] === "moderate" ? "Moderate" : "Complex";
8965
+ this.recordDecision("complexity", `${verdict} \u2014 classifier: ${reason || "no reason given"}`);
8966
+ } else {
8967
+ verdict = prompt.trim().split(/\s+/).length <= 12 ? "Simple" : "Moderate";
8968
+ this.recordDecision("complexity", `${verdict} \u2014 classifier output unparseable; defaulted by length`);
8969
+ }
8897
8970
  return verdict;
8898
8971
  } catch {
8899
8972
  const followUpPrompt = /^(proceed|continue|go ahead|do it|yes|yep|ok|okay|carry on)$/i.test(prompt.trim());