cascade-ai 0.12.7 → 0.12.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -54,7 +54,7 @@ var __export = (target, all) => {
54
54
  var CASCADE_VERSION, CASCADE_CONFIG_FILE, CASCADE_DB_FILE, CASCADE_DASHBOARD_SECRET_FILE, GLOBAL_CONFIG_DIR, GLOBAL_DB_FILE, GLOBAL_KEYSTORE_FILE, GLOBAL_RUNTIME_DB_FILE, DEFAULT_DASHBOARD_PORT, DEFAULT_CONTEXT_LIMIT, DEFAULT_AUTO_SUMMARIZE_AT, MODELS, T1_MODEL_PRIORITY, T2_MODEL_PRIORITY, T3_MODEL_PRIORITY, VISION_MODEL_PRIORITY, COMPLEXITY_T2_COUNT, THEME_NAMES, DEFAULT_THEME, OLLAMA_BASE_URL, LM_STUDIO_BASE_URL, AZURE_BASE_URL_TEMPLATE, TOOL_NAMES, DEFAULT_APPROVAL_REQUIRED;
55
55
  var init_constants = __esm({
56
56
  "src/constants.ts"() {
57
- CASCADE_VERSION = "0.12.7";
57
+ CASCADE_VERSION = "0.12.9";
58
58
  CASCADE_CONFIG_FILE = ".cascade/config.json";
59
59
  CASCADE_DB_FILE = ".cascade/memory.db";
60
60
  CASCADE_DASHBOARD_SECRET_FILE = ".cascade/dashboard-secret";
@@ -455,6 +455,12 @@ var anthropic_exports = {};
455
455
  __export(anthropic_exports, {
456
456
  AnthropicProvider: () => AnthropicProvider
457
457
  });
458
+ function anthropicThinkingParam(modelId, maxTokens) {
459
+ if (!/claude-(opus|sonnet)-4/i.test(modelId)) return {};
460
+ const budget = Math.min(8e3, maxTokens - 1024);
461
+ if (budget < 1024) return {};
462
+ return { thinking: { type: "enabled", budget_tokens: budget } };
463
+ }
458
464
  var AnthropicProvider;
459
465
  var init_anthropic = __esm({
460
466
  "src/providers/anthropic.ts"() {
@@ -489,13 +495,18 @@ var init_anthropic = __esm({
489
495
  let fullContent = "";
490
496
  let inputTokens = 0;
491
497
  let outputTokens = 0;
498
+ const maxTokens = options.maxTokens ?? this.model.maxOutputTokens;
499
+ const thinkParam = anthropicThinkingParam(this.model.id, maxTokens);
500
+ const useThinking = !!thinkParam.thinking;
492
501
  const stream = this.client.messages.stream({
493
502
  model: this.model.id,
494
- max_tokens: options.maxTokens ?? this.model.maxOutputTokens,
495
- temperature: options.temperature ?? 0.7,
503
+ max_tokens: maxTokens,
504
+ // Extended thinking requires temperature = 1; otherwise honor the request.
505
+ temperature: useThinking ? 1 : options.temperature ?? 0.7,
496
506
  system: options.systemPrompt,
497
507
  messages,
498
- tools: tools?.length ? tools : void 0
508
+ tools: tools?.length ? tools : void 0,
509
+ ...thinkParam
499
510
  }, { signal: options.signal });
500
511
  let isThinking = false;
501
512
  for await (const event of stream) {
@@ -3322,6 +3333,7 @@ var ModelSelector = class {
3322
3333
  if (lower.includes("claude")) providerStr = "anthropic";
3323
3334
  else if (lower.startsWith("gpt") || lower.startsWith("o1") || lower.startsWith("o3")) providerStr = "openai";
3324
3335
  else if (lower.includes("gemini")) providerStr = "gemini";
3336
+ else if ((lower.endsWith(".gguf") || actualId.includes("/") || actualId.includes("\\")) && this.availableProviders.has("openai-compatible")) providerStr = "openai-compatible";
3325
3337
  else if (this.availableProviders.has("ollama")) providerStr = "ollama";
3326
3338
  else if (this.availableProviders.has("openai-compatible")) providerStr = "openai-compatible";
3327
3339
  else if (this.availableProviders.size === 1) providerStr = Array.from(this.availableProviders)[0];
@@ -4004,6 +4016,11 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
4004
4016
  if (availableProviders.has("ollama")) {
4005
4017
  await this.discoverOllamaModels(ollamaCfg);
4006
4018
  }
4019
+ if (availableProviders.has("openai-compatible")) {
4020
+ await Promise.all(
4021
+ config.providers.filter((p) => p.type === "openai-compatible").map((cfg) => this.discoverOpenAICompatibleModels(cfg))
4022
+ );
4023
+ }
4007
4024
  for (const tier of ["T1", "T2", "T3"]) {
4008
4025
  const override = tier === "T1" ? config.models.t1 : tier === "T2" ? config.models.t2 : config.models.t3;
4009
4026
  if (!override || override === "auto") continue;
@@ -4435,6 +4452,14 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
4435
4452
  getModelsForProvider(provider) {
4436
4453
  return this.selector.getAvailableModelsForProvider(provider);
4437
4454
  }
4455
+ /**
4456
+ * Every model available across the configured + reachable providers, after
4457
+ * discovery (Ollama tags, OpenAI-compatible/llama.cpp models, cloud catalog).
4458
+ * Used to populate the desktop model pickers with the user's real models.
4459
+ */
4460
+ getAvailableModels() {
4461
+ return this.selector?.getAllAvailableModels() ?? [];
4462
+ }
4438
4463
  // ── Private ──────────────────────────────────
4439
4464
  async detectAvailableProviders(configs) {
4440
4465
  const available = /* @__PURE__ */ new Set();
@@ -4465,6 +4490,28 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
4465
4490
  } catch {
4466
4491
  }
4467
4492
  }
4493
+ async discoverOpenAICompatibleModels(cfg) {
4494
+ try {
4495
+ const seed = {
4496
+ id: "openai-compatible",
4497
+ name: "openai-compatible",
4498
+ provider: "openai-compatible",
4499
+ contextWindow: 32e3,
4500
+ isVisionCapable: false,
4501
+ inputCostPer1kTokens: 0,
4502
+ outputCostPer1kTokens: 0,
4503
+ maxOutputTokens: 4e3,
4504
+ supportsStreaming: true,
4505
+ isLocal: false
4506
+ };
4507
+ const provider = new OpenAICompatibleProvider(cfg, seed);
4508
+ const models = await provider.listModels();
4509
+ for (const m of models) {
4510
+ this.selector.addDynamicModel(m);
4511
+ }
4512
+ } catch {
4513
+ }
4514
+ }
4468
4515
  ensureProvider(model, configs) {
4469
4516
  const key = `${model.provider}:${model.id}`;
4470
4517
  if (this.providers.has(key)) return;
@@ -4494,7 +4541,23 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
4494
4541
  }
4495
4542
  }
4496
4543
  getAnyModelForProvider(type) {
4497
- return Object.values(MODELS).find((m) => m.provider === type);
4544
+ const fromCatalog = Object.values(MODELS).find((m) => m.provider === type);
4545
+ if (fromCatalog) return fromCatalog;
4546
+ if (type === "openai-compatible" || type === "azure") {
4547
+ return {
4548
+ id: type,
4549
+ name: type,
4550
+ provider: type,
4551
+ contextWindow: 32e3,
4552
+ isVisionCapable: false,
4553
+ inputCostPer1kTokens: 0,
4554
+ outputCostPer1kTokens: 0,
4555
+ maxOutputTokens: 4e3,
4556
+ supportsStreaming: true,
4557
+ isLocal: false
4558
+ };
4559
+ }
4560
+ return void 0;
4498
4561
  }
4499
4562
  recordStats(tier, model, usage) {
4500
4563
  this.stats.totalTokens += usage.totalTokens;
@@ -10363,7 +10426,11 @@ ${last.partialOutput}` : "");
10363
10426
  looksLikeConversational(prompt) {
10364
10427
  const LOW_COMPLEXITY = [
10365
10428
  /^(?:hi|hello|hey|thanks|thank you|ok|okay|yes|no|sure|got it|sounds good)\b/i,
10366
- /^(?:what is|what are|list|show me|tell me|who is|where is|when is|how do i)\b/i,
10429
+ /^(?:what is|what are|what'?s|list|show me|tell me|who is|who are|who'?re|where is|when is|how do i)\b/i,
10430
+ // Self-identity / capability questions ("who are you", "what can you do",
10431
+ // "who made you") are pure conversation — never a multi-agent build.
10432
+ /^(?:who|what)\b.*\byou\b/i,
10433
+ /^what can you\b/i,
10367
10434
  /\b(?:simple|quick|brief|small|single|one-line|typo|rename)\b/i
10368
10435
  ];
10369
10436
  const wordCount = prompt.trim().split(/\s+/).length;
@@ -10461,10 +10528,16 @@ ${prompt}` : prompt;
10461
10528
  temperature: 0
10462
10529
  });
10463
10530
  const content = result.content.trim();
10464
- const firstWord = (content.split(/[\s—–-]+/)[0] ?? "").toLowerCase();
10531
+ const match = content.toLowerCase().match(/\b(simple|moderate|complex)\b/);
10465
10532
  const reason = content.replace(/^\S+\s*[—–-]*\s*/, "").trim();
10466
- const verdict = firstWord.includes("simple") ? "Simple" : firstWord.includes("moderate") ? "Moderate" : "Complex";
10467
- this.recordDecision("complexity", `${verdict} \u2014 classifier: ${reason || "no reason given"}`);
10533
+ let verdict;
10534
+ if (match) {
10535
+ verdict = match[1] === "simple" ? "Simple" : match[1] === "moderate" ? "Moderate" : "Complex";
10536
+ this.recordDecision("complexity", `${verdict} \u2014 classifier: ${reason || "no reason given"}`);
10537
+ } else {
10538
+ verdict = prompt.trim().split(/\s+/).length <= 12 ? "Simple" : "Moderate";
10539
+ this.recordDecision("complexity", `${verdict} \u2014 classifier output unparseable; defaulted by length`);
10540
+ }
10468
10541
  return verdict;
10469
10542
  } catch {
10470
10543
  const followUpPrompt = /^(proceed|continue|go ahead|do it|yes|yep|ok|okay|carry on)$/i.test(prompt.trim());
@@ -13873,7 +13946,8 @@ function SetupWizard({ workspacePath, onComplete }) {
13873
13946
  ) })
13874
13947
  ] });
13875
13948
  }
13876
- const prompt = isAzure && fieldStage === "deploymentName" ? `Azure deployment name (${currentEntry.label})` : isAzure && fieldStage === "baseUrl" ? `Azure endpoint URL` : isAzure && fieldStage === "apiKey" ? `${currentEntry.label} API Key` : isAzure && fieldStage === "apiVersion" ? `Azure API version (e.g. 2024-08-01-preview)` : isCompat && fieldStage === "label" ? `Name for this endpoint (e.g. Groq)` : isCompat && fieldStage === "baseUrl" ? `Base URL (e.g. https://api.groq.com/openai/v1)` : isOllama ? `Ollama URL` : `${currentEntry.label} API Key`;
13949
+ const prompt = isAzure && fieldStage === "deploymentName" ? `Azure deployment name (${currentEntry.label})` : isAzure && fieldStage === "baseUrl" ? `Azure endpoint URL` : isAzure && fieldStage === "apiKey" ? `${currentEntry.label} API Key` : isAzure && fieldStage === "apiVersion" ? `Azure API version (e.g. 2024-08-01-preview)` : isCompat && fieldStage === "label" ? `Name for this endpoint (e.g. Groq)` : isCompat && fieldStage === "baseUrl" ? `Base URL (e.g. https://api.groq.com/openai/v1)` : isCompat && fieldStage === "apiKey" ? `${currentEntry.label} API Key (optional)` : isOllama ? `Ollama URL` : `${currentEntry.label} API Key`;
13950
+ const keyOptional = isCompat && fieldStage === "apiKey";
13877
13951
  const isMasked = fieldStage === "apiKey" && !isOllama;
13878
13952
  return /* @__PURE__ */ jsxs(Frame, { theme, phase: "keys", children: [
13879
13953
  doneEntries.length > 0 && /* @__PURE__ */ jsx(Box, { flexDirection: "column", marginBottom: 1, children: doneEntries.map((e) => /* @__PURE__ */ jsxs(Box, { children: [
@@ -13891,8 +13965,8 @@ function SetupWizard({ workspacePath, onComplete }) {
13891
13965
  {
13892
13966
  theme,
13893
13967
  label: prompt,
13894
- tag: isOllama ? "optional \u2014 Enter for default" : "required",
13895
- tagColor: isOllama ? theme.colors.muted : theme.colors.error,
13968
+ tag: isOllama ? "optional \u2014 Enter for default" : keyOptional ? "optional \u2014 Enter to skip" : "required",
13969
+ tagColor: isOllama || keyOptional ? theme.colors.muted : theme.colors.error,
13896
13970
  active: true,
13897
13971
  children: /* @__PURE__ */ jsx(
13898
13972
  SafeTextInput,