@joshuaswarren/openclaw-engram 9.0.13 → 9.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -985,6 +985,7 @@ var LocalLlmClient = class _LocalLlmClient {
985
985
  consecutive400s = 0;
986
986
  cooldownUntilMs = 0;
987
987
  modelRegistry;
988
+ _disableThinking = false;
988
989
  static HEALTH_CHECK_INTERVAL_MS = 6e4;
989
990
  // 1 minute
990
991
  static LMS_CACHE_INTERVAL_MS = 3e4;
@@ -993,6 +994,14 @@ var LocalLlmClient = class _LocalLlmClient {
993
994
  this.config = config;
994
995
  this.modelRegistry = modelRegistry;
995
996
  }
997
+ /**
998
+ * Disable thinking/reasoning mode for models that support it (e.g. Qwen 3.5).
999
+ * When enabled, adds chat_template_kwargs to suppress chain-of-thought,
1000
+ * reducing latency for fast-tier operations.
1001
+ */
1002
+ set disableThinking(value) {
1003
+ this._disableThinking = value;
1004
+ }
996
1005
  resolveHomeDir() {
997
1006
  return this.config.localLlmHomeDir || process.env.HOME || os.homedir();
998
1007
  }
@@ -1432,6 +1441,9 @@ var LocalLlmClient = class _LocalLlmClient {
1432
1441
  if (options.responseFormat?.type === "json_schema") {
1433
1442
  requestBody.response_format = options.responseFormat;
1434
1443
  }
1444
+ if (this._disableThinking) {
1445
+ requestBody.chat_template_kwargs = { enable_thinking: false };
1446
+ }
1435
1447
  const baseUrl = this.config.localLlmUrl.replace("localhost", "127.0.0.1").replace(/\/+$/, "");
1436
1448
  const chatUrl = `${baseUrl}/chat/completions`;
1437
1449
  const requestBodyJson = JSON.stringify(requestBody);
@@ -16495,10 +16507,14 @@ var Orchestrator = class _Orchestrator {
16495
16507
  this.policyRuntime = new PolicyRuntimeManager(config.memoryDir, config);
16496
16508
  this.summarizer = new HourlySummarizer(config, config.gatewayConfig, this.modelRegistry, this.transcript);
16497
16509
  this.localLlm = new LocalLlmClient(config, this.modelRegistry);
16498
- this.fastLlm = config.localLlmFastEnabled ? new LocalLlmClient(
16499
- { ...config, localLlmModel: config.localLlmFastModel || config.localLlmModel, localLlmUrl: config.localLlmFastUrl, localLlmTimeoutMs: config.localLlmFastTimeoutMs },
16500
- this.modelRegistry
16501
- ) : this.localLlm;
16510
+ this.fastLlm = config.localLlmFastEnabled ? (() => {
16511
+ const client = new LocalLlmClient(
16512
+ { ...config, localLlmModel: config.localLlmFastModel || config.localLlmModel, localLlmUrl: config.localLlmFastUrl, localLlmTimeoutMs: config.localLlmFastTimeoutMs },
16513
+ this.modelRegistry
16514
+ );
16515
+ client.disableThinking = true;
16516
+ return client;
16517
+ })() : this.localLlm;
16502
16518
  this.extraction = new ExtractionEngine(config, this.localLlm, config.gatewayConfig, this.modelRegistry);
16503
16519
  this.threading = new ThreadingManager(
16504
16520
  path30.join(config.memoryDir, "threads"),