npm - @joshuaswarren/openclaw-engram - Versions diffs - 9.0.13 → 9.0.14 - Mend

@joshuaswarren/openclaw-engram 9.0.13 → 9.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -985,6 +985,7 @@ var LocalLlmClient = class _LocalLlmClient {
   consecutive400s = 0;
   cooldownUntilMs = 0;
   modelRegistry;
+  _disableThinking = false;
   static HEALTH_CHECK_INTERVAL_MS = 6e4;
   // 1 minute
   static LMS_CACHE_INTERVAL_MS = 3e4;
@@ -993,6 +994,14 @@ var LocalLlmClient = class _LocalLlmClient {
     this.config = config;
     this.modelRegistry = modelRegistry;
   }
+  /**
+   * Disable thinking/reasoning mode for models that support it (e.g. Qwen 3.5).
+   * When enabled, adds chat_template_kwargs to suppress chain-of-thought,
+   * reducing latency for fast-tier operations.
+   */
+  set disableThinking(value) {
+    this._disableThinking = value;
+  }
   resolveHomeDir() {
     return this.config.localLlmHomeDir || process.env.HOME || os.homedir();
   }
@@ -1432,6 +1441,9 @@ var LocalLlmClient = class _LocalLlmClient {
       if (options.responseFormat?.type === "json_schema") {
         requestBody.response_format = options.responseFormat;
       }
+      if (this._disableThinking) {
+        requestBody.chat_template_kwargs = { enable_thinking: false };
+      }
       const baseUrl = this.config.localLlmUrl.replace("localhost", "127.0.0.1").replace(/\/+$/, "");
       const chatUrl = `${baseUrl}/chat/completions`;
       const requestBodyJson = JSON.stringify(requestBody);
@@ -16495,10 +16507,14 @@ var Orchestrator = class _Orchestrator {
     this.policyRuntime = new PolicyRuntimeManager(config.memoryDir, config);
     this.summarizer = new HourlySummarizer(config, config.gatewayConfig, this.modelRegistry, this.transcript);
     this.localLlm = new LocalLlmClient(config, this.modelRegistry);
-    this.fastLlm = config.localLlmFastEnabled ? new LocalLlmClient(
-      { ...config, localLlmModel: config.localLlmFastModel || config.localLlmModel, localLlmUrl: config.localLlmFastUrl, localLlmTimeoutMs: config.localLlmFastTimeoutMs },
-      this.modelRegistry
-    ) : this.localLlm;
+    this.fastLlm = config.localLlmFastEnabled ? (() => {
+      const client = new LocalLlmClient(
+        { ...config, localLlmModel: config.localLlmFastModel || config.localLlmModel, localLlmUrl: config.localLlmFastUrl, localLlmTimeoutMs: config.localLlmFastTimeoutMs },
+        this.modelRegistry
+      );
+      client.disableThinking = true;
+      return client;
+    })() : this.localLlm;
     this.extraction = new ExtractionEngine(config, this.localLlm, config.gatewayConfig, this.modelRegistry);
     this.threading = new ThreadingManager(
       path30.join(config.memoryDir, "threads"),