npm - hammer-ai - Versions diffs - 0.2.7 → 0.2.8 - Mend

hammer-ai 0.2.7 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -28,6 +28,20 @@ interface LLMProviderConfig {
     extraHeaders?: Record<string, string>;
     /** Custom fetch implementation (e.g. expo/fetch for RN streaming support). */
     fetchImpl?: FetchLike;
+    /**
+     * Explicitly enable or disable the provider's thinking/reasoning mode.
+     *
+     * - `false` — disables thinking (e.g. DashScope `enable_thinking: false` for
+     *   Qwen3 models, which have thinking on by default). Prevents the silent
+     *   multi-minute server-side CoT delay before the first token streams out.
+     * - `true`  — explicitly enables thinking with the provider's default budget.
+     * - `undefined` — no thinking-related field is sent; the provider uses its
+     *   own model default.
+     *
+     * Currently maps to `enable_thinking` in the request body, which is the
+     * DashScope OpenAI-compatible API parameter for Qwen3 models.
+     */
+    enableThinking?: boolean;
 }
 /** Options for a single chat completion request. */
 interface LLMRequestOptions {
@@ -49,6 +63,12 @@ interface StreamCallbacks {
      * accumulated so far.
      */
     onToken?: (token: string) => void | boolean;
+    /**
+     * Fired for every reasoning/thinking token received
+     * (delta.reasoning_content). Called before any content tokens arrive
+     * for models that emit a thinking phase (e.g. Qwen 3+).
+     */
+    onReasoningToken?: (token: string) => void;
     /**
      * Fired once when the first SSE data chunk arrives from the model.
      * Useful for closing premature-cancellation windows: Qwen 3+ models
@@ -1256,6 +1276,7 @@ interface ToolLoopRuntimeLLMRequest {
     frequencyPenalty?: number;
     presencePenalty?: number;
     onToken?: (token: string) => void | boolean;
+    onReasoningToken?: (token: string) => void;
     normalizeResponseContent?: (content: string) => string;
 }
 interface ToolLoopRuntimeLLMResponse {

package/dist/index.js CHANGED Viewed

@@ -176,6 +176,9 @@ function buildChatPayload(config, basePayload) {
   if (shouldOmitTemperature(config)) {
     delete payload.temperature;
   }
+  if (config.enableThinking !== void 0) {
+    payload.enable_thinking = config.enableThinking;
+  }
   return payload;
 }
 var LLMClient = class {
@@ -407,8 +410,12 @@ var LLMClient = class {
               try {
                 const parsed = JSON.parse(line.slice(6));
                 const delta = parsed.choices?.[0]?.delta;
+                const reasoningToken = delta?.reasoning_content;
+                if (reasoningToken) {
+                  callbacks?.onReasoningToken?.(reasoningToken);
+                }
                 const token = delta?.content;
-                if (!token && delta) {
+                if (!token && !reasoningToken && delta) {
                   log(`SSE delta (no content): ${JSON.stringify(delta).slice(0, 200)}`, "warn");
                 }
                 if (token) {
@@ -3981,17 +3988,19 @@ var ToolLoopAgentRuntime = class {
         `No llmClient is configured for ${this.constructor.name}. Override executeLLMStep() or provide llmClient in the constructor.`
       );
     }
+    const hasStreamCallbacks = Boolean(request.onToken ?? this.hooks.onToken ?? request.onReasoningToken);
     const response = await this.llmClient.chat(
       {
         messages: request.messages,
         temperature: request.temperature,
         maxTokens: request.maxTokens,
-        stream: Boolean(request.onToken ?? this.hooks.onToken),
+        stream: hasStreamCallbacks,
         frequencyPenalty: request.frequencyPenalty,
         presencePenalty: request.presencePenalty
       },
-      request.onToken || this.hooks.onToken ? {
-        onToken: request.onToken ?? this.hooks.onToken
+      hasStreamCallbacks ? {
+        onToken: request.onToken ?? this.hooks.onToken,
+        onReasoningToken: request.onReasoningToken
       } : void 0
     );
     return {
@@ -6635,14 +6644,31 @@ ${JSON.stringify(request.messages, null, 2)}`
     const runDetector = new StreamingToolParser({
       allowedRunTargets: this.getParserRunTargets()
     });
+    let reasoningStarted = false;
     return {
       messages,
       temperature: options.temperature,
       maxTokens: options.maxTokens,
       normalizeResponseContent: () => runDetector.getFullContent(),
+      onReasoningToken: (token) => {
+        if (runId === void 0) return;
+        if (!reasoningStarted) {
+          reasoningStarted = true;
+          this.appendStreamingToken(runId, "\u{1F4AD} thinking\u2026\n");
+        }
+        this.appendStreamingToken(runId, token);
+      },
       onToken: (token) => {
         runDetector.push(token);
         if (runId !== void 0) {
+          if (reasoningStarted) {
+            reasoningStarted = false;
+            this.clearPendingStreamingContent(runId);
+            this.updateSnapshotForRun(runId, (state) => ({
+              ...state,
+              streamingContent: ""
+            }));
+          }
           if (runDetector.sealed) {
             this.clearPendingStreamingContent(runId);
             this.updateSnapshotForRun(runId, (state) => ({