npm - hammer-ai - Versions diffs - 0.2.6 → 0.2.8 - Mend

hammer-ai 0.2.6 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -28,6 +28,20 @@ interface LLMProviderConfig {
     extraHeaders?: Record<string, string>;
     /** Custom fetch implementation (e.g. expo/fetch for RN streaming support). */
     fetchImpl?: FetchLike;
+    /**
+     * Explicitly enable or disable the provider's thinking/reasoning mode.
+     *
+     * - `false` — disables thinking (e.g. DashScope `enable_thinking: false` for
+     *   Qwen3 models, which have thinking on by default). Prevents the silent
+     *   multi-minute server-side CoT delay before the first token streams out.
+     * - `true`  — explicitly enables thinking with the provider's default budget.
+     * - `undefined` — no thinking-related field is sent; the provider uses its
+     *   own model default.
+     *
+     * Currently maps to `enable_thinking` in the request body, which is the
+     * DashScope OpenAI-compatible API parameter for Qwen3 models.
+     */
+    enableThinking?: boolean;
 }
 /** Options for a single chat completion request. */
 interface LLMRequestOptions {
@@ -49,6 +63,12 @@ interface StreamCallbacks {
      * accumulated so far.
      */
     onToken?: (token: string) => void | boolean;
+    /**
+     * Fired for every reasoning/thinking token received
+     * (delta.reasoning_content). Called before any content tokens arrive
+     * for models that emit a thinking phase (e.g. Qwen 3+).
+     */
+    onReasoningToken?: (token: string) => void;
     /**
      * Fired once when the first SSE data chunk arrives from the model.
      * Useful for closing premature-cancellation windows: Qwen 3+ models
@@ -1256,6 +1276,7 @@ interface ToolLoopRuntimeLLMRequest {
     frequencyPenalty?: number;
     presencePenalty?: number;
     onToken?: (token: string) => void | boolean;
+    onReasoningToken?: (token: string) => void;
     normalizeResponseContent?: (content: string) => string;
 }
 interface ToolLoopRuntimeLLMResponse {
@@ -1303,9 +1324,9 @@ declare abstract class ToolLoopAgentRuntime<TMemory extends AgentMemoryLayer = A
     protected constructor(deps: ToolLoopAgentRuntimeDeps<TStepInput>);
     protected abstract createRuntimeSetup(): Promise<ToolLoopRuntimeSetup<TMemory, TEnforcer>>;
     protected abstract getToolDefinitions(): ToolDefinition[];
-    protected abstract buildSystemPrompt(context: ToolLoopRuntimeStepContext<TStepInput>): string;
-    protected abstract buildLLMRequest(context: ToolLoopRuntimeStepContext<TStepInput>, messages: ChatMessage[]): ToolLoopRuntimeLLMRequest;
-    protected abstract parseStepResponse(response: ToolLoopRuntimeLLMResponse, tools: ToolDefinition[]): ParsedStepInput;
+    protected buildSystemPrompt(_context: ToolLoopRuntimeStepContext<TStepInput>): string;
+    protected buildLLMRequest(_context: ToolLoopRuntimeStepContext<TStepInput>, _messages: ChatMessage[]): ToolLoopRuntimeLLMRequest;
+    protected parseStepResponse(_response: ToolLoopRuntimeLLMResponse, _tools: ToolDefinition[]): ParsedStepInput;
     protected get runtimeLoop(): AgentLoop | null;
     protected get runtimeMemory(): TMemory | null;
     protected get runtimeEnforcer(): TEnforcer | null;

package/dist/index.js CHANGED Viewed

@@ -176,6 +176,9 @@ function buildChatPayload(config, basePayload) {
   if (shouldOmitTemperature(config)) {
     delete payload.temperature;
   }
+  if (config.enableThinking !== void 0) {
+    payload.enable_thinking = config.enableThinking;
+  }
   return payload;
 }
 var LLMClient = class {
@@ -407,8 +410,12 @@ var LLMClient = class {
               try {
                 const parsed = JSON.parse(line.slice(6));
                 const delta = parsed.choices?.[0]?.delta;
+                const reasoningToken = delta?.reasoning_content;
+                if (reasoningToken) {
+                  callbacks?.onReasoningToken?.(reasoningToken);
+                }
                 const token = delta?.content;
-                if (!token && delta) {
+                if (!token && !reasoningToken && delta) {
                   log(`SSE delta (no content): ${JSON.stringify(delta).slice(0, 200)}`, "warn");
                 }
                 if (token) {
@@ -3929,6 +3936,21 @@ var ToolLoopAgentRuntime = class {
     this.hooks = deps.hooks ?? {};
     this.requireTodoListOnFirstResponse = deps.requireTodoListOnFirstResponse ?? false;
   }
+  buildSystemPrompt(_context) {
+    throw new Error(
+      `buildSystemPrompt is not implemented for ${this.constructor.name}`
+    );
+  }
+  buildLLMRequest(_context, _messages) {
+    throw new Error(
+      `buildLLMRequest is not implemented for ${this.constructor.name}`
+    );
+  }
+  parseStepResponse(_response, _tools) {
+    throw new Error(
+      `parseStepResponse is not implemented for ${this.constructor.name}`
+    );
+  }
   get runtimeLoop() {
     return this.infrastructure?.loop ?? null;
   }
@@ -3966,17 +3988,19 @@ var ToolLoopAgentRuntime = class {
         `No llmClient is configured for ${this.constructor.name}. Override executeLLMStep() or provide llmClient in the constructor.`
       );
     }
+    const hasStreamCallbacks = Boolean(request.onToken ?? this.hooks.onToken ?? request.onReasoningToken);
     const response = await this.llmClient.chat(
       {
         messages: request.messages,
         temperature: request.temperature,
         maxTokens: request.maxTokens,
-        stream: Boolean(request.onToken ?? this.hooks.onToken),
+        stream: hasStreamCallbacks,
         frequencyPenalty: request.frequencyPenalty,
         presencePenalty: request.presencePenalty
       },
-      request.onToken || this.hooks.onToken ? {
-        onToken: request.onToken ?? this.hooks.onToken
+      hasStreamCallbacks ? {
+        onToken: request.onToken ?? this.hooks.onToken,
+        onReasoningToken: request.onReasoningToken
       } : void 0
     );
     return {
@@ -6620,14 +6644,31 @@ ${JSON.stringify(request.messages, null, 2)}`
     const runDetector = new StreamingToolParser({
       allowedRunTargets: this.getParserRunTargets()
     });
+    let reasoningStarted = false;
     return {
       messages,
       temperature: options.temperature,
       maxTokens: options.maxTokens,
       normalizeResponseContent: () => runDetector.getFullContent(),
+      onReasoningToken: (token) => {
+        if (runId === void 0) return;
+        if (!reasoningStarted) {
+          reasoningStarted = true;
+          this.appendStreamingToken(runId, "\u{1F4AD} thinking\u2026\n");
+        }
+        this.appendStreamingToken(runId, token);
+      },
       onToken: (token) => {
         runDetector.push(token);
         if (runId !== void 0) {
+          if (reasoningStarted) {
+            reasoningStarted = false;
+            this.clearPendingStreamingContent(runId);
+            this.updateSnapshotForRun(runId, (state) => ({
+              ...state,
+              streamingContent: ""
+            }));
+          }
           if (runDetector.sealed) {
             this.clearPendingStreamingContent(runId);
             this.updateSnapshotForRun(runId, (state) => ({