npm - hammer-ai - Versions diffs - 0.2.7 → 0.2.9 - Mend

hammer-ai 0.2.7 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -28,6 +28,20 @@ interface LLMProviderConfig {
     extraHeaders?: Record<string, string>;
     /** Custom fetch implementation (e.g. expo/fetch for RN streaming support). */
     fetchImpl?: FetchLike;
+    /**
+     * Explicitly enable or disable the provider's thinking/reasoning mode.
+     *
+     * - `false` — disables thinking (e.g. DashScope `enable_thinking: false` for
+     *   Qwen3 models, which have thinking on by default). Prevents the silent
+     *   multi-minute server-side CoT delay before the first token streams out.
+     * - `true`  — explicitly enables thinking with the provider's default budget.
+     * - `undefined` — no thinking-related field is sent; the provider uses its
+     *   own model default.
+     *
+     * Currently maps to `enable_thinking` in the request body, which is the
+     * DashScope OpenAI-compatible API parameter for Qwen3 models.
+     */
+    enableThinking?: boolean;
 }
 /** Options for a single chat completion request. */
 interface LLMRequestOptions {
@@ -49,6 +63,12 @@ interface StreamCallbacks {
      * accumulated so far.
      */
     onToken?: (token: string) => void | boolean;
+    /**
+     * Fired for every reasoning/thinking token received
+     * (delta.reasoning_content). Called before any content tokens arrive
+     * for models that emit a thinking phase (e.g. Qwen 3+).
+     */
+    onReasoningToken?: (token: string) => void;
     /**
      * Fired once when the first SSE data chunk arrives from the model.
      * Useful for closing premature-cancellation windows: Qwen 3+ models
@@ -1256,6 +1276,7 @@ interface ToolLoopRuntimeLLMRequest {
     frequencyPenalty?: number;
     presencePenalty?: number;
     onToken?: (token: string) => void | boolean;
+    onReasoningToken?: (token: string) => void;
     normalizeResponseContent?: (content: string) => string;
 }
 interface ToolLoopRuntimeLLMResponse {

package/dist/index.js CHANGED Viewed

@@ -168,16 +168,6 @@ var LLMResponseSchema = z.object({
 }).strict();
 // src/llm-client.ts
-function shouldOmitTemperature(config) {
-  return config.model === "kimi-k2.5";
-}
-function buildChatPayload(config, basePayload) {
-  const payload = { ...basePayload };
-  if (shouldOmitTemperature(config)) {
-    delete payload.temperature;
-  }
-  return payload;
-}
 var LLMClient = class {
   config;
   constructor(config) {
@@ -210,7 +200,7 @@ var LLMClient = class {
       presencePenalty = 0,
       signal
     } = options;
-    const payload = buildChatPayload(this.config, {
+    const payload = {
       model: this.config.model,
       messages,
       temperature,
@@ -218,7 +208,13 @@ var LLMClient = class {
       frequency_penalty: frequencyPenalty,
       presence_penalty: presencePenalty,
       stream
-    });
+    };
+    if (this.config.model === "kimi-k2.5") {
+      delete payload.temperature;
+    }
+    if (this.config.enableThinking !== void 0) {
+      payload.enable_thinking = this.config.enableThinking;
+    }
     const headers = {
       "Content-Type": "application/json",
       Authorization: `Bearer ${this.config.apiKey}`,
@@ -236,7 +232,7 @@ var LLMClient = class {
         }
         if (attempt > 1) {
           log(`Retry attempt ${attempt}/${maxRetries}\u2026`, "warn");
-          await sleep(1e3 * attempt);
+          await new Promise((r) => setTimeout(r, 1e3 * attempt));
         }
         const controller = new AbortController();
         const abortFetch = () => {
@@ -254,7 +250,7 @@ var LLMClient = class {
           if (!response.ok) {
             const errorText = await response.text();
             const err = new ApiError(response.status, errorText);
-            if (isRetryableStatus(response.status) && attempt < maxRetries) {
+            if ((response.status === 429 || response.status === 500 || response.status === 502 || response.status === 503 || response.status === 408) && attempt < maxRetries) {
               log(`Transient HTTP ${response.status}. Retrying\u2026`, "warn");
               lastError = err;
               continue;
@@ -407,8 +403,12 @@ var LLMClient = class {
               try {
                 const parsed = JSON.parse(line.slice(6));
                 const delta = parsed.choices?.[0]?.delta;
+                const reasoningToken = delta?.reasoning_content;
+                if (reasoningToken) {
+                  callbacks?.onReasoningToken?.(reasoningToken);
+                }
                 const token = delta?.content;
-                if (!token && delta) {
+                if (!token && !reasoningToken && delta) {
                   log(`SSE delta (no content): ${JSON.stringify(delta).slice(0, 200)}`, "warn");
                 }
                 if (token) {
@@ -448,9 +448,6 @@ var LLMClient = class {
     });
     try {
       await streamPromise;
-    } catch (err) {
-      if (err.message?.includes("getFirstChunkTimeout()")) throw err;
-      throw err;
     } finally {
       cleanup();
       signal?.removeEventListener("abort", abortStream);
@@ -476,9 +473,6 @@ var ApiError = class extends Error {
   }
   status;
 };
-function isRetryableStatus(status) {
-  return status === 429 || status === 500 || status === 502 || status === 503 || status === 408;
-}
 function isNetworkError(err) {
   const code = err.code ?? err.cause?.code ?? "";
   return code === "ENOTFOUND" || code === "ETIMEDOUT" || code === "ECONNREFUSED" || code === "UND_ERR_CONNECT_TIMEOUT" || typeof err.message === "string" && err.message.includes("fetch failed");
@@ -499,9 +493,6 @@ function createAbortError(reason) {
   error.name = "AbortError";
   return error;
 }
-function sleep(ms) {
-  return new Promise((r) => setTimeout(r, ms));
-}
 var AGENT_MACHINE_STATES = [
   "idle",
   "prompting",
@@ -3981,17 +3972,19 @@ var ToolLoopAgentRuntime = class {
         `No llmClient is configured for ${this.constructor.name}. Override executeLLMStep() or provide llmClient in the constructor.`
       );
     }
+    const hasStreamCallbacks = Boolean(request.onToken ?? this.hooks.onToken ?? request.onReasoningToken);
     const response = await this.llmClient.chat(
       {
         messages: request.messages,
         temperature: request.temperature,
         maxTokens: request.maxTokens,
-        stream: Boolean(request.onToken ?? this.hooks.onToken),
+        stream: hasStreamCallbacks,
         frequencyPenalty: request.frequencyPenalty,
         presencePenalty: request.presencePenalty
       },
-      request.onToken || this.hooks.onToken ? {
-        onToken: request.onToken ?? this.hooks.onToken
+      hasStreamCallbacks ? {
+        onToken: request.onToken ?? this.hooks.onToken,
+        onReasoningToken: request.onReasoningToken
       } : void 0
     );
     return {
@@ -6635,14 +6628,31 @@ ${JSON.stringify(request.messages, null, 2)}`
     const runDetector = new StreamingToolParser({
       allowedRunTargets: this.getParserRunTargets()
     });
+    let reasoningStarted = false;
     return {
       messages,
       temperature: options.temperature,
       maxTokens: options.maxTokens,
       normalizeResponseContent: () => runDetector.getFullContent(),
+      onReasoningToken: (token) => {
+        if (runId === void 0) return;
+        if (!reasoningStarted) {
+          reasoningStarted = true;
+          this.appendStreamingToken(runId, "\u{1F4AD} thinking\u2026\n");
+        }
+        this.appendStreamingToken(runId, token);
+      },
       onToken: (token) => {
         runDetector.push(token);
         if (runId !== void 0) {
+          if (reasoningStarted) {
+            reasoningStarted = false;
+            this.clearPendingStreamingContent(runId);
+            this.updateSnapshotForRun(runId, (state) => ({
+              ...state,
+              streamingContent: ""
+            }));
+          }
           if (runDetector.sealed) {
             this.clearPendingStreamingContent(runId);
             this.updateSnapshotForRun(runId, (state) => ({