hammer-ai 0.2.7 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -28,6 +28,20 @@ interface LLMProviderConfig {
28
28
  extraHeaders?: Record<string, string>;
29
29
  /** Custom fetch implementation (e.g. expo/fetch for RN streaming support). */
30
30
  fetchImpl?: FetchLike;
31
+ /**
32
+ * Explicitly enable or disable the provider's thinking/reasoning mode.
33
+ *
34
+ * - `false` — disables thinking (e.g. DashScope `enable_thinking: false` for
35
+ * Qwen3 models, which have thinking on by default). Prevents the silent
36
+ * multi-minute server-side CoT delay before the first token streams out.
37
+ * - `true` — explicitly enables thinking with the provider's default budget.
38
+ * - `undefined` — no thinking-related field is sent; the provider uses its
39
+ * own model default.
40
+ *
41
+ * Currently maps to `enable_thinking` in the request body, which is the
42
+ * DashScope OpenAI-compatible API parameter for Qwen3 models.
43
+ */
44
+ enableThinking?: boolean;
31
45
  }
32
46
  /** Options for a single chat completion request. */
33
47
  interface LLMRequestOptions {
@@ -49,6 +63,12 @@ interface StreamCallbacks {
49
63
  * accumulated so far.
50
64
  */
51
65
  onToken?: (token: string) => void | boolean;
66
+ /**
67
+ * Fired for every reasoning/thinking token received
68
+ * (delta.reasoning_content). Called before any content tokens arrive
69
+ * for models that emit a thinking phase (e.g. Qwen 3+).
70
+ */
71
+ onReasoningToken?: (token: string) => void;
52
72
  /**
53
73
  * Fired once when the first SSE data chunk arrives from the model.
54
74
  * Useful for closing premature-cancellation windows: Qwen 3+ models
@@ -1256,6 +1276,7 @@ interface ToolLoopRuntimeLLMRequest {
1256
1276
  frequencyPenalty?: number;
1257
1277
  presencePenalty?: number;
1258
1278
  onToken?: (token: string) => void | boolean;
1279
+ onReasoningToken?: (token: string) => void;
1259
1280
  normalizeResponseContent?: (content: string) => string;
1260
1281
  }
1261
1282
  interface ToolLoopRuntimeLLMResponse {
package/dist/index.js CHANGED
@@ -168,16 +168,6 @@ var LLMResponseSchema = z.object({
168
168
  }).strict();
169
169
 
170
170
  // src/llm-client.ts
171
- function shouldOmitTemperature(config) {
172
- return config.model === "kimi-k2.5";
173
- }
174
- function buildChatPayload(config, basePayload) {
175
- const payload = { ...basePayload };
176
- if (shouldOmitTemperature(config)) {
177
- delete payload.temperature;
178
- }
179
- return payload;
180
- }
181
171
  var LLMClient = class {
182
172
  config;
183
173
  constructor(config) {
@@ -210,7 +200,7 @@ var LLMClient = class {
210
200
  presencePenalty = 0,
211
201
  signal
212
202
  } = options;
213
- const payload = buildChatPayload(this.config, {
203
+ const payload = {
214
204
  model: this.config.model,
215
205
  messages,
216
206
  temperature,
@@ -218,7 +208,13 @@ var LLMClient = class {
218
208
  frequency_penalty: frequencyPenalty,
219
209
  presence_penalty: presencePenalty,
220
210
  stream
221
- });
211
+ };
212
+ if (this.config.model === "kimi-k2.5") {
213
+ delete payload.temperature;
214
+ }
215
+ if (this.config.enableThinking !== void 0) {
216
+ payload.enable_thinking = this.config.enableThinking;
217
+ }
222
218
  const headers = {
223
219
  "Content-Type": "application/json",
224
220
  Authorization: `Bearer ${this.config.apiKey}`,
@@ -236,7 +232,7 @@ var LLMClient = class {
236
232
  }
237
233
  if (attempt > 1) {
238
234
  log(`Retry attempt ${attempt}/${maxRetries}\u2026`, "warn");
239
- await sleep(1e3 * attempt);
235
+ await new Promise((r) => setTimeout(r, 1e3 * attempt));
240
236
  }
241
237
  const controller = new AbortController();
242
238
  const abortFetch = () => {
@@ -254,7 +250,7 @@ var LLMClient = class {
254
250
  if (!response.ok) {
255
251
  const errorText = await response.text();
256
252
  const err = new ApiError(response.status, errorText);
257
- if (isRetryableStatus(response.status) && attempt < maxRetries) {
253
+ if ((response.status === 429 || response.status === 500 || response.status === 502 || response.status === 503 || response.status === 408) && attempt < maxRetries) {
258
254
  log(`Transient HTTP ${response.status}. Retrying\u2026`, "warn");
259
255
  lastError = err;
260
256
  continue;
@@ -407,8 +403,12 @@ var LLMClient = class {
407
403
  try {
408
404
  const parsed = JSON.parse(line.slice(6));
409
405
  const delta = parsed.choices?.[0]?.delta;
406
+ const reasoningToken = delta?.reasoning_content;
407
+ if (reasoningToken) {
408
+ callbacks?.onReasoningToken?.(reasoningToken);
409
+ }
410
410
  const token = delta?.content;
411
- if (!token && delta) {
411
+ if (!token && !reasoningToken && delta) {
412
412
  log(`SSE delta (no content): ${JSON.stringify(delta).slice(0, 200)}`, "warn");
413
413
  }
414
414
  if (token) {
@@ -448,9 +448,6 @@ var LLMClient = class {
448
448
  });
449
449
  try {
450
450
  await streamPromise;
451
- } catch (err) {
452
- if (err.message?.includes("getFirstChunkTimeout()")) throw err;
453
- throw err;
454
451
  } finally {
455
452
  cleanup();
456
453
  signal?.removeEventListener("abort", abortStream);
@@ -476,9 +473,6 @@ var ApiError = class extends Error {
476
473
  }
477
474
  status;
478
475
  };
479
- function isRetryableStatus(status) {
480
- return status === 429 || status === 500 || status === 502 || status === 503 || status === 408;
481
- }
482
476
  function isNetworkError(err) {
483
477
  const code = err.code ?? err.cause?.code ?? "";
484
478
  return code === "ENOTFOUND" || code === "ETIMEDOUT" || code === "ECONNREFUSED" || code === "UND_ERR_CONNECT_TIMEOUT" || typeof err.message === "string" && err.message.includes("fetch failed");
@@ -499,9 +493,6 @@ function createAbortError(reason) {
499
493
  error.name = "AbortError";
500
494
  return error;
501
495
  }
502
- function sleep(ms) {
503
- return new Promise((r) => setTimeout(r, ms));
504
- }
505
496
  var AGENT_MACHINE_STATES = [
506
497
  "idle",
507
498
  "prompting",
@@ -3981,17 +3972,19 @@ var ToolLoopAgentRuntime = class {
3981
3972
  `No llmClient is configured for ${this.constructor.name}. Override executeLLMStep() or provide llmClient in the constructor.`
3982
3973
  );
3983
3974
  }
3975
+ const hasStreamCallbacks = Boolean(request.onToken ?? this.hooks.onToken ?? request.onReasoningToken);
3984
3976
  const response = await this.llmClient.chat(
3985
3977
  {
3986
3978
  messages: request.messages,
3987
3979
  temperature: request.temperature,
3988
3980
  maxTokens: request.maxTokens,
3989
- stream: Boolean(request.onToken ?? this.hooks.onToken),
3981
+ stream: hasStreamCallbacks,
3990
3982
  frequencyPenalty: request.frequencyPenalty,
3991
3983
  presencePenalty: request.presencePenalty
3992
3984
  },
3993
- request.onToken || this.hooks.onToken ? {
3994
- onToken: request.onToken ?? this.hooks.onToken
3985
+ hasStreamCallbacks ? {
3986
+ onToken: request.onToken ?? this.hooks.onToken,
3987
+ onReasoningToken: request.onReasoningToken
3995
3988
  } : void 0
3996
3989
  );
3997
3990
  return {
@@ -6635,14 +6628,31 @@ ${JSON.stringify(request.messages, null, 2)}`
6635
6628
  const runDetector = new StreamingToolParser({
6636
6629
  allowedRunTargets: this.getParserRunTargets()
6637
6630
  });
6631
+ let reasoningStarted = false;
6638
6632
  return {
6639
6633
  messages,
6640
6634
  temperature: options.temperature,
6641
6635
  maxTokens: options.maxTokens,
6642
6636
  normalizeResponseContent: () => runDetector.getFullContent(),
6637
+ onReasoningToken: (token) => {
6638
+ if (runId === void 0) return;
6639
+ if (!reasoningStarted) {
6640
+ reasoningStarted = true;
6641
+ this.appendStreamingToken(runId, "\u{1F4AD} thinking\u2026\n");
6642
+ }
6643
+ this.appendStreamingToken(runId, token);
6644
+ },
6643
6645
  onToken: (token) => {
6644
6646
  runDetector.push(token);
6645
6647
  if (runId !== void 0) {
6648
+ if (reasoningStarted) {
6649
+ reasoningStarted = false;
6650
+ this.clearPendingStreamingContent(runId);
6651
+ this.updateSnapshotForRun(runId, (state) => ({
6652
+ ...state,
6653
+ streamingContent: ""
6654
+ }));
6655
+ }
6646
6656
  if (runDetector.sealed) {
6647
6657
  this.clearPendingStreamingContent(runId);
6648
6658
  this.updateSnapshotForRun(runId, (state) => ({