hammer-ai 0.2.7 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -28,6 +28,20 @@ interface LLMProviderConfig {
28
28
  extraHeaders?: Record<string, string>;
29
29
  /** Custom fetch implementation (e.g. expo/fetch for RN streaming support). */
30
30
  fetchImpl?: FetchLike;
31
+ /**
32
+ * Explicitly enable or disable the provider's thinking/reasoning mode.
33
+ *
34
+ * - `false` — disables thinking (e.g. DashScope `enable_thinking: false` for
35
+ * Qwen3 models, which have thinking on by default). Prevents the silent
36
+ * multi-minute server-side CoT delay before the first token streams out.
37
+ * - `true` — explicitly enables thinking with the provider's default budget.
38
+ * - `undefined` — no thinking-related field is sent; the provider uses its
39
+ * own model default.
40
+ *
41
+ * Currently maps to `enable_thinking` in the request body, which is the
42
+ * DashScope OpenAI-compatible API parameter for Qwen3 models.
43
+ */
44
+ enableThinking?: boolean;
31
45
  }
32
46
  /** Options for a single chat completion request. */
33
47
  interface LLMRequestOptions {
@@ -49,6 +63,12 @@ interface StreamCallbacks {
49
63
  * accumulated so far.
50
64
  */
51
65
  onToken?: (token: string) => void | boolean;
66
+ /**
67
+ * Fired for every reasoning/thinking token received
68
+ * (delta.reasoning_content). Called before any content tokens arrive
69
+ * for models that emit a thinking phase (e.g. Qwen 3+).
70
+ */
71
+ onReasoningToken?: (token: string) => void;
52
72
  /**
53
73
  * Fired once when the first SSE data chunk arrives from the model.
54
74
  * Useful for closing premature-cancellation windows: Qwen 3+ models
@@ -1256,6 +1276,7 @@ interface ToolLoopRuntimeLLMRequest {
1256
1276
  frequencyPenalty?: number;
1257
1277
  presencePenalty?: number;
1258
1278
  onToken?: (token: string) => void | boolean;
1279
+ onReasoningToken?: (token: string) => void;
1259
1280
  normalizeResponseContent?: (content: string) => string;
1260
1281
  }
1261
1282
  interface ToolLoopRuntimeLLMResponse {
package/dist/index.js CHANGED
@@ -176,6 +176,9 @@ function buildChatPayload(config, basePayload) {
176
176
  if (shouldOmitTemperature(config)) {
177
177
  delete payload.temperature;
178
178
  }
179
+ if (config.enableThinking !== void 0) {
180
+ payload.enable_thinking = config.enableThinking;
181
+ }
179
182
  return payload;
180
183
  }
181
184
  var LLMClient = class {
@@ -407,8 +410,12 @@ var LLMClient = class {
407
410
  try {
408
411
  const parsed = JSON.parse(line.slice(6));
409
412
  const delta = parsed.choices?.[0]?.delta;
413
+ const reasoningToken = delta?.reasoning_content;
414
+ if (reasoningToken) {
415
+ callbacks?.onReasoningToken?.(reasoningToken);
416
+ }
410
417
  const token = delta?.content;
411
- if (!token && delta) {
418
+ if (!token && !reasoningToken && delta) {
412
419
  log(`SSE delta (no content): ${JSON.stringify(delta).slice(0, 200)}`, "warn");
413
420
  }
414
421
  if (token) {
@@ -3981,17 +3988,19 @@ var ToolLoopAgentRuntime = class {
3981
3988
  `No llmClient is configured for ${this.constructor.name}. Override executeLLMStep() or provide llmClient in the constructor.`
3982
3989
  );
3983
3990
  }
3991
+ const hasStreamCallbacks = Boolean(request.onToken ?? this.hooks.onToken ?? request.onReasoningToken);
3984
3992
  const response = await this.llmClient.chat(
3985
3993
  {
3986
3994
  messages: request.messages,
3987
3995
  temperature: request.temperature,
3988
3996
  maxTokens: request.maxTokens,
3989
- stream: Boolean(request.onToken ?? this.hooks.onToken),
3997
+ stream: hasStreamCallbacks,
3990
3998
  frequencyPenalty: request.frequencyPenalty,
3991
3999
  presencePenalty: request.presencePenalty
3992
4000
  },
3993
- request.onToken || this.hooks.onToken ? {
3994
- onToken: request.onToken ?? this.hooks.onToken
4001
+ hasStreamCallbacks ? {
4002
+ onToken: request.onToken ?? this.hooks.onToken,
4003
+ onReasoningToken: request.onReasoningToken
3995
4004
  } : void 0
3996
4005
  );
3997
4006
  return {
@@ -6635,14 +6644,31 @@ ${JSON.stringify(request.messages, null, 2)}`
6635
6644
  const runDetector = new StreamingToolParser({
6636
6645
  allowedRunTargets: this.getParserRunTargets()
6637
6646
  });
6647
+ let reasoningStarted = false;
6638
6648
  return {
6639
6649
  messages,
6640
6650
  temperature: options.temperature,
6641
6651
  maxTokens: options.maxTokens,
6642
6652
  normalizeResponseContent: () => runDetector.getFullContent(),
6653
+ onReasoningToken: (token) => {
6654
+ if (runId === void 0) return;
6655
+ if (!reasoningStarted) {
6656
+ reasoningStarted = true;
6657
+ this.appendStreamingToken(runId, "\u{1F4AD} thinking\u2026\n");
6658
+ }
6659
+ this.appendStreamingToken(runId, token);
6660
+ },
6643
6661
  onToken: (token) => {
6644
6662
  runDetector.push(token);
6645
6663
  if (runId !== void 0) {
6664
+ if (reasoningStarted) {
6665
+ reasoningStarted = false;
6666
+ this.clearPendingStreamingContent(runId);
6667
+ this.updateSnapshotForRun(runId, (state) => ({
6668
+ ...state,
6669
+ streamingContent: ""
6670
+ }));
6671
+ }
6646
6672
  if (runDetector.sealed) {
6647
6673
  this.clearPendingStreamingContent(runId);
6648
6674
  this.updateSnapshotForRun(runId, (state) => ({