hammer-ai 0.2.6 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -28,6 +28,20 @@ interface LLMProviderConfig {
28
28
  extraHeaders?: Record<string, string>;
29
29
  /** Custom fetch implementation (e.g. expo/fetch for RN streaming support). */
30
30
  fetchImpl?: FetchLike;
31
+ /**
32
+ * Explicitly enable or disable the provider's thinking/reasoning mode.
33
+ *
34
+ * - `false` — disables thinking (e.g. DashScope `enable_thinking: false` for
35
+ * Qwen3 models, which have thinking on by default). Prevents the silent
36
+ * multi-minute server-side CoT delay before the first token streams out.
37
+ * - `true` — explicitly enables thinking with the provider's default budget.
38
+ * - `undefined` — no thinking-related field is sent; the provider uses its
39
+ * own model default.
40
+ *
41
+ * Currently maps to `enable_thinking` in the request body, which is the
42
+ * DashScope OpenAI-compatible API parameter for Qwen3 models.
43
+ */
44
+ enableThinking?: boolean;
31
45
  }
32
46
  /** Options for a single chat completion request. */
33
47
  interface LLMRequestOptions {
@@ -49,6 +63,12 @@ interface StreamCallbacks {
49
63
  * accumulated so far.
50
64
  */
51
65
  onToken?: (token: string) => void | boolean;
66
+ /**
67
+ * Fired for every reasoning/thinking token received
68
+ * (delta.reasoning_content). Called before any content tokens arrive
69
+ * for models that emit a thinking phase (e.g. Qwen 3+).
70
+ */
71
+ onReasoningToken?: (token: string) => void;
52
72
  /**
53
73
  * Fired once when the first SSE data chunk arrives from the model.
54
74
  * Useful for closing premature-cancellation windows: Qwen 3+ models
@@ -1256,6 +1276,7 @@ interface ToolLoopRuntimeLLMRequest {
1256
1276
  frequencyPenalty?: number;
1257
1277
  presencePenalty?: number;
1258
1278
  onToken?: (token: string) => void | boolean;
1279
+ onReasoningToken?: (token: string) => void;
1259
1280
  normalizeResponseContent?: (content: string) => string;
1260
1281
  }
1261
1282
  interface ToolLoopRuntimeLLMResponse {
@@ -1303,9 +1324,9 @@ declare abstract class ToolLoopAgentRuntime<TMemory extends AgentMemoryLayer = A
1303
1324
  protected constructor(deps: ToolLoopAgentRuntimeDeps<TStepInput>);
1304
1325
  protected abstract createRuntimeSetup(): Promise<ToolLoopRuntimeSetup<TMemory, TEnforcer>>;
1305
1326
  protected abstract getToolDefinitions(): ToolDefinition[];
1306
- protected abstract buildSystemPrompt(context: ToolLoopRuntimeStepContext<TStepInput>): string;
1307
- protected abstract buildLLMRequest(context: ToolLoopRuntimeStepContext<TStepInput>, messages: ChatMessage[]): ToolLoopRuntimeLLMRequest;
1308
- protected abstract parseStepResponse(response: ToolLoopRuntimeLLMResponse, tools: ToolDefinition[]): ParsedStepInput;
1327
+ protected buildSystemPrompt(_context: ToolLoopRuntimeStepContext<TStepInput>): string;
1328
+ protected buildLLMRequest(_context: ToolLoopRuntimeStepContext<TStepInput>, _messages: ChatMessage[]): ToolLoopRuntimeLLMRequest;
1329
+ protected parseStepResponse(_response: ToolLoopRuntimeLLMResponse, _tools: ToolDefinition[]): ParsedStepInput;
1309
1330
  protected get runtimeLoop(): AgentLoop | null;
1310
1331
  protected get runtimeMemory(): TMemory | null;
1311
1332
  protected get runtimeEnforcer(): TEnforcer | null;
package/dist/index.js CHANGED
@@ -176,6 +176,9 @@ function buildChatPayload(config, basePayload) {
176
176
  if (shouldOmitTemperature(config)) {
177
177
  delete payload.temperature;
178
178
  }
179
+ if (config.enableThinking !== void 0) {
180
+ payload.enable_thinking = config.enableThinking;
181
+ }
179
182
  return payload;
180
183
  }
181
184
  var LLMClient = class {
@@ -407,8 +410,12 @@ var LLMClient = class {
407
410
  try {
408
411
  const parsed = JSON.parse(line.slice(6));
409
412
  const delta = parsed.choices?.[0]?.delta;
413
+ const reasoningToken = delta?.reasoning_content;
414
+ if (reasoningToken) {
415
+ callbacks?.onReasoningToken?.(reasoningToken);
416
+ }
410
417
  const token = delta?.content;
411
- if (!token && delta) {
418
+ if (!token && !reasoningToken && delta) {
412
419
  log(`SSE delta (no content): ${JSON.stringify(delta).slice(0, 200)}`, "warn");
413
420
  }
414
421
  if (token) {
@@ -3929,6 +3936,21 @@ var ToolLoopAgentRuntime = class {
3929
3936
  this.hooks = deps.hooks ?? {};
3930
3937
  this.requireTodoListOnFirstResponse = deps.requireTodoListOnFirstResponse ?? false;
3931
3938
  }
3939
+ buildSystemPrompt(_context) {
3940
+ throw new Error(
3941
+ `buildSystemPrompt is not implemented for ${this.constructor.name}`
3942
+ );
3943
+ }
3944
+ buildLLMRequest(_context, _messages) {
3945
+ throw new Error(
3946
+ `buildLLMRequest is not implemented for ${this.constructor.name}`
3947
+ );
3948
+ }
3949
+ parseStepResponse(_response, _tools) {
3950
+ throw new Error(
3951
+ `parseStepResponse is not implemented for ${this.constructor.name}`
3952
+ );
3953
+ }
3932
3954
  get runtimeLoop() {
3933
3955
  return this.infrastructure?.loop ?? null;
3934
3956
  }
@@ -3966,17 +3988,19 @@ var ToolLoopAgentRuntime = class {
3966
3988
  `No llmClient is configured for ${this.constructor.name}. Override executeLLMStep() or provide llmClient in the constructor.`
3967
3989
  );
3968
3990
  }
3991
+ const hasStreamCallbacks = Boolean(request.onToken ?? this.hooks.onToken ?? request.onReasoningToken);
3969
3992
  const response = await this.llmClient.chat(
3970
3993
  {
3971
3994
  messages: request.messages,
3972
3995
  temperature: request.temperature,
3973
3996
  maxTokens: request.maxTokens,
3974
- stream: Boolean(request.onToken ?? this.hooks.onToken),
3997
+ stream: hasStreamCallbacks,
3975
3998
  frequencyPenalty: request.frequencyPenalty,
3976
3999
  presencePenalty: request.presencePenalty
3977
4000
  },
3978
- request.onToken || this.hooks.onToken ? {
3979
- onToken: request.onToken ?? this.hooks.onToken
4001
+ hasStreamCallbacks ? {
4002
+ onToken: request.onToken ?? this.hooks.onToken,
4003
+ onReasoningToken: request.onReasoningToken
3980
4004
  } : void 0
3981
4005
  );
3982
4006
  return {
@@ -6620,14 +6644,31 @@ ${JSON.stringify(request.messages, null, 2)}`
6620
6644
  const runDetector = new StreamingToolParser({
6621
6645
  allowedRunTargets: this.getParserRunTargets()
6622
6646
  });
6647
+ let reasoningStarted = false;
6623
6648
  return {
6624
6649
  messages,
6625
6650
  temperature: options.temperature,
6626
6651
  maxTokens: options.maxTokens,
6627
6652
  normalizeResponseContent: () => runDetector.getFullContent(),
6653
+ onReasoningToken: (token) => {
6654
+ if (runId === void 0) return;
6655
+ if (!reasoningStarted) {
6656
+ reasoningStarted = true;
6657
+ this.appendStreamingToken(runId, "\u{1F4AD} thinking\u2026\n");
6658
+ }
6659
+ this.appendStreamingToken(runId, token);
6660
+ },
6628
6661
  onToken: (token) => {
6629
6662
  runDetector.push(token);
6630
6663
  if (runId !== void 0) {
6664
+ if (reasoningStarted) {
6665
+ reasoningStarted = false;
6666
+ this.clearPendingStreamingContent(runId);
6667
+ this.updateSnapshotForRun(runId, (state) => ({
6668
+ ...state,
6669
+ streamingContent: ""
6670
+ }));
6671
+ }
6631
6672
  if (runDetector.sealed) {
6632
6673
  this.clearPendingStreamingContent(runId);
6633
6674
  this.updateSnapshotForRun(runId, (state) => ({