hammer-ai 0.2.6 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +24 -3
- package/dist/index.js +45 -4
- package/dist/index.js.map +1 -1
- package/package.json +5 -2
package/dist/index.d.ts
CHANGED
|
@@ -28,6 +28,20 @@ interface LLMProviderConfig {
|
|
|
28
28
|
extraHeaders?: Record<string, string>;
|
|
29
29
|
/** Custom fetch implementation (e.g. expo/fetch for RN streaming support). */
|
|
30
30
|
fetchImpl?: FetchLike;
|
|
31
|
+
/**
|
|
32
|
+
* Explicitly enable or disable the provider's thinking/reasoning mode.
|
|
33
|
+
*
|
|
34
|
+
* - `false` — disables thinking (e.g. DashScope `enable_thinking: false` for
|
|
35
|
+
* Qwen3 models, which have thinking on by default). Prevents the silent
|
|
36
|
+
* multi-minute server-side CoT delay before the first token streams out.
|
|
37
|
+
* - `true` — explicitly enables thinking with the provider's default budget.
|
|
38
|
+
* - `undefined` — no thinking-related field is sent; the provider uses its
|
|
39
|
+
* own model default.
|
|
40
|
+
*
|
|
41
|
+
* Currently maps to `enable_thinking` in the request body, which is the
|
|
42
|
+
* DashScope OpenAI-compatible API parameter for Qwen3 models.
|
|
43
|
+
*/
|
|
44
|
+
enableThinking?: boolean;
|
|
31
45
|
}
|
|
32
46
|
/** Options for a single chat completion request. */
|
|
33
47
|
interface LLMRequestOptions {
|
|
@@ -49,6 +63,12 @@ interface StreamCallbacks {
|
|
|
49
63
|
* accumulated so far.
|
|
50
64
|
*/
|
|
51
65
|
onToken?: (token: string) => void | boolean;
|
|
66
|
+
/**
|
|
67
|
+
* Fired for every reasoning/thinking token received
|
|
68
|
+
* (delta.reasoning_content). Called before any content tokens arrive
|
|
69
|
+
* for models that emit a thinking phase (e.g. Qwen 3+).
|
|
70
|
+
*/
|
|
71
|
+
onReasoningToken?: (token: string) => void;
|
|
52
72
|
/**
|
|
53
73
|
* Fired once when the first SSE data chunk arrives from the model.
|
|
54
74
|
* Useful for closing premature-cancellation windows: Qwen 3+ models
|
|
@@ -1256,6 +1276,7 @@ interface ToolLoopRuntimeLLMRequest {
|
|
|
1256
1276
|
frequencyPenalty?: number;
|
|
1257
1277
|
presencePenalty?: number;
|
|
1258
1278
|
onToken?: (token: string) => void | boolean;
|
|
1279
|
+
onReasoningToken?: (token: string) => void;
|
|
1259
1280
|
normalizeResponseContent?: (content: string) => string;
|
|
1260
1281
|
}
|
|
1261
1282
|
interface ToolLoopRuntimeLLMResponse {
|
|
@@ -1303,9 +1324,9 @@ declare abstract class ToolLoopAgentRuntime<TMemory extends AgentMemoryLayer = A
|
|
|
1303
1324
|
protected constructor(deps: ToolLoopAgentRuntimeDeps<TStepInput>);
|
|
1304
1325
|
protected abstract createRuntimeSetup(): Promise<ToolLoopRuntimeSetup<TMemory, TEnforcer>>;
|
|
1305
1326
|
protected abstract getToolDefinitions(): ToolDefinition[];
|
|
1306
|
-
protected
|
|
1307
|
-
protected
|
|
1308
|
-
protected
|
|
1327
|
+
protected buildSystemPrompt(_context: ToolLoopRuntimeStepContext<TStepInput>): string;
|
|
1328
|
+
protected buildLLMRequest(_context: ToolLoopRuntimeStepContext<TStepInput>, _messages: ChatMessage[]): ToolLoopRuntimeLLMRequest;
|
|
1329
|
+
protected parseStepResponse(_response: ToolLoopRuntimeLLMResponse, _tools: ToolDefinition[]): ParsedStepInput;
|
|
1309
1330
|
protected get runtimeLoop(): AgentLoop | null;
|
|
1310
1331
|
protected get runtimeMemory(): TMemory | null;
|
|
1311
1332
|
protected get runtimeEnforcer(): TEnforcer | null;
|
package/dist/index.js
CHANGED
|
@@ -176,6 +176,9 @@ function buildChatPayload(config, basePayload) {
|
|
|
176
176
|
if (shouldOmitTemperature(config)) {
|
|
177
177
|
delete payload.temperature;
|
|
178
178
|
}
|
|
179
|
+
if (config.enableThinking !== void 0) {
|
|
180
|
+
payload.enable_thinking = config.enableThinking;
|
|
181
|
+
}
|
|
179
182
|
return payload;
|
|
180
183
|
}
|
|
181
184
|
var LLMClient = class {
|
|
@@ -407,8 +410,12 @@ var LLMClient = class {
|
|
|
407
410
|
try {
|
|
408
411
|
const parsed = JSON.parse(line.slice(6));
|
|
409
412
|
const delta = parsed.choices?.[0]?.delta;
|
|
413
|
+
const reasoningToken = delta?.reasoning_content;
|
|
414
|
+
if (reasoningToken) {
|
|
415
|
+
callbacks?.onReasoningToken?.(reasoningToken);
|
|
416
|
+
}
|
|
410
417
|
const token = delta?.content;
|
|
411
|
-
if (!token && delta) {
|
|
418
|
+
if (!token && !reasoningToken && delta) {
|
|
412
419
|
log(`SSE delta (no content): ${JSON.stringify(delta).slice(0, 200)}`, "warn");
|
|
413
420
|
}
|
|
414
421
|
if (token) {
|
|
@@ -3929,6 +3936,21 @@ var ToolLoopAgentRuntime = class {
|
|
|
3929
3936
|
this.hooks = deps.hooks ?? {};
|
|
3930
3937
|
this.requireTodoListOnFirstResponse = deps.requireTodoListOnFirstResponse ?? false;
|
|
3931
3938
|
}
|
|
3939
|
+
buildSystemPrompt(_context) {
|
|
3940
|
+
throw new Error(
|
|
3941
|
+
`buildSystemPrompt is not implemented for ${this.constructor.name}`
|
|
3942
|
+
);
|
|
3943
|
+
}
|
|
3944
|
+
buildLLMRequest(_context, _messages) {
|
|
3945
|
+
throw new Error(
|
|
3946
|
+
`buildLLMRequest is not implemented for ${this.constructor.name}`
|
|
3947
|
+
);
|
|
3948
|
+
}
|
|
3949
|
+
parseStepResponse(_response, _tools) {
|
|
3950
|
+
throw new Error(
|
|
3951
|
+
`parseStepResponse is not implemented for ${this.constructor.name}`
|
|
3952
|
+
);
|
|
3953
|
+
}
|
|
3932
3954
|
get runtimeLoop() {
|
|
3933
3955
|
return this.infrastructure?.loop ?? null;
|
|
3934
3956
|
}
|
|
@@ -3966,17 +3988,19 @@ var ToolLoopAgentRuntime = class {
|
|
|
3966
3988
|
`No llmClient is configured for ${this.constructor.name}. Override executeLLMStep() or provide llmClient in the constructor.`
|
|
3967
3989
|
);
|
|
3968
3990
|
}
|
|
3991
|
+
const hasStreamCallbacks = Boolean(request.onToken ?? this.hooks.onToken ?? request.onReasoningToken);
|
|
3969
3992
|
const response = await this.llmClient.chat(
|
|
3970
3993
|
{
|
|
3971
3994
|
messages: request.messages,
|
|
3972
3995
|
temperature: request.temperature,
|
|
3973
3996
|
maxTokens: request.maxTokens,
|
|
3974
|
-
stream:
|
|
3997
|
+
stream: hasStreamCallbacks,
|
|
3975
3998
|
frequencyPenalty: request.frequencyPenalty,
|
|
3976
3999
|
presencePenalty: request.presencePenalty
|
|
3977
4000
|
},
|
|
3978
|
-
|
|
3979
|
-
onToken: request.onToken ?? this.hooks.onToken
|
|
4001
|
+
hasStreamCallbacks ? {
|
|
4002
|
+
onToken: request.onToken ?? this.hooks.onToken,
|
|
4003
|
+
onReasoningToken: request.onReasoningToken
|
|
3980
4004
|
} : void 0
|
|
3981
4005
|
);
|
|
3982
4006
|
return {
|
|
@@ -6620,14 +6644,31 @@ ${JSON.stringify(request.messages, null, 2)}`
|
|
|
6620
6644
|
const runDetector = new StreamingToolParser({
|
|
6621
6645
|
allowedRunTargets: this.getParserRunTargets()
|
|
6622
6646
|
});
|
|
6647
|
+
let reasoningStarted = false;
|
|
6623
6648
|
return {
|
|
6624
6649
|
messages,
|
|
6625
6650
|
temperature: options.temperature,
|
|
6626
6651
|
maxTokens: options.maxTokens,
|
|
6627
6652
|
normalizeResponseContent: () => runDetector.getFullContent(),
|
|
6653
|
+
onReasoningToken: (token) => {
|
|
6654
|
+
if (runId === void 0) return;
|
|
6655
|
+
if (!reasoningStarted) {
|
|
6656
|
+
reasoningStarted = true;
|
|
6657
|
+
this.appendStreamingToken(runId, "\u{1F4AD} thinking\u2026\n");
|
|
6658
|
+
}
|
|
6659
|
+
this.appendStreamingToken(runId, token);
|
|
6660
|
+
},
|
|
6628
6661
|
onToken: (token) => {
|
|
6629
6662
|
runDetector.push(token);
|
|
6630
6663
|
if (runId !== void 0) {
|
|
6664
|
+
if (reasoningStarted) {
|
|
6665
|
+
reasoningStarted = false;
|
|
6666
|
+
this.clearPendingStreamingContent(runId);
|
|
6667
|
+
this.updateSnapshotForRun(runId, (state) => ({
|
|
6668
|
+
...state,
|
|
6669
|
+
streamingContent: ""
|
|
6670
|
+
}));
|
|
6671
|
+
}
|
|
6631
6672
|
if (runDetector.sealed) {
|
|
6632
6673
|
this.clearPendingStreamingContent(runId);
|
|
6633
6674
|
this.updateSnapshotForRun(runId, (state) => ({
|