hammer-ai 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +21 -0
- package/dist/index.js +37 -27
- package/dist/index.js.map +1 -1
- package/package.json +5 -2
package/dist/index.d.ts
CHANGED
|
@@ -28,6 +28,20 @@ interface LLMProviderConfig {
|
|
|
28
28
|
extraHeaders?: Record<string, string>;
|
|
29
29
|
/** Custom fetch implementation (e.g. expo/fetch for RN streaming support). */
|
|
30
30
|
fetchImpl?: FetchLike;
|
|
31
|
+
/**
|
|
32
|
+
* Explicitly enable or disable the provider's thinking/reasoning mode.
|
|
33
|
+
*
|
|
34
|
+
* - `false` — disables thinking (e.g. DashScope `enable_thinking: false` for
|
|
35
|
+
* Qwen3 models, which have thinking on by default). Prevents the silent
|
|
36
|
+
* multi-minute server-side CoT delay before the first token streams out.
|
|
37
|
+
* - `true` — explicitly enables thinking with the provider's default budget.
|
|
38
|
+
* - `undefined` — no thinking-related field is sent; the provider uses its
|
|
39
|
+
* own model default.
|
|
40
|
+
*
|
|
41
|
+
* Currently maps to `enable_thinking` in the request body, which is the
|
|
42
|
+
* DashScope OpenAI-compatible API parameter for Qwen3 models.
|
|
43
|
+
*/
|
|
44
|
+
enableThinking?: boolean;
|
|
31
45
|
}
|
|
32
46
|
/** Options for a single chat completion request. */
|
|
33
47
|
interface LLMRequestOptions {
|
|
@@ -49,6 +63,12 @@ interface StreamCallbacks {
|
|
|
49
63
|
* accumulated so far.
|
|
50
64
|
*/
|
|
51
65
|
onToken?: (token: string) => void | boolean;
|
|
66
|
+
/**
|
|
67
|
+
* Fired for every reasoning/thinking token received
|
|
68
|
+
* (delta.reasoning_content). Called before any content tokens arrive
|
|
69
|
+
* for models that emit a thinking phase (e.g. Qwen 3+).
|
|
70
|
+
*/
|
|
71
|
+
onReasoningToken?: (token: string) => void;
|
|
52
72
|
/**
|
|
53
73
|
* Fired once when the first SSE data chunk arrives from the model.
|
|
54
74
|
* Useful for closing premature-cancellation windows: Qwen 3+ models
|
|
@@ -1256,6 +1276,7 @@ interface ToolLoopRuntimeLLMRequest {
|
|
|
1256
1276
|
frequencyPenalty?: number;
|
|
1257
1277
|
presencePenalty?: number;
|
|
1258
1278
|
onToken?: (token: string) => void | boolean;
|
|
1279
|
+
onReasoningToken?: (token: string) => void;
|
|
1259
1280
|
normalizeResponseContent?: (content: string) => string;
|
|
1260
1281
|
}
|
|
1261
1282
|
interface ToolLoopRuntimeLLMResponse {
|
package/dist/index.js
CHANGED
|
@@ -168,16 +168,6 @@ var LLMResponseSchema = z.object({
|
|
|
168
168
|
}).strict();
|
|
169
169
|
|
|
170
170
|
// src/llm-client.ts
|
|
171
|
-
function shouldOmitTemperature(config) {
|
|
172
|
-
return config.model === "kimi-k2.5";
|
|
173
|
-
}
|
|
174
|
-
function buildChatPayload(config, basePayload) {
|
|
175
|
-
const payload = { ...basePayload };
|
|
176
|
-
if (shouldOmitTemperature(config)) {
|
|
177
|
-
delete payload.temperature;
|
|
178
|
-
}
|
|
179
|
-
return payload;
|
|
180
|
-
}
|
|
181
171
|
var LLMClient = class {
|
|
182
172
|
config;
|
|
183
173
|
constructor(config) {
|
|
@@ -210,7 +200,7 @@ var LLMClient = class {
|
|
|
210
200
|
presencePenalty = 0,
|
|
211
201
|
signal
|
|
212
202
|
} = options;
|
|
213
|
-
const payload =
|
|
203
|
+
const payload = {
|
|
214
204
|
model: this.config.model,
|
|
215
205
|
messages,
|
|
216
206
|
temperature,
|
|
@@ -218,7 +208,13 @@ var LLMClient = class {
|
|
|
218
208
|
frequency_penalty: frequencyPenalty,
|
|
219
209
|
presence_penalty: presencePenalty,
|
|
220
210
|
stream
|
|
221
|
-
}
|
|
211
|
+
};
|
|
212
|
+
if (this.config.model === "kimi-k2.5") {
|
|
213
|
+
delete payload.temperature;
|
|
214
|
+
}
|
|
215
|
+
if (this.config.enableThinking !== void 0) {
|
|
216
|
+
payload.enable_thinking = this.config.enableThinking;
|
|
217
|
+
}
|
|
222
218
|
const headers = {
|
|
223
219
|
"Content-Type": "application/json",
|
|
224
220
|
Authorization: `Bearer ${this.config.apiKey}`,
|
|
@@ -236,7 +232,7 @@ var LLMClient = class {
|
|
|
236
232
|
}
|
|
237
233
|
if (attempt > 1) {
|
|
238
234
|
log(`Retry attempt ${attempt}/${maxRetries}\u2026`, "warn");
|
|
239
|
-
await
|
|
235
|
+
await new Promise((r) => setTimeout(r, 1e3 * attempt));
|
|
240
236
|
}
|
|
241
237
|
const controller = new AbortController();
|
|
242
238
|
const abortFetch = () => {
|
|
@@ -254,7 +250,7 @@ var LLMClient = class {
|
|
|
254
250
|
if (!response.ok) {
|
|
255
251
|
const errorText = await response.text();
|
|
256
252
|
const err = new ApiError(response.status, errorText);
|
|
257
|
-
if (
|
|
253
|
+
if ((response.status === 429 || response.status === 500 || response.status === 502 || response.status === 503 || response.status === 408) && attempt < maxRetries) {
|
|
258
254
|
log(`Transient HTTP ${response.status}. Retrying\u2026`, "warn");
|
|
259
255
|
lastError = err;
|
|
260
256
|
continue;
|
|
@@ -407,8 +403,12 @@ var LLMClient = class {
|
|
|
407
403
|
try {
|
|
408
404
|
const parsed = JSON.parse(line.slice(6));
|
|
409
405
|
const delta = parsed.choices?.[0]?.delta;
|
|
406
|
+
const reasoningToken = delta?.reasoning_content;
|
|
407
|
+
if (reasoningToken) {
|
|
408
|
+
callbacks?.onReasoningToken?.(reasoningToken);
|
|
409
|
+
}
|
|
410
410
|
const token = delta?.content;
|
|
411
|
-
if (!token && delta) {
|
|
411
|
+
if (!token && !reasoningToken && delta) {
|
|
412
412
|
log(`SSE delta (no content): ${JSON.stringify(delta).slice(0, 200)}`, "warn");
|
|
413
413
|
}
|
|
414
414
|
if (token) {
|
|
@@ -448,9 +448,6 @@ var LLMClient = class {
|
|
|
448
448
|
});
|
|
449
449
|
try {
|
|
450
450
|
await streamPromise;
|
|
451
|
-
} catch (err) {
|
|
452
|
-
if (err.message?.includes("getFirstChunkTimeout()")) throw err;
|
|
453
|
-
throw err;
|
|
454
451
|
} finally {
|
|
455
452
|
cleanup();
|
|
456
453
|
signal?.removeEventListener("abort", abortStream);
|
|
@@ -476,9 +473,6 @@ var ApiError = class extends Error {
|
|
|
476
473
|
}
|
|
477
474
|
status;
|
|
478
475
|
};
|
|
479
|
-
function isRetryableStatus(status) {
|
|
480
|
-
return status === 429 || status === 500 || status === 502 || status === 503 || status === 408;
|
|
481
|
-
}
|
|
482
476
|
function isNetworkError(err) {
|
|
483
477
|
const code = err.code ?? err.cause?.code ?? "";
|
|
484
478
|
return code === "ENOTFOUND" || code === "ETIMEDOUT" || code === "ECONNREFUSED" || code === "UND_ERR_CONNECT_TIMEOUT" || typeof err.message === "string" && err.message.includes("fetch failed");
|
|
@@ -499,9 +493,6 @@ function createAbortError(reason) {
|
|
|
499
493
|
error.name = "AbortError";
|
|
500
494
|
return error;
|
|
501
495
|
}
|
|
502
|
-
function sleep(ms) {
|
|
503
|
-
return new Promise((r) => setTimeout(r, ms));
|
|
504
|
-
}
|
|
505
496
|
var AGENT_MACHINE_STATES = [
|
|
506
497
|
"idle",
|
|
507
498
|
"prompting",
|
|
@@ -3981,17 +3972,19 @@ var ToolLoopAgentRuntime = class {
|
|
|
3981
3972
|
`No llmClient is configured for ${this.constructor.name}. Override executeLLMStep() or provide llmClient in the constructor.`
|
|
3982
3973
|
);
|
|
3983
3974
|
}
|
|
3975
|
+
const hasStreamCallbacks = Boolean(request.onToken ?? this.hooks.onToken ?? request.onReasoningToken);
|
|
3984
3976
|
const response = await this.llmClient.chat(
|
|
3985
3977
|
{
|
|
3986
3978
|
messages: request.messages,
|
|
3987
3979
|
temperature: request.temperature,
|
|
3988
3980
|
maxTokens: request.maxTokens,
|
|
3989
|
-
stream:
|
|
3981
|
+
stream: hasStreamCallbacks,
|
|
3990
3982
|
frequencyPenalty: request.frequencyPenalty,
|
|
3991
3983
|
presencePenalty: request.presencePenalty
|
|
3992
3984
|
},
|
|
3993
|
-
|
|
3994
|
-
onToken: request.onToken ?? this.hooks.onToken
|
|
3985
|
+
hasStreamCallbacks ? {
|
|
3986
|
+
onToken: request.onToken ?? this.hooks.onToken,
|
|
3987
|
+
onReasoningToken: request.onReasoningToken
|
|
3995
3988
|
} : void 0
|
|
3996
3989
|
);
|
|
3997
3990
|
return {
|
|
@@ -6635,14 +6628,31 @@ ${JSON.stringify(request.messages, null, 2)}`
|
|
|
6635
6628
|
const runDetector = new StreamingToolParser({
|
|
6636
6629
|
allowedRunTargets: this.getParserRunTargets()
|
|
6637
6630
|
});
|
|
6631
|
+
let reasoningStarted = false;
|
|
6638
6632
|
return {
|
|
6639
6633
|
messages,
|
|
6640
6634
|
temperature: options.temperature,
|
|
6641
6635
|
maxTokens: options.maxTokens,
|
|
6642
6636
|
normalizeResponseContent: () => runDetector.getFullContent(),
|
|
6637
|
+
onReasoningToken: (token) => {
|
|
6638
|
+
if (runId === void 0) return;
|
|
6639
|
+
if (!reasoningStarted) {
|
|
6640
|
+
reasoningStarted = true;
|
|
6641
|
+
this.appendStreamingToken(runId, "\u{1F4AD} thinking\u2026\n");
|
|
6642
|
+
}
|
|
6643
|
+
this.appendStreamingToken(runId, token);
|
|
6644
|
+
},
|
|
6643
6645
|
onToken: (token) => {
|
|
6644
6646
|
runDetector.push(token);
|
|
6645
6647
|
if (runId !== void 0) {
|
|
6648
|
+
if (reasoningStarted) {
|
|
6649
|
+
reasoningStarted = false;
|
|
6650
|
+
this.clearPendingStreamingContent(runId);
|
|
6651
|
+
this.updateSnapshotForRun(runId, (state) => ({
|
|
6652
|
+
...state,
|
|
6653
|
+
streamingContent: ""
|
|
6654
|
+
}));
|
|
6655
|
+
}
|
|
6646
6656
|
if (runDetector.sealed) {
|
|
6647
6657
|
this.clearPendingStreamingContent(runId);
|
|
6648
6658
|
this.updateSnapshotForRun(runId, (state) => ({
|