llmist 0.6.2 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-TSR25DAY.js → chunk-4IMGADVY.js} +2 -2
- package/dist/{chunk-DVK6ZQOV.js → chunk-62M4TDAK.js} +501 -78
- package/dist/chunk-62M4TDAK.js.map +1 -0
- package/dist/cli.cjs +946 -197
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +436 -110
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +511 -88
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +6 -9
- package/dist/index.d.ts +6 -9
- package/dist/index.js +2 -2
- package/dist/{mock-stream-B5R6XPif.d.cts → mock-stream-CjmvWDc3.d.cts} +91 -20
- package/dist/{mock-stream-B5R6XPif.d.ts → mock-stream-CjmvWDc3.d.ts} +91 -20
- package/dist/testing/index.cjs +497 -74
- package/dist/testing/index.cjs.map +1 -1
- package/dist/testing/index.d.cts +2 -2
- package/dist/testing/index.d.ts +2 -2
- package/dist/testing/index.js +2 -2
- package/package.json +2 -1
- package/dist/chunk-DVK6ZQOV.js.map +0 -1
- /package/dist/{chunk-TSR25DAY.js.map → chunk-4IMGADVY.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -30,6 +30,20 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
30
30
|
));
|
|
31
31
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
32
32
|
|
|
33
|
+
// src/core/constants.ts
|
|
34
|
+
var GADGET_START_PREFIX, GADGET_END_PREFIX, DEFAULT_GADGET_OUTPUT_LIMIT, DEFAULT_GADGET_OUTPUT_LIMIT_PERCENT, CHARS_PER_TOKEN, FALLBACK_CONTEXT_WINDOW;
|
|
35
|
+
var init_constants = __esm({
|
|
36
|
+
"src/core/constants.ts"() {
|
|
37
|
+
"use strict";
|
|
38
|
+
GADGET_START_PREFIX = "!!!GADGET_START:";
|
|
39
|
+
GADGET_END_PREFIX = "!!!GADGET_END";
|
|
40
|
+
DEFAULT_GADGET_OUTPUT_LIMIT = true;
|
|
41
|
+
DEFAULT_GADGET_OUTPUT_LIMIT_PERCENT = 15;
|
|
42
|
+
CHARS_PER_TOKEN = 4;
|
|
43
|
+
FALLBACK_CONTEXT_WINDOW = 128e3;
|
|
44
|
+
}
|
|
45
|
+
});
|
|
46
|
+
|
|
33
47
|
// src/core/model-shortcuts.ts
|
|
34
48
|
function isKnownModelPattern(model) {
|
|
35
49
|
const normalized = model.toLowerCase();
|
|
@@ -344,20 +358,6 @@ var init_registry = __esm({
|
|
|
344
358
|
}
|
|
345
359
|
});
|
|
346
360
|
|
|
347
|
-
// src/core/constants.ts
|
|
348
|
-
var GADGET_START_PREFIX, GADGET_END_PREFIX, DEFAULT_GADGET_OUTPUT_LIMIT, DEFAULT_GADGET_OUTPUT_LIMIT_PERCENT, CHARS_PER_TOKEN, FALLBACK_CONTEXT_WINDOW;
|
|
349
|
-
var init_constants = __esm({
|
|
350
|
-
"src/core/constants.ts"() {
|
|
351
|
-
"use strict";
|
|
352
|
-
GADGET_START_PREFIX = "!!!GADGET_START:";
|
|
353
|
-
GADGET_END_PREFIX = "!!!GADGET_END";
|
|
354
|
-
DEFAULT_GADGET_OUTPUT_LIMIT = true;
|
|
355
|
-
DEFAULT_GADGET_OUTPUT_LIMIT_PERCENT = 15;
|
|
356
|
-
CHARS_PER_TOKEN = 4;
|
|
357
|
-
FALLBACK_CONTEXT_WINDOW = 128e3;
|
|
358
|
-
}
|
|
359
|
-
});
|
|
360
|
-
|
|
361
361
|
// src/core/prompt-config.ts
|
|
362
362
|
function resolvePromptTemplate(template, defaultValue, context) {
|
|
363
363
|
const resolved = template ?? defaultValue;
|
|
@@ -881,7 +881,7 @@ function findSafeDelimiter(content) {
|
|
|
881
881
|
}
|
|
882
882
|
let counter = 1;
|
|
883
883
|
while (counter < 1e3) {
|
|
884
|
-
const delimiter = `
|
|
884
|
+
const delimiter = `__GADGET_PARAM_${counter}__`;
|
|
885
885
|
const regex = new RegExp(`^${delimiter}\\s*$`);
|
|
886
886
|
const isUsed = lines.some((line) => regex.test(line));
|
|
887
887
|
if (!isUsed) {
|
|
@@ -939,6 +939,10 @@ function formatParamsAsYaml(params) {
|
|
|
939
939
|
}
|
|
940
940
|
return lines.join("\n");
|
|
941
941
|
}
|
|
942
|
+
function formatTomlInlineTable(obj) {
|
|
943
|
+
const entries = Object.entries(obj).map(([k, v]) => `${k} = ${formatTomlValue(v)}`);
|
|
944
|
+
return `{ ${entries.join(", ")} }`;
|
|
945
|
+
}
|
|
942
946
|
function formatTomlValue(value) {
|
|
943
947
|
if (typeof value === "string") {
|
|
944
948
|
if (value.includes("\n")) {
|
|
@@ -956,10 +960,17 @@ ${delimiter}`;
|
|
|
956
960
|
return '""';
|
|
957
961
|
}
|
|
958
962
|
if (Array.isArray(value)) {
|
|
959
|
-
|
|
963
|
+
if (value.length === 0) return "[]";
|
|
964
|
+
const items = value.map((item) => {
|
|
965
|
+
if (typeof item === "object" && item !== null && !Array.isArray(item)) {
|
|
966
|
+
return formatTomlInlineTable(item);
|
|
967
|
+
}
|
|
968
|
+
return formatTomlValue(item);
|
|
969
|
+
});
|
|
970
|
+
return `[${items.join(", ")}]`;
|
|
960
971
|
}
|
|
961
972
|
if (typeof value === "object") {
|
|
962
|
-
return
|
|
973
|
+
return formatTomlInlineTable(value);
|
|
963
974
|
}
|
|
964
975
|
return JSON.stringify(value);
|
|
965
976
|
}
|
|
@@ -977,7 +988,16 @@ var init_gadget = __esm({
|
|
|
977
988
|
yaml = __toESM(require("js-yaml"), 1);
|
|
978
989
|
init_schema_to_json();
|
|
979
990
|
init_schema_validator();
|
|
980
|
-
HEREDOC_DELIMITERS = [
|
|
991
|
+
HEREDOC_DELIMITERS = [
|
|
992
|
+
"__GADGET_PARAM_EOF__",
|
|
993
|
+
"__GADGET_PARAM_END__",
|
|
994
|
+
"__GADGET_PARAM_DOC__",
|
|
995
|
+
"__GADGET_PARAM_CONTENT__",
|
|
996
|
+
"__GADGET_PARAM_TEXT__",
|
|
997
|
+
"__GADGET_PARAM_HEREDOC__",
|
|
998
|
+
"__GADGET_PARAM_DATA__",
|
|
999
|
+
"__GADGET_PARAM_BLOCK__"
|
|
1000
|
+
];
|
|
981
1001
|
BaseGadget = class {
|
|
982
1002
|
/**
|
|
983
1003
|
* The name of the gadget. Used for identification when LLM calls it.
|
|
@@ -2005,6 +2025,14 @@ function preprocessTomlHeredoc(tomlStr) {
|
|
|
2005
2025
|
}
|
|
2006
2026
|
return result.join("\n");
|
|
2007
2027
|
}
|
|
2028
|
+
function stripMarkdownFences(content) {
|
|
2029
|
+
let cleaned = content.trim();
|
|
2030
|
+
const openingFence = /^```(?:toml|yaml|json)?\s*\n/i;
|
|
2031
|
+
const closingFence = /\n?```\s*$/;
|
|
2032
|
+
cleaned = cleaned.replace(openingFence, "");
|
|
2033
|
+
cleaned = cleaned.replace(closingFence, "");
|
|
2034
|
+
return cleaned.trim();
|
|
2035
|
+
}
|
|
2008
2036
|
var yaml2, import_js_toml, globalInvocationCounter, StreamParser;
|
|
2009
2037
|
var init_parser = __esm({
|
|
2010
2038
|
"src/gadgets/parser.ts"() {
|
|
@@ -2060,35 +2088,36 @@ var init_parser = __esm({
|
|
|
2060
2088
|
* Parse parameter string according to configured format
|
|
2061
2089
|
*/
|
|
2062
2090
|
parseParameters(raw) {
|
|
2091
|
+
const cleaned = stripMarkdownFences(raw);
|
|
2063
2092
|
if (this.parameterFormat === "json") {
|
|
2064
2093
|
try {
|
|
2065
|
-
return { parameters: JSON.parse(
|
|
2094
|
+
return { parameters: JSON.parse(cleaned) };
|
|
2066
2095
|
} catch (error) {
|
|
2067
2096
|
return { parseError: this.truncateParseError(error, "JSON") };
|
|
2068
2097
|
}
|
|
2069
2098
|
}
|
|
2070
2099
|
if (this.parameterFormat === "yaml") {
|
|
2071
2100
|
try {
|
|
2072
|
-
return { parameters: yaml2.load(preprocessYaml(
|
|
2101
|
+
return { parameters: yaml2.load(preprocessYaml(cleaned)) };
|
|
2073
2102
|
} catch (error) {
|
|
2074
2103
|
return { parseError: this.truncateParseError(error, "YAML") };
|
|
2075
2104
|
}
|
|
2076
2105
|
}
|
|
2077
2106
|
if (this.parameterFormat === "toml") {
|
|
2078
2107
|
try {
|
|
2079
|
-
return { parameters: (0, import_js_toml.load)(preprocessTomlHeredoc(
|
|
2108
|
+
return { parameters: (0, import_js_toml.load)(preprocessTomlHeredoc(cleaned)) };
|
|
2080
2109
|
} catch (error) {
|
|
2081
2110
|
return { parseError: this.truncateParseError(error, "TOML") };
|
|
2082
2111
|
}
|
|
2083
2112
|
}
|
|
2084
2113
|
try {
|
|
2085
|
-
return { parameters: JSON.parse(
|
|
2114
|
+
return { parameters: JSON.parse(cleaned) };
|
|
2086
2115
|
} catch {
|
|
2087
2116
|
try {
|
|
2088
|
-
return { parameters: (0, import_js_toml.load)(preprocessTomlHeredoc(
|
|
2117
|
+
return { parameters: (0, import_js_toml.load)(preprocessTomlHeredoc(cleaned)) };
|
|
2089
2118
|
} catch {
|
|
2090
2119
|
try {
|
|
2091
|
-
return { parameters: yaml2.load(preprocessYaml(
|
|
2120
|
+
return { parameters: yaml2.load(preprocessYaml(cleaned)) };
|
|
2092
2121
|
} catch (error) {
|
|
2093
2122
|
return { parseError: this.truncateParseError(error, "auto") };
|
|
2094
2123
|
}
|
|
@@ -2634,6 +2663,7 @@ var init_agent = __esm({
|
|
|
2634
2663
|
gadgetEndPrefix;
|
|
2635
2664
|
onHumanInputRequired;
|
|
2636
2665
|
textOnlyHandler;
|
|
2666
|
+
textWithGadgetsHandler;
|
|
2637
2667
|
stopOnGadgetError;
|
|
2638
2668
|
shouldContinueAfterError;
|
|
2639
2669
|
defaultGadgetTimeoutMs;
|
|
@@ -2664,6 +2694,7 @@ var init_agent = __esm({
|
|
|
2664
2694
|
this.gadgetEndPrefix = options.gadgetEndPrefix;
|
|
2665
2695
|
this.onHumanInputRequired = options.onHumanInputRequired;
|
|
2666
2696
|
this.textOnlyHandler = options.textOnlyHandler ?? "terminate";
|
|
2697
|
+
this.textWithGadgetsHandler = options.textWithGadgetsHandler;
|
|
2667
2698
|
this.stopOnGadgetError = options.stopOnGadgetError ?? true;
|
|
2668
2699
|
this.shouldContinueAfterError = options.shouldContinueAfterError;
|
|
2669
2700
|
this.defaultGadgetTimeoutMs = options.defaultGadgetTimeoutMs;
|
|
@@ -2851,6 +2882,17 @@ var init_agent = __esm({
|
|
|
2851
2882
|
}
|
|
2852
2883
|
}
|
|
2853
2884
|
if (result.didExecuteGadgets) {
|
|
2885
|
+
if (this.textWithGadgetsHandler) {
|
|
2886
|
+
const textContent = result.outputs.filter((output) => output.type === "text").map((output) => output.content).join("");
|
|
2887
|
+
if (textContent.trim()) {
|
|
2888
|
+
const { gadgetName, parameterMapping, resultMapping } = this.textWithGadgetsHandler;
|
|
2889
|
+
this.conversation.addGadgetCall(
|
|
2890
|
+
gadgetName,
|
|
2891
|
+
parameterMapping(textContent),
|
|
2892
|
+
resultMapping ? resultMapping(textContent) : textContent
|
|
2893
|
+
);
|
|
2894
|
+
}
|
|
2895
|
+
}
|
|
2854
2896
|
for (const output of result.outputs) {
|
|
2855
2897
|
if (output.type === "gadget_result") {
|
|
2856
2898
|
const gadgetResult = output.result;
|
|
@@ -2862,7 +2904,13 @@ var init_agent = __esm({
|
|
|
2862
2904
|
}
|
|
2863
2905
|
}
|
|
2864
2906
|
} else {
|
|
2865
|
-
|
|
2907
|
+
if (finalMessage.trim()) {
|
|
2908
|
+
this.conversation.addGadgetCall(
|
|
2909
|
+
"TellUser",
|
|
2910
|
+
{ message: finalMessage, done: false, type: "info" },
|
|
2911
|
+
`\u2139\uFE0F ${finalMessage}`
|
|
2912
|
+
);
|
|
2913
|
+
}
|
|
2866
2914
|
const shouldBreak = await this.handleTextOnlyResponse(finalMessage);
|
|
2867
2915
|
if (shouldBreak) {
|
|
2868
2916
|
break;
|
|
@@ -3057,7 +3105,8 @@ var init_anthropic_models = __esm({
|
|
|
3057
3105
|
pricing: {
|
|
3058
3106
|
input: 3,
|
|
3059
3107
|
output: 15,
|
|
3060
|
-
cachedInput: 0.3
|
|
3108
|
+
cachedInput: 0.3,
|
|
3109
|
+
cacheWriteInput: 3.75
|
|
3061
3110
|
},
|
|
3062
3111
|
knowledgeCutoff: "2025-01",
|
|
3063
3112
|
features: {
|
|
@@ -3081,7 +3130,8 @@ var init_anthropic_models = __esm({
|
|
|
3081
3130
|
pricing: {
|
|
3082
3131
|
input: 1,
|
|
3083
3132
|
output: 5,
|
|
3084
|
-
cachedInput: 0.1
|
|
3133
|
+
cachedInput: 0.1,
|
|
3134
|
+
cacheWriteInput: 1.25
|
|
3085
3135
|
},
|
|
3086
3136
|
knowledgeCutoff: "2025-02",
|
|
3087
3137
|
features: {
|
|
@@ -3105,7 +3155,8 @@ var init_anthropic_models = __esm({
|
|
|
3105
3155
|
pricing: {
|
|
3106
3156
|
input: 3,
|
|
3107
3157
|
output: 15,
|
|
3108
|
-
cachedInput: 0.3
|
|
3158
|
+
cachedInput: 0.3,
|
|
3159
|
+
cacheWriteInput: 3.75
|
|
3109
3160
|
},
|
|
3110
3161
|
knowledgeCutoff: "2025-03",
|
|
3111
3162
|
features: {
|
|
@@ -3129,7 +3180,8 @@ var init_anthropic_models = __esm({
|
|
|
3129
3180
|
pricing: {
|
|
3130
3181
|
input: 3,
|
|
3131
3182
|
output: 15,
|
|
3132
|
-
cachedInput: 0.3
|
|
3183
|
+
cachedInput: 0.3,
|
|
3184
|
+
cacheWriteInput: 3.75
|
|
3133
3185
|
},
|
|
3134
3186
|
knowledgeCutoff: "2024-11",
|
|
3135
3187
|
features: {
|
|
@@ -3153,7 +3205,8 @@ var init_anthropic_models = __esm({
|
|
|
3153
3205
|
pricing: {
|
|
3154
3206
|
input: 15,
|
|
3155
3207
|
output: 75,
|
|
3156
|
-
cachedInput: 1.5
|
|
3208
|
+
cachedInput: 1.5,
|
|
3209
|
+
cacheWriteInput: 18.75
|
|
3157
3210
|
},
|
|
3158
3211
|
knowledgeCutoff: "2025-01",
|
|
3159
3212
|
features: {
|
|
@@ -3177,7 +3230,8 @@ var init_anthropic_models = __esm({
|
|
|
3177
3230
|
pricing: {
|
|
3178
3231
|
input: 15,
|
|
3179
3232
|
output: 75,
|
|
3180
|
-
cachedInput: 1.5
|
|
3233
|
+
cachedInput: 1.5,
|
|
3234
|
+
cacheWriteInput: 18.75
|
|
3181
3235
|
},
|
|
3182
3236
|
knowledgeCutoff: "2025-03",
|
|
3183
3237
|
features: {
|
|
@@ -3200,7 +3254,8 @@ var init_anthropic_models = __esm({
|
|
|
3200
3254
|
pricing: {
|
|
3201
3255
|
input: 0.8,
|
|
3202
3256
|
output: 4,
|
|
3203
|
-
cachedInput: 0.08
|
|
3257
|
+
cachedInput: 0.08,
|
|
3258
|
+
cacheWriteInput: 1
|
|
3204
3259
|
},
|
|
3205
3260
|
knowledgeCutoff: "2024-07",
|
|
3206
3261
|
features: {
|
|
@@ -3223,7 +3278,8 @@ var init_anthropic_models = __esm({
|
|
|
3223
3278
|
pricing: {
|
|
3224
3279
|
input: 0.25,
|
|
3225
3280
|
output: 1.25,
|
|
3226
|
-
cachedInput: 0.025
|
|
3281
|
+
cachedInput: 0.025,
|
|
3282
|
+
cacheWriteInput: 0.3125
|
|
3227
3283
|
},
|
|
3228
3284
|
knowledgeCutoff: "2023-08",
|
|
3229
3285
|
features: {
|
|
@@ -3247,7 +3303,8 @@ var init_anthropic_models = __esm({
|
|
|
3247
3303
|
pricing: {
|
|
3248
3304
|
input: 1,
|
|
3249
3305
|
output: 5,
|
|
3250
|
-
cachedInput: 0.1
|
|
3306
|
+
cachedInput: 0.1,
|
|
3307
|
+
cacheWriteInput: 1.25
|
|
3251
3308
|
},
|
|
3252
3309
|
knowledgeCutoff: "2025-02",
|
|
3253
3310
|
features: {
|
|
@@ -3271,7 +3328,8 @@ var init_anthropic_models = __esm({
|
|
|
3271
3328
|
pricing: {
|
|
3272
3329
|
input: 3,
|
|
3273
3330
|
output: 15,
|
|
3274
|
-
cachedInput: 0.3
|
|
3331
|
+
cachedInput: 0.3,
|
|
3332
|
+
cacheWriteInput: 3.75
|
|
3275
3333
|
},
|
|
3276
3334
|
knowledgeCutoff: "2025-01",
|
|
3277
3335
|
features: {
|
|
@@ -3295,7 +3353,8 @@ var init_anthropic_models = __esm({
|
|
|
3295
3353
|
pricing: {
|
|
3296
3354
|
input: 5,
|
|
3297
3355
|
output: 25,
|
|
3298
|
-
cachedInput: 0.5
|
|
3356
|
+
cachedInput: 0.5,
|
|
3357
|
+
cacheWriteInput: 6.25
|
|
3299
3358
|
},
|
|
3300
3359
|
knowledgeCutoff: "2025-03",
|
|
3301
3360
|
features: {
|
|
@@ -3410,15 +3469,27 @@ var init_anthropic = __esm({
|
|
|
3410
3469
|
}
|
|
3411
3470
|
buildRequestPayload(options, descriptor, spec, messages) {
|
|
3412
3471
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
3413
|
-
const system = systemMessages.length > 0 ? systemMessages.map((m) =>
|
|
3414
|
-
|
|
3472
|
+
const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
|
|
3473
|
+
type: "text",
|
|
3474
|
+
text: m.content,
|
|
3475
|
+
// Add cache_control to the LAST system message block
|
|
3476
|
+
...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
|
|
3477
|
+
})) : void 0;
|
|
3478
|
+
const nonSystemMessages = messages.filter(
|
|
3415
3479
|
(message) => message.role !== "system"
|
|
3416
|
-
)
|
|
3480
|
+
);
|
|
3481
|
+
const lastUserIndex = nonSystemMessages.reduce(
|
|
3482
|
+
(lastIdx, msg, idx) => msg.role === "user" ? idx : lastIdx,
|
|
3483
|
+
-1
|
|
3484
|
+
);
|
|
3485
|
+
const conversation = nonSystemMessages.map((message, index) => ({
|
|
3417
3486
|
role: message.role,
|
|
3418
3487
|
content: [
|
|
3419
3488
|
{
|
|
3420
3489
|
type: "text",
|
|
3421
|
-
text: message.content
|
|
3490
|
+
text: message.content,
|
|
3491
|
+
// Add cache_control to the LAST user message
|
|
3492
|
+
...message.role === "user" && index === lastUserIndex ? { cache_control: { type: "ephemeral" } } : {}
|
|
3422
3493
|
}
|
|
3423
3494
|
]
|
|
3424
3495
|
}));
|
|
@@ -3444,15 +3515,22 @@ var init_anthropic = __esm({
|
|
|
3444
3515
|
async *wrapStream(iterable) {
|
|
3445
3516
|
const stream2 = iterable;
|
|
3446
3517
|
let inputTokens = 0;
|
|
3518
|
+
let cachedInputTokens = 0;
|
|
3519
|
+
let cacheCreationInputTokens = 0;
|
|
3447
3520
|
for await (const event of stream2) {
|
|
3448
3521
|
if (event.type === "message_start") {
|
|
3449
|
-
|
|
3522
|
+
const usage = event.message.usage;
|
|
3523
|
+
cachedInputTokens = usage.cache_read_input_tokens ?? 0;
|
|
3524
|
+
cacheCreationInputTokens = usage.cache_creation_input_tokens ?? 0;
|
|
3525
|
+
inputTokens = usage.input_tokens + cachedInputTokens + cacheCreationInputTokens;
|
|
3450
3526
|
yield {
|
|
3451
3527
|
text: "",
|
|
3452
3528
|
usage: {
|
|
3453
3529
|
inputTokens,
|
|
3454
3530
|
outputTokens: 0,
|
|
3455
|
-
totalTokens: inputTokens
|
|
3531
|
+
totalTokens: inputTokens,
|
|
3532
|
+
cachedInputTokens,
|
|
3533
|
+
cacheCreationInputTokens
|
|
3456
3534
|
},
|
|
3457
3535
|
rawEvent: event
|
|
3458
3536
|
};
|
|
@@ -3466,7 +3544,9 @@ var init_anthropic = __esm({
|
|
|
3466
3544
|
const usage = event.usage ? {
|
|
3467
3545
|
inputTokens,
|
|
3468
3546
|
outputTokens: event.usage.output_tokens,
|
|
3469
|
-
totalTokens: inputTokens + event.usage.output_tokens
|
|
3547
|
+
totalTokens: inputTokens + event.usage.output_tokens,
|
|
3548
|
+
cachedInputTokens,
|
|
3549
|
+
cacheCreationInputTokens
|
|
3470
3550
|
} : void 0;
|
|
3471
3551
|
if (event.delta.stop_reason || usage) {
|
|
3472
3552
|
yield {
|
|
@@ -3547,6 +3627,7 @@ var init_gemini_models = __esm({
|
|
|
3547
3627
|
"src/providers/gemini-models.ts"() {
|
|
3548
3628
|
"use strict";
|
|
3549
3629
|
GEMINI_MODELS = [
|
|
3630
|
+
// Gemini 3 Pro (Preview)
|
|
3550
3631
|
{
|
|
3551
3632
|
provider: "gemini",
|
|
3552
3633
|
modelId: "gemini-3-pro-preview",
|
|
@@ -3555,8 +3636,11 @@ var init_gemini_models = __esm({
|
|
|
3555
3636
|
maxOutputTokens: 65536,
|
|
3556
3637
|
pricing: {
|
|
3557
3638
|
input: 2,
|
|
3639
|
+
// $2.00 for prompts <= 200k, $4.00 for > 200k (using lower tier)
|
|
3558
3640
|
output: 12,
|
|
3641
|
+
// $12.00 for prompts <= 200k, $18.00 for > 200k
|
|
3559
3642
|
cachedInput: 0.2
|
|
3643
|
+
// $0.20 for prompts <= 200k
|
|
3560
3644
|
},
|
|
3561
3645
|
knowledgeCutoff: "2025-01",
|
|
3562
3646
|
features: {
|
|
@@ -3569,9 +3653,10 @@ var init_gemini_models = __esm({
|
|
|
3569
3653
|
metadata: {
|
|
3570
3654
|
family: "Gemini 3",
|
|
3571
3655
|
releaseDate: "2025-11-18",
|
|
3572
|
-
notes: "
|
|
3656
|
+
notes: "Best model for multimodal understanding, agentic and vibe-coding. Deep Think mode available."
|
|
3573
3657
|
}
|
|
3574
3658
|
},
|
|
3659
|
+
// Gemini 2.5 Pro
|
|
3575
3660
|
{
|
|
3576
3661
|
provider: "gemini",
|
|
3577
3662
|
modelId: "gemini-2.5-pro",
|
|
@@ -3580,8 +3665,11 @@ var init_gemini_models = __esm({
|
|
|
3580
3665
|
maxOutputTokens: 65536,
|
|
3581
3666
|
pricing: {
|
|
3582
3667
|
input: 1.25,
|
|
3668
|
+
// $1.25 for prompts <= 200k, $2.50 for > 200k
|
|
3583
3669
|
output: 10,
|
|
3670
|
+
// $10.00 for prompts <= 200k, $15.00 for > 200k
|
|
3584
3671
|
cachedInput: 0.125
|
|
3672
|
+
// $0.125 for prompts <= 200k
|
|
3585
3673
|
},
|
|
3586
3674
|
knowledgeCutoff: "2025-01",
|
|
3587
3675
|
features: {
|
|
@@ -3594,9 +3682,10 @@ var init_gemini_models = __esm({
|
|
|
3594
3682
|
metadata: {
|
|
3595
3683
|
family: "Gemini 2.5",
|
|
3596
3684
|
releaseDate: "2025-06",
|
|
3597
|
-
notes: "
|
|
3685
|
+
notes: "State-of-the-art multipurpose model. Excels at coding and complex reasoning."
|
|
3598
3686
|
}
|
|
3599
3687
|
},
|
|
3688
|
+
// Gemini 2.5 Flash
|
|
3600
3689
|
{
|
|
3601
3690
|
provider: "gemini",
|
|
3602
3691
|
modelId: "gemini-2.5-flash",
|
|
@@ -3605,8 +3694,10 @@ var init_gemini_models = __esm({
|
|
|
3605
3694
|
maxOutputTokens: 65536,
|
|
3606
3695
|
pricing: {
|
|
3607
3696
|
input: 0.3,
|
|
3697
|
+
// $0.30 for text/image/video, $1.00 for audio
|
|
3608
3698
|
output: 2.5,
|
|
3609
3699
|
cachedInput: 0.03
|
|
3700
|
+
// $0.03 for text/image/video
|
|
3610
3701
|
},
|
|
3611
3702
|
knowledgeCutoff: "2025-01",
|
|
3612
3703
|
features: {
|
|
@@ -3619,9 +3710,10 @@ var init_gemini_models = __esm({
|
|
|
3619
3710
|
metadata: {
|
|
3620
3711
|
family: "Gemini 2.5",
|
|
3621
3712
|
releaseDate: "2025-06",
|
|
3622
|
-
notes: "
|
|
3713
|
+
notes: "First hybrid reasoning model with 1M context and thinking budgets."
|
|
3623
3714
|
}
|
|
3624
3715
|
},
|
|
3716
|
+
// Gemini 2.5 Flash-Lite
|
|
3625
3717
|
{
|
|
3626
3718
|
provider: "gemini",
|
|
3627
3719
|
modelId: "gemini-2.5-flash-lite",
|
|
@@ -3630,8 +3722,10 @@ var init_gemini_models = __esm({
|
|
|
3630
3722
|
maxOutputTokens: 65536,
|
|
3631
3723
|
pricing: {
|
|
3632
3724
|
input: 0.1,
|
|
3725
|
+
// $0.10 for text/image/video, $0.30 for audio
|
|
3633
3726
|
output: 0.4,
|
|
3634
3727
|
cachedInput: 0.01
|
|
3728
|
+
// $0.01 for text/image/video
|
|
3635
3729
|
},
|
|
3636
3730
|
knowledgeCutoff: "2025-01",
|
|
3637
3731
|
features: {
|
|
@@ -3643,9 +3737,10 @@ var init_gemini_models = __esm({
|
|
|
3643
3737
|
metadata: {
|
|
3644
3738
|
family: "Gemini 2.5",
|
|
3645
3739
|
releaseDate: "2025-06",
|
|
3646
|
-
notes: "
|
|
3740
|
+
notes: "Smallest and most cost effective model, built for at scale usage."
|
|
3647
3741
|
}
|
|
3648
3742
|
},
|
|
3743
|
+
// Gemini 2.0 Flash
|
|
3649
3744
|
{
|
|
3650
3745
|
provider: "gemini",
|
|
3651
3746
|
modelId: "gemini-2.0-flash",
|
|
@@ -3654,8 +3749,10 @@ var init_gemini_models = __esm({
|
|
|
3654
3749
|
maxOutputTokens: 8192,
|
|
3655
3750
|
pricing: {
|
|
3656
3751
|
input: 0.1,
|
|
3752
|
+
// $0.10 for text/image/video, $0.70 for audio
|
|
3657
3753
|
output: 0.4,
|
|
3658
|
-
cachedInput: 0.
|
|
3754
|
+
cachedInput: 0.025
|
|
3755
|
+
// $0.025 for text/image/video
|
|
3659
3756
|
},
|
|
3660
3757
|
knowledgeCutoff: "2024-08",
|
|
3661
3758
|
features: {
|
|
@@ -3666,9 +3763,10 @@ var init_gemini_models = __esm({
|
|
|
3666
3763
|
},
|
|
3667
3764
|
metadata: {
|
|
3668
3765
|
family: "Gemini 2.0",
|
|
3669
|
-
notes: "
|
|
3766
|
+
notes: "Balanced multimodal model with 1M context, built for the era of Agents."
|
|
3670
3767
|
}
|
|
3671
3768
|
},
|
|
3769
|
+
// Gemini 2.0 Flash-Lite
|
|
3672
3770
|
{
|
|
3673
3771
|
provider: "gemini",
|
|
3674
3772
|
modelId: "gemini-2.0-flash-lite",
|
|
@@ -3677,8 +3775,8 @@ var init_gemini_models = __esm({
|
|
|
3677
3775
|
maxOutputTokens: 8192,
|
|
3678
3776
|
pricing: {
|
|
3679
3777
|
input: 0.075,
|
|
3680
|
-
output: 0.3
|
|
3681
|
-
|
|
3778
|
+
output: 0.3
|
|
3779
|
+
// No context caching available for 2.0-flash-lite
|
|
3682
3780
|
},
|
|
3683
3781
|
knowledgeCutoff: "2024-08",
|
|
3684
3782
|
features: {
|
|
@@ -3689,7 +3787,7 @@ var init_gemini_models = __esm({
|
|
|
3689
3787
|
},
|
|
3690
3788
|
metadata: {
|
|
3691
3789
|
family: "Gemini 2.0",
|
|
3692
|
-
notes: "
|
|
3790
|
+
notes: "Smallest and most cost effective 2.0 model for at scale usage."
|
|
3693
3791
|
}
|
|
3694
3792
|
}
|
|
3695
3793
|
];
|
|
@@ -3859,7 +3957,9 @@ var init_gemini = __esm({
|
|
|
3859
3957
|
return {
|
|
3860
3958
|
inputTokens: usageMetadata.promptTokenCount ?? 0,
|
|
3861
3959
|
outputTokens: usageMetadata.candidatesTokenCount ?? 0,
|
|
3862
|
-
totalTokens: usageMetadata.totalTokenCount ?? 0
|
|
3960
|
+
totalTokens: usageMetadata.totalTokenCount ?? 0,
|
|
3961
|
+
// Gemini returns cached token count in cachedContentTokenCount
|
|
3962
|
+
cachedInputTokens: usageMetadata.cachedContentTokenCount ?? 0
|
|
3863
3963
|
};
|
|
3864
3964
|
}
|
|
3865
3965
|
/**
|
|
@@ -3915,10 +4015,11 @@ var init_openai_models = __esm({
|
|
|
3915
4015
|
"src/providers/openai-models.ts"() {
|
|
3916
4016
|
"use strict";
|
|
3917
4017
|
OPENAI_MODELS = [
|
|
4018
|
+
// GPT-5 Family
|
|
3918
4019
|
{
|
|
3919
4020
|
provider: "openai",
|
|
3920
4021
|
modelId: "gpt-5.1",
|
|
3921
|
-
displayName: "GPT-5.1
|
|
4022
|
+
displayName: "GPT-5.1",
|
|
3922
4023
|
contextWindow: 128e3,
|
|
3923
4024
|
maxOutputTokens: 32768,
|
|
3924
4025
|
pricing: {
|
|
@@ -3938,34 +4039,7 @@ var init_openai_models = __esm({
|
|
|
3938
4039
|
metadata: {
|
|
3939
4040
|
family: "GPT-5",
|
|
3940
4041
|
releaseDate: "2025-11-12",
|
|
3941
|
-
notes: "
|
|
3942
|
-
supportsTemperature: false
|
|
3943
|
-
}
|
|
3944
|
-
},
|
|
3945
|
-
{
|
|
3946
|
-
provider: "openai",
|
|
3947
|
-
modelId: "gpt-5.1-thinking",
|
|
3948
|
-
displayName: "GPT-5.1 Thinking",
|
|
3949
|
-
contextWindow: 196e3,
|
|
3950
|
-
maxOutputTokens: 32768,
|
|
3951
|
-
pricing: {
|
|
3952
|
-
input: 1.25,
|
|
3953
|
-
output: 10,
|
|
3954
|
-
cachedInput: 0.125
|
|
3955
|
-
},
|
|
3956
|
-
knowledgeCutoff: "2024-09-30",
|
|
3957
|
-
features: {
|
|
3958
|
-
streaming: true,
|
|
3959
|
-
functionCalling: true,
|
|
3960
|
-
vision: true,
|
|
3961
|
-
reasoning: true,
|
|
3962
|
-
structuredOutputs: true,
|
|
3963
|
-
fineTuning: true
|
|
3964
|
-
},
|
|
3965
|
-
metadata: {
|
|
3966
|
-
family: "GPT-5",
|
|
3967
|
-
releaseDate: "2025-11-12",
|
|
3968
|
-
notes: "Advanced reasoning with thinking levels: Light, Standard, Extended, Heavy. Best for complex tasks.",
|
|
4042
|
+
notes: "Latest GPT-5 with improved instruction following. 2-3x faster than GPT-5.",
|
|
3969
4043
|
supportsTemperature: false
|
|
3970
4044
|
}
|
|
3971
4045
|
},
|
|
@@ -4045,6 +4119,255 @@ var init_openai_models = __esm({
|
|
|
4045
4119
|
notes: "Fastest, most cost-efficient version for well-defined tasks",
|
|
4046
4120
|
supportsTemperature: false
|
|
4047
4121
|
}
|
|
4122
|
+
},
|
|
4123
|
+
{
|
|
4124
|
+
provider: "openai",
|
|
4125
|
+
modelId: "gpt-5-pro",
|
|
4126
|
+
displayName: "GPT-5 Pro",
|
|
4127
|
+
contextWindow: 272e3,
|
|
4128
|
+
maxOutputTokens: 128e3,
|
|
4129
|
+
pricing: {
|
|
4130
|
+
input: 15,
|
|
4131
|
+
output: 120
|
|
4132
|
+
// No cached input pricing for gpt-5-pro
|
|
4133
|
+
},
|
|
4134
|
+
knowledgeCutoff: "2024-09-30",
|
|
4135
|
+
features: {
|
|
4136
|
+
streaming: true,
|
|
4137
|
+
functionCalling: true,
|
|
4138
|
+
vision: true,
|
|
4139
|
+
reasoning: true,
|
|
4140
|
+
structuredOutputs: true
|
|
4141
|
+
},
|
|
4142
|
+
metadata: {
|
|
4143
|
+
family: "GPT-5",
|
|
4144
|
+
notes: "Premium tier with enhanced capabilities. Does not support prompt caching.",
|
|
4145
|
+
supportsTemperature: false
|
|
4146
|
+
}
|
|
4147
|
+
},
|
|
4148
|
+
// GPT-4.1 Family
|
|
4149
|
+
{
|
|
4150
|
+
provider: "openai",
|
|
4151
|
+
modelId: "gpt-4.1",
|
|
4152
|
+
displayName: "GPT-4.1",
|
|
4153
|
+
contextWindow: 128e3,
|
|
4154
|
+
maxOutputTokens: 32768,
|
|
4155
|
+
pricing: {
|
|
4156
|
+
input: 2,
|
|
4157
|
+
output: 8,
|
|
4158
|
+
cachedInput: 0.5
|
|
4159
|
+
},
|
|
4160
|
+
knowledgeCutoff: "2024-04-01",
|
|
4161
|
+
features: {
|
|
4162
|
+
streaming: true,
|
|
4163
|
+
functionCalling: true,
|
|
4164
|
+
vision: true,
|
|
4165
|
+
structuredOutputs: true,
|
|
4166
|
+
fineTuning: true
|
|
4167
|
+
},
|
|
4168
|
+
metadata: {
|
|
4169
|
+
family: "GPT-4.1",
|
|
4170
|
+
notes: "Improved GPT-4 with better instruction following"
|
|
4171
|
+
}
|
|
4172
|
+
},
|
|
4173
|
+
{
|
|
4174
|
+
provider: "openai",
|
|
4175
|
+
modelId: "gpt-4.1-mini",
|
|
4176
|
+
displayName: "GPT-4.1 Mini",
|
|
4177
|
+
contextWindow: 128e3,
|
|
4178
|
+
maxOutputTokens: 32768,
|
|
4179
|
+
pricing: {
|
|
4180
|
+
input: 0.4,
|
|
4181
|
+
output: 1.6,
|
|
4182
|
+
cachedInput: 0.1
|
|
4183
|
+
},
|
|
4184
|
+
knowledgeCutoff: "2024-04-01",
|
|
4185
|
+
features: {
|
|
4186
|
+
streaming: true,
|
|
4187
|
+
functionCalling: true,
|
|
4188
|
+
vision: true,
|
|
4189
|
+
structuredOutputs: true,
|
|
4190
|
+
fineTuning: true
|
|
4191
|
+
},
|
|
4192
|
+
metadata: {
|
|
4193
|
+
family: "GPT-4.1",
|
|
4194
|
+
notes: "Cost-efficient GPT-4.1 variant"
|
|
4195
|
+
}
|
|
4196
|
+
},
|
|
4197
|
+
{
|
|
4198
|
+
provider: "openai",
|
|
4199
|
+
modelId: "gpt-4.1-nano",
|
|
4200
|
+
displayName: "GPT-4.1 Nano",
|
|
4201
|
+
contextWindow: 128e3,
|
|
4202
|
+
maxOutputTokens: 32768,
|
|
4203
|
+
pricing: {
|
|
4204
|
+
input: 0.1,
|
|
4205
|
+
output: 0.4,
|
|
4206
|
+
cachedInput: 0.025
|
|
4207
|
+
},
|
|
4208
|
+
knowledgeCutoff: "2024-04-01",
|
|
4209
|
+
features: {
|
|
4210
|
+
streaming: true,
|
|
4211
|
+
functionCalling: true,
|
|
4212
|
+
vision: true,
|
|
4213
|
+
structuredOutputs: true,
|
|
4214
|
+
fineTuning: true
|
|
4215
|
+
},
|
|
4216
|
+
metadata: {
|
|
4217
|
+
family: "GPT-4.1",
|
|
4218
|
+
notes: "Fastest GPT-4.1 variant for simple tasks"
|
|
4219
|
+
}
|
|
4220
|
+
},
|
|
4221
|
+
// GPT-4o Family
|
|
4222
|
+
{
|
|
4223
|
+
provider: "openai",
|
|
4224
|
+
modelId: "gpt-4o",
|
|
4225
|
+
displayName: "GPT-4o",
|
|
4226
|
+
contextWindow: 128e3,
|
|
4227
|
+
maxOutputTokens: 16384,
|
|
4228
|
+
pricing: {
|
|
4229
|
+
input: 2.5,
|
|
4230
|
+
output: 10,
|
|
4231
|
+
cachedInput: 1.25
|
|
4232
|
+
},
|
|
4233
|
+
knowledgeCutoff: "2024-04-01",
|
|
4234
|
+
features: {
|
|
4235
|
+
streaming: true,
|
|
4236
|
+
functionCalling: true,
|
|
4237
|
+
vision: true,
|
|
4238
|
+
structuredOutputs: true,
|
|
4239
|
+
fineTuning: true
|
|
4240
|
+
},
|
|
4241
|
+
metadata: {
|
|
4242
|
+
family: "GPT-4o",
|
|
4243
|
+
notes: "Multimodal model optimized for speed"
|
|
4244
|
+
}
|
|
4245
|
+
},
|
|
4246
|
+
{
|
|
4247
|
+
provider: "openai",
|
|
4248
|
+
modelId: "gpt-4o-mini",
|
|
4249
|
+
displayName: "GPT-4o Mini",
|
|
4250
|
+
contextWindow: 128e3,
|
|
4251
|
+
maxOutputTokens: 16384,
|
|
4252
|
+
pricing: {
|
|
4253
|
+
input: 0.15,
|
|
4254
|
+
output: 0.6,
|
|
4255
|
+
cachedInput: 0.075
|
|
4256
|
+
},
|
|
4257
|
+
knowledgeCutoff: "2024-04-01",
|
|
4258
|
+
features: {
|
|
4259
|
+
streaming: true,
|
|
4260
|
+
functionCalling: true,
|
|
4261
|
+
vision: true,
|
|
4262
|
+
structuredOutputs: true,
|
|
4263
|
+
fineTuning: true
|
|
4264
|
+
},
|
|
4265
|
+
metadata: {
|
|
4266
|
+
family: "GPT-4o",
|
|
4267
|
+
notes: "Fast and affordable multimodal model"
|
|
4268
|
+
}
|
|
4269
|
+
},
|
|
4270
|
+
// o-series (Reasoning models)
|
|
4271
|
+
{
|
|
4272
|
+
provider: "openai",
|
|
4273
|
+
modelId: "o1",
|
|
4274
|
+
displayName: "o1",
|
|
4275
|
+
contextWindow: 2e5,
|
|
4276
|
+
maxOutputTokens: 1e5,
|
|
4277
|
+
pricing: {
|
|
4278
|
+
input: 15,
|
|
4279
|
+
output: 60,
|
|
4280
|
+
cachedInput: 7.5
|
|
4281
|
+
},
|
|
4282
|
+
knowledgeCutoff: "2024-12-01",
|
|
4283
|
+
features: {
|
|
4284
|
+
streaming: true,
|
|
4285
|
+
functionCalling: true,
|
|
4286
|
+
vision: true,
|
|
4287
|
+
reasoning: true,
|
|
4288
|
+
structuredOutputs: true
|
|
4289
|
+
},
|
|
4290
|
+
metadata: {
|
|
4291
|
+
family: "o-series",
|
|
4292
|
+
notes: "Advanced reasoning model with chain-of-thought",
|
|
4293
|
+
supportsTemperature: false
|
|
4294
|
+
}
|
|
4295
|
+
},
|
|
4296
|
+
{
|
|
4297
|
+
provider: "openai",
|
|
4298
|
+
modelId: "o3",
|
|
4299
|
+
displayName: "o3",
|
|
4300
|
+
contextWindow: 2e5,
|
|
4301
|
+
maxOutputTokens: 1e5,
|
|
4302
|
+
pricing: {
|
|
4303
|
+
input: 2,
|
|
4304
|
+
output: 8,
|
|
4305
|
+
cachedInput: 0.5
|
|
4306
|
+
},
|
|
4307
|
+
knowledgeCutoff: "2025-01-01",
|
|
4308
|
+
features: {
|
|
4309
|
+
streaming: true,
|
|
4310
|
+
functionCalling: true,
|
|
4311
|
+
vision: true,
|
|
4312
|
+
reasoning: true,
|
|
4313
|
+
structuredOutputs: true
|
|
4314
|
+
},
|
|
4315
|
+
metadata: {
|
|
4316
|
+
family: "o-series",
|
|
4317
|
+
notes: "Next-gen reasoning model, more efficient than o1",
|
|
4318
|
+
supportsTemperature: false
|
|
4319
|
+
}
|
|
4320
|
+
},
|
|
4321
|
+
{
|
|
4322
|
+
provider: "openai",
|
|
4323
|
+
modelId: "o4-mini",
|
|
4324
|
+
displayName: "o4 Mini",
|
|
4325
|
+
contextWindow: 2e5,
|
|
4326
|
+
maxOutputTokens: 1e5,
|
|
4327
|
+
pricing: {
|
|
4328
|
+
input: 1.1,
|
|
4329
|
+
output: 4.4,
|
|
4330
|
+
cachedInput: 0.275
|
|
4331
|
+
},
|
|
4332
|
+
knowledgeCutoff: "2025-04-01",
|
|
4333
|
+
features: {
|
|
4334
|
+
streaming: true,
|
|
4335
|
+
functionCalling: true,
|
|
4336
|
+
vision: true,
|
|
4337
|
+
reasoning: true,
|
|
4338
|
+
structuredOutputs: true,
|
|
4339
|
+
fineTuning: true
|
|
4340
|
+
},
|
|
4341
|
+
metadata: {
|
|
4342
|
+
family: "o-series",
|
|
4343
|
+
notes: "Cost-efficient reasoning model",
|
|
4344
|
+
supportsTemperature: false
|
|
4345
|
+
}
|
|
4346
|
+
},
|
|
4347
|
+
{
|
|
4348
|
+
provider: "openai",
|
|
4349
|
+
modelId: "o3-mini",
|
|
4350
|
+
displayName: "o3 Mini",
|
|
4351
|
+
contextWindow: 2e5,
|
|
4352
|
+
maxOutputTokens: 1e5,
|
|
4353
|
+
pricing: {
|
|
4354
|
+
input: 1.1,
|
|
4355
|
+
output: 4.4,
|
|
4356
|
+
cachedInput: 0.55
|
|
4357
|
+
},
|
|
4358
|
+
knowledgeCutoff: "2025-01-01",
|
|
4359
|
+
features: {
|
|
4360
|
+
streaming: true,
|
|
4361
|
+
functionCalling: true,
|
|
4362
|
+
vision: true,
|
|
4363
|
+
reasoning: true,
|
|
4364
|
+
structuredOutputs: true
|
|
4365
|
+
},
|
|
4366
|
+
metadata: {
|
|
4367
|
+
family: "o-series",
|
|
4368
|
+
notes: "Compact reasoning model for cost-sensitive applications",
|
|
4369
|
+
supportsTemperature: false
|
|
4370
|
+
}
|
|
4048
4371
|
}
|
|
4049
4372
|
];
|
|
4050
4373
|
}
|
|
@@ -4125,7 +4448,8 @@ var init_openai = __esm({
|
|
|
4125
4448
|
const usage = chunk.usage ? {
|
|
4126
4449
|
inputTokens: chunk.usage.prompt_tokens,
|
|
4127
4450
|
outputTokens: chunk.usage.completion_tokens,
|
|
4128
|
-
totalTokens: chunk.usage.total_tokens
|
|
4451
|
+
totalTokens: chunk.usage.total_tokens,
|
|
4452
|
+
cachedInputTokens: chunk.usage.prompt_tokens_details?.cached_tokens ?? 0
|
|
4129
4453
|
} : void 0;
|
|
4130
4454
|
if (finishReason || usage) {
|
|
4131
4455
|
yield { text: "", finishReason, usage, rawEvent: chunk };
|
|
@@ -4342,20 +4666,28 @@ var init_model_registry = __esm({
|
|
|
4342
4666
|
/**
|
|
4343
4667
|
* Estimate API cost for a given model and token usage
|
|
4344
4668
|
* @param modelId - Full model identifier
|
|
4345
|
-
* @param inputTokens - Number of input tokens
|
|
4669
|
+
* @param inputTokens - Number of input tokens (total, including cached and cache creation)
|
|
4346
4670
|
* @param outputTokens - Number of output tokens
|
|
4347
|
-
* @param
|
|
4671
|
+
* @param cachedInputTokens - Number of cached input tokens (subset of inputTokens)
|
|
4672
|
+
* @param cacheCreationInputTokens - Number of cache creation tokens (subset of inputTokens, Anthropic only)
|
|
4348
4673
|
* @returns CostEstimate if model found, undefined otherwise
|
|
4349
4674
|
*/
|
|
4350
|
-
estimateCost(modelId, inputTokens, outputTokens,
|
|
4675
|
+
estimateCost(modelId, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0) {
|
|
4351
4676
|
const spec = this.getModelSpec(modelId);
|
|
4352
4677
|
if (!spec) return void 0;
|
|
4353
|
-
const
|
|
4354
|
-
const
|
|
4678
|
+
const cachedRate = spec.pricing.cachedInput ?? spec.pricing.input;
|
|
4679
|
+
const cacheWriteRate = spec.pricing.cacheWriteInput ?? spec.pricing.input;
|
|
4680
|
+
const uncachedInputTokens = inputTokens - cachedInputTokens - cacheCreationInputTokens;
|
|
4681
|
+
const uncachedInputCost = uncachedInputTokens / 1e6 * spec.pricing.input;
|
|
4682
|
+
const cachedInputCost = cachedInputTokens / 1e6 * cachedRate;
|
|
4683
|
+
const cacheCreationCost = cacheCreationInputTokens / 1e6 * cacheWriteRate;
|
|
4684
|
+
const inputCost = uncachedInputCost + cachedInputCost + cacheCreationCost;
|
|
4355
4685
|
const outputCost = outputTokens / 1e6 * spec.pricing.output;
|
|
4356
4686
|
const totalCost = inputCost + outputCost;
|
|
4357
4687
|
return {
|
|
4358
4688
|
inputCost,
|
|
4689
|
+
cachedInputCost,
|
|
4690
|
+
cacheCreationCost,
|
|
4359
4691
|
outputCost,
|
|
4360
4692
|
totalCost,
|
|
4361
4693
|
currency: "USD"
|
|
@@ -4736,6 +5068,7 @@ var AgentBuilder;
|
|
|
4736
5068
|
var init_builder = __esm({
|
|
4737
5069
|
"src/agent/builder.ts"() {
|
|
4738
5070
|
"use strict";
|
|
5071
|
+
init_constants();
|
|
4739
5072
|
init_model_shortcuts();
|
|
4740
5073
|
init_registry();
|
|
4741
5074
|
init_agent();
|
|
@@ -4757,6 +5090,7 @@ var init_builder = __esm({
|
|
|
4757
5090
|
gadgetStartPrefix;
|
|
4758
5091
|
gadgetEndPrefix;
|
|
4759
5092
|
textOnlyHandler;
|
|
5093
|
+
textWithGadgetsHandler;
|
|
4760
5094
|
stopOnGadgetError;
|
|
4761
5095
|
shouldContinueAfterError;
|
|
4762
5096
|
defaultGadgetTimeoutMs;
|
|
@@ -5019,6 +5353,30 @@ var init_builder = __esm({
|
|
|
5019
5353
|
this.textOnlyHandler = handler;
|
|
5020
5354
|
return this;
|
|
5021
5355
|
}
|
|
5356
|
+
/**
|
|
5357
|
+
* Set the handler for text content that appears alongside gadget calls.
|
|
5358
|
+
*
|
|
5359
|
+
* When set, text accompanying gadget responses will be wrapped as a
|
|
5360
|
+
* synthetic gadget call before the actual gadget results in the
|
|
5361
|
+
* conversation history.
|
|
5362
|
+
*
|
|
5363
|
+
* @param handler - Configuration for wrapping text
|
|
5364
|
+
* @returns This builder for chaining
|
|
5365
|
+
*
|
|
5366
|
+
* @example
|
|
5367
|
+
* ```typescript
|
|
5368
|
+
* // Wrap text as TellUser gadget
|
|
5369
|
+
* .withTextWithGadgetsHandler({
|
|
5370
|
+
* gadgetName: "TellUser",
|
|
5371
|
+
* parameterMapping: (text) => ({ message: text, done: false, type: "info" }),
|
|
5372
|
+
* resultMapping: (text) => `ℹ️ ${text}`,
|
|
5373
|
+
* })
|
|
5374
|
+
* ```
|
|
5375
|
+
*/
|
|
5376
|
+
withTextWithGadgetsHandler(handler) {
|
|
5377
|
+
this.textWithGadgetsHandler = handler;
|
|
5378
|
+
return this;
|
|
5379
|
+
}
|
|
5022
5380
|
/**
|
|
5023
5381
|
* Set whether to stop gadget execution on first error.
|
|
5024
5382
|
*
|
|
@@ -5133,6 +5491,69 @@ var init_builder = __esm({
|
|
|
5133
5491
|
this.gadgetOutputLimitPercent = percent;
|
|
5134
5492
|
return this;
|
|
5135
5493
|
}
|
|
5494
|
+
/**
|
|
5495
|
+
* Add a synthetic gadget call to the conversation history.
|
|
5496
|
+
*
|
|
5497
|
+
* This is useful for in-context learning - showing the LLM what "past self"
|
|
5498
|
+
* did correctly so it mimics the pattern. The call is formatted with proper
|
|
5499
|
+
* markers and parameter format.
|
|
5500
|
+
*
|
|
5501
|
+
* @param gadgetName - Name of the gadget
|
|
5502
|
+
* @param parameters - Parameters passed to the gadget
|
|
5503
|
+
* @param result - Result returned by the gadget
|
|
5504
|
+
* @returns This builder for chaining
|
|
5505
|
+
*
|
|
5506
|
+
* @example
|
|
5507
|
+
* ```typescript
|
|
5508
|
+
* .withSyntheticGadgetCall(
|
|
5509
|
+
* 'TellUser',
|
|
5510
|
+
* {
|
|
5511
|
+
* message: '👋 Hello!\n\nHere\'s what I can do:\n- Analyze code\n- Run commands',
|
|
5512
|
+
* done: false,
|
|
5513
|
+
* type: 'info'
|
|
5514
|
+
* },
|
|
5515
|
+
* 'ℹ️ 👋 Hello!\n\nHere\'s what I can do:\n- Analyze code\n- Run commands'
|
|
5516
|
+
* )
|
|
5517
|
+
* ```
|
|
5518
|
+
*/
|
|
5519
|
+
withSyntheticGadgetCall(gadgetName, parameters, result) {
|
|
5520
|
+
const startPrefix = this.gadgetStartPrefix ?? GADGET_START_PREFIX;
|
|
5521
|
+
const endPrefix = this.gadgetEndPrefix ?? GADGET_END_PREFIX;
|
|
5522
|
+
const format = this.parameterFormat ?? "yaml";
|
|
5523
|
+
const paramStr = this.formatSyntheticParameters(parameters, format);
|
|
5524
|
+
this.initialMessages.push({
|
|
5525
|
+
role: "assistant",
|
|
5526
|
+
content: `${startPrefix}${gadgetName}
|
|
5527
|
+
${paramStr}
|
|
5528
|
+
${endPrefix}`
|
|
5529
|
+
});
|
|
5530
|
+
this.initialMessages.push({
|
|
5531
|
+
role: "user",
|
|
5532
|
+
content: `Result: ${result}`
|
|
5533
|
+
});
|
|
5534
|
+
return this;
|
|
5535
|
+
}
|
|
5536
|
+
/**
|
|
5537
|
+
* Format parameters for synthetic gadget calls.
|
|
5538
|
+
* Uses heredoc for multiline string values.
|
|
5539
|
+
*/
|
|
5540
|
+
formatSyntheticParameters(parameters, format) {
|
|
5541
|
+
if (format === "json" || format === "auto") {
|
|
5542
|
+
return JSON.stringify(parameters);
|
|
5543
|
+
}
|
|
5544
|
+
return Object.entries(parameters).map(([key, value]) => {
|
|
5545
|
+
if (typeof value === "string" && value.includes("\n")) {
|
|
5546
|
+
const separator = format === "yaml" ? ":" : " =";
|
|
5547
|
+
return `${key}${separator} <<<EOF
|
|
5548
|
+
${value}
|
|
5549
|
+
EOF`;
|
|
5550
|
+
}
|
|
5551
|
+
if (format === "yaml") {
|
|
5552
|
+
return typeof value === "string" ? `${key}: ${value}` : `${key}: ${JSON.stringify(value)}`;
|
|
5553
|
+
}
|
|
5554
|
+
return `${key} = ${JSON.stringify(value)}`;
|
|
5555
|
+
}).join("\n");
|
|
5556
|
+
}
|
|
5136
5557
|
/**
|
|
5137
5558
|
* Build and create the agent with the given user prompt.
|
|
5138
5559
|
* Returns the Agent instance ready to run.
|
|
@@ -5175,6 +5596,7 @@ var init_builder = __esm({
|
|
|
5175
5596
|
gadgetStartPrefix: this.gadgetStartPrefix,
|
|
5176
5597
|
gadgetEndPrefix: this.gadgetEndPrefix,
|
|
5177
5598
|
textOnlyHandler: this.textOnlyHandler,
|
|
5599
|
+
textWithGadgetsHandler: this.textWithGadgetsHandler,
|
|
5178
5600
|
stopOnGadgetError: this.stopOnGadgetError,
|
|
5179
5601
|
shouldContinueAfterError: this.shouldContinueAfterError,
|
|
5180
5602
|
defaultGadgetTimeoutMs: this.defaultGadgetTimeoutMs,
|
|
@@ -5276,6 +5698,7 @@ var init_builder = __esm({
|
|
|
5276
5698
|
gadgetStartPrefix: this.gadgetStartPrefix,
|
|
5277
5699
|
gadgetEndPrefix: this.gadgetEndPrefix,
|
|
5278
5700
|
textOnlyHandler: this.textOnlyHandler,
|
|
5701
|
+
textWithGadgetsHandler: this.textWithGadgetsHandler,
|
|
5279
5702
|
stopOnGadgetError: this.stopOnGadgetError,
|
|
5280
5703
|
shouldContinueAfterError: this.shouldContinueAfterError,
|
|
5281
5704
|
defaultGadgetTimeoutMs: this.defaultGadgetTimeoutMs,
|