llmist 0.6.2 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-TSR25DAY.js → chunk-4IMGADVY.js} +2 -2
- package/dist/{chunk-DVK6ZQOV.js → chunk-62M4TDAK.js} +501 -78
- package/dist/chunk-62M4TDAK.js.map +1 -0
- package/dist/cli.cjs +946 -197
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +436 -110
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +511 -88
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +6 -9
- package/dist/index.d.ts +6 -9
- package/dist/index.js +2 -2
- package/dist/{mock-stream-B5R6XPif.d.cts → mock-stream-CjmvWDc3.d.cts} +91 -20
- package/dist/{mock-stream-B5R6XPif.d.ts → mock-stream-CjmvWDc3.d.ts} +91 -20
- package/dist/testing/index.cjs +497 -74
- package/dist/testing/index.cjs.map +1 -1
- package/dist/testing/index.d.cts +2 -2
- package/dist/testing/index.d.ts +2 -2
- package/dist/testing/index.js +2 -2
- package/package.json +2 -1
- package/dist/chunk-DVK6ZQOV.js.map +0 -1
- /package/dist/{chunk-TSR25DAY.js.map → chunk-4IMGADVY.js.map} +0 -0
package/dist/testing/index.cjs
CHANGED
|
@@ -864,7 +864,7 @@ function findSafeDelimiter(content) {
|
|
|
864
864
|
}
|
|
865
865
|
let counter = 1;
|
|
866
866
|
while (counter < 1e3) {
|
|
867
|
-
const delimiter = `
|
|
867
|
+
const delimiter = `__GADGET_PARAM_${counter}__`;
|
|
868
868
|
const regex = new RegExp(`^${delimiter}\\s*$`);
|
|
869
869
|
const isUsed = lines.some((line) => regex.test(line));
|
|
870
870
|
if (!isUsed) {
|
|
@@ -922,6 +922,10 @@ function formatParamsAsYaml(params) {
|
|
|
922
922
|
}
|
|
923
923
|
return lines.join("\n");
|
|
924
924
|
}
|
|
925
|
+
function formatTomlInlineTable(obj) {
|
|
926
|
+
const entries = Object.entries(obj).map(([k, v]) => `${k} = ${formatTomlValue(v)}`);
|
|
927
|
+
return `{ ${entries.join(", ")} }`;
|
|
928
|
+
}
|
|
925
929
|
function formatTomlValue(value) {
|
|
926
930
|
if (typeof value === "string") {
|
|
927
931
|
if (value.includes("\n")) {
|
|
@@ -939,10 +943,17 @@ ${delimiter}`;
|
|
|
939
943
|
return '""';
|
|
940
944
|
}
|
|
941
945
|
if (Array.isArray(value)) {
|
|
942
|
-
|
|
946
|
+
if (value.length === 0) return "[]";
|
|
947
|
+
const items = value.map((item) => {
|
|
948
|
+
if (typeof item === "object" && item !== null && !Array.isArray(item)) {
|
|
949
|
+
return formatTomlInlineTable(item);
|
|
950
|
+
}
|
|
951
|
+
return formatTomlValue(item);
|
|
952
|
+
});
|
|
953
|
+
return `[${items.join(", ")}]`;
|
|
943
954
|
}
|
|
944
955
|
if (typeof value === "object") {
|
|
945
|
-
return
|
|
956
|
+
return formatTomlInlineTable(value);
|
|
946
957
|
}
|
|
947
958
|
return JSON.stringify(value);
|
|
948
959
|
}
|
|
@@ -960,7 +971,16 @@ var init_gadget = __esm({
|
|
|
960
971
|
yaml = __toESM(require("js-yaml"), 1);
|
|
961
972
|
init_schema_to_json();
|
|
962
973
|
init_schema_validator();
|
|
963
|
-
HEREDOC_DELIMITERS = [
|
|
974
|
+
HEREDOC_DELIMITERS = [
|
|
975
|
+
"__GADGET_PARAM_EOF__",
|
|
976
|
+
"__GADGET_PARAM_END__",
|
|
977
|
+
"__GADGET_PARAM_DOC__",
|
|
978
|
+
"__GADGET_PARAM_CONTENT__",
|
|
979
|
+
"__GADGET_PARAM_TEXT__",
|
|
980
|
+
"__GADGET_PARAM_HEREDOC__",
|
|
981
|
+
"__GADGET_PARAM_DATA__",
|
|
982
|
+
"__GADGET_PARAM_BLOCK__"
|
|
983
|
+
];
|
|
964
984
|
BaseGadget = class {
|
|
965
985
|
/**
|
|
966
986
|
* The name of the gadget. Used for identification when LLM calls it.
|
|
@@ -1958,6 +1978,14 @@ function preprocessTomlHeredoc(tomlStr) {
|
|
|
1958
1978
|
}
|
|
1959
1979
|
return result.join("\n");
|
|
1960
1980
|
}
|
|
1981
|
+
function stripMarkdownFences(content) {
|
|
1982
|
+
let cleaned = content.trim();
|
|
1983
|
+
const openingFence = /^```(?:toml|yaml|json)?\s*\n/i;
|
|
1984
|
+
const closingFence = /\n?```\s*$/;
|
|
1985
|
+
cleaned = cleaned.replace(openingFence, "");
|
|
1986
|
+
cleaned = cleaned.replace(closingFence, "");
|
|
1987
|
+
return cleaned.trim();
|
|
1988
|
+
}
|
|
1961
1989
|
var yaml2, import_js_toml, globalInvocationCounter, StreamParser;
|
|
1962
1990
|
var init_parser = __esm({
|
|
1963
1991
|
"src/gadgets/parser.ts"() {
|
|
@@ -2013,35 +2041,36 @@ var init_parser = __esm({
|
|
|
2013
2041
|
* Parse parameter string according to configured format
|
|
2014
2042
|
*/
|
|
2015
2043
|
parseParameters(raw) {
|
|
2044
|
+
const cleaned = stripMarkdownFences(raw);
|
|
2016
2045
|
if (this.parameterFormat === "json") {
|
|
2017
2046
|
try {
|
|
2018
|
-
return { parameters: JSON.parse(
|
|
2047
|
+
return { parameters: JSON.parse(cleaned) };
|
|
2019
2048
|
} catch (error) {
|
|
2020
2049
|
return { parseError: this.truncateParseError(error, "JSON") };
|
|
2021
2050
|
}
|
|
2022
2051
|
}
|
|
2023
2052
|
if (this.parameterFormat === "yaml") {
|
|
2024
2053
|
try {
|
|
2025
|
-
return { parameters: yaml2.load(preprocessYaml(
|
|
2054
|
+
return { parameters: yaml2.load(preprocessYaml(cleaned)) };
|
|
2026
2055
|
} catch (error) {
|
|
2027
2056
|
return { parseError: this.truncateParseError(error, "YAML") };
|
|
2028
2057
|
}
|
|
2029
2058
|
}
|
|
2030
2059
|
if (this.parameterFormat === "toml") {
|
|
2031
2060
|
try {
|
|
2032
|
-
return { parameters: (0, import_js_toml.load)(preprocessTomlHeredoc(
|
|
2061
|
+
return { parameters: (0, import_js_toml.load)(preprocessTomlHeredoc(cleaned)) };
|
|
2033
2062
|
} catch (error) {
|
|
2034
2063
|
return { parseError: this.truncateParseError(error, "TOML") };
|
|
2035
2064
|
}
|
|
2036
2065
|
}
|
|
2037
2066
|
try {
|
|
2038
|
-
return { parameters: JSON.parse(
|
|
2067
|
+
return { parameters: JSON.parse(cleaned) };
|
|
2039
2068
|
} catch {
|
|
2040
2069
|
try {
|
|
2041
|
-
return { parameters: (0, import_js_toml.load)(preprocessTomlHeredoc(
|
|
2070
|
+
return { parameters: (0, import_js_toml.load)(preprocessTomlHeredoc(cleaned)) };
|
|
2042
2071
|
} catch {
|
|
2043
2072
|
try {
|
|
2044
|
-
return { parameters: yaml2.load(preprocessYaml(
|
|
2073
|
+
return { parameters: yaml2.load(preprocessYaml(cleaned)) };
|
|
2045
2074
|
} catch (error) {
|
|
2046
2075
|
return { parseError: this.truncateParseError(error, "auto") };
|
|
2047
2076
|
}
|
|
@@ -2587,6 +2616,7 @@ var init_agent = __esm({
|
|
|
2587
2616
|
gadgetEndPrefix;
|
|
2588
2617
|
onHumanInputRequired;
|
|
2589
2618
|
textOnlyHandler;
|
|
2619
|
+
textWithGadgetsHandler;
|
|
2590
2620
|
stopOnGadgetError;
|
|
2591
2621
|
shouldContinueAfterError;
|
|
2592
2622
|
defaultGadgetTimeoutMs;
|
|
@@ -2617,6 +2647,7 @@ var init_agent = __esm({
|
|
|
2617
2647
|
this.gadgetEndPrefix = options.gadgetEndPrefix;
|
|
2618
2648
|
this.onHumanInputRequired = options.onHumanInputRequired;
|
|
2619
2649
|
this.textOnlyHandler = options.textOnlyHandler ?? "terminate";
|
|
2650
|
+
this.textWithGadgetsHandler = options.textWithGadgetsHandler;
|
|
2620
2651
|
this.stopOnGadgetError = options.stopOnGadgetError ?? true;
|
|
2621
2652
|
this.shouldContinueAfterError = options.shouldContinueAfterError;
|
|
2622
2653
|
this.defaultGadgetTimeoutMs = options.defaultGadgetTimeoutMs;
|
|
@@ -2804,6 +2835,17 @@ var init_agent = __esm({
|
|
|
2804
2835
|
}
|
|
2805
2836
|
}
|
|
2806
2837
|
if (result.didExecuteGadgets) {
|
|
2838
|
+
if (this.textWithGadgetsHandler) {
|
|
2839
|
+
const textContent = result.outputs.filter((output) => output.type === "text").map((output) => output.content).join("");
|
|
2840
|
+
if (textContent.trim()) {
|
|
2841
|
+
const { gadgetName, parameterMapping, resultMapping } = this.textWithGadgetsHandler;
|
|
2842
|
+
this.conversation.addGadgetCall(
|
|
2843
|
+
gadgetName,
|
|
2844
|
+
parameterMapping(textContent),
|
|
2845
|
+
resultMapping ? resultMapping(textContent) : textContent
|
|
2846
|
+
);
|
|
2847
|
+
}
|
|
2848
|
+
}
|
|
2807
2849
|
for (const output of result.outputs) {
|
|
2808
2850
|
if (output.type === "gadget_result") {
|
|
2809
2851
|
const gadgetResult = output.result;
|
|
@@ -2815,7 +2857,13 @@ var init_agent = __esm({
|
|
|
2815
2857
|
}
|
|
2816
2858
|
}
|
|
2817
2859
|
} else {
|
|
2818
|
-
|
|
2860
|
+
if (finalMessage.trim()) {
|
|
2861
|
+
this.conversation.addGadgetCall(
|
|
2862
|
+
"TellUser",
|
|
2863
|
+
{ message: finalMessage, done: false, type: "info" },
|
|
2864
|
+
`\u2139\uFE0F ${finalMessage}`
|
|
2865
|
+
);
|
|
2866
|
+
}
|
|
2819
2867
|
const shouldBreak = await this.handleTextOnlyResponse(finalMessage);
|
|
2820
2868
|
if (shouldBreak) {
|
|
2821
2869
|
break;
|
|
@@ -3000,6 +3048,7 @@ var AgentBuilder;
|
|
|
3000
3048
|
var init_builder = __esm({
|
|
3001
3049
|
"src/agent/builder.ts"() {
|
|
3002
3050
|
"use strict";
|
|
3051
|
+
init_constants();
|
|
3003
3052
|
init_model_shortcuts();
|
|
3004
3053
|
init_registry();
|
|
3005
3054
|
init_agent();
|
|
@@ -3021,6 +3070,7 @@ var init_builder = __esm({
|
|
|
3021
3070
|
gadgetStartPrefix;
|
|
3022
3071
|
gadgetEndPrefix;
|
|
3023
3072
|
textOnlyHandler;
|
|
3073
|
+
textWithGadgetsHandler;
|
|
3024
3074
|
stopOnGadgetError;
|
|
3025
3075
|
shouldContinueAfterError;
|
|
3026
3076
|
defaultGadgetTimeoutMs;
|
|
@@ -3283,6 +3333,30 @@ var init_builder = __esm({
|
|
|
3283
3333
|
this.textOnlyHandler = handler;
|
|
3284
3334
|
return this;
|
|
3285
3335
|
}
|
|
3336
|
+
/**
|
|
3337
|
+
* Set the handler for text content that appears alongside gadget calls.
|
|
3338
|
+
*
|
|
3339
|
+
* When set, text accompanying gadget responses will be wrapped as a
|
|
3340
|
+
* synthetic gadget call before the actual gadget results in the
|
|
3341
|
+
* conversation history.
|
|
3342
|
+
*
|
|
3343
|
+
* @param handler - Configuration for wrapping text
|
|
3344
|
+
* @returns This builder for chaining
|
|
3345
|
+
*
|
|
3346
|
+
* @example
|
|
3347
|
+
* ```typescript
|
|
3348
|
+
* // Wrap text as TellUser gadget
|
|
3349
|
+
* .withTextWithGadgetsHandler({
|
|
3350
|
+
* gadgetName: "TellUser",
|
|
3351
|
+
* parameterMapping: (text) => ({ message: text, done: false, type: "info" }),
|
|
3352
|
+
* resultMapping: (text) => `ℹ️ ${text}`,
|
|
3353
|
+
* })
|
|
3354
|
+
* ```
|
|
3355
|
+
*/
|
|
3356
|
+
withTextWithGadgetsHandler(handler) {
|
|
3357
|
+
this.textWithGadgetsHandler = handler;
|
|
3358
|
+
return this;
|
|
3359
|
+
}
|
|
3286
3360
|
/**
|
|
3287
3361
|
* Set whether to stop gadget execution on first error.
|
|
3288
3362
|
*
|
|
@@ -3397,6 +3471,69 @@ var init_builder = __esm({
|
|
|
3397
3471
|
this.gadgetOutputLimitPercent = percent;
|
|
3398
3472
|
return this;
|
|
3399
3473
|
}
|
|
3474
|
+
/**
|
|
3475
|
+
* Add a synthetic gadget call to the conversation history.
|
|
3476
|
+
*
|
|
3477
|
+
* This is useful for in-context learning - showing the LLM what "past self"
|
|
3478
|
+
* did correctly so it mimics the pattern. The call is formatted with proper
|
|
3479
|
+
* markers and parameter format.
|
|
3480
|
+
*
|
|
3481
|
+
* @param gadgetName - Name of the gadget
|
|
3482
|
+
* @param parameters - Parameters passed to the gadget
|
|
3483
|
+
* @param result - Result returned by the gadget
|
|
3484
|
+
* @returns This builder for chaining
|
|
3485
|
+
*
|
|
3486
|
+
* @example
|
|
3487
|
+
* ```typescript
|
|
3488
|
+
* .withSyntheticGadgetCall(
|
|
3489
|
+
* 'TellUser',
|
|
3490
|
+
* {
|
|
3491
|
+
* message: '👋 Hello!\n\nHere\'s what I can do:\n- Analyze code\n- Run commands',
|
|
3492
|
+
* done: false,
|
|
3493
|
+
* type: 'info'
|
|
3494
|
+
* },
|
|
3495
|
+
* 'ℹ️ 👋 Hello!\n\nHere\'s what I can do:\n- Analyze code\n- Run commands'
|
|
3496
|
+
* )
|
|
3497
|
+
* ```
|
|
3498
|
+
*/
|
|
3499
|
+
withSyntheticGadgetCall(gadgetName, parameters, result) {
|
|
3500
|
+
const startPrefix = this.gadgetStartPrefix ?? GADGET_START_PREFIX;
|
|
3501
|
+
const endPrefix = this.gadgetEndPrefix ?? GADGET_END_PREFIX;
|
|
3502
|
+
const format = this.parameterFormat ?? "yaml";
|
|
3503
|
+
const paramStr = this.formatSyntheticParameters(parameters, format);
|
|
3504
|
+
this.initialMessages.push({
|
|
3505
|
+
role: "assistant",
|
|
3506
|
+
content: `${startPrefix}${gadgetName}
|
|
3507
|
+
${paramStr}
|
|
3508
|
+
${endPrefix}`
|
|
3509
|
+
});
|
|
3510
|
+
this.initialMessages.push({
|
|
3511
|
+
role: "user",
|
|
3512
|
+
content: `Result: ${result}`
|
|
3513
|
+
});
|
|
3514
|
+
return this;
|
|
3515
|
+
}
|
|
3516
|
+
/**
|
|
3517
|
+
* Format parameters for synthetic gadget calls.
|
|
3518
|
+
* Uses heredoc for multiline string values.
|
|
3519
|
+
*/
|
|
3520
|
+
formatSyntheticParameters(parameters, format) {
|
|
3521
|
+
if (format === "json" || format === "auto") {
|
|
3522
|
+
return JSON.stringify(parameters);
|
|
3523
|
+
}
|
|
3524
|
+
return Object.entries(parameters).map(([key, value]) => {
|
|
3525
|
+
if (typeof value === "string" && value.includes("\n")) {
|
|
3526
|
+
const separator = format === "yaml" ? ":" : " =";
|
|
3527
|
+
return `${key}${separator} <<<EOF
|
|
3528
|
+
${value}
|
|
3529
|
+
EOF`;
|
|
3530
|
+
}
|
|
3531
|
+
if (format === "yaml") {
|
|
3532
|
+
return typeof value === "string" ? `${key}: ${value}` : `${key}: ${JSON.stringify(value)}`;
|
|
3533
|
+
}
|
|
3534
|
+
return `${key} = ${JSON.stringify(value)}`;
|
|
3535
|
+
}).join("\n");
|
|
3536
|
+
}
|
|
3400
3537
|
/**
|
|
3401
3538
|
* Build and create the agent with the given user prompt.
|
|
3402
3539
|
* Returns the Agent instance ready to run.
|
|
@@ -3439,6 +3576,7 @@ var init_builder = __esm({
|
|
|
3439
3576
|
gadgetStartPrefix: this.gadgetStartPrefix,
|
|
3440
3577
|
gadgetEndPrefix: this.gadgetEndPrefix,
|
|
3441
3578
|
textOnlyHandler: this.textOnlyHandler,
|
|
3579
|
+
textWithGadgetsHandler: this.textWithGadgetsHandler,
|
|
3442
3580
|
stopOnGadgetError: this.stopOnGadgetError,
|
|
3443
3581
|
shouldContinueAfterError: this.shouldContinueAfterError,
|
|
3444
3582
|
defaultGadgetTimeoutMs: this.defaultGadgetTimeoutMs,
|
|
@@ -3540,6 +3678,7 @@ var init_builder = __esm({
|
|
|
3540
3678
|
gadgetStartPrefix: this.gadgetStartPrefix,
|
|
3541
3679
|
gadgetEndPrefix: this.gadgetEndPrefix,
|
|
3542
3680
|
textOnlyHandler: this.textOnlyHandler,
|
|
3681
|
+
textWithGadgetsHandler: this.textWithGadgetsHandler,
|
|
3543
3682
|
stopOnGadgetError: this.stopOnGadgetError,
|
|
3544
3683
|
shouldContinueAfterError: this.shouldContinueAfterError,
|
|
3545
3684
|
defaultGadgetTimeoutMs: this.defaultGadgetTimeoutMs,
|
|
@@ -3567,7 +3706,8 @@ var init_anthropic_models = __esm({
|
|
|
3567
3706
|
pricing: {
|
|
3568
3707
|
input: 3,
|
|
3569
3708
|
output: 15,
|
|
3570
|
-
cachedInput: 0.3
|
|
3709
|
+
cachedInput: 0.3,
|
|
3710
|
+
cacheWriteInput: 3.75
|
|
3571
3711
|
},
|
|
3572
3712
|
knowledgeCutoff: "2025-01",
|
|
3573
3713
|
features: {
|
|
@@ -3591,7 +3731,8 @@ var init_anthropic_models = __esm({
|
|
|
3591
3731
|
pricing: {
|
|
3592
3732
|
input: 1,
|
|
3593
3733
|
output: 5,
|
|
3594
|
-
cachedInput: 0.1
|
|
3734
|
+
cachedInput: 0.1,
|
|
3735
|
+
cacheWriteInput: 1.25
|
|
3595
3736
|
},
|
|
3596
3737
|
knowledgeCutoff: "2025-02",
|
|
3597
3738
|
features: {
|
|
@@ -3615,7 +3756,8 @@ var init_anthropic_models = __esm({
|
|
|
3615
3756
|
pricing: {
|
|
3616
3757
|
input: 3,
|
|
3617
3758
|
output: 15,
|
|
3618
|
-
cachedInput: 0.3
|
|
3759
|
+
cachedInput: 0.3,
|
|
3760
|
+
cacheWriteInput: 3.75
|
|
3619
3761
|
},
|
|
3620
3762
|
knowledgeCutoff: "2025-03",
|
|
3621
3763
|
features: {
|
|
@@ -3639,7 +3781,8 @@ var init_anthropic_models = __esm({
|
|
|
3639
3781
|
pricing: {
|
|
3640
3782
|
input: 3,
|
|
3641
3783
|
output: 15,
|
|
3642
|
-
cachedInput: 0.3
|
|
3784
|
+
cachedInput: 0.3,
|
|
3785
|
+
cacheWriteInput: 3.75
|
|
3643
3786
|
},
|
|
3644
3787
|
knowledgeCutoff: "2024-11",
|
|
3645
3788
|
features: {
|
|
@@ -3663,7 +3806,8 @@ var init_anthropic_models = __esm({
|
|
|
3663
3806
|
pricing: {
|
|
3664
3807
|
input: 15,
|
|
3665
3808
|
output: 75,
|
|
3666
|
-
cachedInput: 1.5
|
|
3809
|
+
cachedInput: 1.5,
|
|
3810
|
+
cacheWriteInput: 18.75
|
|
3667
3811
|
},
|
|
3668
3812
|
knowledgeCutoff: "2025-01",
|
|
3669
3813
|
features: {
|
|
@@ -3687,7 +3831,8 @@ var init_anthropic_models = __esm({
|
|
|
3687
3831
|
pricing: {
|
|
3688
3832
|
input: 15,
|
|
3689
3833
|
output: 75,
|
|
3690
|
-
cachedInput: 1.5
|
|
3834
|
+
cachedInput: 1.5,
|
|
3835
|
+
cacheWriteInput: 18.75
|
|
3691
3836
|
},
|
|
3692
3837
|
knowledgeCutoff: "2025-03",
|
|
3693
3838
|
features: {
|
|
@@ -3710,7 +3855,8 @@ var init_anthropic_models = __esm({
|
|
|
3710
3855
|
pricing: {
|
|
3711
3856
|
input: 0.8,
|
|
3712
3857
|
output: 4,
|
|
3713
|
-
cachedInput: 0.08
|
|
3858
|
+
cachedInput: 0.08,
|
|
3859
|
+
cacheWriteInput: 1
|
|
3714
3860
|
},
|
|
3715
3861
|
knowledgeCutoff: "2024-07",
|
|
3716
3862
|
features: {
|
|
@@ -3733,7 +3879,8 @@ var init_anthropic_models = __esm({
|
|
|
3733
3879
|
pricing: {
|
|
3734
3880
|
input: 0.25,
|
|
3735
3881
|
output: 1.25,
|
|
3736
|
-
cachedInput: 0.025
|
|
3882
|
+
cachedInput: 0.025,
|
|
3883
|
+
cacheWriteInput: 0.3125
|
|
3737
3884
|
},
|
|
3738
3885
|
knowledgeCutoff: "2023-08",
|
|
3739
3886
|
features: {
|
|
@@ -3757,7 +3904,8 @@ var init_anthropic_models = __esm({
|
|
|
3757
3904
|
pricing: {
|
|
3758
3905
|
input: 1,
|
|
3759
3906
|
output: 5,
|
|
3760
|
-
cachedInput: 0.1
|
|
3907
|
+
cachedInput: 0.1,
|
|
3908
|
+
cacheWriteInput: 1.25
|
|
3761
3909
|
},
|
|
3762
3910
|
knowledgeCutoff: "2025-02",
|
|
3763
3911
|
features: {
|
|
@@ -3781,7 +3929,8 @@ var init_anthropic_models = __esm({
|
|
|
3781
3929
|
pricing: {
|
|
3782
3930
|
input: 3,
|
|
3783
3931
|
output: 15,
|
|
3784
|
-
cachedInput: 0.3
|
|
3932
|
+
cachedInput: 0.3,
|
|
3933
|
+
cacheWriteInput: 3.75
|
|
3785
3934
|
},
|
|
3786
3935
|
knowledgeCutoff: "2025-01",
|
|
3787
3936
|
features: {
|
|
@@ -3805,7 +3954,8 @@ var init_anthropic_models = __esm({
|
|
|
3805
3954
|
pricing: {
|
|
3806
3955
|
input: 5,
|
|
3807
3956
|
output: 25,
|
|
3808
|
-
cachedInput: 0.5
|
|
3957
|
+
cachedInput: 0.5,
|
|
3958
|
+
cacheWriteInput: 6.25
|
|
3809
3959
|
},
|
|
3810
3960
|
knowledgeCutoff: "2025-03",
|
|
3811
3961
|
features: {
|
|
@@ -3920,15 +4070,27 @@ var init_anthropic = __esm({
|
|
|
3920
4070
|
}
|
|
3921
4071
|
buildRequestPayload(options, descriptor, spec, messages) {
|
|
3922
4072
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
3923
|
-
const system = systemMessages.length > 0 ? systemMessages.map((m) =>
|
|
3924
|
-
|
|
4073
|
+
const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
|
|
4074
|
+
type: "text",
|
|
4075
|
+
text: m.content,
|
|
4076
|
+
// Add cache_control to the LAST system message block
|
|
4077
|
+
...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
|
|
4078
|
+
})) : void 0;
|
|
4079
|
+
const nonSystemMessages = messages.filter(
|
|
3925
4080
|
(message) => message.role !== "system"
|
|
3926
|
-
)
|
|
4081
|
+
);
|
|
4082
|
+
const lastUserIndex = nonSystemMessages.reduce(
|
|
4083
|
+
(lastIdx, msg, idx) => msg.role === "user" ? idx : lastIdx,
|
|
4084
|
+
-1
|
|
4085
|
+
);
|
|
4086
|
+
const conversation = nonSystemMessages.map((message, index) => ({
|
|
3927
4087
|
role: message.role,
|
|
3928
4088
|
content: [
|
|
3929
4089
|
{
|
|
3930
4090
|
type: "text",
|
|
3931
|
-
text: message.content
|
|
4091
|
+
text: message.content,
|
|
4092
|
+
// Add cache_control to the LAST user message
|
|
4093
|
+
...message.role === "user" && index === lastUserIndex ? { cache_control: { type: "ephemeral" } } : {}
|
|
3932
4094
|
}
|
|
3933
4095
|
]
|
|
3934
4096
|
}));
|
|
@@ -3954,15 +4116,22 @@ var init_anthropic = __esm({
|
|
|
3954
4116
|
async *wrapStream(iterable) {
|
|
3955
4117
|
const stream2 = iterable;
|
|
3956
4118
|
let inputTokens = 0;
|
|
4119
|
+
let cachedInputTokens = 0;
|
|
4120
|
+
let cacheCreationInputTokens = 0;
|
|
3957
4121
|
for await (const event of stream2) {
|
|
3958
4122
|
if (event.type === "message_start") {
|
|
3959
|
-
|
|
4123
|
+
const usage = event.message.usage;
|
|
4124
|
+
cachedInputTokens = usage.cache_read_input_tokens ?? 0;
|
|
4125
|
+
cacheCreationInputTokens = usage.cache_creation_input_tokens ?? 0;
|
|
4126
|
+
inputTokens = usage.input_tokens + cachedInputTokens + cacheCreationInputTokens;
|
|
3960
4127
|
yield {
|
|
3961
4128
|
text: "",
|
|
3962
4129
|
usage: {
|
|
3963
4130
|
inputTokens,
|
|
3964
4131
|
outputTokens: 0,
|
|
3965
|
-
totalTokens: inputTokens
|
|
4132
|
+
totalTokens: inputTokens,
|
|
4133
|
+
cachedInputTokens,
|
|
4134
|
+
cacheCreationInputTokens
|
|
3966
4135
|
},
|
|
3967
4136
|
rawEvent: event
|
|
3968
4137
|
};
|
|
@@ -3976,7 +4145,9 @@ var init_anthropic = __esm({
|
|
|
3976
4145
|
const usage = event.usage ? {
|
|
3977
4146
|
inputTokens,
|
|
3978
4147
|
outputTokens: event.usage.output_tokens,
|
|
3979
|
-
totalTokens: inputTokens + event.usage.output_tokens
|
|
4148
|
+
totalTokens: inputTokens + event.usage.output_tokens,
|
|
4149
|
+
cachedInputTokens,
|
|
4150
|
+
cacheCreationInputTokens
|
|
3980
4151
|
} : void 0;
|
|
3981
4152
|
if (event.delta.stop_reason || usage) {
|
|
3982
4153
|
yield {
|
|
@@ -4057,6 +4228,7 @@ var init_gemini_models = __esm({
|
|
|
4057
4228
|
"src/providers/gemini-models.ts"() {
|
|
4058
4229
|
"use strict";
|
|
4059
4230
|
GEMINI_MODELS = [
|
|
4231
|
+
// Gemini 3 Pro (Preview)
|
|
4060
4232
|
{
|
|
4061
4233
|
provider: "gemini",
|
|
4062
4234
|
modelId: "gemini-3-pro-preview",
|
|
@@ -4065,8 +4237,11 @@ var init_gemini_models = __esm({
|
|
|
4065
4237
|
maxOutputTokens: 65536,
|
|
4066
4238
|
pricing: {
|
|
4067
4239
|
input: 2,
|
|
4240
|
+
// $2.00 for prompts <= 200k, $4.00 for > 200k (using lower tier)
|
|
4068
4241
|
output: 12,
|
|
4242
|
+
// $12.00 for prompts <= 200k, $18.00 for > 200k
|
|
4069
4243
|
cachedInput: 0.2
|
|
4244
|
+
// $0.20 for prompts <= 200k
|
|
4070
4245
|
},
|
|
4071
4246
|
knowledgeCutoff: "2025-01",
|
|
4072
4247
|
features: {
|
|
@@ -4079,9 +4254,10 @@ var init_gemini_models = __esm({
|
|
|
4079
4254
|
metadata: {
|
|
4080
4255
|
family: "Gemini 3",
|
|
4081
4256
|
releaseDate: "2025-11-18",
|
|
4082
|
-
notes: "
|
|
4257
|
+
notes: "Best model for multimodal understanding, agentic and vibe-coding. Deep Think mode available."
|
|
4083
4258
|
}
|
|
4084
4259
|
},
|
|
4260
|
+
// Gemini 2.5 Pro
|
|
4085
4261
|
{
|
|
4086
4262
|
provider: "gemini",
|
|
4087
4263
|
modelId: "gemini-2.5-pro",
|
|
@@ -4090,8 +4266,11 @@ var init_gemini_models = __esm({
|
|
|
4090
4266
|
maxOutputTokens: 65536,
|
|
4091
4267
|
pricing: {
|
|
4092
4268
|
input: 1.25,
|
|
4269
|
+
// $1.25 for prompts <= 200k, $2.50 for > 200k
|
|
4093
4270
|
output: 10,
|
|
4271
|
+
// $10.00 for prompts <= 200k, $15.00 for > 200k
|
|
4094
4272
|
cachedInput: 0.125
|
|
4273
|
+
// $0.125 for prompts <= 200k
|
|
4095
4274
|
},
|
|
4096
4275
|
knowledgeCutoff: "2025-01",
|
|
4097
4276
|
features: {
|
|
@@ -4104,9 +4283,10 @@ var init_gemini_models = __esm({
|
|
|
4104
4283
|
metadata: {
|
|
4105
4284
|
family: "Gemini 2.5",
|
|
4106
4285
|
releaseDate: "2025-06",
|
|
4107
|
-
notes: "
|
|
4286
|
+
notes: "State-of-the-art multipurpose model. Excels at coding and complex reasoning."
|
|
4108
4287
|
}
|
|
4109
4288
|
},
|
|
4289
|
+
// Gemini 2.5 Flash
|
|
4110
4290
|
{
|
|
4111
4291
|
provider: "gemini",
|
|
4112
4292
|
modelId: "gemini-2.5-flash",
|
|
@@ -4115,8 +4295,10 @@ var init_gemini_models = __esm({
|
|
|
4115
4295
|
maxOutputTokens: 65536,
|
|
4116
4296
|
pricing: {
|
|
4117
4297
|
input: 0.3,
|
|
4298
|
+
// $0.30 for text/image/video, $1.00 for audio
|
|
4118
4299
|
output: 2.5,
|
|
4119
4300
|
cachedInput: 0.03
|
|
4301
|
+
// $0.03 for text/image/video
|
|
4120
4302
|
},
|
|
4121
4303
|
knowledgeCutoff: "2025-01",
|
|
4122
4304
|
features: {
|
|
@@ -4129,9 +4311,10 @@ var init_gemini_models = __esm({
|
|
|
4129
4311
|
metadata: {
|
|
4130
4312
|
family: "Gemini 2.5",
|
|
4131
4313
|
releaseDate: "2025-06",
|
|
4132
|
-
notes: "
|
|
4314
|
+
notes: "First hybrid reasoning model with 1M context and thinking budgets."
|
|
4133
4315
|
}
|
|
4134
4316
|
},
|
|
4317
|
+
// Gemini 2.5 Flash-Lite
|
|
4135
4318
|
{
|
|
4136
4319
|
provider: "gemini",
|
|
4137
4320
|
modelId: "gemini-2.5-flash-lite",
|
|
@@ -4140,8 +4323,10 @@ var init_gemini_models = __esm({
|
|
|
4140
4323
|
maxOutputTokens: 65536,
|
|
4141
4324
|
pricing: {
|
|
4142
4325
|
input: 0.1,
|
|
4326
|
+
// $0.10 for text/image/video, $0.30 for audio
|
|
4143
4327
|
output: 0.4,
|
|
4144
4328
|
cachedInput: 0.01
|
|
4329
|
+
// $0.01 for text/image/video
|
|
4145
4330
|
},
|
|
4146
4331
|
knowledgeCutoff: "2025-01",
|
|
4147
4332
|
features: {
|
|
@@ -4153,9 +4338,10 @@ var init_gemini_models = __esm({
|
|
|
4153
4338
|
metadata: {
|
|
4154
4339
|
family: "Gemini 2.5",
|
|
4155
4340
|
releaseDate: "2025-06",
|
|
4156
|
-
notes: "
|
|
4341
|
+
notes: "Smallest and most cost effective model, built for at scale usage."
|
|
4157
4342
|
}
|
|
4158
4343
|
},
|
|
4344
|
+
// Gemini 2.0 Flash
|
|
4159
4345
|
{
|
|
4160
4346
|
provider: "gemini",
|
|
4161
4347
|
modelId: "gemini-2.0-flash",
|
|
@@ -4164,8 +4350,10 @@ var init_gemini_models = __esm({
|
|
|
4164
4350
|
maxOutputTokens: 8192,
|
|
4165
4351
|
pricing: {
|
|
4166
4352
|
input: 0.1,
|
|
4353
|
+
// $0.10 for text/image/video, $0.70 for audio
|
|
4167
4354
|
output: 0.4,
|
|
4168
|
-
cachedInput: 0.
|
|
4355
|
+
cachedInput: 0.025
|
|
4356
|
+
// $0.025 for text/image/video
|
|
4169
4357
|
},
|
|
4170
4358
|
knowledgeCutoff: "2024-08",
|
|
4171
4359
|
features: {
|
|
@@ -4176,9 +4364,10 @@ var init_gemini_models = __esm({
|
|
|
4176
4364
|
},
|
|
4177
4365
|
metadata: {
|
|
4178
4366
|
family: "Gemini 2.0",
|
|
4179
|
-
notes: "
|
|
4367
|
+
notes: "Balanced multimodal model with 1M context, built for the era of Agents."
|
|
4180
4368
|
}
|
|
4181
4369
|
},
|
|
4370
|
+
// Gemini 2.0 Flash-Lite
|
|
4182
4371
|
{
|
|
4183
4372
|
provider: "gemini",
|
|
4184
4373
|
modelId: "gemini-2.0-flash-lite",
|
|
@@ -4187,8 +4376,8 @@ var init_gemini_models = __esm({
|
|
|
4187
4376
|
maxOutputTokens: 8192,
|
|
4188
4377
|
pricing: {
|
|
4189
4378
|
input: 0.075,
|
|
4190
|
-
output: 0.3
|
|
4191
|
-
|
|
4379
|
+
output: 0.3
|
|
4380
|
+
// No context caching available for 2.0-flash-lite
|
|
4192
4381
|
},
|
|
4193
4382
|
knowledgeCutoff: "2024-08",
|
|
4194
4383
|
features: {
|
|
@@ -4199,7 +4388,7 @@ var init_gemini_models = __esm({
|
|
|
4199
4388
|
},
|
|
4200
4389
|
metadata: {
|
|
4201
4390
|
family: "Gemini 2.0",
|
|
4202
|
-
notes: "
|
|
4391
|
+
notes: "Smallest and most cost effective 2.0 model for at scale usage."
|
|
4203
4392
|
}
|
|
4204
4393
|
}
|
|
4205
4394
|
];
|
|
@@ -4369,7 +4558,9 @@ var init_gemini = __esm({
|
|
|
4369
4558
|
return {
|
|
4370
4559
|
inputTokens: usageMetadata.promptTokenCount ?? 0,
|
|
4371
4560
|
outputTokens: usageMetadata.candidatesTokenCount ?? 0,
|
|
4372
|
-
totalTokens: usageMetadata.totalTokenCount ?? 0
|
|
4561
|
+
totalTokens: usageMetadata.totalTokenCount ?? 0,
|
|
4562
|
+
// Gemini returns cached token count in cachedContentTokenCount
|
|
4563
|
+
cachedInputTokens: usageMetadata.cachedContentTokenCount ?? 0
|
|
4373
4564
|
};
|
|
4374
4565
|
}
|
|
4375
4566
|
/**
|
|
@@ -4425,10 +4616,11 @@ var init_openai_models = __esm({
|
|
|
4425
4616
|
"src/providers/openai-models.ts"() {
|
|
4426
4617
|
"use strict";
|
|
4427
4618
|
OPENAI_MODELS = [
|
|
4619
|
+
// GPT-5 Family
|
|
4428
4620
|
{
|
|
4429
4621
|
provider: "openai",
|
|
4430
4622
|
modelId: "gpt-5.1",
|
|
4431
|
-
displayName: "GPT-5.1
|
|
4623
|
+
displayName: "GPT-5.1",
|
|
4432
4624
|
contextWindow: 128e3,
|
|
4433
4625
|
maxOutputTokens: 32768,
|
|
4434
4626
|
pricing: {
|
|
@@ -4448,34 +4640,7 @@ var init_openai_models = __esm({
|
|
|
4448
4640
|
metadata: {
|
|
4449
4641
|
family: "GPT-5",
|
|
4450
4642
|
releaseDate: "2025-11-12",
|
|
4451
|
-
notes: "
|
|
4452
|
-
supportsTemperature: false
|
|
4453
|
-
}
|
|
4454
|
-
},
|
|
4455
|
-
{
|
|
4456
|
-
provider: "openai",
|
|
4457
|
-
modelId: "gpt-5.1-thinking",
|
|
4458
|
-
displayName: "GPT-5.1 Thinking",
|
|
4459
|
-
contextWindow: 196e3,
|
|
4460
|
-
maxOutputTokens: 32768,
|
|
4461
|
-
pricing: {
|
|
4462
|
-
input: 1.25,
|
|
4463
|
-
output: 10,
|
|
4464
|
-
cachedInput: 0.125
|
|
4465
|
-
},
|
|
4466
|
-
knowledgeCutoff: "2024-09-30",
|
|
4467
|
-
features: {
|
|
4468
|
-
streaming: true,
|
|
4469
|
-
functionCalling: true,
|
|
4470
|
-
vision: true,
|
|
4471
|
-
reasoning: true,
|
|
4472
|
-
structuredOutputs: true,
|
|
4473
|
-
fineTuning: true
|
|
4474
|
-
},
|
|
4475
|
-
metadata: {
|
|
4476
|
-
family: "GPT-5",
|
|
4477
|
-
releaseDate: "2025-11-12",
|
|
4478
|
-
notes: "Advanced reasoning with thinking levels: Light, Standard, Extended, Heavy. Best for complex tasks.",
|
|
4643
|
+
notes: "Latest GPT-5 with improved instruction following. 2-3x faster than GPT-5.",
|
|
4479
4644
|
supportsTemperature: false
|
|
4480
4645
|
}
|
|
4481
4646
|
},
|
|
@@ -4555,6 +4720,255 @@ var init_openai_models = __esm({
|
|
|
4555
4720
|
notes: "Fastest, most cost-efficient version for well-defined tasks",
|
|
4556
4721
|
supportsTemperature: false
|
|
4557
4722
|
}
|
|
4723
|
+
},
|
|
4724
|
+
{
|
|
4725
|
+
provider: "openai",
|
|
4726
|
+
modelId: "gpt-5-pro",
|
|
4727
|
+
displayName: "GPT-5 Pro",
|
|
4728
|
+
contextWindow: 272e3,
|
|
4729
|
+
maxOutputTokens: 128e3,
|
|
4730
|
+
pricing: {
|
|
4731
|
+
input: 15,
|
|
4732
|
+
output: 120
|
|
4733
|
+
// No cached input pricing for gpt-5-pro
|
|
4734
|
+
},
|
|
4735
|
+
knowledgeCutoff: "2024-09-30",
|
|
4736
|
+
features: {
|
|
4737
|
+
streaming: true,
|
|
4738
|
+
functionCalling: true,
|
|
4739
|
+
vision: true,
|
|
4740
|
+
reasoning: true,
|
|
4741
|
+
structuredOutputs: true
|
|
4742
|
+
},
|
|
4743
|
+
metadata: {
|
|
4744
|
+
family: "GPT-5",
|
|
4745
|
+
notes: "Premium tier with enhanced capabilities. Does not support prompt caching.",
|
|
4746
|
+
supportsTemperature: false
|
|
4747
|
+
}
|
|
4748
|
+
},
|
|
4749
|
+
// GPT-4.1 Family
|
|
4750
|
+
{
|
|
4751
|
+
provider: "openai",
|
|
4752
|
+
modelId: "gpt-4.1",
|
|
4753
|
+
displayName: "GPT-4.1",
|
|
4754
|
+
contextWindow: 128e3,
|
|
4755
|
+
maxOutputTokens: 32768,
|
|
4756
|
+
pricing: {
|
|
4757
|
+
input: 2,
|
|
4758
|
+
output: 8,
|
|
4759
|
+
cachedInput: 0.5
|
|
4760
|
+
},
|
|
4761
|
+
knowledgeCutoff: "2024-04-01",
|
|
4762
|
+
features: {
|
|
4763
|
+
streaming: true,
|
|
4764
|
+
functionCalling: true,
|
|
4765
|
+
vision: true,
|
|
4766
|
+
structuredOutputs: true,
|
|
4767
|
+
fineTuning: true
|
|
4768
|
+
},
|
|
4769
|
+
metadata: {
|
|
4770
|
+
family: "GPT-4.1",
|
|
4771
|
+
notes: "Improved GPT-4 with better instruction following"
|
|
4772
|
+
}
|
|
4773
|
+
},
|
|
4774
|
+
{
|
|
4775
|
+
provider: "openai",
|
|
4776
|
+
modelId: "gpt-4.1-mini",
|
|
4777
|
+
displayName: "GPT-4.1 Mini",
|
|
4778
|
+
contextWindow: 128e3,
|
|
4779
|
+
maxOutputTokens: 32768,
|
|
4780
|
+
pricing: {
|
|
4781
|
+
input: 0.4,
|
|
4782
|
+
output: 1.6,
|
|
4783
|
+
cachedInput: 0.1
|
|
4784
|
+
},
|
|
4785
|
+
knowledgeCutoff: "2024-04-01",
|
|
4786
|
+
features: {
|
|
4787
|
+
streaming: true,
|
|
4788
|
+
functionCalling: true,
|
|
4789
|
+
vision: true,
|
|
4790
|
+
structuredOutputs: true,
|
|
4791
|
+
fineTuning: true
|
|
4792
|
+
},
|
|
4793
|
+
metadata: {
|
|
4794
|
+
family: "GPT-4.1",
|
|
4795
|
+
notes: "Cost-efficient GPT-4.1 variant"
|
|
4796
|
+
}
|
|
4797
|
+
},
|
|
4798
|
+
{
|
|
4799
|
+
provider: "openai",
|
|
4800
|
+
modelId: "gpt-4.1-nano",
|
|
4801
|
+
displayName: "GPT-4.1 Nano",
|
|
4802
|
+
contextWindow: 128e3,
|
|
4803
|
+
maxOutputTokens: 32768,
|
|
4804
|
+
pricing: {
|
|
4805
|
+
input: 0.1,
|
|
4806
|
+
output: 0.4,
|
|
4807
|
+
cachedInput: 0.025
|
|
4808
|
+
},
|
|
4809
|
+
knowledgeCutoff: "2024-04-01",
|
|
4810
|
+
features: {
|
|
4811
|
+
streaming: true,
|
|
4812
|
+
functionCalling: true,
|
|
4813
|
+
vision: true,
|
|
4814
|
+
structuredOutputs: true,
|
|
4815
|
+
fineTuning: true
|
|
4816
|
+
},
|
|
4817
|
+
metadata: {
|
|
4818
|
+
family: "GPT-4.1",
|
|
4819
|
+
notes: "Fastest GPT-4.1 variant for simple tasks"
|
|
4820
|
+
}
|
|
4821
|
+
},
|
|
4822
|
+
// GPT-4o Family
|
|
4823
|
+
{
|
|
4824
|
+
provider: "openai",
|
|
4825
|
+
modelId: "gpt-4o",
|
|
4826
|
+
displayName: "GPT-4o",
|
|
4827
|
+
contextWindow: 128e3,
|
|
4828
|
+
maxOutputTokens: 16384,
|
|
4829
|
+
pricing: {
|
|
4830
|
+
input: 2.5,
|
|
4831
|
+
output: 10,
|
|
4832
|
+
cachedInput: 1.25
|
|
4833
|
+
},
|
|
4834
|
+
knowledgeCutoff: "2024-04-01",
|
|
4835
|
+
features: {
|
|
4836
|
+
streaming: true,
|
|
4837
|
+
functionCalling: true,
|
|
4838
|
+
vision: true,
|
|
4839
|
+
structuredOutputs: true,
|
|
4840
|
+
fineTuning: true
|
|
4841
|
+
},
|
|
4842
|
+
metadata: {
|
|
4843
|
+
family: "GPT-4o",
|
|
4844
|
+
notes: "Multimodal model optimized for speed"
|
|
4845
|
+
}
|
|
4846
|
+
},
|
|
4847
|
+
{
|
|
4848
|
+
provider: "openai",
|
|
4849
|
+
modelId: "gpt-4o-mini",
|
|
4850
|
+
displayName: "GPT-4o Mini",
|
|
4851
|
+
contextWindow: 128e3,
|
|
4852
|
+
maxOutputTokens: 16384,
|
|
4853
|
+
pricing: {
|
|
4854
|
+
input: 0.15,
|
|
4855
|
+
output: 0.6,
|
|
4856
|
+
cachedInput: 0.075
|
|
4857
|
+
},
|
|
4858
|
+
knowledgeCutoff: "2024-04-01",
|
|
4859
|
+
features: {
|
|
4860
|
+
streaming: true,
|
|
4861
|
+
functionCalling: true,
|
|
4862
|
+
vision: true,
|
|
4863
|
+
structuredOutputs: true,
|
|
4864
|
+
fineTuning: true
|
|
4865
|
+
},
|
|
4866
|
+
metadata: {
|
|
4867
|
+
family: "GPT-4o",
|
|
4868
|
+
notes: "Fast and affordable multimodal model"
|
|
4869
|
+
}
|
|
4870
|
+
},
|
|
4871
|
+
// o-series (Reasoning models)
|
|
4872
|
+
{
|
|
4873
|
+
provider: "openai",
|
|
4874
|
+
modelId: "o1",
|
|
4875
|
+
displayName: "o1",
|
|
4876
|
+
contextWindow: 2e5,
|
|
4877
|
+
maxOutputTokens: 1e5,
|
|
4878
|
+
pricing: {
|
|
4879
|
+
input: 15,
|
|
4880
|
+
output: 60,
|
|
4881
|
+
cachedInput: 7.5
|
|
4882
|
+
},
|
|
4883
|
+
knowledgeCutoff: "2024-12-01",
|
|
4884
|
+
features: {
|
|
4885
|
+
streaming: true,
|
|
4886
|
+
functionCalling: true,
|
|
4887
|
+
vision: true,
|
|
4888
|
+
reasoning: true,
|
|
4889
|
+
structuredOutputs: true
|
|
4890
|
+
},
|
|
4891
|
+
metadata: {
|
|
4892
|
+
family: "o-series",
|
|
4893
|
+
notes: "Advanced reasoning model with chain-of-thought",
|
|
4894
|
+
supportsTemperature: false
|
|
4895
|
+
}
|
|
4896
|
+
},
|
|
4897
|
+
{
|
|
4898
|
+
provider: "openai",
|
|
4899
|
+
modelId: "o3",
|
|
4900
|
+
displayName: "o3",
|
|
4901
|
+
contextWindow: 2e5,
|
|
4902
|
+
maxOutputTokens: 1e5,
|
|
4903
|
+
pricing: {
|
|
4904
|
+
input: 2,
|
|
4905
|
+
output: 8,
|
|
4906
|
+
cachedInput: 0.5
|
|
4907
|
+
},
|
|
4908
|
+
knowledgeCutoff: "2025-01-01",
|
|
4909
|
+
features: {
|
|
4910
|
+
streaming: true,
|
|
4911
|
+
functionCalling: true,
|
|
4912
|
+
vision: true,
|
|
4913
|
+
reasoning: true,
|
|
4914
|
+
structuredOutputs: true
|
|
4915
|
+
},
|
|
4916
|
+
metadata: {
|
|
4917
|
+
family: "o-series",
|
|
4918
|
+
notes: "Next-gen reasoning model, more efficient than o1",
|
|
4919
|
+
supportsTemperature: false
|
|
4920
|
+
}
|
|
4921
|
+
},
|
|
4922
|
+
{
|
|
4923
|
+
provider: "openai",
|
|
4924
|
+
modelId: "o4-mini",
|
|
4925
|
+
displayName: "o4 Mini",
|
|
4926
|
+
contextWindow: 2e5,
|
|
4927
|
+
maxOutputTokens: 1e5,
|
|
4928
|
+
pricing: {
|
|
4929
|
+
input: 1.1,
|
|
4930
|
+
output: 4.4,
|
|
4931
|
+
cachedInput: 0.275
|
|
4932
|
+
},
|
|
4933
|
+
knowledgeCutoff: "2025-04-01",
|
|
4934
|
+
features: {
|
|
4935
|
+
streaming: true,
|
|
4936
|
+
functionCalling: true,
|
|
4937
|
+
vision: true,
|
|
4938
|
+
reasoning: true,
|
|
4939
|
+
structuredOutputs: true,
|
|
4940
|
+
fineTuning: true
|
|
4941
|
+
},
|
|
4942
|
+
metadata: {
|
|
4943
|
+
family: "o-series",
|
|
4944
|
+
notes: "Cost-efficient reasoning model",
|
|
4945
|
+
supportsTemperature: false
|
|
4946
|
+
}
|
|
4947
|
+
},
|
|
4948
|
+
{
|
|
4949
|
+
provider: "openai",
|
|
4950
|
+
modelId: "o3-mini",
|
|
4951
|
+
displayName: "o3 Mini",
|
|
4952
|
+
contextWindow: 2e5,
|
|
4953
|
+
maxOutputTokens: 1e5,
|
|
4954
|
+
pricing: {
|
|
4955
|
+
input: 1.1,
|
|
4956
|
+
output: 4.4,
|
|
4957
|
+
cachedInput: 0.55
|
|
4958
|
+
},
|
|
4959
|
+
knowledgeCutoff: "2025-01-01",
|
|
4960
|
+
features: {
|
|
4961
|
+
streaming: true,
|
|
4962
|
+
functionCalling: true,
|
|
4963
|
+
vision: true,
|
|
4964
|
+
reasoning: true,
|
|
4965
|
+
structuredOutputs: true
|
|
4966
|
+
},
|
|
4967
|
+
metadata: {
|
|
4968
|
+
family: "o-series",
|
|
4969
|
+
notes: "Compact reasoning model for cost-sensitive applications",
|
|
4970
|
+
supportsTemperature: false
|
|
4971
|
+
}
|
|
4558
4972
|
}
|
|
4559
4973
|
];
|
|
4560
4974
|
}
|
|
@@ -4635,7 +5049,8 @@ var init_openai = __esm({
|
|
|
4635
5049
|
const usage = chunk.usage ? {
|
|
4636
5050
|
inputTokens: chunk.usage.prompt_tokens,
|
|
4637
5051
|
outputTokens: chunk.usage.completion_tokens,
|
|
4638
|
-
totalTokens: chunk.usage.total_tokens
|
|
5052
|
+
totalTokens: chunk.usage.total_tokens,
|
|
5053
|
+
cachedInputTokens: chunk.usage.prompt_tokens_details?.cached_tokens ?? 0
|
|
4639
5054
|
} : void 0;
|
|
4640
5055
|
if (finishReason || usage) {
|
|
4641
5056
|
yield { text: "", finishReason, usage, rawEvent: chunk };
|
|
@@ -4852,20 +5267,28 @@ var init_model_registry = __esm({
|
|
|
4852
5267
|
/**
|
|
4853
5268
|
* Estimate API cost for a given model and token usage
|
|
4854
5269
|
* @param modelId - Full model identifier
|
|
4855
|
-
* @param inputTokens - Number of input tokens
|
|
5270
|
+
* @param inputTokens - Number of input tokens (total, including cached and cache creation)
|
|
4856
5271
|
* @param outputTokens - Number of output tokens
|
|
4857
|
-
* @param
|
|
5272
|
+
* @param cachedInputTokens - Number of cached input tokens (subset of inputTokens)
|
|
5273
|
+
* @param cacheCreationInputTokens - Number of cache creation tokens (subset of inputTokens, Anthropic only)
|
|
4858
5274
|
* @returns CostEstimate if model found, undefined otherwise
|
|
4859
5275
|
*/
|
|
4860
|
-
estimateCost(modelId, inputTokens, outputTokens,
|
|
5276
|
+
estimateCost(modelId, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0) {
|
|
4861
5277
|
const spec = this.getModelSpec(modelId);
|
|
4862
5278
|
if (!spec) return void 0;
|
|
4863
|
-
const
|
|
4864
|
-
const
|
|
5279
|
+
const cachedRate = spec.pricing.cachedInput ?? spec.pricing.input;
|
|
5280
|
+
const cacheWriteRate = spec.pricing.cacheWriteInput ?? spec.pricing.input;
|
|
5281
|
+
const uncachedInputTokens = inputTokens - cachedInputTokens - cacheCreationInputTokens;
|
|
5282
|
+
const uncachedInputCost = uncachedInputTokens / 1e6 * spec.pricing.input;
|
|
5283
|
+
const cachedInputCost = cachedInputTokens / 1e6 * cachedRate;
|
|
5284
|
+
const cacheCreationCost = cacheCreationInputTokens / 1e6 * cacheWriteRate;
|
|
5285
|
+
const inputCost = uncachedInputCost + cachedInputCost + cacheCreationCost;
|
|
4865
5286
|
const outputCost = outputTokens / 1e6 * spec.pricing.output;
|
|
4866
5287
|
const totalCost = inputCost + outputCost;
|
|
4867
5288
|
return {
|
|
4868
5289
|
inputCost,
|
|
5290
|
+
cachedInputCost,
|
|
5291
|
+
cacheCreationCost,
|
|
4869
5292
|
outputCost,
|
|
4870
5293
|
totalCost,
|
|
4871
5294
|
currency: "USD"
|