@ax-llm/ax 11.0.35 → 11.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.cjs CHANGED
@@ -114,6 +114,7 @@ __export(index_exports, {
114
114
  AxStringUtil: () => AxStringUtil,
115
115
  AxTestPrompt: () => AxTestPrompt,
116
116
  axAIAnthropicDefaultConfig: () => axAIAnthropicDefaultConfig,
117
+ axAIAnthropicVertexDefaultConfig: () => axAIAnthropicVertexDefaultConfig,
117
118
  axAIAzureOpenAIBestConfig: () => axAIAzureOpenAIBestConfig,
118
119
  axAIAzureOpenAICreativeConfig: () => axAIAzureOpenAICreativeConfig,
119
120
  axAIAzureOpenAIDefaultConfig: () => axAIAzureOpenAIDefaultConfig,
@@ -885,7 +886,7 @@ var AxBaseAI = class {
885
886
  this.models = models;
886
887
  this.id = crypto.randomUUID();
887
888
  const model = this.getModel(defaults.model) ?? defaults.model;
888
- const embedModel = this.getEmbedModel(defaults.embedModel);
889
+ const embedModel = this.getEmbedModel(defaults.embedModel) ?? defaults.embedModel;
889
890
  this.defaults = { model, embedModel };
890
891
  if (!defaults.model || typeof defaults.model !== "string" || defaults.model === "") {
891
892
  throw new Error("No model defined");
@@ -987,12 +988,6 @@ var AxBaseAI = class {
987
988
  }
988
989
  return models;
989
990
  }
990
- getDefaultModels() {
991
- return {
992
- model: this.defaults.model,
993
- embedModel: this.defaults.embedModel
994
- };
995
- }
996
991
  getName() {
997
992
  return this.name;
998
993
  }
@@ -1149,13 +1144,18 @@ var AxBaseAI = class {
1149
1144
  if (!this.aiImpl.createChatStreamResp) {
1150
1145
  throw new Error("generateChatResp not implemented");
1151
1146
  }
1152
- const respFn = this.aiImpl.createChatStreamResp;
1147
+ const respFn = this.aiImpl.createChatStreamResp.bind(this);
1153
1148
  const wrappedRespFn = (state) => (resp) => {
1154
1149
  const res2 = respFn(resp, state);
1155
1150
  res2.sessionId = options?.sessionId;
1156
- if (res2.modelUsage) {
1157
- this.modelUsage = res2.modelUsage;
1151
+ if (!res2.modelUsage) {
1152
+ res2.modelUsage = {
1153
+ ai: this.name,
1154
+ model,
1155
+ tokens: this.aiImpl.getTokenUsage()
1156
+ };
1158
1157
  }
1158
+ this.modelUsage = res2.modelUsage;
1159
1159
  if (span?.isRecording()) {
1160
1160
  setResponseAttr(res2, span);
1161
1161
  }
@@ -1261,17 +1261,19 @@ var AxBaseAI = class {
1261
1261
  );
1262
1262
  return res2;
1263
1263
  };
1264
- const resValue = this.rt ? await this.rt(fn, { embedModelUsage: this.embedModelUsage }) : await fn();
1264
+ const resValue = this.rt ? await this.rt(fn, { modelUsage: this.embedModelUsage }) : await fn();
1265
1265
  const res = this.aiImpl.createEmbedResp(resValue);
1266
1266
  res.sessionId = options?.sessionId;
1267
+ if (!res.modelUsage) {
1268
+ res.modelUsage = {
1269
+ ai: this.name,
1270
+ model: embedModel,
1271
+ tokens: this.aiImpl.getTokenUsage()
1272
+ };
1273
+ }
1274
+ this.embedModelUsage = res.modelUsage;
1267
1275
  if (span?.isRecording()) {
1268
- if (res.modelUsage) {
1269
- this.embedModelUsage = res.modelUsage;
1270
- span.setAttributes({
1271
- [axSpanAttributes.LLM_USAGE_COMPLETION_TOKENS]: res.modelUsage.completionTokens ?? 0,
1272
- [axSpanAttributes.LLM_USAGE_PROMPT_TOKENS]: res.modelUsage.promptTokens
1273
- });
1274
- }
1276
+ setResponseAttr(res, span);
1275
1277
  }
1276
1278
  span?.end();
1277
1279
  return res;
@@ -1298,8 +1300,8 @@ var AxBaseAI = class {
1298
1300
  function setResponseAttr(res, span) {
1299
1301
  if (res.modelUsage) {
1300
1302
  span.setAttributes({
1301
- [axSpanAttributes.LLM_USAGE_COMPLETION_TOKENS]: res.modelUsage.completionTokens ?? 0,
1302
- [axSpanAttributes.LLM_USAGE_PROMPT_TOKENS]: res.modelUsage.promptTokens
1303
+ [axSpanAttributes.LLM_USAGE_COMPLETION_TOKENS]: res.modelUsage.tokens?.completionTokens ?? 0,
1304
+ [axSpanAttributes.LLM_USAGE_PROMPT_TOKENS]: res.modelUsage.tokens?.promptTokens
1303
1305
  });
1304
1306
  }
1305
1307
  }
@@ -1354,14 +1356,14 @@ var AxAIAnthropicModel = /* @__PURE__ */ ((AxAIAnthropicModel2) => {
1354
1356
  AxAIAnthropicModel2["ClaudeInstant12"] = "claude-instant-1.2";
1355
1357
  return AxAIAnthropicModel2;
1356
1358
  })(AxAIAnthropicModel || {});
1357
- var AxAIAnthropicVertexModel = /* @__PURE__ */ ((AxAIAnthropicVertexModel3) => {
1358
- AxAIAnthropicVertexModel3["Claude37Sonnet"] = "claude-3-7-sonnet";
1359
- AxAIAnthropicVertexModel3["Claude35Haiku"] = "claude-3-5-haiku";
1360
- AxAIAnthropicVertexModel3["Claude35Sonnet"] = "claude-3-5-sonnet";
1361
- AxAIAnthropicVertexModel3["Claude35SonnetV2"] = "claude-3-5-sonnet-v2";
1362
- AxAIAnthropicVertexModel3["Claude3Haiku"] = "claude-3-haiku";
1363
- AxAIAnthropicVertexModel3["Claude3Opus"] = "claude-3-opus";
1364
- return AxAIAnthropicVertexModel3;
1359
+ var AxAIAnthropicVertexModel = /* @__PURE__ */ ((AxAIAnthropicVertexModel2) => {
1360
+ AxAIAnthropicVertexModel2["Claude37Sonnet"] = "claude-3-7-sonnet";
1361
+ AxAIAnthropicVertexModel2["Claude35Haiku"] = "claude-3-5-haiku";
1362
+ AxAIAnthropicVertexModel2["Claude35Sonnet"] = "claude-3-5-sonnet";
1363
+ AxAIAnthropicVertexModel2["Claude35SonnetV2"] = "claude-3-5-sonnet-v2";
1364
+ AxAIAnthropicVertexModel2["Claude3Haiku"] = "claude-3-haiku";
1365
+ AxAIAnthropicVertexModel2["Claude3Opus"] = "claude-3-opus";
1366
+ return AxAIAnthropicVertexModel2;
1365
1367
  })(AxAIAnthropicVertexModel || {});
1366
1368
 
1367
1369
  // ai/anthropic/info.ts
@@ -1415,7 +1417,11 @@ var axModelInfoAnthropic = [
1415
1417
 
1416
1418
  // ai/anthropic/api.ts
1417
1419
  var axAIAnthropicDefaultConfig = () => structuredClone({
1418
- model: "claude-3-5-sonnet-latest" /* Claude35Sonnet */,
1420
+ model: "claude-3-7-sonnet-latest" /* Claude37Sonnet */,
1421
+ ...axBaseAIDefaultConfig()
1422
+ });
1423
+ var axAIAnthropicVertexDefaultConfig = () => structuredClone({
1424
+ model: "claude-3-7-sonnet" /* Claude37Sonnet */,
1419
1425
  ...axBaseAIDefaultConfig()
1420
1426
  });
1421
1427
  var AxAIAnthropicImpl = class {
@@ -1423,6 +1429,10 @@ var AxAIAnthropicImpl = class {
1423
1429
  this.config = config;
1424
1430
  this.isVertex = isVertex;
1425
1431
  }
1432
+ tokensUsed;
1433
+ getTokenUsage() {
1434
+ return this.tokensUsed;
1435
+ }
1426
1436
  getModelConfig() {
1427
1437
  const { config } = this;
1428
1438
  return {
@@ -1532,16 +1542,12 @@ var AxAIAnthropicImpl = class {
1532
1542
  finishReason
1533
1543
  };
1534
1544
  });
1535
- const modelUsage = {
1545
+ this.tokensUsed = {
1536
1546
  promptTokens: resp.usage.input_tokens,
1537
1547
  completionTokens: resp.usage.output_tokens,
1538
1548
  totalTokens: resp.usage.input_tokens + resp.usage.output_tokens
1539
1549
  };
1540
- return {
1541
- results,
1542
- modelUsage,
1543
- remoteId: resp.id
1544
- };
1550
+ return { results, remoteId: resp.id };
1545
1551
  };
1546
1552
  createChatStreamResp = (resp, state) => {
1547
1553
  if (!("type" in resp)) {
@@ -1558,15 +1564,12 @@ var AxAIAnthropicImpl = class {
1558
1564
  if (resp.type === "message_start") {
1559
1565
  const { message } = resp;
1560
1566
  const results = [{ content: "", id: message.id }];
1561
- const modelUsage = {
1567
+ this.tokensUsed = {
1562
1568
  promptTokens: message.usage?.input_tokens ?? 0,
1563
1569
  completionTokens: message.usage?.output_tokens ?? 0,
1564
1570
  totalTokens: (message.usage?.input_tokens ?? 0) + (message.usage?.output_tokens ?? 0)
1565
1571
  };
1566
- return {
1567
- results,
1568
- modelUsage
1569
- };
1572
+ return { results };
1570
1573
  }
1571
1574
  if (resp.type === "content_block_start") {
1572
1575
  const { content_block: contentBlock } = resp;
@@ -1623,19 +1626,15 @@ var AxAIAnthropicImpl = class {
1623
1626
  }
1624
1627
  if (resp.type === "message_delta") {
1625
1628
  const { delta, usage } = resp;
1626
- return {
1627
- results: [
1628
- {
1629
- content: "",
1630
- finishReason: mapFinishReason(delta.stop_reason)
1631
- }
1632
- ],
1633
- modelUsage: {
1634
- promptTokens: 0,
1635
- completionTokens: usage.output_tokens,
1636
- totalTokens: usage.output_tokens
1637
- }
1629
+ this.tokensUsed = {
1630
+ promptTokens: 0,
1631
+ completionTokens: usage.output_tokens,
1632
+ totalTokens: usage.output_tokens
1638
1633
  };
1634
+ const results = [
1635
+ { content: "", finishReason: mapFinishReason(delta.stop_reason) }
1636
+ ];
1637
+ return { results };
1639
1638
  }
1640
1639
  return {
1641
1640
  results: [{ content: "" }]
@@ -1826,6 +1825,7 @@ var AxAIOpenAIModel = /* @__PURE__ */ ((AxAIOpenAIModel2) => {
1826
1825
  AxAIOpenAIModel2["O4Mini"] = "o4-mini";
1827
1826
  AxAIOpenAIModel2["GPT4"] = "gpt-4";
1828
1827
  AxAIOpenAIModel2["GPT41"] = "gpt-4.1";
1828
+ AxAIOpenAIModel2["GPT41Mini"] = "gpt-4.1-mini";
1829
1829
  AxAIOpenAIModel2["GPT4O"] = "gpt-4o";
1830
1830
  AxAIOpenAIModel2["GPT4OMini"] = "gpt-4o-mini";
1831
1831
  AxAIOpenAIModel2["GPT4ChatGPT4O"] = "chatgpt-4o-latest";
@@ -1882,6 +1882,12 @@ var axModelInfoOpenAI = [
1882
1882
  promptTokenCostPer1M: 2,
1883
1883
  completionTokenCostPer1M: 8
1884
1884
  },
1885
+ {
1886
+ name: "gpt-4.1-mini" /* GPT41Mini */,
1887
+ currency: "usd",
1888
+ promptTokenCostPer1M: 0.4,
1889
+ completionTokenCostPer1M: 1.6
1890
+ },
1885
1891
  {
1886
1892
  name: "gpt-4o" /* GPT4O */,
1887
1893
  currency: "usd",
@@ -1934,28 +1940,32 @@ var axModelInfoOpenAI = [
1934
1940
 
1935
1941
  // ai/openai/api.ts
1936
1942
  var axAIOpenAIDefaultConfig = () => structuredClone({
1937
- model: "gpt-4o" /* GPT4O */,
1943
+ model: "gpt-4.1" /* GPT41 */,
1938
1944
  embedModel: "text-embedding-3-small" /* TextEmbedding3Small */,
1939
1945
  ...axBaseAIDefaultConfig()
1940
1946
  });
1941
1947
  var axAIOpenAIBestConfig = () => structuredClone({
1942
1948
  ...axAIOpenAIDefaultConfig(),
1943
- model: "gpt-4o" /* GPT4O */
1949
+ model: "gpt-4.1" /* GPT41 */
1944
1950
  });
1945
1951
  var axAIOpenAICreativeConfig = () => structuredClone({
1946
- model: "gpt-4o" /* GPT4O */,
1952
+ model: "gpt-4.1" /* GPT41 */,
1947
1953
  embedModel: "text-embedding-3-small" /* TextEmbedding3Small */,
1948
1954
  ...axBaseAIDefaultCreativeConfig()
1949
1955
  });
1950
1956
  var axAIOpenAIFastConfig = () => ({
1951
1957
  ...axAIOpenAIDefaultConfig(),
1952
- model: "gpt-4o-mini" /* GPT4OMini */
1958
+ model: "gpt-4.1-mini" /* GPT41Mini */
1953
1959
  });
1954
1960
  var AxAIOpenAIImpl = class {
1955
1961
  constructor(config, streamingUsage) {
1956
1962
  this.config = config;
1957
1963
  this.streamingUsage = streamingUsage;
1958
1964
  }
1965
+ tokensUsed;
1966
+ getTokenUsage() {
1967
+ return this.tokensUsed;
1968
+ }
1959
1969
  getModelConfig() {
1960
1970
  const { config } = this;
1961
1971
  return {
@@ -2035,7 +2045,7 @@ var AxAIOpenAIImpl = class {
2035
2045
  if (error) {
2036
2046
  throw error;
2037
2047
  }
2038
- const modelUsage = usage ? {
2048
+ this.tokensUsed = usage ? {
2039
2049
  promptTokens: usage.prompt_tokens,
2040
2050
  completionTokens: usage.completion_tokens,
2041
2051
  totalTokens: usage.total_tokens
@@ -2057,14 +2067,13 @@ var AxAIOpenAIImpl = class {
2057
2067
  };
2058
2068
  });
2059
2069
  return {
2060
- modelUsage,
2061
2070
  results,
2062
2071
  remoteId: id
2063
2072
  };
2064
2073
  }
2065
2074
  createChatStreamResp(resp, state) {
2066
2075
  const { id, usage, choices } = resp;
2067
- const modelUsage = usage ? {
2076
+ this.tokensUsed = usage ? {
2068
2077
  promptTokens: usage.prompt_tokens,
2069
2078
  completionTokens: usage.completion_tokens,
2070
2079
  totalTokens: usage.total_tokens
@@ -2102,22 +2111,16 @@ var AxAIOpenAIImpl = class {
2102
2111
  };
2103
2112
  }
2104
2113
  );
2105
- return {
2106
- results,
2107
- modelUsage
2108
- };
2114
+ return { results };
2109
2115
  }
2110
2116
  createEmbedResp(resp) {
2111
2117
  const { data, usage } = resp;
2112
- const modelUsage = usage ? {
2118
+ this.tokensUsed = usage ? {
2113
2119
  promptTokens: usage.prompt_tokens,
2114
2120
  completionTokens: usage.completion_tokens,
2115
2121
  totalTokens: usage.total_tokens
2116
2122
  } : void 0;
2117
- return {
2118
- embeddings: data.map((v) => v.embedding),
2119
- modelUsage
2120
- };
2123
+ return { embeddings: data.map((v) => v.embedding) };
2121
2124
  }
2122
2125
  };
2123
2126
  var mapFinishReason2 = (finishReason) => {
@@ -2382,6 +2385,10 @@ var AxAICohereImpl = class {
2382
2385
  constructor(config) {
2383
2386
  this.config = config;
2384
2387
  }
2388
+ tokensUsed;
2389
+ getTokenUsage() {
2390
+ return this.tokensUsed;
2391
+ }
2385
2392
  getModelConfig() {
2386
2393
  const { config } = this;
2387
2394
  return {
@@ -2473,7 +2480,7 @@ var AxAICohereImpl = class {
2473
2480
  return [apiConfig, reqValue];
2474
2481
  };
2475
2482
  createChatResp = (resp) => {
2476
- const modelUsage = resp.meta.billed_units ? {
2483
+ this.tokensUsed = resp.meta.billed_units ? {
2477
2484
  promptTokens: resp.meta.billed_units.input_tokens,
2478
2485
  completionTokens: resp.meta.billed_units.output_tokens,
2479
2486
  totalTokens: resp.meta.billed_units.input_tokens + resp.meta.billed_units.output_tokens
@@ -2516,17 +2523,18 @@ var AxAICohereImpl = class {
2516
2523
  finishReason
2517
2524
  }
2518
2525
  ];
2519
- return {
2520
- results,
2521
- modelUsage,
2522
- remoteId: resp.response_id
2523
- };
2526
+ return { results, remoteId: resp.response_id };
2524
2527
  };
2525
2528
  createChatStreamResp = (resp, state) => {
2526
2529
  const ss = state;
2527
2530
  if (resp.event_type === "stream-start") {
2528
2531
  ss.generation_id = resp.generation_id;
2529
2532
  }
2533
+ this.tokensUsed = {
2534
+ promptTokens: 0,
2535
+ completionTokens: resp.meta.billed_units?.output_tokens ?? 0,
2536
+ totalTokens: resp.meta.billed_units?.output_tokens ?? 0
2537
+ };
2530
2538
  const { results } = this.createChatResp(resp);
2531
2539
  const result = results[0];
2532
2540
  if (!result) {
@@ -2820,13 +2828,13 @@ var safetySettings = [
2820
2828
  ];
2821
2829
  var axAIGoogleGeminiDefaultConfig = () => structuredClone({
2822
2830
  model: "gemini-2.0-flash" /* Gemini20Flash */,
2823
- embedModel: "text-embedding-004" /* TextEmbedding004 */,
2831
+ embedModel: "text-embedding-005" /* TextEmbedding005 */,
2824
2832
  safetySettings,
2825
2833
  ...axBaseAIDefaultConfig()
2826
2834
  });
2827
2835
  var axAIGoogleGeminiDefaultCreativeConfig = () => structuredClone({
2828
2836
  model: "gemini-2.0-flash" /* Gemini20Flash */,
2829
- embedModel: "text-embedding-004" /* TextEmbedding004 */,
2837
+ embedModel: "text-embedding-005" /* TextEmbedding005 */,
2830
2838
  safetySettings,
2831
2839
  ...axBaseAIDefaultCreativeConfig()
2832
2840
  });
@@ -2841,6 +2849,10 @@ var AxAIGoogleGeminiImpl = class {
2841
2849
  throw new Error("Auto truncate is not supported for GoogleGemini");
2842
2850
  }
2843
2851
  }
2852
+ tokensUsed;
2853
+ getTokenUsage() {
2854
+ return this.tokensUsed;
2855
+ }
2844
2856
  getModelConfig() {
2845
2857
  const { config } = this;
2846
2858
  return {
@@ -3103,18 +3115,14 @@ var AxAIGoogleGeminiImpl = class {
3103
3115
  return result;
3104
3116
  }
3105
3117
  );
3106
- let modelUsage;
3107
3118
  if (resp.usageMetadata) {
3108
- modelUsage = {
3119
+ this.tokensUsed = {
3109
3120
  totalTokens: resp.usageMetadata.totalTokenCount,
3110
3121
  promptTokens: resp.usageMetadata.promptTokenCount,
3111
3122
  completionTokens: resp.usageMetadata.candidatesTokenCount
3112
3123
  };
3113
3124
  }
3114
- return {
3115
- results,
3116
- modelUsage
3117
- };
3125
+ return { results };
3118
3126
  };
3119
3127
  createChatStreamResp = (resp) => {
3120
3128
  return this.createChatResp(resp);
@@ -3332,7 +3340,7 @@ var AxAIGroq = class extends AxAIOpenAIBase {
3332
3340
  debug: options?.debug
3333
3341
  });
3334
3342
  const rtFunc = async (func, info) => {
3335
- const totalTokens = info.modelUsage?.totalTokens || 0;
3343
+ const totalTokens = info.modelUsage?.tokens?.totalTokens || 0;
3336
3344
  await rt.acquire(totalTokens);
3337
3345
  return await func();
3338
3346
  };
@@ -3362,6 +3370,10 @@ var AxAIHuggingFaceImpl = class {
3362
3370
  constructor(config) {
3363
3371
  this.config = config;
3364
3372
  }
3373
+ tokensUsed;
3374
+ getTokenUsage() {
3375
+ return this.tokensUsed;
3376
+ }
3365
3377
  getModelConfig() {
3366
3378
  const { config } = this;
3367
3379
  return {
@@ -3655,6 +3667,10 @@ var AxAIRekaImpl = class {
3655
3667
  constructor(config) {
3656
3668
  this.config = config;
3657
3669
  }
3670
+ tokensUsed;
3671
+ getTokenUsage() {
3672
+ return this.tokensUsed;
3673
+ }
3658
3674
  getModelConfig() {
3659
3675
  const { config } = this;
3660
3676
  return {
@@ -3695,7 +3711,7 @@ var AxAIRekaImpl = class {
3695
3711
  };
3696
3712
  createChatResp = (resp) => {
3697
3713
  const { id, usage, responses } = resp;
3698
- const modelUsage = usage ? {
3714
+ this.tokensUsed = usage ? {
3699
3715
  promptTokens: usage.input_tokens,
3700
3716
  completionTokens: usage.output_tokens,
3701
3717
  totalTokens: usage.input_tokens + usage.output_tokens
@@ -3714,15 +3730,11 @@ var AxAIRekaImpl = class {
3714
3730
  finishReason
3715
3731
  };
3716
3732
  });
3717
- return {
3718
- modelUsage,
3719
- results,
3720
- remoteId: id
3721
- };
3733
+ return { results, remoteId: id };
3722
3734
  };
3723
3735
  createChatStreamResp = (resp) => {
3724
3736
  const { id, usage, responses } = resp;
3725
- const modelUsage = usage ? {
3737
+ this.tokensUsed = usage ? {
3726
3738
  promptTokens: usage.input_tokens,
3727
3739
  completionTokens: usage.output_tokens,
3728
3740
  totalTokens: usage.input_tokens + usage.output_tokens
@@ -3741,10 +3753,7 @@ var AxAIRekaImpl = class {
3741
3753
  finishReason
3742
3754
  };
3743
3755
  });
3744
- return {
3745
- results,
3746
- modelUsage
3747
- };
3756
+ return { results };
3748
3757
  };
3749
3758
  };
3750
3759
  var mapFinishReason3 = (finishReason) => {
@@ -3930,9 +3939,6 @@ var AxAI = class {
3930
3939
  getModelList() {
3931
3940
  return this.ai.getModelList();
3932
3941
  }
3933
- getDefaultModels() {
3934
- return this.ai.getDefaultModels();
3935
- }
3936
3942
  getMetrics() {
3937
3943
  return this.ai.getMetrics();
3938
3944
  }
@@ -4869,9 +4875,15 @@ function mergeProgramUsage(usages) {
4869
4875
  }
4870
4876
  const currentUsage = usageMap[key];
4871
4877
  if (currentUsage) {
4872
- currentUsage.promptTokens += usage.promptTokens;
4873
- currentUsage.completionTokens += usage.completionTokens;
4874
- currentUsage.totalTokens += usage.totalTokens;
4878
+ const tokens = currentUsage.tokens ?? {
4879
+ promptTokens: 0,
4880
+ completionTokens: 0,
4881
+ totalTokens: 0
4882
+ };
4883
+ tokens.promptTokens += usage?.tokens?.promptTokens ?? 0;
4884
+ tokens.completionTokens += usage?.tokens?.completionTokens ?? 0;
4885
+ tokens.totalTokens += usage?.tokens?.totalTokens ?? 0;
4886
+ currentUsage.tokens = tokens;
4875
4887
  }
4876
4888
  }
4877
4889
  return Object.values(usageMap);
@@ -6391,13 +6403,9 @@ var AxGen = class extends AxProgramWithSignature {
6391
6403
  mem,
6392
6404
  options
6393
6405
  }) {
6394
- const { sessionId, traceId, model, functions: _functions } = options ?? {};
6406
+ const { sessionId, traceId, functions: _functions } = options ?? {};
6395
6407
  const fastFail = options?.fastFail ?? this.options?.fastFail;
6396
- const modelName = model ?? ai.getDefaultModels().model;
6397
- const usageInfo = {
6398
- ai: ai.getName(),
6399
- model: modelName
6400
- };
6408
+ const model = options.model;
6401
6409
  const functions = _functions?.map((f) => "toFunction" in f ? f.toFunction() : f)?.flat();
6402
6410
  const res = await this.forwardSendRequest({
6403
6411
  ai,
@@ -6409,7 +6417,6 @@ var AxGen = class extends AxProgramWithSignature {
6409
6417
  ai,
6410
6418
  model,
6411
6419
  res,
6412
- usageInfo,
6413
6420
  mem,
6414
6421
  traceId,
6415
6422
  sessionId,
@@ -6421,7 +6428,6 @@ var AxGen = class extends AxProgramWithSignature {
6421
6428
  ai,
6422
6429
  model,
6423
6430
  res,
6424
- usageInfo,
6425
6431
  mem,
6426
6432
  traceId,
6427
6433
  sessionId,
@@ -6433,14 +6439,13 @@ var AxGen = class extends AxProgramWithSignature {
6433
6439
  ai,
6434
6440
  model,
6435
6441
  res,
6436
- usageInfo,
6437
6442
  mem,
6438
6443
  sessionId,
6439
6444
  traceId,
6440
6445
  functions,
6441
6446
  fastFail
6442
6447
  }) {
6443
- const streamingValidation = fastFail ?? ai.getFeatures().functionCot !== true;
6448
+ const streamingValidation = fastFail ?? ai.getFeatures(model).functionCot !== true;
6444
6449
  const functionCalls = [];
6445
6450
  const values = {};
6446
6451
  const xstate = {
@@ -6455,7 +6460,7 @@ var AxGen = class extends AxProgramWithSignature {
6455
6460
  continue;
6456
6461
  }
6457
6462
  if (v.modelUsage) {
6458
- this.usage.push({ ...usageInfo, ...v.modelUsage });
6463
+ this.usage.push(v.modelUsage);
6459
6464
  }
6460
6465
  if (result.functionCalls) {
6461
6466
  mergeFunctionCalls(functionCalls, result.functionCalls);
@@ -6556,7 +6561,6 @@ var AxGen = class extends AxProgramWithSignature {
6556
6561
  async processResponse({
6557
6562
  ai,
6558
6563
  res,
6559
- usageInfo,
6560
6564
  mem,
6561
6565
  sessionId,
6562
6566
  traceId,
@@ -6569,7 +6573,7 @@ var AxGen = class extends AxProgramWithSignature {
6569
6573
  }
6570
6574
  for (const result of results) {
6571
6575
  if (res.modelUsage) {
6572
- this.usage.push({ ...usageInfo, ...res.modelUsage });
6576
+ this.usage.push(res.modelUsage);
6573
6577
  }
6574
6578
  mem.addResult(result, sessionId);
6575
6579
  if (result.functionCalls?.length) {
@@ -7161,9 +7165,6 @@ var AxBalancer = class _AxBalancer {
7161
7165
  getModelList() {
7162
7166
  return this.currentService.getModelList();
7163
7167
  }
7164
- getDefaultModels() {
7165
- return this.currentService.getDefaultModels();
7166
- }
7167
7168
  getNextService() {
7168
7169
  const cs = this.services[++this.currentServiceIndex];
7169
7170
  if (cs === void 0) {
@@ -9315,12 +9316,6 @@ var AxMockAIService = class {
9315
9316
  getModelList() {
9316
9317
  return this.config.models;
9317
9318
  }
9318
- getDefaultModels() {
9319
- return {
9320
- model: this.config.modelInfo?.name ?? "mock-model",
9321
- embedModel: this.config.embedModelInfo?.name
9322
- };
9323
- }
9324
9319
  getMetrics() {
9325
9320
  return this.metrics;
9326
9321
  }
@@ -9343,9 +9338,13 @@ var AxMockAIService = class {
9343
9338
  }
9344
9339
  ],
9345
9340
  modelUsage: {
9346
- promptTokens: 10,
9347
- completionTokens: 5,
9348
- totalTokens: 15
9341
+ ai: this.getName(),
9342
+ model: "mock-model",
9343
+ tokens: {
9344
+ promptTokens: 10,
9345
+ completionTokens: 5,
9346
+ totalTokens: 15
9347
+ }
9349
9348
  }
9350
9349
  };
9351
9350
  }
@@ -9363,9 +9362,13 @@ var AxMockAIService = class {
9363
9362
  return this.config.embedResponse ?? {
9364
9363
  embeddings: [[0.1, 0.2, 0.3]],
9365
9364
  modelUsage: {
9366
- promptTokens: 5,
9367
- completionTokens: 0,
9368
- totalTokens: 5
9365
+ ai: this.getName(),
9366
+ model: "mock-model",
9367
+ tokens: {
9368
+ promptTokens: 5,
9369
+ completionTokens: 0,
9370
+ totalTokens: 5
9371
+ }
9369
9372
  }
9370
9373
  };
9371
9374
  }
@@ -11329,11 +11332,6 @@ var AxMultiServiceRouter = class {
11329
11332
  }
11330
11333
  });
11331
11334
  }
11332
- getDefaultModels() {
11333
- throw new Error(
11334
- "getDefaultModels is not supported for multi-service router."
11335
- );
11336
- }
11337
11335
  /**
11338
11336
  * If a model key is provided, delegate to the corresponding service's features.
11339
11337
  * Otherwise, returns a default feature set.
@@ -11496,6 +11494,7 @@ var AxRAG = class extends AxChainOfThought {
11496
11494
  AxStringUtil,
11497
11495
  AxTestPrompt,
11498
11496
  axAIAnthropicDefaultConfig,
11497
+ axAIAnthropicVertexDefaultConfig,
11499
11498
  axAIAzureOpenAIBestConfig,
11500
11499
  axAIAzureOpenAICreativeConfig,
11501
11500
  axAIAzureOpenAIDefaultConfig,