@adaptic/lumic-utils 1.0.20 → 1.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. package/dist/{apollo-client.client-DVsbR05r.js → apollo-client.client-ByADDB46.js} +3 -3
  2. package/dist/{apollo-client.client-DVsbR05r.js.map → apollo-client.client-ByADDB46.js.map} +1 -1
  3. package/dist/{apollo-client.client-DRk6kygw.js → apollo-client.client-CUIakkzs.js} +4 -4
  4. package/dist/{apollo-client.client-DRk6kygw.js.map → apollo-client.client-CUIakkzs.js.map} +1 -1
  5. package/dist/{apollo-client.server-Djh4v__C.js → apollo-client.server-BnZhh39o.js} +3 -3
  6. package/dist/{apollo-client.server-Djh4v__C.js.map → apollo-client.server-BnZhh39o.js.map} +1 -1
  7. package/dist/{apollo-client.server-L8JR2ko_.js → apollo-client.server-JucuAyrj.js} +3 -3
  8. package/dist/{apollo-client.server-L8JR2ko_.js.map → apollo-client.server-JucuAyrj.js.map} +1 -1
  9. package/dist/{index-eU6Q74W8.js → index-BLXN1stF.js} +2 -2
  10. package/dist/{index-eU6Q74W8.js.map → index-BLXN1stF.js.map} +1 -1
  11. package/dist/{index-BVl0tRmx.js → index-Ca3x8X5U.js} +34 -5
  12. package/dist/{index-BVl0tRmx.js.map → index-Ca3x8X5U.js.map} +1 -1
  13. package/dist/{index-Cs56Fq24.js → index-DT0dXUtn.js} +2 -2
  14. package/dist/{index-Cs56Fq24.js.map → index-DT0dXUtn.js.map} +1 -1
  15. package/dist/{index-CSOg0U0R.js → index-DYehXKUX.js} +34 -5
  16. package/dist/{index-CSOg0U0R.js.map → index-DYehXKUX.js.map} +1 -1
  17. package/dist/index.cjs +1 -1
  18. package/dist/index.mjs +1 -1
  19. package/dist/test.cjs +1 -1
  20. package/dist/test.mjs +1 -1
  21. package/dist/types/functions/llm-config.d.ts +9 -1
  22. package/dist/types/utils/llm-cost-tracker.d.ts +3 -0
  23. package/package.json +1 -1
@@ -768,58 +768,79 @@ const DEFAULT_DEVELOPER_PROMPT = `
768
768
  Present complete, high-confidence, final answers only. Do not rephrase to be more brief or omit parts of answers.
769
769
  Respond only with final content (e.g. code, a json or yaml object, a formatted string, or a markdown document) and nothing else. Do not reply with a preamble, introduction, or conclusion.
770
770
  `;
771
- /** Token costs in USD per token. Last updated Mar 2026. */
771
+ /**
772
+ * Token costs in USD per token. Last updated Apr 2026.
773
+ *
774
+ * `cacheHitCost` reflects OpenAI's cached-input billing rate (~50% of the
775
+ * standard input rate per OpenAI's prompt caching documentation). When set,
776
+ * `calculateCost` splits prompt tokens into cached vs non-cached buckets and
777
+ * applies the discount; when omitted, cached tokens are billed at full input
778
+ * rate (a silent ~50% cost overstatement for cache-friendly workloads).
779
+ */
772
780
  const openAiModelCosts = {
773
781
  'gpt-5.4': {
774
782
  inputCost: 2.5 / 1_000_000,
783
+ cacheHitCost: 1.25 / 1_000_000,
775
784
  outputCost: 15 / 1_000_000,
776
785
  },
777
786
  'gpt-5.4-mini': {
778
787
  inputCost: 0.75 / 1_000_000,
788
+ cacheHitCost: 0.375 / 1_000_000,
779
789
  outputCost: 4.5 / 1_000_000,
780
790
  },
781
791
  'gpt-5.4-nano': {
782
792
  inputCost: 0.2 / 1_000_000,
793
+ cacheHitCost: 0.1 / 1_000_000,
783
794
  outputCost: 1.25 / 1_000_000,
784
795
  },
785
796
  'gpt-5': {
786
797
  inputCost: 2.5 / 1_000_000,
798
+ cacheHitCost: 1.25 / 1_000_000,
787
799
  outputCost: 10 / 1_000_000,
788
800
  },
789
801
  'gpt-5-mini': {
790
802
  inputCost: 0.15 / 1_000_000,
803
+ cacheHitCost: 0.075 / 1_000_000,
791
804
  outputCost: 0.6 / 1_000_000,
792
805
  },
793
806
  'o1-mini': {
794
807
  inputCost: 1.1 / 1_000_000,
808
+ cacheHitCost: 0.55 / 1_000_000,
795
809
  outputCost: 4.4 / 1_000_000,
796
810
  },
797
811
  'o1': {
798
812
  inputCost: 15 / 1_000_000,
813
+ cacheHitCost: 7.5 / 1_000_000,
799
814
  outputCost: 60 / 1_000_000,
800
815
  },
801
816
  'o3-mini': {
802
817
  inputCost: 1.1 / 1_000_000,
818
+ cacheHitCost: 0.55 / 1_000_000,
803
819
  outputCost: 4.4 / 1_000_000,
804
820
  },
805
821
  'o3': {
806
822
  inputCost: 2 / 1_000_000,
823
+ cacheHitCost: 1 / 1_000_000,
807
824
  outputCost: 8 / 1_000_000,
808
825
  },
809
826
  'gpt-4.1': {
810
827
  inputCost: 2 / 1_000_000,
828
+ cacheHitCost: 1 / 1_000_000,
811
829
  outputCost: 8 / 1_000_000,
812
830
  },
813
831
  'gpt-4.1-mini': {
814
832
  inputCost: 0.4 / 1_000_000,
833
+ cacheHitCost: 0.2 / 1_000_000,
815
834
  outputCost: 1.6 / 1_000_000,
816
835
  },
817
836
  'gpt-4.1-nano': {
818
837
  inputCost: 0.1 / 1_000_000,
838
+ cacheHitCost: 0.05 / 1_000_000,
819
839
  outputCost: 0.4 / 1_000_000,
820
840
  },
821
841
  'o4-mini': {
822
842
  inputCost: 1.1 / 1_000_000,
843
+ cacheHitCost: 0.55 / 1_000_000,
823
844
  outputCost: 4.4 / 1_000_000,
824
845
  },
825
846
  };
@@ -1894,7 +1915,10 @@ class LLMCostTracker {
1894
1915
  timestamp: Date.now(),
1895
1916
  };
1896
1917
  this.usageRecords.push(record);
1897
- getLumicLogger().info(`LLM cost tracked: ${provider}/${model} - $${cost.toFixed(6)}`, { provider, model, inputTokens, outputTokens, cost });
1918
+ // Emit cachedTokens and reasoningTokens explicitly so operators can
1919
+ // verify cache effectiveness from logs alone (the prior log shape only
1920
+ // surfaced inputTokens and outputTokens, hiding the cache discount).
1921
+ getLumicLogger().info(`LLM cost tracked: ${provider}/${model} - $${cost.toFixed(6)}`, { provider, model, inputTokens, cachedTokens: cacheHitTokens, outputTokens, reasoningTokens, cost });
1898
1922
  }
1899
1923
  /**
1900
1924
  * Records usage from an image generation call.
@@ -1975,11 +1999,13 @@ class LLMCostTracker {
1975
1999
  const images = this.getImageCosts();
1976
2000
  let totalCost = 0;
1977
2001
  let totalInputTokens = 0;
2002
+ let totalCacheHitTokens = 0;
1978
2003
  let totalOutputTokens = 0;
1979
2004
  let totalReasoningTokens = 0;
1980
2005
  for (const summary of Object.values(byModel)) {
1981
2006
  totalCost += summary.totalCost;
1982
2007
  totalInputTokens += summary.totalInputTokens;
2008
+ totalCacheHitTokens += summary.totalCacheHitTokens;
1983
2009
  totalOutputTokens += summary.totalOutputTokens;
1984
2010
  totalReasoningTokens += summary.totalReasoningTokens;
1985
2011
  }
@@ -1996,6 +2022,7 @@ class LLMCostTracker {
1996
2022
  totalCost,
1997
2023
  totalCalls: this.usageRecords.length + this.imageRecords.length,
1998
2024
  totalInputTokens,
2025
+ totalCacheHitTokens,
1999
2026
  totalOutputTokens,
2000
2027
  totalReasoningTokens,
2001
2028
  byModel,
@@ -2018,7 +2045,9 @@ class LLMCostTracker {
2018
2045
  cost: `$${m.totalCost.toFixed(6)}`,
2019
2046
  calls: m.callCount,
2020
2047
  inputTokens: m.totalInputTokens,
2048
+ cachedTokens: m.totalCacheHitTokens,
2021
2049
  outputTokens: m.totalOutputTokens,
2050
+ reasoningTokens: m.totalReasoningTokens,
2022
2051
  }));
2023
2052
  const images = Object.values(summary.images).map((img) => ({
2024
2053
  model: img.model,
@@ -22821,11 +22850,11 @@ let poolConfig = DEFAULT_POOL_CONFIG;
22821
22850
  async function loadApolloModules() {
22822
22851
  if (typeof window === "undefined" || process.env.AWS_EXECUTION_ENV) {
22823
22852
  // Server-side (or Lambda): load the CommonJS‑based implementation.
22824
- return (await Promise.resolve().then(function () { return require('./apollo-client.server-Djh4v__C.js'); }));
22853
+ return (await Promise.resolve().then(function () { return require('./apollo-client.server-BnZhh39o.js'); }));
22825
22854
  }
22826
22855
  else {
22827
22856
  // Client-side: load the ESM‑based implementation.
22828
- return (await Promise.resolve().then(function () { return require('./apollo-client.client-DVsbR05r.js'); }));
22857
+ return (await Promise.resolve().then(function () { return require('./apollo-client.client-ByADDB46.js'); }));
22829
22858
  }
22830
22859
  }
22831
22860
  /**
@@ -81536,4 +81565,4 @@ exports.withCorrelationId = withCorrelationId;
81536
81565
  exports.withMetrics = withMetrics;
81537
81566
  exports.withRateLimit = withRateLimit;
81538
81567
  exports.withRetry = withRetry;
81539
- //# sourceMappingURL=index-BVl0tRmx.js.map
81568
+ //# sourceMappingURL=index-Ca3x8X5U.js.map