@reaatech/llm-cost-telemetry-providers 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +205 -0
- package/dist/index.cjs +467 -0
- package/dist/index.d.cts +193 -0
- package/dist/index.d.ts +193 -0
- package/dist/index.js +434 -0
- package/package.json +63 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 llm-cost-telemetry contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
# @reaatech/llm-cost-telemetry-providers
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/@reaatech/llm-cost-telemetry-providers)
|
|
4
|
+
[](https://github.com/reaatech/llm-cost-telemetry/blob/main/LICENSE)
|
|
5
|
+
[](https://github.com/reaatech/llm-cost-telemetry/actions/workflows/ci.yml)
|
|
6
|
+
|
|
7
|
+
> **Status:** Pre-1.0 — APIs may change in minor versions. Pin to a specific version in production.
|
|
8
|
+
|
|
9
|
+
LLM provider SDK wrappers for automatic cost telemetry. Wraps the official OpenAI, Anthropic, and Google Generative AI SDKs to capture token usage, timing, and telemetry context from every API call without changing your application code.
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
npm install @reaatech/llm-cost-telemetry-providers
|
|
15
|
+
# or
|
|
16
|
+
pnpm add @reaatech/llm-cost-telemetry-providers
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Provider SDKs are peer dependencies — install only the ones you use:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
pnpm add openai # for wrapOpenAI
|
|
23
|
+
pnpm add @anthropic-ai/sdk # for wrapAnthropic
|
|
24
|
+
pnpm add @google/generative-ai # for wrapGoogleGenerativeAI
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Feature Overview
|
|
28
|
+
|
|
29
|
+
- **OpenAI wrapper** — intercepts `chat.completions.create` and `completions.create`
|
|
30
|
+
- **Anthropic wrapper** — intercepts `messages.create` with cache token awareness
|
|
31
|
+
- **Google wrapper** — intercepts `generateContent` and `generateContentStream` with streaming support
|
|
32
|
+
- **Telemetry context injection** — attach tenant, feature, and route metadata to each call
|
|
33
|
+
- **Cost span emission** — every intercepted call produces a `CostSpan` with token counts and timing
|
|
34
|
+
- **Pluggable span handler** — register a callback to forward spans to aggregators, exporters, or your own pipeline
|
|
35
|
+
|
|
36
|
+
## Quick Start
|
|
37
|
+
|
|
38
|
+
```typescript
|
|
39
|
+
import { wrapOpenAI } from "@reaatech/llm-cost-telemetry-providers";
|
|
40
|
+
import OpenAI from "openai";
|
|
41
|
+
|
|
42
|
+
const client = wrapOpenAI(new OpenAI({ apiKey: process.env.OPENAI_API_KEY }));
|
|
43
|
+
|
|
44
|
+
const response = await client.chat.completions.create({
|
|
45
|
+
model: "gpt-4",
|
|
46
|
+
messages: [{ role: "user", content: "Hello!" }],
|
|
47
|
+
telemetry: {
|
|
48
|
+
tenant: "acme-corp",
|
|
49
|
+
feature: "chat-support",
|
|
50
|
+
route: "/api/chat",
|
|
51
|
+
},
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
// A CostSpan was automatically emitted with:
|
|
55
|
+
// provider: "openai", model: "gpt-4"
|
|
56
|
+
// inputTokens: response.usage.prompt_tokens
|
|
57
|
+
// outputTokens: response.usage.completion_tokens
|
|
58
|
+
// timing: request duration
|
|
59
|
+
// telemetry: { tenant, feature, route }
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## API Reference
|
|
63
|
+
|
|
64
|
+
### `wrapOpenAI(client: OpenAI): WrappedOpenAI`
|
|
65
|
+
|
|
66
|
+
Wraps an OpenAI client instance. The returned object preserves the full OpenAI API surface. Intercepted methods:
|
|
67
|
+
|
|
68
|
+
| Method | Telemetry Added |
|
|
69
|
+
|--------|----------------|
|
|
70
|
+
| `chat.completions.create(params)` | `telemetry?` on `params` |
|
|
71
|
+
| `completions.create(params)` | `telemetry?` on `params` |
|
|
72
|
+
|
|
73
|
+
```typescript
|
|
74
|
+
import { wrapOpenAI, type WrappedOpenAI } from "@reaatech/llm-cost-telemetry-providers";
|
|
75
|
+
|
|
76
|
+
const client: WrappedOpenAI = wrapOpenAI(new OpenAI());
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### `wrapAnthropic(client: Anthropic): WrappedAnthropic`
|
|
80
|
+
|
|
81
|
+
Wraps an Anthropic client. Automatically captures cache read and cache creation tokens:
|
|
82
|
+
|
|
83
|
+
```typescript
|
|
84
|
+
import { wrapAnthropic, type WrappedAnthropic } from "@reaatech/llm-cost-telemetry-providers";
|
|
85
|
+
import Anthropic from "@anthropic-ai/sdk";
|
|
86
|
+
|
|
87
|
+
const client: WrappedAnthropic = wrapAnthropic(new Anthropic());
|
|
88
|
+
|
|
89
|
+
const response = await client.messages.create({
|
|
90
|
+
model: "claude-sonnet-20240229",
|
|
91
|
+
max_tokens: 1024,
|
|
92
|
+
system: "You are a helpful assistant.", // eligible for prompt caching
|
|
93
|
+
messages: [{ role: "user", content: "Hello!" }],
|
|
94
|
+
telemetry: { tenant: "acme-corp" },
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
// CostSpan includes: cacheReadTokens, cacheCreationTokens from response.usage
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### `wrapGoogleGenerativeAI(client: GoogleGenerativeAI): WrappedGoogleGenerativeAI`
|
|
101
|
+
|
|
102
|
+
Wraps a Google Generative AI client. Supports both streaming and non-streaming:
|
|
103
|
+
|
|
104
|
+
```typescript
|
|
105
|
+
import {
|
|
106
|
+
wrapGoogleGenerativeAI,
|
|
107
|
+
type WrappedGoogleGenerativeAI,
|
|
108
|
+
type WrappedGenerativeModel,
|
|
109
|
+
} from "@reaatech/llm-cost-telemetry-providers";
|
|
110
|
+
import { GoogleGenerativeAI } from "@google/generative-ai";
|
|
111
|
+
|
|
112
|
+
const genAI: WrappedGoogleGenerativeAI = wrapGoogleGenerativeAI(
|
|
113
|
+
new GoogleGenerativeAI(process.env.GOOGLE_API_KEY)
|
|
114
|
+
);
|
|
115
|
+
|
|
116
|
+
const model: WrappedGenerativeModel = genAI.getGenerativeModel({ model: "gemini-pro" });
|
|
117
|
+
|
|
118
|
+
// Non-streaming
|
|
119
|
+
const result = await model.generateContent("Hello!", {
|
|
120
|
+
telemetry: { tenant: "acme-corp" },
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
// Streaming — span emitted when the stream completes
|
|
124
|
+
const stream = await model.generateContentStream("Count to 10", {
|
|
125
|
+
telemetry: { tenant: "acme-corp" },
|
|
126
|
+
});
|
|
127
|
+
for await (const chunk of stream.stream) {
|
|
128
|
+
console.log(chunk.text());
|
|
129
|
+
}
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
### `BaseProviderWrapper<TClient>`
|
|
133
|
+
|
|
134
|
+
Abstract base class for building custom provider wrappers:
|
|
135
|
+
|
|
136
|
+
```typescript
|
|
137
|
+
import { BaseProviderWrapper } from "@reaatech/llm-cost-telemetry-providers";
|
|
138
|
+
import type { RequestMetadata, ResponseMetadata, SpanCallback } from "@reaatech/llm-cost-telemetry-providers";
|
|
139
|
+
|
|
140
|
+
class MyProviderWrapper extends BaseProviderWrapper<MyClient> {
|
|
141
|
+
get provider(): Provider { return "openai"; }
|
|
142
|
+
}
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### Span Callback
|
|
146
|
+
|
|
147
|
+
Register a callback to receive emitted `CostSpan` objects:
|
|
148
|
+
|
|
149
|
+
```typescript
|
|
150
|
+
import { wrapOpenAI, type SpanCallback } from "@reaatech/llm-cost-telemetry-providers";
|
|
151
|
+
|
|
152
|
+
const onSpan: SpanCallback = (span) => {
|
|
153
|
+
console.log(`Cost: $${span.costUsd}`);
|
|
154
|
+
// Forward to aggregator, exporter, or your own pipeline
|
|
155
|
+
};
|
|
156
|
+
|
|
157
|
+
const client = wrapOpenAI(new OpenAI());
|
|
158
|
+
// Set the callback on the wrapper
|
|
159
|
+
client.__telemetry.onSpan = onSpan;
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## Usage Patterns
|
|
163
|
+
|
|
164
|
+
### Multi-Tenant Cost Tracking
|
|
165
|
+
|
|
166
|
+
```typescript
|
|
167
|
+
const client = wrapOpenAI(new OpenAI());
|
|
168
|
+
|
|
169
|
+
// Different tenants attach different telemetry
|
|
170
|
+
const acmeResp = await client.chat.completions.create({
|
|
171
|
+
model: "gpt-3.5-turbo",
|
|
172
|
+
messages: [{ role: "user", content: "Hi" }],
|
|
173
|
+
telemetry: { tenant: "acme-corp", feature: "support" },
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
const startupResp = await client.chat.completions.create({
|
|
177
|
+
model: "gpt-3.5-turbo",
|
|
178
|
+
messages: [{ role: "user", content: "Hi" }],
|
|
179
|
+
telemetry: { tenant: "startup-inc", feature: "onboarding" },
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
// Each span is tagged with the correct tenant
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### Wrapping Multiple Providers
|
|
186
|
+
|
|
187
|
+
```typescript
|
|
188
|
+
import { wrapOpenAI, wrapAnthropic, wrapGoogleGenerativeAI } from "@reaatech/llm-cost-telemetry-providers";
|
|
189
|
+
|
|
190
|
+
const openai = wrapOpenAI(new OpenAI({ apiKey: process.env.OPENAI_API_KEY }));
|
|
191
|
+
const anthropic = wrapAnthropic(new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY }));
|
|
192
|
+
const google = wrapGoogleGenerativeAI(new GoogleGenerativeAI(process.env.GOOGLE_API_KEY));
|
|
193
|
+
|
|
194
|
+
// Use each as normal — telemetry is transparent
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
## Related Packages
|
|
198
|
+
|
|
199
|
+
- [@reaatech/llm-cost-telemetry](https://www.npmjs.com/package/@reaatech/llm-cost-telemetry) — Core types and utilities
|
|
200
|
+
- [@reaatech/llm-cost-telemetry-calculator](https://www.npmjs.com/package/@reaatech/llm-cost-telemetry-calculator) — Cost calculation engine
|
|
201
|
+
- [@reaatech/llm-cost-telemetry-aggregation](https://www.npmjs.com/package/@reaatech/llm-cost-telemetry-aggregation) — Span collection and aggregation
|
|
202
|
+
|
|
203
|
+
## License
|
|
204
|
+
|
|
205
|
+
[MIT](https://github.com/reaatech/llm-cost-telemetry/blob/main/LICENSE)
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,467 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// src/index.ts
|
|
21
|
+
var index_exports = {};
|
|
22
|
+
__export(index_exports, {
|
|
23
|
+
AnthropicWrapper: () => AnthropicWrapper,
|
|
24
|
+
BaseProviderWrapper: () => BaseProviderWrapper,
|
|
25
|
+
GoogleGenerativeAIWrapper: () => GoogleGenerativeAIWrapper,
|
|
26
|
+
OpenAIWrapper: () => OpenAIWrapper,
|
|
27
|
+
wrapAnthropic: () => wrapAnthropic,
|
|
28
|
+
wrapGoogleGenerativeAI: () => wrapGoogleGenerativeAI,
|
|
29
|
+
wrapOpenAI: () => wrapOpenAI
|
|
30
|
+
});
|
|
31
|
+
module.exports = __toCommonJS(index_exports);
|
|
32
|
+
|
|
33
|
+
// src/base.ts
|
|
34
|
+
var import_llm_cost_telemetry = require("@reaatech/llm-cost-telemetry");
|
|
35
|
+
var BaseProviderWrapper = class {
|
|
36
|
+
/** The wrapped client */
|
|
37
|
+
client;
|
|
38
|
+
/** Callback for cost spans */
|
|
39
|
+
onSpanCallback = null;
|
|
40
|
+
/** Default telemetry context */
|
|
41
|
+
defaultContext = {};
|
|
42
|
+
/**
|
|
43
|
+
* Create a new provider wrapper
|
|
44
|
+
*/
|
|
45
|
+
constructor(client) {
|
|
46
|
+
this.client = client;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Set the callback for cost spans
|
|
50
|
+
*/
|
|
51
|
+
onSpan(callback) {
|
|
52
|
+
this.onSpanCallback = callback;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Set default telemetry context
|
|
56
|
+
*/
|
|
57
|
+
setDefaultContext(context) {
|
|
58
|
+
this.defaultContext = context;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Create a cost span from request and response metadata
|
|
62
|
+
*/
|
|
63
|
+
createSpan(request, response) {
|
|
64
|
+
const duration = response.endTime.getTime() - request.startTime.getTime();
|
|
65
|
+
return {
|
|
66
|
+
id: (0, import_llm_cost_telemetry.generateId)(),
|
|
67
|
+
provider: this.provider,
|
|
68
|
+
model: request.model,
|
|
69
|
+
inputTokens: response.inputTokens,
|
|
70
|
+
outputTokens: response.outputTokens,
|
|
71
|
+
totalTokens: response.inputTokens + response.outputTokens,
|
|
72
|
+
costUsd: 0,
|
|
73
|
+
// Will be calculated by the cost calculator
|
|
74
|
+
startTime: request.startTime,
|
|
75
|
+
endTime: response.endTime,
|
|
76
|
+
durationMs: Math.max(0, duration),
|
|
77
|
+
cacheReadTokens: response.cacheReadTokens,
|
|
78
|
+
cacheCreationTokens: response.cacheCreationTokens,
|
|
79
|
+
telemetry: {
|
|
80
|
+
...this.defaultContext,
|
|
81
|
+
...request.telemetry
|
|
82
|
+
},
|
|
83
|
+
metadata: {
|
|
84
|
+
estimated: false
|
|
85
|
+
}
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Emit a cost span
|
|
90
|
+
*/
|
|
91
|
+
emitSpan(span) {
|
|
92
|
+
if (this.onSpanCallback) {
|
|
93
|
+
try {
|
|
94
|
+
this.onSpanCallback(span);
|
|
95
|
+
} catch {
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Extract telemetry context from request options
|
|
101
|
+
*/
|
|
102
|
+
extractTelemetryContext(options) {
|
|
103
|
+
const telemetry = options.telemetry;
|
|
104
|
+
if (telemetry && typeof telemetry === "object") {
|
|
105
|
+
const ctx = {};
|
|
106
|
+
if ("tenant" in telemetry && typeof telemetry.tenant === "string") {
|
|
107
|
+
ctx.tenant = telemetry.tenant;
|
|
108
|
+
}
|
|
109
|
+
if ("feature" in telemetry && typeof telemetry.feature === "string") {
|
|
110
|
+
ctx.feature = telemetry.feature;
|
|
111
|
+
}
|
|
112
|
+
if ("route" in telemetry && typeof telemetry.route === "string") {
|
|
113
|
+
ctx.route = telemetry.route;
|
|
114
|
+
}
|
|
115
|
+
return Object.keys(ctx).length > 0 ? ctx : void 0;
|
|
116
|
+
}
|
|
117
|
+
return void 0;
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Dispose of the wrapper and release resources
|
|
121
|
+
*/
|
|
122
|
+
dispose() {
|
|
123
|
+
this.onSpanCallback = null;
|
|
124
|
+
this.defaultContext = {};
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Get the underlying client
|
|
128
|
+
*/
|
|
129
|
+
unwrap() {
|
|
130
|
+
return this.client;
|
|
131
|
+
}
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
// src/openai.ts
|
|
135
|
+
var import_llm_cost_telemetry2 = require("@reaatech/llm-cost-telemetry");
|
|
136
|
+
var OpenAIWrapper = class extends BaseProviderWrapper {
|
|
137
|
+
/**
|
|
138
|
+
* Get the provider name
|
|
139
|
+
*/
|
|
140
|
+
get provider() {
|
|
141
|
+
return "openai";
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Wrap the OpenAI client to intercept chat completions
|
|
145
|
+
*/
|
|
146
|
+
wrap() {
|
|
147
|
+
const originalClient = this.client;
|
|
148
|
+
const originalChatCreate = originalClient.chat.completions.create.bind(
|
|
149
|
+
originalClient.chat.completions
|
|
150
|
+
);
|
|
151
|
+
originalClient.chat.completions.create = (async (options, ...rest) => {
|
|
152
|
+
const startTime = (0, import_llm_cost_telemetry2.now)();
|
|
153
|
+
const telemetry = this.extractTelemetryContext(options);
|
|
154
|
+
const model = options.model;
|
|
155
|
+
const optionsObj = options;
|
|
156
|
+
const { telemetry: _, ...cleanOptionsObj } = optionsObj;
|
|
157
|
+
const cleanOptions = cleanOptionsObj;
|
|
158
|
+
try {
|
|
159
|
+
const response = await originalChatCreate(cleanOptions, ...rest);
|
|
160
|
+
const endTime = (0, import_llm_cost_telemetry2.now)();
|
|
161
|
+
const requestMetadata = {
|
|
162
|
+
model,
|
|
163
|
+
params: cleanOptions,
|
|
164
|
+
telemetry,
|
|
165
|
+
startTime
|
|
166
|
+
};
|
|
167
|
+
const responseMetadata = {
|
|
168
|
+
inputTokens: response.usage?.prompt_tokens ?? 0,
|
|
169
|
+
outputTokens: response.usage?.completion_tokens ?? 0,
|
|
170
|
+
endTime
|
|
171
|
+
};
|
|
172
|
+
const span = this.createSpan(requestMetadata, responseMetadata);
|
|
173
|
+
this.emitSpan(span);
|
|
174
|
+
return response;
|
|
175
|
+
} catch (error) {
|
|
176
|
+
const endTime = (0, import_llm_cost_telemetry2.now)();
|
|
177
|
+
const requestMetadata = {
|
|
178
|
+
model,
|
|
179
|
+
params: cleanOptions,
|
|
180
|
+
telemetry,
|
|
181
|
+
startTime
|
|
182
|
+
};
|
|
183
|
+
const responseMetadata = {
|
|
184
|
+
inputTokens: 0,
|
|
185
|
+
outputTokens: 0,
|
|
186
|
+
endTime,
|
|
187
|
+
error
|
|
188
|
+
};
|
|
189
|
+
const span = this.createSpan(requestMetadata, responseMetadata);
|
|
190
|
+
this.emitSpan(span);
|
|
191
|
+
throw error;
|
|
192
|
+
}
|
|
193
|
+
});
|
|
194
|
+
const originalCompletionCreate = originalClient.completions.create.bind(
|
|
195
|
+
originalClient.completions
|
|
196
|
+
);
|
|
197
|
+
originalClient.completions.create = (async (options, ...rest) => {
|
|
198
|
+
const startTime = (0, import_llm_cost_telemetry2.now)();
|
|
199
|
+
const telemetry = this.extractTelemetryContext(options);
|
|
200
|
+
const model = options.model;
|
|
201
|
+
const optionsObj = options;
|
|
202
|
+
const { telemetry: _, ...cleanOptionsObj } = optionsObj;
|
|
203
|
+
const cleanOptions = cleanOptionsObj;
|
|
204
|
+
try {
|
|
205
|
+
const response = await originalCompletionCreate(cleanOptions, ...rest);
|
|
206
|
+
const endTime = (0, import_llm_cost_telemetry2.now)();
|
|
207
|
+
const requestMetadata = {
|
|
208
|
+
model,
|
|
209
|
+
params: cleanOptions,
|
|
210
|
+
telemetry,
|
|
211
|
+
startTime
|
|
212
|
+
};
|
|
213
|
+
const responseMetadata = {
|
|
214
|
+
inputTokens: response.usage?.prompt_tokens ?? 0,
|
|
215
|
+
outputTokens: response.usage?.completion_tokens ?? 0,
|
|
216
|
+
endTime
|
|
217
|
+
};
|
|
218
|
+
const span = this.createSpan(requestMetadata, responseMetadata);
|
|
219
|
+
this.emitSpan(span);
|
|
220
|
+
return response;
|
|
221
|
+
} catch (error) {
|
|
222
|
+
const endTime = (0, import_llm_cost_telemetry2.now)();
|
|
223
|
+
const requestMetadata = {
|
|
224
|
+
model,
|
|
225
|
+
params: cleanOptions,
|
|
226
|
+
telemetry,
|
|
227
|
+
startTime
|
|
228
|
+
};
|
|
229
|
+
const responseMetadata = {
|
|
230
|
+
inputTokens: 0,
|
|
231
|
+
outputTokens: 0,
|
|
232
|
+
endTime,
|
|
233
|
+
error
|
|
234
|
+
};
|
|
235
|
+
const span = this.createSpan(requestMetadata, responseMetadata);
|
|
236
|
+
this.emitSpan(span);
|
|
237
|
+
throw error;
|
|
238
|
+
}
|
|
239
|
+
});
|
|
240
|
+
return originalClient;
|
|
241
|
+
}
|
|
242
|
+
};
|
|
243
|
+
function wrapOpenAI(client) {
|
|
244
|
+
const wrapper = new OpenAIWrapper(client);
|
|
245
|
+
return wrapper.wrap();
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// src/anthropic.ts
|
|
249
|
+
var import_llm_cost_telemetry3 = require("@reaatech/llm-cost-telemetry");
|
|
250
|
+
var AnthropicWrapper = class extends BaseProviderWrapper {
|
|
251
|
+
/**
|
|
252
|
+
* Get the provider name
|
|
253
|
+
*/
|
|
254
|
+
get provider() {
|
|
255
|
+
return "anthropic";
|
|
256
|
+
}
|
|
257
|
+
/**
|
|
258
|
+
* Wrap the Anthropic client to intercept messages.create
|
|
259
|
+
*/
|
|
260
|
+
wrap() {
|
|
261
|
+
const originalClient = this.client;
|
|
262
|
+
const originalCreate = originalClient.messages.create.bind(originalClient.messages);
|
|
263
|
+
originalClient.messages.create = (async (options, ...rest) => {
|
|
264
|
+
const startTime = (0, import_llm_cost_telemetry3.now)();
|
|
265
|
+
const telemetry = this.extractTelemetryContext(
|
|
266
|
+
options
|
|
267
|
+
);
|
|
268
|
+
const model = options.model;
|
|
269
|
+
const { telemetry: _, ...cleanOptions } = options;
|
|
270
|
+
try {
|
|
271
|
+
const response = await originalCreate(
|
|
272
|
+
cleanOptions,
|
|
273
|
+
...rest
|
|
274
|
+
);
|
|
275
|
+
const endTime = (0, import_llm_cost_telemetry3.now)();
|
|
276
|
+
const requestMetadata = {
|
|
277
|
+
model,
|
|
278
|
+
params: cleanOptions,
|
|
279
|
+
telemetry,
|
|
280
|
+
startTime
|
|
281
|
+
};
|
|
282
|
+
const inputTokens = response.usage.input_tokens ?? 0;
|
|
283
|
+
const outputTokens = response.usage.output_tokens ?? 0;
|
|
284
|
+
let cacheReadTokens;
|
|
285
|
+
let cacheCreationTokens;
|
|
286
|
+
const usage = response.usage;
|
|
287
|
+
if ("cache_read_input_tokens" in usage) {
|
|
288
|
+
cacheReadTokens = usage.cache_read_input_tokens;
|
|
289
|
+
}
|
|
290
|
+
if ("cache_creation_input_tokens" in usage) {
|
|
291
|
+
cacheCreationTokens = usage.cache_creation_input_tokens;
|
|
292
|
+
}
|
|
293
|
+
const responseMetadata = {
|
|
294
|
+
inputTokens,
|
|
295
|
+
outputTokens,
|
|
296
|
+
cacheReadTokens,
|
|
297
|
+
cacheCreationTokens,
|
|
298
|
+
endTime
|
|
299
|
+
};
|
|
300
|
+
const span = this.createSpan(requestMetadata, responseMetadata);
|
|
301
|
+
this.emitSpan(span);
|
|
302
|
+
return response;
|
|
303
|
+
} catch (error) {
|
|
304
|
+
const endTime = (0, import_llm_cost_telemetry3.now)();
|
|
305
|
+
const requestMetadata = {
|
|
306
|
+
model,
|
|
307
|
+
params: cleanOptions,
|
|
308
|
+
telemetry,
|
|
309
|
+
startTime
|
|
310
|
+
};
|
|
311
|
+
const responseMetadata = {
|
|
312
|
+
inputTokens: 0,
|
|
313
|
+
outputTokens: 0,
|
|
314
|
+
endTime,
|
|
315
|
+
error
|
|
316
|
+
};
|
|
317
|
+
const span = this.createSpan(requestMetadata, responseMetadata);
|
|
318
|
+
this.emitSpan(span);
|
|
319
|
+
throw error;
|
|
320
|
+
}
|
|
321
|
+
});
|
|
322
|
+
return originalClient;
|
|
323
|
+
}
|
|
324
|
+
};
|
|
325
|
+
function wrapAnthropic(client) {
|
|
326
|
+
const wrapper = new AnthropicWrapper(client);
|
|
327
|
+
return wrapper.wrap();
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// src/google.ts
|
|
331
|
+
var import_llm_cost_telemetry4 = require("@reaatech/llm-cost-telemetry");
|
|
332
|
+
var GoogleGenerativeAIWrapper = class extends BaseProviderWrapper {
|
|
333
|
+
/**
|
|
334
|
+
* Get the provider name
|
|
335
|
+
*/
|
|
336
|
+
get provider() {
|
|
337
|
+
return "google";
|
|
338
|
+
}
|
|
339
|
+
/**
|
|
340
|
+
* Wrap the GoogleGenerativeAI client to intercept generateContent
|
|
341
|
+
*/
|
|
342
|
+
wrap() {
|
|
343
|
+
const wrapper = this;
|
|
344
|
+
const originalClient = this.client;
|
|
345
|
+
const originalGetModel = originalClient.getGenerativeModel.bind(originalClient);
|
|
346
|
+
originalClient.getGenerativeModel = ((modelParams, ...rest) => {
|
|
347
|
+
const model = originalGetModel(modelParams, ...rest);
|
|
348
|
+
const originalGenerate = model.generateContent.bind(model);
|
|
349
|
+
model.generateContent = (async (request, options) => {
|
|
350
|
+
const startTime = (0, import_llm_cost_telemetry4.now)();
|
|
351
|
+
const telemetry = options?.telemetry ? wrapper.extractTelemetryContext(options.telemetry) : void 0;
|
|
352
|
+
const modelId = modelParams.model;
|
|
353
|
+
try {
|
|
354
|
+
const response = await originalGenerate(request, options);
|
|
355
|
+
const endTime = (0, import_llm_cost_telemetry4.now)();
|
|
356
|
+
const requestMetadata = {
|
|
357
|
+
model: modelId,
|
|
358
|
+
params: typeof request === "string" ? { prompt: request } : request,
|
|
359
|
+
telemetry,
|
|
360
|
+
startTime
|
|
361
|
+
};
|
|
362
|
+
const responseAny = response;
|
|
363
|
+
const responseMetadata = {
|
|
364
|
+
inputTokens: responseAny.usageMetadata?.promptTokenCount ?? 0,
|
|
365
|
+
outputTokens: responseAny.usageMetadata?.candidatesTokenCount ?? 0,
|
|
366
|
+
endTime
|
|
367
|
+
};
|
|
368
|
+
const span = wrapper.createSpan(requestMetadata, responseMetadata);
|
|
369
|
+
wrapper.emitSpan(span);
|
|
370
|
+
return response;
|
|
371
|
+
} catch (error) {
|
|
372
|
+
const endTime = (0, import_llm_cost_telemetry4.now)();
|
|
373
|
+
const requestMetadata = {
|
|
374
|
+
model: modelId,
|
|
375
|
+
params: typeof request === "string" ? { prompt: request } : request,
|
|
376
|
+
telemetry,
|
|
377
|
+
startTime
|
|
378
|
+
};
|
|
379
|
+
const responseMetadata = {
|
|
380
|
+
inputTokens: 0,
|
|
381
|
+
outputTokens: 0,
|
|
382
|
+
endTime,
|
|
383
|
+
error
|
|
384
|
+
};
|
|
385
|
+
const span = wrapper.createSpan(requestMetadata, responseMetadata);
|
|
386
|
+
wrapper.emitSpan(span);
|
|
387
|
+
throw error;
|
|
388
|
+
}
|
|
389
|
+
});
|
|
390
|
+
const originalGenerateStream = model.generateContentStream.bind(model);
|
|
391
|
+
model.generateContentStream = (async (request, options) => {
|
|
392
|
+
const startTime = (0, import_llm_cost_telemetry4.now)();
|
|
393
|
+
const telemetry = options?.telemetry ? wrapper.extractTelemetryContext(options.telemetry) : void 0;
|
|
394
|
+
const modelId = modelParams.model;
|
|
395
|
+
const responseStream = await originalGenerateStream(request, options);
|
|
396
|
+
const originalStream = responseStream.stream;
|
|
397
|
+
let totalInputTokens = 0;
|
|
398
|
+
let totalOutputTokens = 0;
|
|
399
|
+
const wrappedStream = new ReadableStream({
|
|
400
|
+
async start(controller) {
|
|
401
|
+
try {
|
|
402
|
+
for await (const chunk of originalStream) {
|
|
403
|
+
if (chunk.usageMetadata) {
|
|
404
|
+
totalInputTokens = chunk.usageMetadata.promptTokenCount ?? totalInputTokens;
|
|
405
|
+
totalOutputTokens = chunk.usageMetadata.candidatesTokenCount ?? totalOutputTokens;
|
|
406
|
+
}
|
|
407
|
+
controller.enqueue(chunk);
|
|
408
|
+
}
|
|
409
|
+
const endTime = (0, import_llm_cost_telemetry4.now)();
|
|
410
|
+
const requestMetadata = {
|
|
411
|
+
model: modelId,
|
|
412
|
+
params: typeof request === "string" ? { prompt: request } : request,
|
|
413
|
+
telemetry,
|
|
414
|
+
startTime
|
|
415
|
+
};
|
|
416
|
+
const responseMetadata = {
|
|
417
|
+
inputTokens: totalInputTokens,
|
|
418
|
+
outputTokens: totalOutputTokens,
|
|
419
|
+
endTime
|
|
420
|
+
};
|
|
421
|
+
const span = wrapper.createSpan(requestMetadata, responseMetadata);
|
|
422
|
+
wrapper.emitSpan(span);
|
|
423
|
+
controller.close();
|
|
424
|
+
} catch (error) {
|
|
425
|
+
const endTime = (0, import_llm_cost_telemetry4.now)();
|
|
426
|
+
const requestMetadata = {
|
|
427
|
+
model: modelId,
|
|
428
|
+
params: typeof request === "string" ? { prompt: request } : request,
|
|
429
|
+
telemetry,
|
|
430
|
+
startTime
|
|
431
|
+
};
|
|
432
|
+
const responseMetadata = {
|
|
433
|
+
inputTokens: 0,
|
|
434
|
+
outputTokens: 0,
|
|
435
|
+
endTime,
|
|
436
|
+
error
|
|
437
|
+
};
|
|
438
|
+
const span = wrapper.createSpan(requestMetadata, responseMetadata);
|
|
439
|
+
wrapper.emitSpan(span);
|
|
440
|
+
controller.error(error);
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
});
|
|
444
|
+
return {
|
|
445
|
+
stream: wrappedStream,
|
|
446
|
+
response: responseStream.response
|
|
447
|
+
};
|
|
448
|
+
});
|
|
449
|
+
return model;
|
|
450
|
+
});
|
|
451
|
+
return originalClient;
|
|
452
|
+
}
|
|
453
|
+
};
|
|
454
|
+
function wrapGoogleGenerativeAI(client) {
|
|
455
|
+
const wrapper = new GoogleGenerativeAIWrapper(client);
|
|
456
|
+
return wrapper.wrap();
|
|
457
|
+
}
|
|
458
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
459
|
+
0 && (module.exports = {
|
|
460
|
+
AnthropicWrapper,
|
|
461
|
+
BaseProviderWrapper,
|
|
462
|
+
GoogleGenerativeAIWrapper,
|
|
463
|
+
OpenAIWrapper,
|
|
464
|
+
wrapAnthropic,
|
|
465
|
+
wrapGoogleGenerativeAI,
|
|
466
|
+
wrapOpenAI
|
|
467
|
+
});
|