@aispendguard/sdk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +288 -0
- package/dist/anthropic.d.ts +49 -0
- package/dist/anthropic.js +27 -0
- package/dist/client.d.ts +14 -0
- package/dist/client.js +92 -0
- package/dist/gemini.d.ts +40 -0
- package/dist/gemini.js +22 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.js +36 -0
- package/dist/langchain.d.ts +61 -0
- package/dist/langchain.js +197 -0
- package/dist/openai.d.ts +46 -0
- package/dist/openai.js +58 -0
- package/dist/types.d.ts +95 -0
- package/dist/types.js +2 -0
- package/dist/validate.d.ts +2 -0
- package/dist/validate.js +188 -0
- package/dist/wrap-anthropic.d.ts +15 -0
- package/dist/wrap-anthropic.js +78 -0
- package/dist/wrap-gemini.d.ts +16 -0
- package/dist/wrap-gemini.js +82 -0
- package/dist/wrap-openai.d.ts +15 -0
- package/dist/wrap-openai.js +81 -0
- package/package.json +49 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 AISpendGuard
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
# AISpendGuard SDK
|
|
2
|
+
|
|
3
|
+
Tags-only SDK for sending AI usage events to AISpendGuard.
|
|
4
|
+
|
|
5
|
+
## What it enforces
|
|
6
|
+
- No prompt/output/content fields
|
|
7
|
+
- Strict event validation
|
|
8
|
+
- Required tags: `task_type`, `feature`, `route`
|
|
9
|
+
- Custom tags allowed (lowercase snake_case keys), for example: `team`, `project_code`, `region`
|
|
10
|
+
- Custom tag values can be either string values or array values (`string[]`)
|
|
11
|
+
- API key auth via `x-api-key`
|
|
12
|
+
|
|
13
|
+
## Install
|
|
14
|
+
```bash
|
|
15
|
+
npm install @aispendguard/sdk
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## Quick start
|
|
19
|
+
```ts
|
|
20
|
+
import { init, trackUsage } from "@aispendguard/sdk";
|
|
21
|
+
|
|
22
|
+
init({
|
|
23
|
+
apiKey: process.env.AISPENDGUARD_API_KEY!,
|
|
24
|
+
endpoint: "https://www.aispendguard.com/api/ingest",
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
await trackUsage({
|
|
28
|
+
provider: "openai",
|
|
29
|
+
model: "gpt-4o-mini",
|
|
30
|
+
inputTokens: 120,
|
|
31
|
+
outputTokens: 12,
|
|
32
|
+
latencyMs: 840,
|
|
33
|
+
costUsd: 0.0021,
|
|
34
|
+
timestamp: new Date(),
|
|
35
|
+
tags: {
|
|
36
|
+
task_type: "classify",
|
|
37
|
+
feature: "lead_classifier",
|
|
38
|
+
route: "POST /api/ai/classify",
|
|
39
|
+
environment: "prod",
|
|
40
|
+
customer_plan: "free"
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## OpenAI helper
|
|
46
|
+
```ts
|
|
47
|
+
import { init, trackUsage, createOpenAIUsageEvent } from "@aispendguard/sdk";
|
|
48
|
+
|
|
49
|
+
init({
|
|
50
|
+
apiKey: process.env.AISPENDGUARD_API_KEY!,
|
|
51
|
+
endpoint: "https://www.aispendguard.com/api/ingest",
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
const startedAt = Date.now();
|
|
55
|
+
const response = await openai.responses.create({
|
|
56
|
+
model: "gpt-4o-mini",
|
|
57
|
+
input: "Classify this lead"
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
const event = createOpenAIUsageEvent({
|
|
61
|
+
model: "gpt-4o-mini",
|
|
62
|
+
resolvedModel: response.model, // "gpt-4o-mini-2024-07-18" — pinned version
|
|
63
|
+
usage: response.usage, // auto-extracts tokens, cache hits, reasoning tokens
|
|
64
|
+
latencyMs: Date.now() - startedAt,
|
|
65
|
+
tags: {
|
|
66
|
+
task_type: "classify",
|
|
67
|
+
feature: "lead_classifier",
|
|
68
|
+
route: "POST /api/ai/classify"
|
|
69
|
+
}
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
await trackUsage(event);
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Anthropic helper
|
|
76
|
+
```ts
|
|
77
|
+
import { init, trackUsage, createAnthropicUsageEvent } from "@aispendguard/sdk";
|
|
78
|
+
|
|
79
|
+
init({
|
|
80
|
+
apiKey: process.env.AISPENDGUARD_API_KEY!,
|
|
81
|
+
endpoint: "https://www.aispendguard.com/api/ingest"
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
const startedAt = Date.now();
|
|
85
|
+
const message = await anthropic.messages.create({
|
|
86
|
+
model: "claude-3-5-sonnet-latest",
|
|
87
|
+
max_tokens: 200,
|
|
88
|
+
messages: [{ role: "user", content: "Summarize this thread." }]
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
const event = createAnthropicUsageEvent({
|
|
92
|
+
model: "claude-3-5-sonnet-latest",
|
|
93
|
+
resolvedModel: message.model, // "claude-3-5-sonnet-20241022" — pinned version
|
|
94
|
+
usage: message.usage, // auto-extracts tokens, cache_read, cache_creation
|
|
95
|
+
latencyMs: Date.now() - startedAt,
|
|
96
|
+
tags: {
|
|
97
|
+
task_type: "summarize",
|
|
98
|
+
feature: "support_summary",
|
|
99
|
+
route: "POST /api/support/summary"
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
await trackUsage(event);
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Gemini helper
|
|
107
|
+
```ts
|
|
108
|
+
import { init, trackUsage, createGeminiUsageEvent } from "@aispendguard/sdk";
|
|
109
|
+
|
|
110
|
+
init({
|
|
111
|
+
apiKey: process.env.AISPENDGUARD_API_KEY!,
|
|
112
|
+
endpoint: "https://www.aispendguard.com/api/ingest"
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
const startedAt = Date.now();
|
|
116
|
+
const response = await gemini.models.generateContent({
|
|
117
|
+
model: "gemini-2.0-flash",
|
|
118
|
+
contents: [{ role: "user", parts: [{ text: "Translate this to French." }] }]
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
const event = createGeminiUsageEvent({
|
|
122
|
+
model: "gemini-2.0-flash",
|
|
123
|
+
resolvedModel: response.modelVersion, // "gemini-2.0-flash-001" — pinned version
|
|
124
|
+
usage: response.usageMetadata, // auto-extracts tokens, cachedContent, thoughts
|
|
125
|
+
latencyMs: Date.now() - startedAt,
|
|
126
|
+
tags: {
|
|
127
|
+
task_type: "translate",
|
|
128
|
+
feature: "ui_i18n",
|
|
129
|
+
route: "POST /api/translate"
|
|
130
|
+
}
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
await trackUsage(event);
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## API
|
|
137
|
+
- `init(config)`
|
|
138
|
+
- `trackUsage(event | event[])`
|
|
139
|
+
- `createOpenAIUsageEvent(params)` — OpenAI Chat Completions + Responses API
|
|
140
|
+
- `createAnthropicUsageEvent(params)` — Anthropic Messages API
|
|
141
|
+
- `createGeminiUsageEvent(params)` — Google Gemini generateContent API
|
|
142
|
+
- `new AISpendGuardClient(config).trackUsage(...)` — direct client usage (used by OpenClaw plugin)
|
|
143
|
+
|
|
144
|
+
## Config
|
|
145
|
+
- `apiKey` (required)
|
|
146
|
+
- `endpoint` (default: `https://www.aispendguard.com/api/ingest`)
|
|
147
|
+
- `timeoutMs` (default: `5000`)
|
|
148
|
+
- `maxRetries` (default: `2`)
|
|
149
|
+
- `strict` (default: `false`, if `true` throws on errors)
|
|
150
|
+
|
|
151
|
+
## Notes
|
|
152
|
+
- Non-strict mode logs and returns `{ ok: false, error }`.
|
|
153
|
+
- Strict mode throws on validation/network/ingest errors.
|
|
154
|
+
|
|
155
|
+
## Validation Limits
|
|
156
|
+
- Required tags: `task_type`, `feature`, `route` (must be non-empty strings)
|
|
157
|
+
- Known optional tags: `customer_plan`, `customer_id`, `provider`, `model`, `environment`, `agent_name`
|
|
158
|
+
- Custom tag keys: lowercase snake_case only, regex `^[a-z][a-z0-9_]{1,63}$`
|
|
159
|
+
- Custom tag values: `string` or `string[]`
|
|
160
|
+
- Max tags per event: `24`
|
|
161
|
+
- Max values in a single array tag: `16`
|
|
162
|
+
- Max length per string value: `120`
|
|
163
|
+
- Forbidden keys (blocked): prompt/content/output/message/attachment-like fields
|
|
164
|
+
|
|
165
|
+
## Extended token fields (optional)
|
|
166
|
+
|
|
167
|
+
These optional fields give AISpendGuard the data it needs for accurate cost calculation and cost-spike detection. The provider helpers extract them automatically from `response.usage`.
|
|
168
|
+
|
|
169
|
+
| Field | Type | What it is | Provider |
|
|
170
|
+
|-------|------|-----------|----------|
|
|
171
|
+
| `resolvedModel` | `string` | Pinned model version from response (e.g. `gpt-4o-mini-2024-07-18`) | All |
|
|
172
|
+
| `inputTokensCached` | `number` | Cache read tokens — already in `inputTokens`, billed cheaper | OpenAI (0.5×) · Anthropic (0.1×) · Gemini |
|
|
173
|
+
| `inputTokensCacheWrite` | `number` | Cache write tokens — already in `inputTokens`, billed at premium | Anthropic only (1.25×) |
|
|
174
|
+
| `thinkingTokens` | `number` | Reasoning/thinking tokens — already in `outputTokens`, billed at full output rate | OpenAI o1/o3 · Gemini 2.5 |
|
|
175
|
+
|
|
176
|
+
> **Anthropic note:** Extended thinking tokens (`claude-3-7-sonnet` with `thinking: enabled`) are
|
|
177
|
+
> included in `output_tokens` but NOT separately reported in the `usage` object. You can count
|
|
178
|
+
> `content` blocks of type `"thinking"` manually if you need the split.
|
|
179
|
+
|
|
180
|
+
### Why these matter
|
|
181
|
+
|
|
182
|
+
Without them, cost calculations are inaccurate:
|
|
183
|
+
- **Cache read tokens** cost 10–50% of normal — without tracking, you overstate spend on cached calls.
|
|
184
|
+
- **Cache write tokens** (Anthropic) cost 25% more — without tracking, you understate spend when building cache.
|
|
185
|
+
- **Thinking tokens** for o1/o3 can be 3–10× the visible output — without tracking, cost spikes are invisible.
|
|
186
|
+
- **Resolved model** lets AISpendGuard detect silent provider upgrades between versions.
|
|
187
|
+
|
|
188
|
+
### Manual override (no helper)
|
|
189
|
+
|
|
190
|
+
If you aren't using a helper, pass them directly in `trackUsage`:
|
|
191
|
+
```ts
|
|
192
|
+
await trackUsage({
|
|
193
|
+
provider: "openai",
|
|
194
|
+
model: "gpt-4o-mini",
|
|
195
|
+
resolvedModel: response.model,
|
|
196
|
+
inputTokens: 1000,
|
|
197
|
+
outputTokens: 50,
|
|
198
|
+
inputTokensCached: 800, // 800 of the 1000 input tokens were cache hits
|
|
199
|
+
thinkingTokens: 0,
|
|
200
|
+
latencyMs: 320,
|
|
201
|
+
timestamp: new Date(),
|
|
202
|
+
tags: { task_type: "classify", feature: "router", route: "POST /api/route" }
|
|
203
|
+
});
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
## task_type values
|
|
207
|
+
|
|
208
|
+
Pick the value that describes what the model is being asked to **produce**.
|
|
209
|
+
The right `task_type` is what enables AISpendGuard's waste detection rules.
|
|
210
|
+
|
|
211
|
+
| Value | What it does | Output size | Best model tier |
|
|
212
|
+
|-------|-------------|-------------|-----------------|
|
|
213
|
+
| `answer` | Q&A, RAG responses, knowledge retrieval | 100–800 tok | standard |
|
|
214
|
+
| `classify` | Label, categorize, detect intent | **1–10 tok** | **micro** |
|
|
215
|
+
| `extract` | Pull structured fields from text | 50–300 tok | micro |
|
|
216
|
+
| `summarize` | Condense long content, TLDR | 100–500 tok | standard |
|
|
217
|
+
| `generate` | Write/draft new content | 300–2000 tok | standard |
|
|
218
|
+
| `rewrite` | Paraphrase, tone-adjust, edit | ≈ input | standard |
|
|
219
|
+
| `translate` | Language translation | ≈ input | micro |
|
|
220
|
+
| `code` | Generate, review, explain code | 200–1500 tok | premium |
|
|
221
|
+
| `eval` | LLM-as-judge, quality score | **10–50 tok** | **micro** |
|
|
222
|
+
| `embed` | Text embedding / vector | fixed vector | embedding models |
|
|
223
|
+
| `route` | Decide which tool/path/agent | **1–20 tok** | **micro** |
|
|
224
|
+
| `plan` | Decompose tasks, strategy | 100–500 tok | premium |
|
|
225
|
+
| `agent_step` | Single step in agent loop | 50–800 tok | varies |
|
|
226
|
+
| `vision` | Image/PDF/screenshot understanding | 100–600 tok | standard |
|
|
227
|
+
| `chat` | Multi-turn stateful conversation | 100–500 tok | standard |
|
|
228
|
+
| `other` | None of the above (avoid — disables waste detection) | — | — |
|
|
229
|
+
|
|
230
|
+
**Model tiers:**
|
|
231
|
+
- `micro` — haiku / gpt-4o-mini / flash-lite (80–95% cheaper than premium for short-output tasks)
|
|
232
|
+
- `standard` — sonnet / gpt-4o / flash (best quality/cost balance for most workloads)
|
|
233
|
+
- `premium` — opus / o1 / o3 / gpt-4-turbo (complex reasoning, nuanced code, planning)
|
|
234
|
+
- `embedding` — text-embedding-3-small / embed-english-v3 (never use chat models for embeddings)
|
|
235
|
+
|
|
236
|
+
**Waste rule:** if `classify`, `route`, or `eval` uses a `premium` model with avg output < 100 tokens,
|
|
237
|
+
AISpendGuard will flag this and calculate the exact monthly saving from switching to `micro` tier.
|
|
238
|
+
|
|
239
|
+
## OpenClaw plugin
|
|
240
|
+
|
|
241
|
+
Track every LLM call made by an [OpenClaw](https://openclaw.ai/) AI agent automatically — no code changes in the agent itself.
|
|
242
|
+
|
|
243
|
+
The `@aispendguard/openclaw-plugin` hooks into OpenClaw's `llm_output` lifecycle event and forwards token-usage data to AISpendGuard.
|
|
244
|
+
|
|
245
|
+
### What gets tracked per LLM call
|
|
246
|
+
|
|
247
|
+
| Field | Source |
|
|
248
|
+
|---|---|
|
|
249
|
+
| `provider` | hook — openai, anthropic, google, deepseek |
|
|
250
|
+
| `model` | hook — e.g. claude-sonnet-4-20250514, gpt-4o |
|
|
251
|
+
| `input_tokens` | `usage.input` |
|
|
252
|
+
| `output_tokens` | `usage.output` |
|
|
253
|
+
| `input_tokens_cached` | `usage.cacheRead` (when cache is used) |
|
|
254
|
+
| `input_tokens_cache_write` | `usage.cacheWrite` (when cache is written) |
|
|
255
|
+
| `cache_ttl` | plugin config — `"5m"` (1.25×) or `"1h"` (2.0×) |
|
|
256
|
+
| `agent_name` | `ctx.agentId` |
|
|
257
|
+
| `session_id` | `ctx.sessionId` |
|
|
258
|
+
|
|
259
|
+
### Setup
|
|
260
|
+
|
|
261
|
+
```bash
|
|
262
|
+
# Set env vars for the OpenClaw plugin
|
|
263
|
+
AISG_ENDPOINT=https://www.aispendguard.com/api/ingest
|
|
264
|
+
AISG_API_KEY=ask_xxxxxxxxxxxxxxxx
|
|
265
|
+
AISG_CACHE_TTL=5m # "5m" (default) or "1h" for extended Anthropic caching
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
Install the plugin into OpenClaw:
|
|
269
|
+
```bash
|
|
270
|
+
cp -r openclaw-aispendguard-plugin ~/.openclaw/plugins/aispendguard
|
|
271
|
+
cd ~/.openclaw/plugins/aispendguard
|
|
272
|
+
npm install && npm run build
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
See `openclaw-aispendguard-plugin/README.md` for full docs.
|
|
276
|
+
|
|
277
|
+
## Tests
|
|
278
|
+
Run unit-style tests:
|
|
279
|
+
```bash
|
|
280
|
+
npm test
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
Run live ingest integration test (requires local app running and valid key/workspace):
|
|
284
|
+
```bash
|
|
285
|
+
AISPENDGUARD_API_KEY=asg_xxx \
|
|
286
|
+
AISPENDGUARD_ENDPOINT=https://www.aispendguard.com/api/ingest \
|
|
287
|
+
npm test
|
|
288
|
+
```
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import type { UsageEventInput, UsageTags } from "./types";
|
|
2
|
+
/**
|
|
3
|
+
* Anthropic Messages API response.usage shape.
|
|
4
|
+
*
|
|
5
|
+
* Cache tokens (prompt caching feature):
|
|
6
|
+
* - cache_read_input_tokens → cache read hits — billed at 0.1× base input price (very cheap)
|
|
7
|
+
* - cache_creation_input_tokens → cache writes — billed at 1.25× base input price (slightly expensive)
|
|
8
|
+
* Both are already counted in input_tokens; store separately for accurate cost calculation.
|
|
9
|
+
*
|
|
10
|
+
* Extended thinking (claude-3-7-sonnet with thinking: { type: "enabled", budget_tokens: N }):
|
|
11
|
+
* - Thinking tokens are NOT reported separately in the usage object.
|
|
12
|
+
* - They are included in output_tokens along with visible text tokens.
|
|
13
|
+
* - If you need them separately, count content blocks of type "thinking" manually.
|
|
14
|
+
*/
|
|
15
|
+
type AnthropicUsageLike = {
|
|
16
|
+
input_tokens?: number;
|
|
17
|
+
output_tokens?: number;
|
|
18
|
+
/** Cache read tokens — billed at 0.1× base input price. Included in input_tokens. */
|
|
19
|
+
cache_read_input_tokens?: number;
|
|
20
|
+
/** Cache write tokens — billed at 1.25× base input price. Included in input_tokens. */
|
|
21
|
+
cache_creation_input_tokens?: number;
|
|
22
|
+
};
|
|
23
|
+
export type AnthropicEventParams = {
|
|
24
|
+
model: string;
|
|
25
|
+
/**
|
|
26
|
+
* The resolved model name as returned in response.model
|
|
27
|
+
* (e.g. "claude-3-5-sonnet-20241022" when you passed "claude-3-5-sonnet-latest").
|
|
28
|
+
* Pass message.model here for accurate model version tracking.
|
|
29
|
+
*/
|
|
30
|
+
resolvedModel?: string;
|
|
31
|
+
usage: AnthropicUsageLike | null | undefined;
|
|
32
|
+
latencyMs: number;
|
|
33
|
+
timestamp?: string | Date;
|
|
34
|
+
costUsd?: number;
|
|
35
|
+
tags: UsageTags;
|
|
36
|
+
eventId?: string;
|
|
37
|
+
/** Cache write TTL: "5m" (default, 1.25× input) or "1h" (extended, 2.0× input). */
|
|
38
|
+
cacheTtl?: "5m" | "1h";
|
|
39
|
+
/** Number of web search tool calls in this request. */
|
|
40
|
+
webSearchCount?: number;
|
|
41
|
+
/** Number of web fetch tool calls in this request. */
|
|
42
|
+
webFetchCount?: number;
|
|
43
|
+
/** Whether this request used the Batch API (50% discount). */
|
|
44
|
+
isBatchApi?: boolean;
|
|
45
|
+
/** Whether fast mode was used (6× multiplier). */
|
|
46
|
+
isFastMode?: boolean;
|
|
47
|
+
};
|
|
48
|
+
export declare function createAnthropicUsageEvent(params: AnthropicEventParams): UsageEventInput;
|
|
49
|
+
export {};
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.createAnthropicUsageEvent = createAnthropicUsageEvent;
|
|
4
|
+
function createAnthropicUsageEvent(params) {
|
|
5
|
+
const usage = params.usage;
|
|
6
|
+
const cacheRead = usage?.cache_read_input_tokens;
|
|
7
|
+
const cacheWrite = usage?.cache_creation_input_tokens;
|
|
8
|
+
return {
|
|
9
|
+
eventId: params.eventId,
|
|
10
|
+
provider: "anthropic",
|
|
11
|
+
model: params.model,
|
|
12
|
+
resolvedModel: params.resolvedModel,
|
|
13
|
+
inputTokens: usage?.input_tokens ?? 0,
|
|
14
|
+
outputTokens: usage?.output_tokens ?? 0,
|
|
15
|
+
...(typeof cacheRead === "number" ? { inputTokensCached: cacheRead } : {}),
|
|
16
|
+
...(typeof cacheWrite === "number" ? { inputTokensCacheWrite: cacheWrite } : {}),
|
|
17
|
+
...(params.cacheTtl ? { cacheTtl: params.cacheTtl } : {}),
|
|
18
|
+
...(typeof params.webSearchCount === "number" ? { webSearchCount: params.webSearchCount } : {}),
|
|
19
|
+
...(typeof params.webFetchCount === "number" ? { webFetchCount: params.webFetchCount } : {}),
|
|
20
|
+
...(params.isBatchApi ? { isBatchApi: true } : {}),
|
|
21
|
+
...(params.isFastMode ? { isFastMode: true } : {}),
|
|
22
|
+
latencyMs: params.latencyMs,
|
|
23
|
+
costUsd: params.costUsd,
|
|
24
|
+
timestamp: params.timestamp ?? new Date(),
|
|
25
|
+
tags: params.tags
|
|
26
|
+
};
|
|
27
|
+
}
|
package/dist/client.d.ts
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { ClientConfig, TrackResult, UsageEventBatchInput, UsageTags } from "./types";
|
|
2
|
+
export declare class AISpendGuardClient {
|
|
3
|
+
private readonly apiKey;
|
|
4
|
+
private readonly endpoint;
|
|
5
|
+
private readonly timeoutMs;
|
|
6
|
+
private readonly maxRetries;
|
|
7
|
+
private readonly strict;
|
|
8
|
+
readonly defaultTags?: UsageTags;
|
|
9
|
+
private readonly logger;
|
|
10
|
+
constructor(config: ClientConfig);
|
|
11
|
+
trackUsage(events: UsageEventBatchInput): Promise<TrackResult>;
|
|
12
|
+
private sendWithRetry;
|
|
13
|
+
private send;
|
|
14
|
+
}
|
package/dist/client.js
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.AISpendGuardClient = void 0;
|
|
4
|
+
const validate_1 = require("./validate");
|
|
5
|
+
const DEFAULT_ENDPOINT = "https://www.aispendguard.com/api/ingest";
|
|
6
|
+
const DEFAULT_TIMEOUT_MS = 5000;
|
|
7
|
+
const DEFAULT_MAX_RETRIES = 2;
|
|
8
|
+
function sleep(ms) {
|
|
9
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
10
|
+
}
|
|
11
|
+
class AISpendGuardClient {
|
|
12
|
+
constructor(config) {
|
|
13
|
+
if (!config.apiKey || config.apiKey.trim().length === 0) {
|
|
14
|
+
throw new Error("apiKey is required");
|
|
15
|
+
}
|
|
16
|
+
this.apiKey = config.apiKey.trim();
|
|
17
|
+
this.endpoint = (config.endpoint ?? DEFAULT_ENDPOINT).trim();
|
|
18
|
+
this.timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
19
|
+
this.maxRetries = config.maxRetries ?? DEFAULT_MAX_RETRIES;
|
|
20
|
+
this.strict = config.strict ?? false;
|
|
21
|
+
this.defaultTags = config.defaultTags;
|
|
22
|
+
this.logger = config.logger ?? console;
|
|
23
|
+
}
|
|
24
|
+
async trackUsage(events) {
|
|
25
|
+
try {
|
|
26
|
+
const list = Array.isArray(events) ? events : [events];
|
|
27
|
+
if (list.length === 0) {
|
|
28
|
+
throw new Error("at least one event is required");
|
|
29
|
+
}
|
|
30
|
+
const payload = {
|
|
31
|
+
events: list.map((e) => (0, validate_1.normalizeEvent)(e))
|
|
32
|
+
};
|
|
33
|
+
const response = await this.sendWithRetry(payload);
|
|
34
|
+
return { ok: true, response };
|
|
35
|
+
}
|
|
36
|
+
catch (error) {
|
|
37
|
+
const message = error instanceof Error ? error.message : "unknown SDK error";
|
|
38
|
+
if (this.strict) {
|
|
39
|
+
throw error;
|
|
40
|
+
}
|
|
41
|
+
this.logger.warn(`[aispendguard-sdk] ${message}`);
|
|
42
|
+
return { ok: false, error: message };
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
async sendWithRetry(payload) {
|
|
46
|
+
let attempt = 0;
|
|
47
|
+
let lastError = null;
|
|
48
|
+
while (attempt <= this.maxRetries) {
|
|
49
|
+
try {
|
|
50
|
+
return await this.send(payload);
|
|
51
|
+
}
|
|
52
|
+
catch (error) {
|
|
53
|
+
lastError = error;
|
|
54
|
+
if (attempt === this.maxRetries) {
|
|
55
|
+
break;
|
|
56
|
+
}
|
|
57
|
+
const backoffMs = Math.min(1000 * 2 ** attempt, 4000);
|
|
58
|
+
await sleep(backoffMs);
|
|
59
|
+
}
|
|
60
|
+
attempt += 1;
|
|
61
|
+
}
|
|
62
|
+
throw lastError instanceof Error ? lastError : new Error("request failed");
|
|
63
|
+
}
|
|
64
|
+
async send(payload) {
|
|
65
|
+
const controller = new AbortController();
|
|
66
|
+
const timeout = setTimeout(() => controller.abort(), this.timeoutMs);
|
|
67
|
+
try {
|
|
68
|
+
const response = await fetch(this.endpoint, {
|
|
69
|
+
method: "POST",
|
|
70
|
+
headers: {
|
|
71
|
+
"content-type": "application/json",
|
|
72
|
+
"x-api-key": this.apiKey
|
|
73
|
+
},
|
|
74
|
+
body: JSON.stringify(payload),
|
|
75
|
+
signal: controller.signal
|
|
76
|
+
});
|
|
77
|
+
const raw = (await response.json().catch(() => null));
|
|
78
|
+
if (!response.ok) {
|
|
79
|
+
const msg = raw?.errors?.join("; ") || `HTTP ${response.status}`;
|
|
80
|
+
throw new Error(`ingest failed: ${msg}`);
|
|
81
|
+
}
|
|
82
|
+
if (!raw) {
|
|
83
|
+
throw new Error("ingest failed: empty response body");
|
|
84
|
+
}
|
|
85
|
+
return raw;
|
|
86
|
+
}
|
|
87
|
+
finally {
|
|
88
|
+
clearTimeout(timeout);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
exports.AISpendGuardClient = AISpendGuardClient;
|
package/dist/gemini.d.ts
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import type { UsageEventInput, UsageTags } from "./types";
|
|
2
|
+
/**
|
|
3
|
+
* Google Gemini generateContent / generateContentStream response.usageMetadata shape.
|
|
4
|
+
*
|
|
5
|
+
* promptTokenCount → input tokens
|
|
6
|
+
* candidatesTokenCount → output tokens (visible text)
|
|
7
|
+
* cachedContentTokenCount → context cache hits — billed at reduced price (~0.1×)
|
|
8
|
+
* thoughtsTokenCount → Gemini 2.5 thinking tokens — billed separately (in addition to output)
|
|
9
|
+
* totalTokenCount → sum of all above
|
|
10
|
+
*
|
|
11
|
+
* Notes:
|
|
12
|
+
* - cachedContentTokenCount is only present when you use the Context Caching API.
|
|
13
|
+
* - thoughtsTokenCount is only present for Gemini 2.5 Flash/Pro with thinking enabled.
|
|
14
|
+
* - candidatesTokenCount does NOT include thoughtsTokenCount (unlike Anthropic).
|
|
15
|
+
* Gemini bills thinking tokens on top of output tokens.
|
|
16
|
+
*/
|
|
17
|
+
type GeminiUsageLike = {
|
|
18
|
+
promptTokenCount?: number;
|
|
19
|
+
candidatesTokenCount?: number;
|
|
20
|
+
cachedContentTokenCount?: number;
|
|
21
|
+
thoughtsTokenCount?: number;
|
|
22
|
+
totalTokenCount?: number;
|
|
23
|
+
};
|
|
24
|
+
export type GeminiEventParams = {
|
|
25
|
+
model: string;
|
|
26
|
+
/**
|
|
27
|
+
* The resolved model version as returned in response.modelVersion
|
|
28
|
+
* (e.g. "gemini-2.0-flash-001" when you passed "gemini-2.0-flash").
|
|
29
|
+
* Pass response.modelVersion here for accurate model version tracking.
|
|
30
|
+
*/
|
|
31
|
+
resolvedModel?: string;
|
|
32
|
+
usage: GeminiUsageLike | null | undefined;
|
|
33
|
+
latencyMs: number;
|
|
34
|
+
timestamp?: string | Date;
|
|
35
|
+
costUsd?: number;
|
|
36
|
+
tags: UsageTags;
|
|
37
|
+
eventId?: string;
|
|
38
|
+
};
|
|
39
|
+
export declare function createGeminiUsageEvent(params: GeminiEventParams): UsageEventInput;
|
|
40
|
+
export {};
|
package/dist/gemini.js
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.createGeminiUsageEvent = createGeminiUsageEvent;
|
|
4
|
+
function createGeminiUsageEvent(params) {
|
|
5
|
+
const usage = params.usage;
|
|
6
|
+
const cached = usage?.cachedContentTokenCount;
|
|
7
|
+
const thinking = usage?.thoughtsTokenCount;
|
|
8
|
+
return {
|
|
9
|
+
eventId: params.eventId,
|
|
10
|
+
provider: "google",
|
|
11
|
+
model: params.model,
|
|
12
|
+
resolvedModel: params.resolvedModel,
|
|
13
|
+
inputTokens: usage?.promptTokenCount ?? 0,
|
|
14
|
+
outputTokens: usage?.candidatesTokenCount ?? 0,
|
|
15
|
+
...(typeof cached === "number" ? { inputTokensCached: cached } : {}),
|
|
16
|
+
...(typeof thinking === "number" ? { thinkingTokens: thinking } : {}),
|
|
17
|
+
latencyMs: params.latencyMs,
|
|
18
|
+
costUsd: params.costUsd,
|
|
19
|
+
timestamp: params.timestamp ?? new Date(),
|
|
20
|
+
tags: params.tags
|
|
21
|
+
};
|
|
22
|
+
}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { AISpendGuardClient } from "./client";
|
|
2
|
+
import type { ClientConfig, TrackResult, UsageEventBatchInput } from "./types";
|
|
3
|
+
export declare function init(config: ClientConfig): AISpendGuardClient;
|
|
4
|
+
export declare function getClient(): AISpendGuardClient;
|
|
5
|
+
export declare function trackUsage(events: UsageEventBatchInput): Promise<TrackResult>;
|
|
6
|
+
export { AISpendGuardClient };
|
|
7
|
+
export { createOpenAIUsageEvent } from "./openai";
|
|
8
|
+
export { createAnthropicUsageEvent } from "./anthropic";
|
|
9
|
+
export { createGeminiUsageEvent } from "./gemini";
|
|
10
|
+
export { AISpendGuardCallbackHandler } from "./langchain";
|
|
11
|
+
export { wrapOpenAI } from "./wrap-openai";
|
|
12
|
+
export { wrapAnthropic } from "./wrap-anthropic";
|
|
13
|
+
export { wrapGemini } from "./wrap-gemini";
|
|
14
|
+
export type { LangChainHandlerConfig } from "./langchain";
|
|
15
|
+
export type { AnthropicEventParams } from "./anthropic";
|
|
16
|
+
export type { OpenAIEventParams } from "./openai";
|
|
17
|
+
export type { GeminiEventParams } from "./gemini";
|
|
18
|
+
export type { AllowedTagKey, ClientConfig, IngestEventPayload, IngestRequestPayload, IngestResponse, TrackResult, UsageEventBatchInput, UsageEventInput, UsageTags } from "./types";
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.wrapGemini = exports.wrapAnthropic = exports.wrapOpenAI = exports.AISpendGuardCallbackHandler = exports.createGeminiUsageEvent = exports.createAnthropicUsageEvent = exports.createOpenAIUsageEvent = exports.AISpendGuardClient = void 0;
|
|
4
|
+
exports.init = init;
|
|
5
|
+
exports.getClient = getClient;
|
|
6
|
+
exports.trackUsage = trackUsage;
|
|
7
|
+
const client_1 = require("./client");
|
|
8
|
+
Object.defineProperty(exports, "AISpendGuardClient", { enumerable: true, get: function () { return client_1.AISpendGuardClient; } });
|
|
9
|
+
let defaultClient = null;
|
|
10
|
+
function init(config) {
|
|
11
|
+
defaultClient = new client_1.AISpendGuardClient(config);
|
|
12
|
+
return defaultClient;
|
|
13
|
+
}
|
|
14
|
+
function getClient() {
|
|
15
|
+
if (!defaultClient) {
|
|
16
|
+
throw new Error("AISpendGuard SDK is not initialized. Call init(...) first.");
|
|
17
|
+
}
|
|
18
|
+
return defaultClient;
|
|
19
|
+
}
|
|
20
|
+
async function trackUsage(events) {
|
|
21
|
+
return getClient().trackUsage(events);
|
|
22
|
+
}
|
|
23
|
+
var openai_1 = require("./openai");
|
|
24
|
+
Object.defineProperty(exports, "createOpenAIUsageEvent", { enumerable: true, get: function () { return openai_1.createOpenAIUsageEvent; } });
|
|
25
|
+
var anthropic_1 = require("./anthropic");
|
|
26
|
+
Object.defineProperty(exports, "createAnthropicUsageEvent", { enumerable: true, get: function () { return anthropic_1.createAnthropicUsageEvent; } });
|
|
27
|
+
var gemini_1 = require("./gemini");
|
|
28
|
+
Object.defineProperty(exports, "createGeminiUsageEvent", { enumerable: true, get: function () { return gemini_1.createGeminiUsageEvent; } });
|
|
29
|
+
var langchain_1 = require("./langchain");
|
|
30
|
+
Object.defineProperty(exports, "AISpendGuardCallbackHandler", { enumerable: true, get: function () { return langchain_1.AISpendGuardCallbackHandler; } });
|
|
31
|
+
var wrap_openai_1 = require("./wrap-openai");
|
|
32
|
+
Object.defineProperty(exports, "wrapOpenAI", { enumerable: true, get: function () { return wrap_openai_1.wrapOpenAI; } });
|
|
33
|
+
var wrap_anthropic_1 = require("./wrap-anthropic");
|
|
34
|
+
Object.defineProperty(exports, "wrapAnthropic", { enumerable: true, get: function () { return wrap_anthropic_1.wrapAnthropic; } });
|
|
35
|
+
var wrap_gemini_1 = require("./wrap-gemini");
|
|
36
|
+
Object.defineProperty(exports, "wrapGemini", { enumerable: true, get: function () { return wrap_gemini_1.wrapGemini; } });
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LangChain.js callback handler for AISpendGuard.
|
|
3
|
+
*
|
|
4
|
+
* Tracks LLM token usage from LangChain invocations via the SDK singleton.
|
|
5
|
+
* Never reads prompt content or model outputs — only metadata and token counts.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* import { init, AISpendGuardCallbackHandler } from "@aispendguard/sdk";
|
|
9
|
+
*
|
|
10
|
+
* init({ apiKey: "asg_..." });
|
|
11
|
+
* const handler = new AISpendGuardCallbackHandler({
|
|
12
|
+
* defaultTags: { feature: "chatbot", route: "/api/chat" },
|
|
13
|
+
* });
|
|
14
|
+
*
|
|
15
|
+
* const llm = new ChatOpenAI({ callbacks: [handler] });
|
|
16
|
+
*/
|
|
17
|
+
interface Serialized {
|
|
18
|
+
id?: string[];
|
|
19
|
+
name?: string;
|
|
20
|
+
type?: string;
|
|
21
|
+
lc?: number;
|
|
22
|
+
[key: string]: unknown;
|
|
23
|
+
}
|
|
24
|
+
interface Generation {
|
|
25
|
+
text: string;
|
|
26
|
+
generationInfo?: Record<string, unknown>;
|
|
27
|
+
}
|
|
28
|
+
interface LLMResult {
|
|
29
|
+
generations: Generation[][];
|
|
30
|
+
llmOutput?: Record<string, unknown>;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Minimal abstract base we implement. At runtime LangChain will duck-type
|
|
34
|
+
* check the handler; it doesn't require `extends BaseCallbackHandler` from
|
|
35
|
+
* the exact same package version — it only needs the method signatures and
|
|
36
|
+
* the `name` property.
|
|
37
|
+
*/
|
|
38
|
+
declare abstract class BaseCallbackHandlerCompat {
|
|
39
|
+
abstract name: string;
|
|
40
|
+
lc_serializable: boolean;
|
|
41
|
+
ignoreLLM: boolean;
|
|
42
|
+
ignoreChain: boolean;
|
|
43
|
+
ignoreAgent: boolean;
|
|
44
|
+
ignoreRetriever: boolean;
|
|
45
|
+
ignoreCustomEvent: boolean;
|
|
46
|
+
}
|
|
47
|
+
export interface LangChainHandlerConfig {
|
|
48
|
+
/** Default tags merged into every event. */
|
|
49
|
+
defaultTags?: Record<string, string>;
|
|
50
|
+
}
|
|
51
|
+
export declare class AISpendGuardCallbackHandler extends BaseCallbackHandlerCompat {
|
|
52
|
+
name: string;
|
|
53
|
+
private readonly defaultTags;
|
|
54
|
+
private readonly runs;
|
|
55
|
+
constructor(config?: LangChainHandlerConfig);
|
|
56
|
+
handleLLMStart(serialized: Serialized, _prompts: string[], runId?: string, _parentRunId?: string, _extraParams?: Record<string, unknown>, _tags?: string[], _metadata?: Record<string, unknown>, _name?: string): void;
|
|
57
|
+
handleChatModelStart(serialized: Serialized, _messages: unknown[][], runId?: string, _parentRunId?: string, _extraParams?: Record<string, unknown>, _tags?: string[], _metadata?: Record<string, unknown>, _name?: string): void;
|
|
58
|
+
handleLLMEnd(output: LLMResult, runId?: string, _parentRunId?: string, _tags?: string[]): void;
|
|
59
|
+
handleLLMError(_err: Error, runId?: string): void;
|
|
60
|
+
}
|
|
61
|
+
export {};
|