@aispendguard/sdk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +288 -0
- package/dist/anthropic.d.ts +49 -0
- package/dist/anthropic.js +27 -0
- package/dist/client.d.ts +14 -0
- package/dist/client.js +92 -0
- package/dist/gemini.d.ts +40 -0
- package/dist/gemini.js +22 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.js +36 -0
- package/dist/langchain.d.ts +61 -0
- package/dist/langchain.js +197 -0
- package/dist/openai.d.ts +46 -0
- package/dist/openai.js +58 -0
- package/dist/types.d.ts +95 -0
- package/dist/types.js +2 -0
- package/dist/validate.d.ts +2 -0
- package/dist/validate.js +188 -0
- package/dist/wrap-anthropic.d.ts +15 -0
- package/dist/wrap-anthropic.js +78 -0
- package/dist/wrap-gemini.d.ts +16 -0
- package/dist/wrap-gemini.js +82 -0
- package/dist/wrap-openai.d.ts +15 -0
- package/dist/wrap-openai.js +81 -0
- package/package.json +49 -0
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* LangChain.js callback handler for AISpendGuard.
|
|
4
|
+
*
|
|
5
|
+
* Tracks LLM token usage from LangChain invocations via the SDK singleton.
|
|
6
|
+
* Never reads prompt content or model outputs — only metadata and token counts.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* import { init, AISpendGuardCallbackHandler } from "@aispendguard/sdk";
|
|
10
|
+
*
|
|
11
|
+
* init({ apiKey: "asg_..." });
|
|
12
|
+
* const handler = new AISpendGuardCallbackHandler({
|
|
13
|
+
* defaultTags: { feature: "chatbot", route: "/api/chat" },
|
|
14
|
+
* });
|
|
15
|
+
*
|
|
16
|
+
* const llm = new ChatOpenAI({ callbacks: [handler] });
|
|
17
|
+
*/
|
|
18
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
19
|
+
exports.AISpendGuardCallbackHandler = void 0;
|
|
20
|
+
const index_1 = require("./index");
|
|
21
|
+
/**
|
|
22
|
+
* Minimal abstract base we implement. At runtime LangChain will duck-type
|
|
23
|
+
* check the handler; it doesn't require `extends BaseCallbackHandler` from
|
|
24
|
+
* the exact same package version — it only needs the method signatures and
|
|
25
|
+
* the `name` property.
|
|
26
|
+
*/
|
|
27
|
+
class BaseCallbackHandlerCompat {
|
|
28
|
+
constructor() {
|
|
29
|
+
this.lc_serializable = false;
|
|
30
|
+
this.ignoreLLM = false;
|
|
31
|
+
this.ignoreChain = true;
|
|
32
|
+
this.ignoreAgent = true;
|
|
33
|
+
this.ignoreRetriever = true;
|
|
34
|
+
this.ignoreCustomEvent = true;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
// ---------------------------------------------------------------------------
|
|
38
|
+
// Provider detection
|
|
39
|
+
// ---------------------------------------------------------------------------
|
|
40
|
+
const PROVIDER_PATTERNS = [
|
|
41
|
+
[/openai/i, "openai"],
|
|
42
|
+
[/anthropic|claude/i, "anthropic"],
|
|
43
|
+
[/google|gemini|vertex/i, "google"],
|
|
44
|
+
[/cohere/i, "cohere"],
|
|
45
|
+
[/mistral/i, "mistral"],
|
|
46
|
+
[/bedrock/i, "aws_bedrock"],
|
|
47
|
+
[/azure/i, "azure_openai"],
|
|
48
|
+
[/ollama/i, "ollama"],
|
|
49
|
+
[/groq/i, "groq"],
|
|
50
|
+
[/together/i, "together"],
|
|
51
|
+
[/fireworks/i, "fireworks"],
|
|
52
|
+
];
|
|
53
|
+
function detectProvider(serialized) {
|
|
54
|
+
if (!serialized)
|
|
55
|
+
return "unknown";
|
|
56
|
+
// Check serialized id array (e.g. ["langchain", "chat_models", "openai", "ChatOpenAI"])
|
|
57
|
+
const idStr = (serialized.id ?? []).join("/").toLowerCase();
|
|
58
|
+
for (const [pattern, provider] of PROVIDER_PATTERNS) {
|
|
59
|
+
if (pattern.test(idStr))
|
|
60
|
+
return provider;
|
|
61
|
+
}
|
|
62
|
+
// Check name
|
|
63
|
+
const name = serialized.name ?? "";
|
|
64
|
+
for (const [pattern, provider] of PROVIDER_PATTERNS) {
|
|
65
|
+
if (pattern.test(name))
|
|
66
|
+
return provider;
|
|
67
|
+
}
|
|
68
|
+
return "unknown";
|
|
69
|
+
}
|
|
70
|
+
function extractModel(serialized, llmOutput) {
|
|
71
|
+
// Try llmOutput first (most reliable after completion)
|
|
72
|
+
if (llmOutput) {
|
|
73
|
+
const modelName = llmOutput.model ??
|
|
74
|
+
llmOutput.model_name ??
|
|
75
|
+
llmOutput.modelName ??
|
|
76
|
+
llmOutput.model_id;
|
|
77
|
+
if (typeof modelName === "string" && modelName.length > 0) {
|
|
78
|
+
return modelName;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
// Try serialized kwargs
|
|
82
|
+
if (serialized) {
|
|
83
|
+
const kwargs = serialized.kwargs;
|
|
84
|
+
if (kwargs) {
|
|
85
|
+
const modelName = kwargs.model ??
|
|
86
|
+
kwargs.model_name ??
|
|
87
|
+
kwargs.modelName ??
|
|
88
|
+
kwargs.model_id;
|
|
89
|
+
if (typeof modelName === "string" && modelName.length > 0) {
|
|
90
|
+
return modelName;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
return "unknown";
|
|
95
|
+
}
|
|
96
|
+
class AISpendGuardCallbackHandler extends BaseCallbackHandlerCompat {
|
|
97
|
+
constructor(config) {
|
|
98
|
+
super();
|
|
99
|
+
this.name = "AISpendGuardCallbackHandler";
|
|
100
|
+
this.runs = new Map();
|
|
101
|
+
this.defaultTags = config?.defaultTags ?? {};
|
|
102
|
+
}
|
|
103
|
+
// -- LLM lifecycle callbacks --
|
|
104
|
+
handleLLMStart(serialized, _prompts, runId, _parentRunId, _extraParams, _tags, _metadata, _name) {
|
|
105
|
+
// Record start time. NEVER read prompts.
|
|
106
|
+
const id = runId ?? crypto.randomUUID();
|
|
107
|
+
this.runs.set(id, { startedAt: Date.now(), serialized });
|
|
108
|
+
}
|
|
109
|
+
handleChatModelStart(serialized, _messages, runId, _parentRunId, _extraParams, _tags, _metadata, _name) {
|
|
110
|
+
// Record start time. NEVER read messages.
|
|
111
|
+
const id = runId ?? crypto.randomUUID();
|
|
112
|
+
this.runs.set(id, { startedAt: Date.now(), serialized });
|
|
113
|
+
}
|
|
114
|
+
handleLLMEnd(output, runId, _parentRunId, _tags) {
|
|
115
|
+
const id = runId ?? "";
|
|
116
|
+
const runInfo = this.runs.get(id);
|
|
117
|
+
const startedAt = runInfo?.startedAt ?? Date.now();
|
|
118
|
+
const serialized = runInfo?.serialized;
|
|
119
|
+
// Clean up
|
|
120
|
+
if (id)
|
|
121
|
+
this.runs.delete(id);
|
|
122
|
+
const latencyMs = Math.max(0, Date.now() - startedAt);
|
|
123
|
+
// Extract token usage from llmOutput
|
|
124
|
+
const llmOutput = output.llmOutput ?? {};
|
|
125
|
+
const tokenUsage = (llmOutput.tokenUsage ??
|
|
126
|
+
llmOutput.token_usage ??
|
|
127
|
+
llmOutput.usage ??
|
|
128
|
+
{});
|
|
129
|
+
const inputTokens = toNumber(tokenUsage.promptTokens ??
|
|
130
|
+
tokenUsage.prompt_tokens ??
|
|
131
|
+
tokenUsage.input_tokens ??
|
|
132
|
+
tokenUsage.totalInputTokens);
|
|
133
|
+
const outputTokens = toNumber(tokenUsage.completionTokens ??
|
|
134
|
+
tokenUsage.completion_tokens ??
|
|
135
|
+
tokenUsage.output_tokens ??
|
|
136
|
+
tokenUsage.totalOutputTokens);
|
|
137
|
+
// Skip if we got no token data at all
|
|
138
|
+
if (inputTokens === 0 && outputTokens === 0)
|
|
139
|
+
return;
|
|
140
|
+
const provider = detectProvider(serialized);
|
|
141
|
+
const model = extractModel(serialized, llmOutput);
|
|
142
|
+
// Build tags
|
|
143
|
+
const tags = {
|
|
144
|
+
...this.defaultTags,
|
|
145
|
+
source: "langchain",
|
|
146
|
+
task_type: this.defaultTags.task_type ?? "chat",
|
|
147
|
+
feature: this.defaultTags.feature ?? "default",
|
|
148
|
+
route: this.defaultTags.route ?? "default",
|
|
149
|
+
};
|
|
150
|
+
const event = {
|
|
151
|
+
provider,
|
|
152
|
+
model,
|
|
153
|
+
inputTokens,
|
|
154
|
+
outputTokens,
|
|
155
|
+
latencyMs,
|
|
156
|
+
timestamp: new Date(),
|
|
157
|
+
tags,
|
|
158
|
+
// Use run_id as event_id for deduplication
|
|
159
|
+
...(id ? { eventId: `langchain:${id}` } : {}),
|
|
160
|
+
};
|
|
161
|
+
// Fire and forget — never block the LLM chain
|
|
162
|
+
const client = getClientSafe();
|
|
163
|
+
if (client) {
|
|
164
|
+
client.trackUsage(event).catch((err) => {
|
|
165
|
+
console.warn(`[aispendguard-langchain] Failed to track usage: ${err instanceof Error ? err.message : String(err)}`);
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
handleLLMError(_err, runId) {
|
|
170
|
+
// Clean up run tracking; never throw
|
|
171
|
+
const id = runId ?? "";
|
|
172
|
+
if (id)
|
|
173
|
+
this.runs.delete(id);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
exports.AISpendGuardCallbackHandler = AISpendGuardCallbackHandler;
|
|
177
|
+
// ---------------------------------------------------------------------------
|
|
178
|
+
// Internal helpers
|
|
179
|
+
// ---------------------------------------------------------------------------
|
|
180
|
+
function toNumber(value) {
|
|
181
|
+
if (typeof value === "number" && !Number.isNaN(value))
|
|
182
|
+
return value;
|
|
183
|
+
if (typeof value === "string") {
|
|
184
|
+
const n = Number(value);
|
|
185
|
+
return Number.isNaN(n) ? 0 : n;
|
|
186
|
+
}
|
|
187
|
+
return 0;
|
|
188
|
+
}
|
|
189
|
+
function getClientSafe() {
|
|
190
|
+
try {
|
|
191
|
+
return (0, index_1.getClient)();
|
|
192
|
+
}
|
|
193
|
+
catch {
|
|
194
|
+
console.warn("[aispendguard-langchain] SDK not initialized. Call init() before using the LangChain handler.");
|
|
195
|
+
return null;
|
|
196
|
+
}
|
|
197
|
+
}
|
package/dist/openai.d.ts
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import type { UsageEventInput, UsageTags } from "./types";
|
|
2
|
+
/**
|
|
3
|
+
* Covers both OpenAI Chat Completions and Responses API response.usage shapes.
|
|
4
|
+
*
|
|
5
|
+
* Chat Completions: prompt_tokens / completion_tokens
|
|
6
|
+
* - prompt_tokens_details.cached_tokens → cache read (0.5× base input price)
|
|
7
|
+
* - completion_tokens_details.reasoning_tokens → o1/o3 thinking (billed as output)
|
|
8
|
+
*
|
|
9
|
+
* Responses API: input_tokens / output_tokens
|
|
10
|
+
* - input_tokens_details.cached_tokens → cache read (0.5× base input price)
|
|
11
|
+
* - output_tokens_details.reasoning_tokens → o1/o3 thinking (billed as output)
|
|
12
|
+
*/
|
|
13
|
+
type OpenAIUsageLike = {
|
|
14
|
+
input_tokens?: number;
|
|
15
|
+
output_tokens?: number;
|
|
16
|
+
input_tokens_details?: {
|
|
17
|
+
cached_tokens?: number;
|
|
18
|
+
};
|
|
19
|
+
output_tokens_details?: {
|
|
20
|
+
reasoning_tokens?: number;
|
|
21
|
+
};
|
|
22
|
+
prompt_tokens?: number;
|
|
23
|
+
completion_tokens?: number;
|
|
24
|
+
prompt_tokens_details?: {
|
|
25
|
+
cached_tokens?: number;
|
|
26
|
+
};
|
|
27
|
+
completion_tokens_details?: {
|
|
28
|
+
reasoning_tokens?: number;
|
|
29
|
+
};
|
|
30
|
+
};
|
|
31
|
+
export type OpenAIEventParams = {
|
|
32
|
+
model: string;
|
|
33
|
+
/**
|
|
34
|
+
* The resolved model name as returned in response.model (e.g. "gpt-4o-mini-2024-07-18").
|
|
35
|
+
* Pass response.model here for accurate model version tracking.
|
|
36
|
+
*/
|
|
37
|
+
resolvedModel?: string;
|
|
38
|
+
usage: OpenAIUsageLike | null | undefined;
|
|
39
|
+
latencyMs: number;
|
|
40
|
+
timestamp?: string | Date;
|
|
41
|
+
costUsd?: number;
|
|
42
|
+
tags: UsageTags;
|
|
43
|
+
eventId?: string;
|
|
44
|
+
};
|
|
45
|
+
export declare function createOpenAIUsageEvent(params: OpenAIEventParams): UsageEventInput;
|
|
46
|
+
export {};
|
package/dist/openai.js
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.createOpenAIUsageEvent = createOpenAIUsageEvent;
|
|
4
|
+
function getInputTokens(usage) {
|
|
5
|
+
if (!usage)
|
|
6
|
+
return 0;
|
|
7
|
+
if (typeof usage.input_tokens === "number")
|
|
8
|
+
return usage.input_tokens;
|
|
9
|
+
if (typeof usage.prompt_tokens === "number")
|
|
10
|
+
return usage.prompt_tokens;
|
|
11
|
+
return 0;
|
|
12
|
+
}
|
|
13
|
+
function getOutputTokens(usage) {
|
|
14
|
+
if (!usage)
|
|
15
|
+
return 0;
|
|
16
|
+
if (typeof usage.output_tokens === "number")
|
|
17
|
+
return usage.output_tokens;
|
|
18
|
+
if (typeof usage.completion_tokens === "number")
|
|
19
|
+
return usage.completion_tokens;
|
|
20
|
+
return 0;
|
|
21
|
+
}
|
|
22
|
+
/** Cache read tokens — billed at 0.25×–0.5× base input price (varies by model family). Already counted in inputTokens. */
|
|
23
|
+
function getCachedTokens(usage) {
|
|
24
|
+
if (!usage)
|
|
25
|
+
return undefined;
|
|
26
|
+
const v = usage.input_tokens_details?.cached_tokens ??
|
|
27
|
+
usage.prompt_tokens_details?.cached_tokens;
|
|
28
|
+
return typeof v === "number" ? v : undefined;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Reasoning tokens for o1/o3 models — billed as output tokens.
|
|
32
|
+
* Already counted in outputTokens; stored separately for cost spike detection.
|
|
33
|
+
*/
|
|
34
|
+
function getReasoningTokens(usage) {
|
|
35
|
+
if (!usage)
|
|
36
|
+
return undefined;
|
|
37
|
+
const v = usage.output_tokens_details?.reasoning_tokens ??
|
|
38
|
+
usage.completion_tokens_details?.reasoning_tokens;
|
|
39
|
+
return typeof v === "number" ? v : undefined;
|
|
40
|
+
}
|
|
41
|
+
function createOpenAIUsageEvent(params) {
|
|
42
|
+
const cached = getCachedTokens(params.usage);
|
|
43
|
+
const thinking = getReasoningTokens(params.usage);
|
|
44
|
+
return {
|
|
45
|
+
eventId: params.eventId,
|
|
46
|
+
provider: "openai",
|
|
47
|
+
model: params.model,
|
|
48
|
+
resolvedModel: params.resolvedModel,
|
|
49
|
+
inputTokens: getInputTokens(params.usage),
|
|
50
|
+
outputTokens: getOutputTokens(params.usage),
|
|
51
|
+
...(typeof cached === "number" ? { inputTokensCached: cached } : {}),
|
|
52
|
+
...(typeof thinking === "number" ? { thinkingTokens: thinking } : {}),
|
|
53
|
+
latencyMs: params.latencyMs,
|
|
54
|
+
costUsd: params.costUsd,
|
|
55
|
+
timestamp: params.timestamp ?? new Date(),
|
|
56
|
+
tags: params.tags
|
|
57
|
+
};
|
|
58
|
+
}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
export type AllowedTagKey = "task_type" | "feature" | "route" | "customer_plan" | "customer_id" | "provider" | "model" | "environment" | "agent_name";
|
|
2
|
+
export type TagValue = string | string[];
|
|
3
|
+
export type UsageTags = Partial<Record<AllowedTagKey, string>> & Record<string, TagValue>;
|
|
4
|
+
export type UsageEventInput = {
|
|
5
|
+
eventId?: string;
|
|
6
|
+
provider: string;
|
|
7
|
+
model: string;
|
|
8
|
+
/** Resolved model name from provider response (e.g. "gpt-4o-mini-2024-07-18" vs "gpt-4o-mini"). */
|
|
9
|
+
resolvedModel?: string;
|
|
10
|
+
inputTokens: number;
|
|
11
|
+
outputTokens: number;
|
|
12
|
+
/**
|
|
13
|
+
* Cache read tokens — billed at a reduced rate.
|
|
14
|
+
* OpenAI: 0.5× base input price. Anthropic: 0.1× base input price.
|
|
15
|
+
* Already included in inputTokens; stored separately for accurate cost calculation.
|
|
16
|
+
*/
|
|
17
|
+
inputTokensCached?: number;
|
|
18
|
+
/**
|
|
19
|
+
* Cache write tokens — billed at a premium rate (Anthropic only: 1.25× base input price).
|
|
20
|
+
* Already included in inputTokens; stored separately for accurate cost calculation.
|
|
21
|
+
*/
|
|
22
|
+
inputTokensCacheWrite?: number;
|
|
23
|
+
/**
|
|
24
|
+
* Reasoning / thinking tokens — billed as output tokens.
|
|
25
|
+
* OpenAI o1/o3: from completion_tokens_details.reasoning_tokens.
|
|
26
|
+
* Google Gemini 2.5: from usageMetadata.thoughtsTokenCount.
|
|
27
|
+
* Anthropic extended thinking: included in output_tokens (not separately reported in usage).
|
|
28
|
+
* Already included in outputTokens; stored separately for cost spike detection.
|
|
29
|
+
*/
|
|
30
|
+
thinkingTokens?: number;
|
|
31
|
+
/**
|
|
32
|
+
* Anthropic cache write TTL: "5m" (default, 1.25× input price) or "1h" (extended, 2.0× input price).
|
|
33
|
+
* Only relevant when inputTokensCacheWrite > 0.
|
|
34
|
+
*/
|
|
35
|
+
cacheTtl?: "5m" | "1h";
|
|
36
|
+
/** Number of web search tool calls (billed as flat fee per call). */
|
|
37
|
+
webSearchCount?: number;
|
|
38
|
+
/** Number of web fetch tool calls (billed as flat fee per call). */
|
|
39
|
+
webFetchCount?: number;
|
|
40
|
+
/** Whether this request used the Batch API (50% discount on token costs). */
|
|
41
|
+
isBatchApi?: boolean;
|
|
42
|
+
/** Whether fast mode was used (Opus 6× multiplier on token costs). */
|
|
43
|
+
isFastMode?: boolean;
|
|
44
|
+
latencyMs: number;
|
|
45
|
+
costUsd?: number;
|
|
46
|
+
timestamp: string | Date;
|
|
47
|
+
tags: UsageTags;
|
|
48
|
+
};
|
|
49
|
+
export type UsageEventBatchInput = UsageEventInput | UsageEventInput[];
|
|
50
|
+
export type IngestEventPayload = {
|
|
51
|
+
event_id?: string;
|
|
52
|
+
provider: string;
|
|
53
|
+
model: string;
|
|
54
|
+
resolved_model?: string;
|
|
55
|
+
input_tokens: number;
|
|
56
|
+
output_tokens: number;
|
|
57
|
+
input_tokens_cached?: number;
|
|
58
|
+
input_tokens_cache_write?: number;
|
|
59
|
+
thinking_tokens?: number;
|
|
60
|
+
cache_ttl?: "5m" | "1h";
|
|
61
|
+
web_search_count?: number;
|
|
62
|
+
web_fetch_count?: number;
|
|
63
|
+
is_batch_api?: boolean;
|
|
64
|
+
is_fast_mode?: boolean;
|
|
65
|
+
latency_ms: number;
|
|
66
|
+
cost_usd?: number;
|
|
67
|
+
timestamp: string;
|
|
68
|
+
tags: Record<string, TagValue>;
|
|
69
|
+
};
|
|
70
|
+
export type IngestRequestPayload = {
|
|
71
|
+
events: IngestEventPayload[];
|
|
72
|
+
};
|
|
73
|
+
export type IngestResponse = {
|
|
74
|
+
accepted: number;
|
|
75
|
+
duplicates: number;
|
|
76
|
+
rejected: number;
|
|
77
|
+
errors?: string[];
|
|
78
|
+
};
|
|
79
|
+
export type ClientConfig = {
|
|
80
|
+
apiKey: string;
|
|
81
|
+
endpoint?: string;
|
|
82
|
+
/** Default tags merged into every auto-wrapped event. */
|
|
83
|
+
defaultTags?: UsageTags;
|
|
84
|
+
timeoutMs?: number;
|
|
85
|
+
maxRetries?: number;
|
|
86
|
+
strict?: boolean;
|
|
87
|
+
logger?: Pick<Console, "warn" | "error" | "info">;
|
|
88
|
+
};
|
|
89
|
+
export type TrackResult = {
|
|
90
|
+
ok: true;
|
|
91
|
+
response: IngestResponse;
|
|
92
|
+
} | {
|
|
93
|
+
ok: false;
|
|
94
|
+
error: string;
|
|
95
|
+
};
|
package/dist/types.js
ADDED
package/dist/validate.js
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.normalizeEvent = normalizeEvent;
|
|
4
|
+
const REQUIRED_TAGS = ["task_type", "feature", "route"];
|
|
5
|
+
const ALLOWED_TAGS = new Set([
|
|
6
|
+
...REQUIRED_TAGS,
|
|
7
|
+
"customer_plan",
|
|
8
|
+
"customer_id",
|
|
9
|
+
"provider",
|
|
10
|
+
"model",
|
|
11
|
+
"environment",
|
|
12
|
+
"agent_name"
|
|
13
|
+
]);
|
|
14
|
+
const CUSTOM_TAG_KEY_PATTERN = /^[a-z][a-z0-9_]{1,63}$/;
|
|
15
|
+
const MAX_TAGS_PER_EVENT = 24;
|
|
16
|
+
const MAX_TAG_VALUE_LENGTH = 120;
|
|
17
|
+
const MAX_TAG_ARRAY_ITEMS = 16;
|
|
18
|
+
const MAX_STRING_FIELD_LENGTH = 256;
|
|
19
|
+
const FORBIDDEN_KEYS = [
|
|
20
|
+
"prompt",
|
|
21
|
+
"prompts",
|
|
22
|
+
"input",
|
|
23
|
+
"inputs",
|
|
24
|
+
"completion",
|
|
25
|
+
"completions",
|
|
26
|
+
"output",
|
|
27
|
+
"outputs",
|
|
28
|
+
"content",
|
|
29
|
+
"message",
|
|
30
|
+
"messages",
|
|
31
|
+
"attachment",
|
|
32
|
+
"attachments"
|
|
33
|
+
];
|
|
34
|
+
function isObject(value) {
|
|
35
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
36
|
+
}
|
|
37
|
+
function containsForbiddenKeys(obj) {
|
|
38
|
+
const lower = Object.keys(obj).map((k) => k.toLowerCase());
|
|
39
|
+
const found = FORBIDDEN_KEYS.find((k) => lower.includes(k));
|
|
40
|
+
return found ?? null;
|
|
41
|
+
}
|
|
42
|
+
function assertNonEmptyString(value, field, maxLength = MAX_STRING_FIELD_LENGTH) {
|
|
43
|
+
if (typeof value !== "string" || value.trim().length === 0) {
|
|
44
|
+
throw new Error(`${field} must be a non-empty string`);
|
|
45
|
+
}
|
|
46
|
+
if (value.trim().length > maxLength) {
|
|
47
|
+
throw new Error(`${field} exceeds max length ${maxLength}`);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
function assertNonNegative(value, field, integerOnly = true) {
|
|
51
|
+
if (typeof value !== "number" || Number.isNaN(value) || value < 0) {
|
|
52
|
+
throw new Error(`${field} must be a non-negative number`);
|
|
53
|
+
}
|
|
54
|
+
if (integerOnly && !Number.isInteger(value)) {
|
|
55
|
+
throw new Error(`${field} must be an integer`);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
function normalizeTimestamp(value) {
|
|
59
|
+
const d = value instanceof Date ? value : new Date(value);
|
|
60
|
+
if (Number.isNaN(d.getTime())) {
|
|
61
|
+
throw new Error("timestamp must be a valid ISO-8601 datetime");
|
|
62
|
+
}
|
|
63
|
+
return d.toISOString();
|
|
64
|
+
}
|
|
65
|
+
function normalizeTags(tags) {
|
|
66
|
+
if (!isObject(tags)) {
|
|
67
|
+
throw new Error("tags must be an object");
|
|
68
|
+
}
|
|
69
|
+
const forbidden = containsForbiddenKeys(tags);
|
|
70
|
+
if (forbidden) {
|
|
71
|
+
throw new Error(`tags contains forbidden key: ${forbidden}`);
|
|
72
|
+
}
|
|
73
|
+
const normalized = {};
|
|
74
|
+
for (const [key, value] of Object.entries(tags)) {
|
|
75
|
+
const isKnown = ALLOWED_TAGS.has(key);
|
|
76
|
+
const isCustom = CUSTOM_TAG_KEY_PATTERN.test(key) && !FORBIDDEN_KEYS.includes(key);
|
|
77
|
+
if (!isKnown && !isCustom) {
|
|
78
|
+
throw new Error(`tags.${key} is not supported (use known tags or lowercase custom keys like team, project_code)`);
|
|
79
|
+
}
|
|
80
|
+
if (typeof value === "string") {
|
|
81
|
+
assertNonEmptyString(value, `tags.${key}`);
|
|
82
|
+
const normalizedValue = value.trim();
|
|
83
|
+
if (normalizedValue.length > MAX_TAG_VALUE_LENGTH) {
|
|
84
|
+
throw new Error(`tags.${key} exceeds max length ${MAX_TAG_VALUE_LENGTH}`);
|
|
85
|
+
}
|
|
86
|
+
normalized[key] = normalizedValue;
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
if (Array.isArray(value)) {
|
|
90
|
+
if (value.length === 0) {
|
|
91
|
+
throw new Error(`tags.${key} array must not be empty`);
|
|
92
|
+
}
|
|
93
|
+
if (value.length > MAX_TAG_ARRAY_ITEMS) {
|
|
94
|
+
throw new Error(`tags.${key} has too many values (max ${MAX_TAG_ARRAY_ITEMS})`);
|
|
95
|
+
}
|
|
96
|
+
const normalizedArray = value.map((item, idx) => {
|
|
97
|
+
if (typeof item !== "string" || item.trim().length === 0) {
|
|
98
|
+
throw new Error(`tags.${key}[${idx}] must be a non-empty string`);
|
|
99
|
+
}
|
|
100
|
+
const normalizedItem = item.trim();
|
|
101
|
+
if (normalizedItem.length > MAX_TAG_VALUE_LENGTH) {
|
|
102
|
+
throw new Error(`tags.${key}[${idx}] exceeds max length ${MAX_TAG_VALUE_LENGTH}`);
|
|
103
|
+
}
|
|
104
|
+
return normalizedItem;
|
|
105
|
+
});
|
|
106
|
+
normalized[key] = normalizedArray;
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
throw new Error(`tags.${key} must be a string or string[]`);
|
|
110
|
+
}
|
|
111
|
+
if (Object.keys(normalized).length > MAX_TAGS_PER_EVENT) {
|
|
112
|
+
throw new Error(`tags has too many keys (max ${MAX_TAGS_PER_EVENT})`);
|
|
113
|
+
}
|
|
114
|
+
for (const key of REQUIRED_TAGS) {
|
|
115
|
+
const requiredValue = normalized[key];
|
|
116
|
+
if (typeof requiredValue !== "string" || requiredValue.length === 0) {
|
|
117
|
+
throw new Error(`tags.${key} is required`);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
return normalized;
|
|
121
|
+
}
|
|
122
|
+
function normalizeEvent(event) {
|
|
123
|
+
if (!isObject(event)) {
|
|
124
|
+
throw new Error("event must be an object");
|
|
125
|
+
}
|
|
126
|
+
const forbidden = containsForbiddenKeys(event);
|
|
127
|
+
if (forbidden) {
|
|
128
|
+
throw new Error(`event contains forbidden field: ${forbidden}`);
|
|
129
|
+
}
|
|
130
|
+
assertNonEmptyString(event.provider, "provider");
|
|
131
|
+
assertNonEmptyString(event.model, "model");
|
|
132
|
+
assertNonNegative(event.inputTokens, "inputTokens", true);
|
|
133
|
+
assertNonNegative(event.outputTokens, "outputTokens", true);
|
|
134
|
+
assertNonNegative(event.latencyMs, "latencyMs", true);
|
|
135
|
+
if (event.costUsd !== undefined) {
|
|
136
|
+
assertNonNegative(event.costUsd, "costUsd", false);
|
|
137
|
+
}
|
|
138
|
+
if (event.eventId !== undefined) {
|
|
139
|
+
assertNonEmptyString(event.eventId, "eventId");
|
|
140
|
+
}
|
|
141
|
+
if (event.resolvedModel !== undefined) {
|
|
142
|
+
assertNonEmptyString(event.resolvedModel, "resolvedModel");
|
|
143
|
+
}
|
|
144
|
+
if (event.inputTokensCached !== undefined) {
|
|
145
|
+
assertNonNegative(event.inputTokensCached, "inputTokensCached", true);
|
|
146
|
+
}
|
|
147
|
+
if (event.inputTokensCacheWrite !== undefined) {
|
|
148
|
+
assertNonNegative(event.inputTokensCacheWrite, "inputTokensCacheWrite", true);
|
|
149
|
+
}
|
|
150
|
+
if (event.thinkingTokens !== undefined) {
|
|
151
|
+
assertNonNegative(event.thinkingTokens, "thinkingTokens", true);
|
|
152
|
+
}
|
|
153
|
+
if (event.cacheTtl !== undefined && event.cacheTtl !== "5m" && event.cacheTtl !== "1h") {
|
|
154
|
+
throw new Error("cacheTtl must be \"5m\" or \"1h\"");
|
|
155
|
+
}
|
|
156
|
+
if (event.webSearchCount !== undefined) {
|
|
157
|
+
assertNonNegative(event.webSearchCount, "webSearchCount", true);
|
|
158
|
+
}
|
|
159
|
+
if (event.webFetchCount !== undefined) {
|
|
160
|
+
assertNonNegative(event.webFetchCount, "webFetchCount", true);
|
|
161
|
+
}
|
|
162
|
+
if (event.isBatchApi !== undefined && typeof event.isBatchApi !== "boolean") {
|
|
163
|
+
throw new Error("isBatchApi must be a boolean");
|
|
164
|
+
}
|
|
165
|
+
if (event.isFastMode !== undefined && typeof event.isFastMode !== "boolean") {
|
|
166
|
+
throw new Error("isFastMode must be a boolean");
|
|
167
|
+
}
|
|
168
|
+
return {
|
|
169
|
+
...(event.eventId ? { event_id: event.eventId.trim() } : {}),
|
|
170
|
+
provider: event.provider.trim().toLowerCase(),
|
|
171
|
+
model: event.model.trim(),
|
|
172
|
+
...(event.resolvedModel ? { resolved_model: event.resolvedModel.trim() } : {}),
|
|
173
|
+
input_tokens: event.inputTokens,
|
|
174
|
+
output_tokens: event.outputTokens,
|
|
175
|
+
...(event.inputTokensCached !== undefined ? { input_tokens_cached: event.inputTokensCached } : {}),
|
|
176
|
+
...(event.inputTokensCacheWrite !== undefined ? { input_tokens_cache_write: event.inputTokensCacheWrite } : {}),
|
|
177
|
+
...(event.thinkingTokens !== undefined ? { thinking_tokens: event.thinkingTokens } : {}),
|
|
178
|
+
...(event.cacheTtl ? { cache_ttl: event.cacheTtl } : {}),
|
|
179
|
+
...(event.webSearchCount !== undefined ? { web_search_count: event.webSearchCount } : {}),
|
|
180
|
+
...(event.webFetchCount !== undefined ? { web_fetch_count: event.webFetchCount } : {}),
|
|
181
|
+
...(event.isBatchApi ? { is_batch_api: true } : {}),
|
|
182
|
+
...(event.isFastMode ? { is_fast_mode: true } : {}),
|
|
183
|
+
latency_ms: event.latencyMs,
|
|
184
|
+
...(event.costUsd !== undefined ? { cost_usd: event.costUsd } : {}),
|
|
185
|
+
timestamp: normalizeTimestamp(event.timestamp),
|
|
186
|
+
tags: normalizeTags(event.tags)
|
|
187
|
+
};
|
|
188
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wraps an Anthropic client so every messages.create() call is
|
|
3
|
+
* automatically tracked. Returns the original response unchanged.
|
|
4
|
+
*
|
|
5
|
+
* Usage:
|
|
6
|
+
* import { init, wrapAnthropic } from "@aispendguard/sdk";
|
|
7
|
+
* import Anthropic from "@anthropic-ai/sdk";
|
|
8
|
+
*
|
|
9
|
+
* init({ apiKey: "asg_...", defaultTags: { feature: "chat", route: "/api/chat" } });
|
|
10
|
+
* const anthropic = wrapAnthropic(new Anthropic());
|
|
11
|
+
*
|
|
12
|
+
* // Automatically tracked:
|
|
13
|
+
* const msg = await anthropic.messages.create({ model: "claude-sonnet-4-20250514", ... });
|
|
14
|
+
*/
|
|
15
|
+
export declare function wrapAnthropic<T extends object>(client: T): T;
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.wrapAnthropic = wrapAnthropic;
|
|
4
|
+
const anthropic_1 = require("./anthropic");
|
|
5
|
+
const index_1 = require("./index");
|
|
6
|
+
/**
|
|
7
|
+
* Wraps an Anthropic client so every messages.create() call is
|
|
8
|
+
* automatically tracked. Returns the original response unchanged.
|
|
9
|
+
*
|
|
10
|
+
* Usage:
|
|
11
|
+
* import { init, wrapAnthropic } from "@aispendguard/sdk";
|
|
12
|
+
* import Anthropic from "@anthropic-ai/sdk";
|
|
13
|
+
*
|
|
14
|
+
* init({ apiKey: "asg_...", defaultTags: { feature: "chat", route: "/api/chat" } });
|
|
15
|
+
* const anthropic = wrapAnthropic(new Anthropic());
|
|
16
|
+
*
|
|
17
|
+
* // Automatically tracked:
|
|
18
|
+
* const msg = await anthropic.messages.create({ model: "claude-sonnet-4-20250514", ... });
|
|
19
|
+
*/
|
|
20
|
+
function wrapAnthropic(client) {
|
|
21
|
+
const messages = client.messages;
|
|
22
|
+
if (!messages || typeof messages !== "object")
|
|
23
|
+
return client;
|
|
24
|
+
const originalCreate = messages.create;
|
|
25
|
+
if (typeof originalCreate !== "function")
|
|
26
|
+
return client;
|
|
27
|
+
messages.create = async function wrappedCreate(params, ...rest) {
|
|
28
|
+
const start = Date.now();
|
|
29
|
+
const result = await originalCreate.call(this, params, ...rest);
|
|
30
|
+
const latencyMs = Date.now() - start;
|
|
31
|
+
try {
|
|
32
|
+
const client = getClientSafe();
|
|
33
|
+
if (!client)
|
|
34
|
+
return result;
|
|
35
|
+
const res = result;
|
|
36
|
+
const model = params.model ?? "unknown";
|
|
37
|
+
const resolvedModel = typeof res.model === "string" ? res.model : undefined;
|
|
38
|
+
const usage = res.usage;
|
|
39
|
+
const asgOpts = params;
|
|
40
|
+
const tags = mergeTags(client.defaultTags, asgOpts.asgTags, model);
|
|
41
|
+
const event = (0, anthropic_1.createAnthropicUsageEvent)({
|
|
42
|
+
model,
|
|
43
|
+
resolvedModel,
|
|
44
|
+
usage: usage,
|
|
45
|
+
latencyMs,
|
|
46
|
+
tags,
|
|
47
|
+
});
|
|
48
|
+
client.trackUsage(event).catch(logError);
|
|
49
|
+
}
|
|
50
|
+
catch {
|
|
51
|
+
// Never break user code
|
|
52
|
+
}
|
|
53
|
+
return result;
|
|
54
|
+
};
|
|
55
|
+
return client;
|
|
56
|
+
}
|
|
57
|
+
function mergeTags(defaults, overrides, model) {
|
|
58
|
+
return {
|
|
59
|
+
task_type: "chat",
|
|
60
|
+
feature: "default",
|
|
61
|
+
route: "default",
|
|
62
|
+
...defaults,
|
|
63
|
+
...overrides,
|
|
64
|
+
source: "auto-wrap",
|
|
65
|
+
model,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
function getClientSafe() {
|
|
69
|
+
try {
|
|
70
|
+
return (0, index_1.getClient)();
|
|
71
|
+
}
|
|
72
|
+
catch {
|
|
73
|
+
return null;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
function logError(err) {
|
|
77
|
+
console.warn(`[aispendguard-sdk] auto-wrap tracking failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
78
|
+
}
|