@spendguard/sdk 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +190 -0
- package/LICENSE_NOTICES.md +127 -0
- package/README.md +151 -0
- package/dist/adapter-D9T3yEEw.d.ts +3441 -0
- package/dist/cache-DOnw8QtJ.d.ts +1164 -0
- package/dist/cache.d.ts +6 -0
- package/dist/cache.js +74 -0
- package/dist/client.d.ts +6 -0
- package/dist/client.js +4815 -0
- package/dist/errors.d.ts +269 -0
- package/dist/errors.js +148 -0
- package/dist/ids.d.ts +69 -0
- package/dist/ids.js +61 -0
- package/dist/index.d.ts +61 -0
- package/dist/index.js +5295 -0
- package/dist/otel.d.ts +118 -0
- package/dist/otel.js +84 -0
- package/dist/pricing/demo.d.ts +26 -0
- package/dist/pricing/demo.js +138 -0
- package/dist/pricing.d.ts +70 -0
- package/dist/pricing.js +92 -0
- package/dist/promptHash.d.ts +23 -0
- package/dist/promptHash.js +25 -0
- package/dist/proto.d.ts +609 -0
- package/dist/proto.js +3055 -0
- package/dist/retry.d.ts +121 -0
- package/dist/retry.js +92 -0
- package/dist/runPlan.d.ts +69 -0
- package/dist/runPlan.js +35 -0
- package/fixtures/cross-language/v1.json +327 -0
- package/package.json +123 -0
package/dist/otel.d.ts
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import { Span, Tracer } from '@opentelemetry/api';
|
|
2
|
+
import { X as SpanRecord } from './cache-DOnw8QtJ.js';
|
|
3
|
+
import '@grpc/grpc-js';
|
|
4
|
+
import './adapter-D9T3yEEw.js';
|
|
5
|
+
import '@protobuf-ts/runtime-rpc';
|
|
6
|
+
import '@protobuf-ts/runtime';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Frozen set of OTel attribute keys that `withOtelSpan` honors. The keys
|
|
10
|
+
* match the table in design.md §6.4 verbatim — adapters that change them
|
|
11
|
+
* break the span-attribute contract that observability dashboards consume.
|
|
12
|
+
*
|
|
13
|
+
* The values are intentionally lowercase + dotted to match OpenTelemetry
|
|
14
|
+
* semantic-convention style (`<vendor>.<subsystem>.<field>`); they are
|
|
15
|
+
* NOT prefixed with `attr_` or similar.
|
|
16
|
+
*/
|
|
17
|
+
declare const SPENDGUARD_OTEL_ATTR: {
|
|
18
|
+
readonly TENANT_ID: "spendguard.tenant_id";
|
|
19
|
+
readonly DECISION_ID: "spendguard.decision_id";
|
|
20
|
+
readonly TRIGGER: "spendguard.trigger";
|
|
21
|
+
readonly OUTCOME_DECISION: "spendguard.outcome.decision";
|
|
22
|
+
readonly OUTCOME_REASON_CODES: "spendguard.outcome.reason_codes";
|
|
23
|
+
readonly SDK_VERSION: "spendguard.sdk.version";
|
|
24
|
+
readonly RESERVATION_ID: "spendguard.reservation_id";
|
|
25
|
+
readonly SCOPE_ID: "spendguard.scope_id";
|
|
26
|
+
};
|
|
27
|
+
/**
|
|
28
|
+
* Attribute primitive set OTel accepts on `Span.setAttribute`.
|
|
29
|
+
*
|
|
30
|
+
* Mirrors `SpanAttributeValue` from `@opentelemetry/api` without importing
|
|
31
|
+
* the runtime type (the type-only import path is already covered by the
|
|
32
|
+
* function signature).
|
|
33
|
+
*/
|
|
34
|
+
type OtelAttributeValue = string | number | boolean | string[] | number[] | boolean[];
|
|
35
|
+
/**
|
|
36
|
+
* Span attributes for the `withOtelSpan` wrapper. Keys SHOULD use the
|
|
37
|
+
* `spendguard.*` prefix per design.md §6.4 attribute table; values are
|
|
38
|
+
* standard OTel attribute primitives.
|
|
39
|
+
*/
|
|
40
|
+
type OtelAttributes = Readonly<Record<string, OtelAttributeValue | undefined>>;
|
|
41
|
+
/**
|
|
42
|
+
* Wrap `fn()` in an OTel span named `spendguard.<rpcName>`. When `tracer`
|
|
43
|
+
* is `undefined`, returns `await fn()` directly — no span, no attribute
|
|
44
|
+
* encoding, no allocation overhead.
|
|
45
|
+
*
|
|
46
|
+
* Per design.md §6.4 (line 422), `@opentelemetry/api` is a
|
|
47
|
+
* `peerDependenciesMeta.optional` dep; the type-only import above is erased
|
|
48
|
+
* at build time so callers that never enable OTel pay zero install/runtime
|
|
49
|
+
* cost.
|
|
50
|
+
*
|
|
51
|
+
* Span lifecycle:
|
|
52
|
+
* - `tracer.startSpan(name, { attributes })` opens the span.
|
|
53
|
+
* - Exceptions from `fn()` are recorded via `span.recordException(err)` +
|
|
54
|
+
* `span.setStatus({ code: ERROR, message })`, then re-thrown — the
|
|
55
|
+
* RPC failure is observable, the original throw semantics are preserved.
|
|
56
|
+
* - `span.end()` is always invoked from the `finally` block so a thrown
|
|
57
|
+
* exception does NOT leak the span (a leaked span would never appear
|
|
58
|
+
* in the trace export; adapters debugging missing spans waste hours).
|
|
59
|
+
*
|
|
60
|
+
* Span attribute encoding:
|
|
61
|
+
* - Undefined-valued attributes are skipped (no `null` propagation onto the
|
|
62
|
+
* span — OTel treats missing keys identically and omitting them keeps the
|
|
63
|
+
* wire payload tighter).
|
|
64
|
+
*
|
|
65
|
+
* Lightweight `onSpan` observer:
|
|
66
|
+
* - When `onSpan` is provided (and `tracer` is not — the two are mutually
|
|
67
|
+
* exclusive per `SpendGuardClientConfig`), each call emits exactly one
|
|
68
|
+
* `SpanRecord` to `onSpan` in the `finally` block with the wall-clock
|
|
69
|
+
* `startTimeMs` / `durationMs`, the same (scalar) attribute map, and the
|
|
70
|
+
* thrown `error` if `fn()` failed. This is the no-OTel-dep tracing path
|
|
71
|
+
* advertised by `SpendGuardClientConfig.onSpan`.
|
|
72
|
+
* - When BOTH `tracer` and `onSpan` are undefined, `fn()` runs unwrapped
|
|
73
|
+
* with zero timing/allocation overhead (unchanged fast path).
|
|
74
|
+
*
|
|
75
|
+
* @param tracer Optional OTel `Tracer`. When `undefined`, no OTel span is
|
|
76
|
+
* created. When defined, MUST be an `@opentelemetry/api` v1.9+ tracer.
|
|
77
|
+
* @param rpcName Bare RPC name; the span name is `spendguard.<rpcName>`.
|
|
78
|
+
* Caller MUST NOT prefix with `spendguard.` themselves (this function
|
|
79
|
+
* does it).
|
|
80
|
+
* @param attributes Span attributes to set at start-time. Undefined values
|
|
81
|
+
* are filtered out.
|
|
82
|
+
* @param fn The RPC implementation. Its return value is the function's
|
|
83
|
+
* return value; its throws are recorded and re-thrown.
|
|
84
|
+
* @param onSpan Optional `SpanRecord` observer (the lighter-weight tracing
|
|
85
|
+
* path for adapters that don't pull `@opentelemetry/api`). Invoked once per
|
|
86
|
+
* call in `finally`. Mutually exclusive with `tracer` at the config layer.
|
|
87
|
+
*
|
|
88
|
+
* @returns The result of `fn()`. Rethrows whatever `fn()` throws.
|
|
89
|
+
*
|
|
90
|
+
* @example
|
|
91
|
+
* await withOtelSpan(cfg.otelTracer, "reserve", {
|
|
92
|
+
* [SPENDGUARD_OTEL_ATTR.TENANT_ID]: cfg.tenantId,
|
|
93
|
+
* [SPENDGUARD_OTEL_ATTR.DECISION_ID]: req.decisionId,
|
|
94
|
+
* }, async () => {
|
|
95
|
+
* return await client.requestDecision(...);
|
|
96
|
+
* }, cfg.onSpan);
|
|
97
|
+
*/
|
|
98
|
+
declare function withOtelSpan<T>(tracer: Tracer | undefined, rpcName: string, attributes: OtelAttributes, fn: () => Promise<T>, onSpan?: (span: SpanRecord) => void): Promise<T>;
|
|
99
|
+
/**
|
|
100
|
+
* Set additional attributes on the active span after start-time. Used for
|
|
101
|
+
* outcome-side attributes (`spendguard.outcome.decision` /
|
|
102
|
+
* `spendguard.outcome.reason_codes`) that are only known after the RPC
|
|
103
|
+
* returns. Silently no-ops when `tracer` is undefined.
|
|
104
|
+
*
|
|
105
|
+
* The active span lookup is intentionally NOT done via `trace.getActiveSpan()`
|
|
106
|
+
* — that requires importing the OTel runtime, defeating the peer-optional
|
|
107
|
+
* dep cost guarantee. Instead, callers thread the span explicitly via the
|
|
108
|
+
* `withOtelSpan` callback (the span is in scope inside the callback closure).
|
|
109
|
+
*
|
|
110
|
+
* SLICE 9 may add a thread-local span variant if the egress proxy / projector
|
|
111
|
+
* surface needs it; for v0.1.x, `withOtelSpan` is the only surface.
|
|
112
|
+
*
|
|
113
|
+
* @param span The active span. When `undefined`, this function is a no-op.
|
|
114
|
+
* @param attributes Attributes to add. Undefined values are skipped.
|
|
115
|
+
*/
|
|
116
|
+
declare function setOtelSpanAttributes(span: Span | undefined, attributes: OtelAttributes): void;
|
|
117
|
+
|
|
118
|
+
export { type OtelAttributeValue, type OtelAttributes, SPENDGUARD_OTEL_ATTR, setOtelSpanAttributes, withOtelSpan };
|
package/dist/otel.js
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
// src/otel.ts
|
|
2
|
+
var SPENDGUARD_OTEL_ATTR = {
|
|
3
|
+
TENANT_ID: "spendguard.tenant_id",
|
|
4
|
+
DECISION_ID: "spendguard.decision_id",
|
|
5
|
+
TRIGGER: "spendguard.trigger",
|
|
6
|
+
OUTCOME_DECISION: "spendguard.outcome.decision",
|
|
7
|
+
OUTCOME_REASON_CODES: "spendguard.outcome.reason_codes",
|
|
8
|
+
SDK_VERSION: "spendguard.sdk.version",
|
|
9
|
+
RESERVATION_ID: "spendguard.reservation_id",
|
|
10
|
+
SCOPE_ID: "spendguard.scope_id"
|
|
11
|
+
};
|
|
12
|
+
var SPAN_STATUS_ERROR = 2;
|
|
13
|
+
async function withOtelSpan(tracer, rpcName, attributes, fn, onSpan) {
|
|
14
|
+
if (tracer === void 0 && onSpan === void 0) return await fn();
|
|
15
|
+
const spanName = `spendguard.${rpcName}`;
|
|
16
|
+
const filtered = {};
|
|
17
|
+
for (const [k, v] of Object.entries(attributes)) {
|
|
18
|
+
if (v !== void 0) filtered[k] = v;
|
|
19
|
+
}
|
|
20
|
+
if (tracer === void 0) {
|
|
21
|
+
const startTimeMs2 = Date.now();
|
|
22
|
+
let error;
|
|
23
|
+
try {
|
|
24
|
+
return await fn();
|
|
25
|
+
} catch (err) {
|
|
26
|
+
error = err instanceof Error ? err : new Error(String(err));
|
|
27
|
+
throw err;
|
|
28
|
+
} finally {
|
|
29
|
+
emitSpanRecord(onSpan, spanName, startTimeMs2, filtered, error);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
const startTimeMs = Date.now();
|
|
33
|
+
let observerError;
|
|
34
|
+
const span = tracer.startSpan(spanName, { attributes: filtered });
|
|
35
|
+
try {
|
|
36
|
+
const result = await fn();
|
|
37
|
+
return result;
|
|
38
|
+
} catch (err) {
|
|
39
|
+
if (err instanceof Error) {
|
|
40
|
+
observerError = err;
|
|
41
|
+
span.recordException(err);
|
|
42
|
+
span.setStatus({ code: SPAN_STATUS_ERROR, message: err.message });
|
|
43
|
+
} else {
|
|
44
|
+
const message = typeof err === "string" ? err : String(err);
|
|
45
|
+
observerError = new Error(message);
|
|
46
|
+
span.recordException({ name: "SpendGuardError", message });
|
|
47
|
+
span.setStatus({ code: SPAN_STATUS_ERROR, message });
|
|
48
|
+
}
|
|
49
|
+
throw err;
|
|
50
|
+
} finally {
|
|
51
|
+
span.end();
|
|
52
|
+
emitSpanRecord(onSpan, spanName, startTimeMs, filtered, observerError);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
function emitSpanRecord(onSpan, name, startTimeMs, filtered, error) {
|
|
56
|
+
if (onSpan === void 0) return;
|
|
57
|
+
const attributes = {};
|
|
58
|
+
for (const [k, v] of Object.entries(filtered)) {
|
|
59
|
+
if (typeof v === "string" || typeof v === "number" || typeof v === "boolean") {
|
|
60
|
+
attributes[k] = v;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
const record = {
|
|
64
|
+
name,
|
|
65
|
+
startTimeMs,
|
|
66
|
+
durationMs: Date.now() - startTimeMs,
|
|
67
|
+
attributes,
|
|
68
|
+
...error !== void 0 ? { error } : {}
|
|
69
|
+
};
|
|
70
|
+
try {
|
|
71
|
+
onSpan(record);
|
|
72
|
+
} catch {
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
function setOtelSpanAttributes(span, attributes) {
|
|
76
|
+
if (span === void 0) return;
|
|
77
|
+
for (const [k, v] of Object.entries(attributes)) {
|
|
78
|
+
if (v !== void 0) span.setAttribute(k, v);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export { SPENDGUARD_OTEL_ATTR, setOtelSpanAttributes, withOtelSpan };
|
|
83
|
+
//# sourceMappingURL=otel.js.map
|
|
84
|
+
//# sourceMappingURL=otel.js.map
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { PricingLookup } from '../pricing.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Snapshot version pinned from `deploy/demo/init/pricing/seed.yaml`
|
|
5
|
+
* `pricing_version` field. Adapters that mint receipts with `pricingVersion`
|
|
6
|
+
* SHOULD prefer this constant so the demo wire matches the snapshot exactly.
|
|
7
|
+
*/
|
|
8
|
+
declare const DEMO_PRICING_VERSION = "v2026.05.09-1";
|
|
9
|
+
/**
|
|
10
|
+
* Demo `PricingLookup` instance — ready to call.
|
|
11
|
+
*
|
|
12
|
+
* Use in dev / examples to compute USD-micros without wiring a control-plane
|
|
13
|
+
* pricing fetch. Source of truth: `deploy/demo/init/pricing/seed.yaml` at
|
|
14
|
+
* pricing_version `v2026.05.09-1`.
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* import { DEMO_PRICING } from "@spendguard/sdk/pricing/demo";
|
|
18
|
+
* const micros = DEMO_PRICING.usdMicrosForCall({
|
|
19
|
+
* provider: "openai", model: "gpt-4o-mini",
|
|
20
|
+
* inputTokens: 1000, outputTokens: 500,
|
|
21
|
+
* });
|
|
22
|
+
* // 150 + 300 = 450 µUSD
|
|
23
|
+
*/
|
|
24
|
+
declare const DEMO_PRICING: PricingLookup;
|
|
25
|
+
|
|
26
|
+
export { DEMO_PRICING, DEMO_PRICING_VERSION };
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
// src/errors.ts
|
|
2
|
+
var SpendGuardError = class extends Error {
|
|
3
|
+
name = "SpendGuardError";
|
|
4
|
+
constructor(message, opts) {
|
|
5
|
+
super(message);
|
|
6
|
+
if (opts?.cause !== void 0) {
|
|
7
|
+
this.cause = opts.cause;
|
|
8
|
+
}
|
|
9
|
+
Object.defineProperty(this, "name", {
|
|
10
|
+
value: this.name,
|
|
11
|
+
enumerable: true,
|
|
12
|
+
configurable: true,
|
|
13
|
+
writable: true
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
};
|
|
17
|
+
var PricingMissingError = class extends SpendGuardError {
|
|
18
|
+
name = "PricingMissingError";
|
|
19
|
+
provider;
|
|
20
|
+
model;
|
|
21
|
+
tokenKind;
|
|
22
|
+
constructor(args, opts) {
|
|
23
|
+
super(
|
|
24
|
+
`no price configured for provider=${JSON.stringify(args.provider)} model=${JSON.stringify(args.model)} tokenKind=${JSON.stringify(args.tokenKind)} (neither the specific kind nor the default kind has a price); refusing to charge $0 \u2014 supply a price for this model or handle PricingMissingError`,
|
|
25
|
+
opts
|
|
26
|
+
);
|
|
27
|
+
this.provider = args.provider;
|
|
28
|
+
this.model = args.model;
|
|
29
|
+
this.tokenKind = args.tokenKind;
|
|
30
|
+
}
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
// src/pricing.ts
|
|
34
|
+
var USD_MICROS_PER_USD = 1e6;
|
|
35
|
+
var PricingLookup = class {
|
|
36
|
+
table;
|
|
37
|
+
defaultKind;
|
|
38
|
+
constructor(table, opts) {
|
|
39
|
+
this.table = table;
|
|
40
|
+
this.defaultKind = opts?.defaultKind ?? "output";
|
|
41
|
+
}
|
|
42
|
+
/** Return $/1M-tokens or `null` if `(provider, model, kind)` is missing. */
|
|
43
|
+
pricePerMillion(provider, model, tokenKind) {
|
|
44
|
+
const v = this.table.get(`${provider}|${model}|${tokenKind}`);
|
|
45
|
+
return v === void 0 ? null : v;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Compute the µUSD cost of a single LLM call.
|
|
49
|
+
*
|
|
50
|
+
* Charges per-kind when prices are available; falls back to the default
|
|
51
|
+
* kind (typically `"output"`) for any token bucket without a configured
|
|
52
|
+
* price. Result is rounded UP to the nearest µUSD (fail-safe for the
|
|
53
|
+
* customer — never under-charge due to FP truncation). The minimum
|
|
54
|
+
* returned value is `1` (we never claim zero cost on a non-zero token
|
|
55
|
+
* count).
|
|
56
|
+
*
|
|
57
|
+
* Fail-closed: when a token bucket has a non-zero count but NEITHER the
|
|
58
|
+
* specific kind NOR the default kind has a configured price, this throws
|
|
59
|
+
* {@link PricingMissingError} instead of silently charging $0. Coercing a
|
|
60
|
+
* missing price to 0 would under-count the budget — the exact under-charge
|
|
61
|
+
* the guardrail exists to prevent — and unknown/new models are precisely
|
|
62
|
+
* the ones most likely to be mispriced. Buckets with a zero count never
|
|
63
|
+
* trigger this (no charge is attributed, so a missing price is irrelevant).
|
|
64
|
+
*
|
|
65
|
+
* @throws {@link PricingMissingError} when a non-zero token bucket has no
|
|
66
|
+
* resolvable price.
|
|
67
|
+
*/
|
|
68
|
+
usdMicrosForCall(args) {
|
|
69
|
+
let usd = 0;
|
|
70
|
+
const charge = (kind, count) => {
|
|
71
|
+
if (count <= 0) return;
|
|
72
|
+
const p = this.pricePerMillion(args.provider, args.model, kind) ?? this.pricePerMillion(args.provider, args.model, this.defaultKind);
|
|
73
|
+
if (p === null) {
|
|
74
|
+
throw new PricingMissingError({
|
|
75
|
+
provider: args.provider,
|
|
76
|
+
model: args.model,
|
|
77
|
+
tokenKind: kind
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
usd += count * p / 1e6;
|
|
81
|
+
};
|
|
82
|
+
charge("input", args.inputTokens ?? 0);
|
|
83
|
+
charge("output", args.outputTokens ?? 0);
|
|
84
|
+
charge("cached_input", args.cachedInputTokens ?? 0);
|
|
85
|
+
if (usd <= 0) return 0;
|
|
86
|
+
return Math.max(1, Math.ceil(usd * USD_MICROS_PER_USD));
|
|
87
|
+
}
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
// src/pricing/demo.ts
|
|
91
|
+
var DEMO_PRICING_VERSION = "v2026.05.09-1";
|
|
92
|
+
var DEMO_PRICE_ENTRIES = Object.freeze([
|
|
93
|
+
// ============== OpenAI ==============
|
|
94
|
+
["openai|gpt-4o-mini|input", 0.15],
|
|
95
|
+
["openai|gpt-4o-mini|cached_input", 0.075],
|
|
96
|
+
["openai|gpt-4o-mini|output", 0.6],
|
|
97
|
+
["openai|gpt-4o|input", 2.5],
|
|
98
|
+
["openai|gpt-4o|cached_input", 1.25],
|
|
99
|
+
["openai|gpt-4o|output", 10],
|
|
100
|
+
["openai|o1|input", 15],
|
|
101
|
+
["openai|o1|cached_input", 7.5],
|
|
102
|
+
["openai|o1|output", 60],
|
|
103
|
+
["openai|o1|reasoning", 60],
|
|
104
|
+
["openai|o3-mini|input", 1.1],
|
|
105
|
+
["openai|o3-mini|cached_input", 0.55],
|
|
106
|
+
["openai|o3-mini|output", 4.4],
|
|
107
|
+
["openai|o3-mini|reasoning", 4.4],
|
|
108
|
+
// ============== Anthropic ==============
|
|
109
|
+
["anthropic|claude-haiku-4-5-20251001|input", 1],
|
|
110
|
+
["anthropic|claude-haiku-4-5-20251001|cached_input", 0.1],
|
|
111
|
+
["anthropic|claude-haiku-4-5-20251001|output", 5],
|
|
112
|
+
["anthropic|claude-sonnet-4-5-20250929|input", 3],
|
|
113
|
+
["anthropic|claude-sonnet-4-5-20250929|cached_input", 0.3],
|
|
114
|
+
["anthropic|claude-sonnet-4-5-20250929|output", 15],
|
|
115
|
+
["anthropic|claude-opus-4-7|input", 15],
|
|
116
|
+
["anthropic|claude-opus-4-7|cached_input", 1.5],
|
|
117
|
+
["anthropic|claude-opus-4-7|output", 75],
|
|
118
|
+
// ============== Azure OpenAI ==============
|
|
119
|
+
["azure_openai|gpt-4o-mini|input", 0.15],
|
|
120
|
+
["azure_openai|gpt-4o-mini|cached_input", 0.075],
|
|
121
|
+
["azure_openai|gpt-4o-mini|output", 0.6],
|
|
122
|
+
["azure_openai|gpt-4o|input", 2.5],
|
|
123
|
+
["azure_openai|gpt-4o|output", 10],
|
|
124
|
+
// ============== AWS Bedrock ==============
|
|
125
|
+
["bedrock|anthropic.claude-haiku-4-5|input", 1],
|
|
126
|
+
["bedrock|anthropic.claude-haiku-4-5|output", 5],
|
|
127
|
+
["bedrock|anthropic.claude-sonnet-4-5|input", 3],
|
|
128
|
+
["bedrock|anthropic.claude-sonnet-4-5|output", 15],
|
|
129
|
+
// ============== Google Gemini ==============
|
|
130
|
+
["gemini|gemini-2.0-flash|input", 0.1],
|
|
131
|
+
["gemini|gemini-2.0-flash|cached_input", 0.025],
|
|
132
|
+
["gemini|gemini-2.0-flash|output", 0.4]
|
|
133
|
+
]);
|
|
134
|
+
var DEMO_PRICING = new PricingLookup(new Map(DEMO_PRICE_ENTRIES));
|
|
135
|
+
|
|
136
|
+
export { DEMO_PRICING, DEMO_PRICING_VERSION };
|
|
137
|
+
//# sourceMappingURL=demo.js.map
|
|
138
|
+
//# sourceMappingURL=demo.js.map
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/** USD micros per USD — 1 USD = 1,000,000 µUSD. */
|
|
2
|
+
declare const USD_MICROS_PER_USD = 1000000;
|
|
3
|
+
/**
|
|
4
|
+
* Composite lookup key for the pricing table.
|
|
5
|
+
*
|
|
6
|
+
* `(provider, model, tokenKind)` triples key the `PriceTable`. `tokenKind`
|
|
7
|
+
* is one of `"input"`, `"output"`, `"cached_input"`, `"vision_input"`,
|
|
8
|
+
* `"audio_input"`, `"reasoning"`.
|
|
9
|
+
*
|
|
10
|
+
* The `PriceTable` Map serializes the triple to a `${provider}|${model}|${kind}`
|
|
11
|
+
* string — JS Map keys do not support tuple identity, so we flatten.
|
|
12
|
+
*/
|
|
13
|
+
type PriceKey = readonly [provider: string, model: string, tokenKind: string];
|
|
14
|
+
/**
|
|
15
|
+
* Frozen-at-construction pricing table.
|
|
16
|
+
*
|
|
17
|
+
* Keys: `${provider}|${model}|${tokenKind}` strings.
|
|
18
|
+
* Values: USD price per million tokens (e.g. `0.15` = $0.15 / 1M tokens).
|
|
19
|
+
*/
|
|
20
|
+
type PriceTable = ReadonlyMap<string, number>;
|
|
21
|
+
/**
|
|
22
|
+
* Frozen pricing table → USD-micros computation.
|
|
23
|
+
*
|
|
24
|
+
* The lookup is not side-effecting — there is no DB query, no network call.
|
|
25
|
+
* Callers fetch + cache pricing once (e.g., at handshake time) and pass it in.
|
|
26
|
+
* Mirrors Python `PricingLookup` semantics including:
|
|
27
|
+
* - default kind fallback (typically `"output"`) when a token kind has no
|
|
28
|
+
* configured price.
|
|
29
|
+
* - per-kind charging for input / output / cached_input buckets.
|
|
30
|
+
* - round-up to the nearest µUSD so the customer is never under-charged.
|
|
31
|
+
*/
|
|
32
|
+
declare class PricingLookup {
|
|
33
|
+
private readonly table;
|
|
34
|
+
private readonly defaultKind;
|
|
35
|
+
constructor(table: PriceTable, opts?: {
|
|
36
|
+
defaultKind?: string;
|
|
37
|
+
});
|
|
38
|
+
/** Return $/1M-tokens or `null` if `(provider, model, kind)` is missing. */
|
|
39
|
+
pricePerMillion(provider: string, model: string, tokenKind: string): number | null;
|
|
40
|
+
/**
|
|
41
|
+
* Compute the µUSD cost of a single LLM call.
|
|
42
|
+
*
|
|
43
|
+
* Charges per-kind when prices are available; falls back to the default
|
|
44
|
+
* kind (typically `"output"`) for any token bucket without a configured
|
|
45
|
+
* price. Result is rounded UP to the nearest µUSD (fail-safe for the
|
|
46
|
+
* customer — never under-charge due to FP truncation). The minimum
|
|
47
|
+
* returned value is `1` (we never claim zero cost on a non-zero token
|
|
48
|
+
* count).
|
|
49
|
+
*
|
|
50
|
+
* Fail-closed: when a token bucket has a non-zero count but NEITHER the
|
|
51
|
+
* specific kind NOR the default kind has a configured price, this throws
|
|
52
|
+
* {@link PricingMissingError} instead of silently charging $0. Coercing a
|
|
53
|
+
* missing price to 0 would under-count the budget — the exact under-charge
|
|
54
|
+
* the guardrail exists to prevent — and unknown/new models are precisely
|
|
55
|
+
* the ones most likely to be mispriced. Buckets with a zero count never
|
|
56
|
+
* trigger this (no charge is attributed, so a missing price is irrelevant).
|
|
57
|
+
*
|
|
58
|
+
* @throws {@link PricingMissingError} when a non-zero token bucket has no
|
|
59
|
+
* resolvable price.
|
|
60
|
+
*/
|
|
61
|
+
usdMicrosForCall(args: {
|
|
62
|
+
provider: string;
|
|
63
|
+
model: string;
|
|
64
|
+
inputTokens?: number;
|
|
65
|
+
outputTokens?: number;
|
|
66
|
+
cachedInputTokens?: number;
|
|
67
|
+
}): number;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export { type PriceKey, type PriceTable, PricingLookup, USD_MICROS_PER_USD };
|
package/dist/pricing.js
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
// src/errors.ts
|
|
2
|
+
var SpendGuardError = class extends Error {
|
|
3
|
+
name = "SpendGuardError";
|
|
4
|
+
constructor(message, opts) {
|
|
5
|
+
super(message);
|
|
6
|
+
if (opts?.cause !== void 0) {
|
|
7
|
+
this.cause = opts.cause;
|
|
8
|
+
}
|
|
9
|
+
Object.defineProperty(this, "name", {
|
|
10
|
+
value: this.name,
|
|
11
|
+
enumerable: true,
|
|
12
|
+
configurable: true,
|
|
13
|
+
writable: true
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
};
|
|
17
|
+
var PricingMissingError = class extends SpendGuardError {
|
|
18
|
+
name = "PricingMissingError";
|
|
19
|
+
provider;
|
|
20
|
+
model;
|
|
21
|
+
tokenKind;
|
|
22
|
+
constructor(args, opts) {
|
|
23
|
+
super(
|
|
24
|
+
`no price configured for provider=${JSON.stringify(args.provider)} model=${JSON.stringify(args.model)} tokenKind=${JSON.stringify(args.tokenKind)} (neither the specific kind nor the default kind has a price); refusing to charge $0 \u2014 supply a price for this model or handle PricingMissingError`,
|
|
25
|
+
opts
|
|
26
|
+
);
|
|
27
|
+
this.provider = args.provider;
|
|
28
|
+
this.model = args.model;
|
|
29
|
+
this.tokenKind = args.tokenKind;
|
|
30
|
+
}
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
// src/pricing.ts
|
|
34
|
+
var USD_MICROS_PER_USD = 1e6;
|
|
35
|
+
var PricingLookup = class {
|
|
36
|
+
table;
|
|
37
|
+
defaultKind;
|
|
38
|
+
constructor(table, opts) {
|
|
39
|
+
this.table = table;
|
|
40
|
+
this.defaultKind = opts?.defaultKind ?? "output";
|
|
41
|
+
}
|
|
42
|
+
/** Return $/1M-tokens or `null` if `(provider, model, kind)` is missing. */
|
|
43
|
+
pricePerMillion(provider, model, tokenKind) {
|
|
44
|
+
const v = this.table.get(`${provider}|${model}|${tokenKind}`);
|
|
45
|
+
return v === void 0 ? null : v;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Compute the µUSD cost of a single LLM call.
|
|
49
|
+
*
|
|
50
|
+
* Charges per-kind when prices are available; falls back to the default
|
|
51
|
+
* kind (typically `"output"`) for any token bucket without a configured
|
|
52
|
+
* price. Result is rounded UP to the nearest µUSD (fail-safe for the
|
|
53
|
+
* customer — never under-charge due to FP truncation). The minimum
|
|
54
|
+
* returned value is `1` (we never claim zero cost on a non-zero token
|
|
55
|
+
* count).
|
|
56
|
+
*
|
|
57
|
+
* Fail-closed: when a token bucket has a non-zero count but NEITHER the
|
|
58
|
+
* specific kind NOR the default kind has a configured price, this throws
|
|
59
|
+
* {@link PricingMissingError} instead of silently charging $0. Coercing a
|
|
60
|
+
* missing price to 0 would under-count the budget — the exact under-charge
|
|
61
|
+
* the guardrail exists to prevent — and unknown/new models are precisely
|
|
62
|
+
* the ones most likely to be mispriced. Buckets with a zero count never
|
|
63
|
+
* trigger this (no charge is attributed, so a missing price is irrelevant).
|
|
64
|
+
*
|
|
65
|
+
* @throws {@link PricingMissingError} when a non-zero token bucket has no
|
|
66
|
+
* resolvable price.
|
|
67
|
+
*/
|
|
68
|
+
usdMicrosForCall(args) {
|
|
69
|
+
let usd = 0;
|
|
70
|
+
const charge = (kind, count) => {
|
|
71
|
+
if (count <= 0) return;
|
|
72
|
+
const p = this.pricePerMillion(args.provider, args.model, kind) ?? this.pricePerMillion(args.provider, args.model, this.defaultKind);
|
|
73
|
+
if (p === null) {
|
|
74
|
+
throw new PricingMissingError({
|
|
75
|
+
provider: args.provider,
|
|
76
|
+
model: args.model,
|
|
77
|
+
tokenKind: kind
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
usd += count * p / 1e6;
|
|
81
|
+
};
|
|
82
|
+
charge("input", args.inputTokens ?? 0);
|
|
83
|
+
charge("output", args.outputTokens ?? 0);
|
|
84
|
+
charge("cached_input", args.cachedInputTokens ?? 0);
|
|
85
|
+
if (usd <= 0) return 0;
|
|
86
|
+
return Math.max(1, Math.ceil(usd * USD_MICROS_PER_USD));
|
|
87
|
+
}
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
export { PricingLookup, USD_MICROS_PER_USD };
|
|
91
|
+
//# sourceMappingURL=pricing.js.map
|
|
92
|
+
//# sourceMappingURL=pricing.js.map
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Compute the lowercase hex HMAC-SHA256 of a normalized prompt with the
|
|
3
|
+
* tenant key.
|
|
4
|
+
*
|
|
5
|
+
* Determinism: `computePromptHash(s, t) === computePromptHash(s, t)` for any
|
|
6
|
+
* (s, t).
|
|
7
|
+
*
|
|
8
|
+
* Cross-language: byte-for-byte identical to:
|
|
9
|
+
* - Python `spendguard.prompt_hash.compute(s, t)`
|
|
10
|
+
* - Rust `services::sidecar::prompt_hash::compute(s, t)`
|
|
11
|
+
*
|
|
12
|
+
* Cross-tenant: two tenants asking the same prompt produce different
|
|
13
|
+
* hashes.
|
|
14
|
+
*
|
|
15
|
+
* @param promptText The raw prompt text. Leading/trailing ASCII whitespace
|
|
16
|
+
* is stripped before hashing.
|
|
17
|
+
* @param tenantId The tenant identifier. If a canonical UUID, lowercased
|
|
18
|
+
* before keying the HMAC; otherwise passed through verbatim.
|
|
19
|
+
* @returns 64-char lowercase hex string.
|
|
20
|
+
*/
|
|
21
|
+
declare function computePromptHash(promptText: string, tenantId: string): string;
|
|
22
|
+
|
|
23
|
+
export { computePromptHash };
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { createHmac } from 'crypto';
|
|
2
|
+
|
|
3
|
+
// src/promptHash.ts
|
|
4
|
+
var ASCII_WHITESPACE = /* @__PURE__ */ new Set([" ", " ", "\n", "\f", "\r"]);
|
|
5
|
+
var UUID_RE = /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/;
|
|
6
|
+
function canonicalizeTenant(tenantId) {
|
|
7
|
+
if (UUID_RE.test(tenantId)) return tenantId.toLowerCase();
|
|
8
|
+
return tenantId;
|
|
9
|
+
}
|
|
10
|
+
function stripAscii(s) {
|
|
11
|
+
let i = 0;
|
|
12
|
+
while (i < s.length && ASCII_WHITESPACE.has(s.charAt(i))) i++;
|
|
13
|
+
let j = s.length;
|
|
14
|
+
while (j > i && ASCII_WHITESPACE.has(s.charAt(j - 1))) j--;
|
|
15
|
+
return s.slice(i, j);
|
|
16
|
+
}
|
|
17
|
+
function computePromptHash(promptText, tenantId) {
|
|
18
|
+
const key = canonicalizeTenant(tenantId);
|
|
19
|
+
const trimmed = stripAscii(promptText);
|
|
20
|
+
return createHmac("sha256", key).update(trimmed, "utf8").digest("hex");
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export { computePromptHash };
|
|
24
|
+
//# sourceMappingURL=promptHash.js.map
|
|
25
|
+
//# sourceMappingURL=promptHash.js.map
|