@takk/racs 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +70 -0
- package/LICENSE +190 -0
- package/NOTICE +40 -0
- package/README.md +381 -0
- package/SECURITY.md +57 -0
- package/dist/cli/index.js +3016 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/edge/index.cjs +2000 -0
- package/dist/edge/index.cjs.map +1 -0
- package/dist/edge/index.d.cts +598 -0
- package/dist/edge/index.d.ts +598 -0
- package/dist/edge/index.js +1987 -0
- package/dist/edge/index.js.map +1 -0
- package/dist/index.cjs +2071 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +39 -0
- package/dist/index.d.ts +39 -0
- package/dist/index.js +2057 -0
- package/dist/index.js.map +1 -0
- package/dist/integrations/index.cjs +123 -0
- package/dist/integrations/index.cjs.map +1 -0
- package/dist/integrations/index.d.cts +285 -0
- package/dist/integrations/index.d.ts +285 -0
- package/dist/integrations/index.js +117 -0
- package/dist/integrations/index.js.map +1 -0
- package/dist/otel/index.cjs +93 -0
- package/dist/otel/index.cjs.map +1 -0
- package/dist/otel/index.d.cts +105 -0
- package/dist/otel/index.d.ts +105 -0
- package/dist/otel/index.js +91 -0
- package/dist/otel/index.js.map +1 -0
- package/dist/types-DQ7-9sk3.d.cts +758 -0
- package/dist/types-DQ7-9sk3.d.ts +758 -0
- package/dist/vercel/index.cjs +209 -0
- package/dist/vercel/index.cjs.map +1 -0
- package/dist/vercel/index.d.cts +210 -0
- package/dist/vercel/index.d.ts +210 -0
- package/dist/vercel/index.js +206 -0
- package/dist/vercel/index.js.map +1 -0
- package/dist/web/index.cjs +2000 -0
- package/dist/web/index.cjs.map +1 -0
- package/dist/web/index.d.cts +2 -0
- package/dist/web/index.d.ts +2 -0
- package/dist/web/index.js +1987 -0
- package/dist/web/index.js.map +1 -0
- package/package.json +189 -0
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
// src/integrations/index.ts
|
|
2
|
+
function noeticosAdapter(noeticosModule, runtime) {
|
|
3
|
+
return {
|
|
4
|
+
freeze: (agentId, reason) => noeticosModule.freezeTuning(runtime, agentId, reason),
|
|
5
|
+
release: (agentId) => noeticosModule.releaseTuning(runtime, agentId)
|
|
6
|
+
};
|
|
7
|
+
}
|
|
8
|
+
function noeticosBridge(racs, noeticos, options) {
|
|
9
|
+
const releaseAfter = options?.releaseAfterStablePlans ?? 3;
|
|
10
|
+
const frozen = /* @__PURE__ */ new Map();
|
|
11
|
+
return racs.on((event) => {
|
|
12
|
+
if (event.type === "prefix.drifted") {
|
|
13
|
+
const agentId = event.report.agentId;
|
|
14
|
+
if (agentId === void 0) {
|
|
15
|
+
return;
|
|
16
|
+
}
|
|
17
|
+
const segments = event.report.changedSegmentIds.join(", ");
|
|
18
|
+
try {
|
|
19
|
+
noeticos.freeze(
|
|
20
|
+
agentId,
|
|
21
|
+
`RACS prefix drift: changed segments [${segments}], ${event.report.invalidatedTokens} cached prefix tokens invalidated.`
|
|
22
|
+
);
|
|
23
|
+
} catch {
|
|
24
|
+
}
|
|
25
|
+
frozen.set(agentId, {
|
|
26
|
+
prefixKey: event.report.prefixKey,
|
|
27
|
+
stablePlans: 0,
|
|
28
|
+
skipNext: true
|
|
29
|
+
});
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
32
|
+
if (event.type !== "plan.created") {
|
|
33
|
+
return;
|
|
34
|
+
}
|
|
35
|
+
for (const [agentId, state] of frozen) {
|
|
36
|
+
if (state.prefixKey !== event.plan.prefixKey) {
|
|
37
|
+
continue;
|
|
38
|
+
}
|
|
39
|
+
if (state.skipNext) {
|
|
40
|
+
state.skipNext = false;
|
|
41
|
+
break;
|
|
42
|
+
}
|
|
43
|
+
state.stablePlans += 1;
|
|
44
|
+
if (state.stablePlans >= releaseAfter) {
|
|
45
|
+
frozen.delete(agentId);
|
|
46
|
+
try {
|
|
47
|
+
noeticos.release(agentId);
|
|
48
|
+
} catch {
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
break;
|
|
52
|
+
}
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
function writeCostUsd(usage, pricing) {
|
|
56
|
+
const card = pricing?.[usage.model];
|
|
57
|
+
if (card === void 0) {
|
|
58
|
+
return void 0;
|
|
59
|
+
}
|
|
60
|
+
const write5m = usage.cacheWriteTokens5m ?? 0;
|
|
61
|
+
const write1h = usage.cacheWriteTokens1h ?? 0;
|
|
62
|
+
if (write5m > 0 && card.cacheWrite5mPerMTok === void 0) {
|
|
63
|
+
return void 0;
|
|
64
|
+
}
|
|
65
|
+
if (write1h > 0 && card.cacheWrite1hPerMTok === void 0) {
|
|
66
|
+
return void 0;
|
|
67
|
+
}
|
|
68
|
+
const spend = write5m * (card.cacheWrite5mPerMTok ?? 0) + write1h * (card.cacheWrite1hPerMTok ?? 0);
|
|
69
|
+
return spend / 1e6;
|
|
70
|
+
}
|
|
71
|
+
function behavioralaiBridge(racs, behavioral, options) {
|
|
72
|
+
const agentId = options?.agentId ?? "racs-cache";
|
|
73
|
+
const pricing = options?.pricing;
|
|
74
|
+
return racs.on((event) => {
|
|
75
|
+
if (event.type !== "usage.recorded") {
|
|
76
|
+
return;
|
|
77
|
+
}
|
|
78
|
+
const costUsd = writeCostUsd(event.usage, pricing);
|
|
79
|
+
const turn = {
|
|
80
|
+
agentId,
|
|
81
|
+
error: false,
|
|
82
|
+
metadata: {
|
|
83
|
+
...event.usage.prefixKey !== void 0 ? { prefixKey: event.usage.prefixKey } : {},
|
|
84
|
+
hit: event.hit ? "true" : "false"
|
|
85
|
+
},
|
|
86
|
+
...costUsd !== void 0 ? { costUsd } : {}
|
|
87
|
+
};
|
|
88
|
+
try {
|
|
89
|
+
behavioral.observe(turn);
|
|
90
|
+
} catch {
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
function modelchainBridge(racs) {
|
|
95
|
+
return {
|
|
96
|
+
planForModel(base, modelId) {
|
|
97
|
+
return racs.plan({ ...base, model: modelId });
|
|
98
|
+
}
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
function keymeshBridge(racs, keymesh, options) {
|
|
102
|
+
const onCredentialChange = () => {
|
|
103
|
+
for (const provider of options.providers) {
|
|
104
|
+
racs.invalidate({ provider });
|
|
105
|
+
}
|
|
106
|
+
};
|
|
107
|
+
keymesh.on("key.rotated", onCredentialChange);
|
|
108
|
+
keymesh.on("circuit.open", onCredentialChange);
|
|
109
|
+
return () => {
|
|
110
|
+
keymesh.off("key.rotated", onCredentialChange);
|
|
111
|
+
keymesh.off("circuit.open", onCredentialChange);
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
export { behavioralaiBridge, keymeshBridge, modelchainBridge, noeticosAdapter, noeticosBridge };
|
|
116
|
+
//# sourceMappingURL=index.js.map
|
|
117
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/integrations/index.ts"],"names":[],"mappings":";AAsEO,SAAS,eAAA,CACd,gBACA,OAAA,EACc;AACd,EAAA,OAAO;AAAA,IACL,MAAA,EAAQ,CAAC,OAAA,EAAS,MAAA,KAAiB,eAAe,YAAA,CAAa,OAAA,EAAS,SAAS,MAAM,CAAA;AAAA,IACvF,SAAS,CAAC,OAAA,KAAkB,cAAA,CAAe,aAAA,CAAc,SAAS,OAAO;AAAA,GAC3E;AACF;AAmDO,SAAS,cAAA,CACd,IAAA,EACA,QAAA,EACA,OAAA,EACY;AACZ,EAAA,MAAM,YAAA,GAAe,SAAS,uBAAA,IAA2B,CAAA;AACzD,EAAA,MAAM,MAAA,uBAAa,GAAA,EAAyB;AAC5C,EAAA,OAAO,IAAA,CAAK,EAAA,CAAG,CAAC,KAAA,KAAU;AACxB,IAAA,IAAI,KAAA,CAAM,SAAS,gBAAA,EAAkB;AACnC,MAAA,MAAM,OAAA,GAAU,MAAM,MAAA,CAAO,OAAA;AAC7B,MAAA,IAAI,YAAY,MAAA,EAAW;AACzB,QAAA;AAAA,MACF;AACA,MAAA,MAAM,QAAA,GAAW,KAAA,CAAM,MAAA,CAAO,iBAAA,CAAkB,KAAK,IAAI,CAAA;AACzD,MAAA,IAAI;AACF,QAAA,QAAA,CAAS,MAAA;AAAA,UACP,OAAA;AAAA,UACA,CAAA,qCAAA,EAAwC,QAAQ,CAAA,GAAA,EAC3C,KAAA,CAAM,OAAO,iBAAiB,CAAA,kCAAA;AAAA,SACrC;AAAA,MACF,CAAA,CAAA,MAAQ;AAAA,MAGR;AACA,MAAA,MAAA,CAAO,IAAI,OAAA,EAAS;AAAA,QAClB,SAAA,EAAW,MAAM,MAAA,CAAO,SAAA;AAAA,QACxB,WAAA,EAAa,CAAA;AAAA,QACb,QAAA,EAAU;AAAA,OACX,CAAA;AACD,MAAA;AAAA,IACF;AACA,IAAA,IAAI,KAAA,CAAM,SAAS,cAAA,EAAgB;AACjC,MAAA;AAAA,IACF;AAEA,IAAA,KAAA,MAAW,CAAC,OAAA,EAAS,KAAK,CAAA,IAAK,MAAA,EAAQ;AACrC,MAAA,IAAI,KAAA,CAAM,SAAA,KAAc,KAAA,CAAM,IAAA,CAAK,SAAA,EAAW;AAC5C,QAAA;AAAA,MACF;AACA,MAAA,IAAI,MAAM,QAAA,EAAU;AAGlB,QAAA,KAAA,CAAM,QAAA,GAAW,KAAA;AACjB,QAAA;AAAA,MACF;AACA,MAAA,KAAA,CAAM,WAAA,IAAe,CAAA;AACrB,MAAA,IAAI,KAAA,CAAM,eAAe,YAAA,EAAc;AACrC,QAAA,MAAA,CAAO,OAAO,OAAO,CAAA;AACrB,QAAA,IAAI;AACF,UAAA,QAAA,CAAS,QAAQ,OAAO,CAAA;AAAA,QAC1B,CAAA,CAAA,MAAQ;AAAA,QAER;AAAA,MACF;AACA,MAAA;AAAA,IACF;AAAA,EACF,CAAC,CAAA;AACH;AAmCA,SAAS,YAAA,CAAa,OAAmB,OAAA,EAAuD;AAC9F,EAAA,MAAM,IAAA,GAAO,OAAA,GAAU,KAAA,CAAM,KAAK,CAAA;AAClC,EAAA,IAAI,SAAS,MAAA,EAAW;AACtB,IAAA,OAAO,MAAA;AAAA,EACT;AACA,EAAA,MAAM,OAAA,GAAU,MAAM,kBAAA,IAAsB,CAAA;AAC5C,EAAA,MAAM,OAAA,GAAU,MAAM,kBAAA,IAAsB,CAAA;AAC5C,EAAA,IAAI,OAAA,GAAU,CAAA,IAAK,IAAA,CAAK,mBAAA,KAAwB,MAAA,EAAW;AACzD,IAAA,OAAO,MAAA;AAAA,EACT;AACA,EAAA,IAAI,OAAA,GAAU,CAAA,IAAK,IAAA,CAAK,mBAAA,KAAwB,MAAA,EAAW;AACzD,IAAA,OAAO,MAAA;AAAA,EACT;AACA,EAAA,MAAM,QACJ,OAAA,IAAW,IAAA,CAAK,uBAAuB,CAAA,CAAA,GAAK,OAAA,IAAW,KAAK,mBAAA,IAAuB,CAAA,CAAA;AACrF,EAAA,OAAO,KAAA,GAAQ,GAAA;AACjB;AAoDO,SAAS,kBAAA,CACd,IAAA,EACA,UAAA,EACA,OAAA,EACY;AACZ,EAAA,MAAM,OAAA,GAAU,SAAS,OAAA,IAAW,YAAA;AACpC,EAAA,MAAM,UAAU,OAAA,EAAS,OAAA;AACzB,EAAA,OAAO,IAAA,CAAK,EAAA,CAAG,CAAC,KAAA,KAAU;AACxB,IAAA,IAAI,KAAA,CAAM,SAAS,gBAAA,EAAkB;AACnC,MAAA;AAAA,IACF;AACA,IAAA,MAAM,OAAA,GAAU,YAAA,CAAa,KAAA,CAAM,KAAA,EAAO,OAAO,CAAA;AACjD,IAAA,MAAM,IAAA,GAA6B;AAAA,MACjC,OAAA;AAAA,MACA,KAAA,EAAO,KAAA;AAAA,MACP,QAAA,EAAU;AAAA,QACR,GAAI,KAAA,CAAM,KAAA,CAAM,SAAA,KAAc,MAAA,GAAY,EAAE,SAAA,EAAW,KAAA,CAAM,KAAA,CAAM,SAAA,EAAU,GAAI,EAAC;AAAA,QAClF,GAAA,EAAK,KAAA,CAAM,GAAA,GAAM,MAAA,GAAS;AAAA,OAC5B;AAAA,MACA,GAAI,OAAA,KAAY,MAAA,GAAY,EAAE,OAAA,KAAY;AAAC,KAC7C;AACA,IAAA,IAAI;AACF,MAAA,UAAA,CAAW,QAAQ,IAAI,CAAA;AAAA,IACzB,CAAA,CAAA,MAAQ;AAAA,IAGR;AAAA,EACF,CAAC,CAAA;AACH;AA4CO,SAAS,iBAAiB,IAAA,EAAoC;AACnE,EAAA,OAAO;AAAA,IACL,YAAA,CAAa,MAAgC,OAAA,EAA4B;AACvE,MAAA,OAAO,KAAK,IAAA,CAAK,EAAE,GAAG,IAAA,EAAM,KAAA,EAAO,SAAS,CAAA;AAAA,IAC9C;AAAA,GACF;AACF;AAmDO,SAAS,aAAA,CACd,IAAA,EACA,OAAA,EACA,OAAA,EACY;AACZ,EAAA,MAAM,qBAAqB,MAAY;AACrC,IAAA,KAAA,MAAW,QAAA,IAAY,QAAQ,SAAA,EAAW;AACxC,MAAA,IAAA,CAAK,UAAA,CAAW,EAAE,QAAA,EAAU,CAAA;AAAA,IAC9B;AAAA,EACF,CAAA;AACA,EAAA,OAAA,CAAQ,EAAA,CAAG,eAAe,kBAAkB,CAAA;AAC5C,EAAA,OAAA,CAAQ,EAAA,CAAG,gBAAgB,kBAAkB,CAAA;AAC7C,EAAA,OAAO,MAAY;AACjB,IAAA,OAAA,CAAQ,GAAA,CAAI,eAAe,kBAAkB,CAAA;AAC7C,IAAA,OAAA,CAAQ,GAAA,CAAI,gBAAgB,kBAAkB,CAAA;AAAA,EAChD,CAAA;AACF","file":"index.js","sourcesContent":["/**\n * Sibling-package bridges of RACS (Remote Agent Context Store): four adapters wiring a\n * running engine to the rest of the @takk family, @takk/noeticos parameter tuning,\n * @takk/behavioralai behavioral observability, @takk/modelchain model routing, and\n * @takk/keymesh credential rotation.\n *\n * Optional-peer pattern: every sibling shape in this module is a LOCAL structural\n * interface. Nothing here imports a sibling package at runtime or at the type level, so\n * the siblings stay optional peer dependencies, the published real objects satisfy these\n * shapes structurally, and the zero-runtime-dependency invariant of the package survives\n * intact. Hosts pass ready instances in, exactly as they do with `KvLike` stores.\n *\n * Privacy posture, shared by every bridge: only prefix keys (hashes), token counts, USD\n * figures derived from counts, agent identifiers, and hit flags ever cross a bridge.\n * Prompt content never does, RACS never holds it in the first place.\n *\n * @packageDocumentation\n */\n\nimport type { CachePlan, CacheUsage, PlanInput, PricingTable, ProviderId, RACS } from '../types.js';\n\n/**\n * Structural freeze surface of a @takk/noeticos runtime, as {@link noeticosBridge}\n * consumes it.\n *\n * The published package exposes freezing as the module-level functions\n * `freezeTuning(runtime, agentId, reason)` and `releaseTuning(runtime, agentId)` next to\n * the `NoeticOS` runtime interface; {@link noeticosAdapter} folds that pair into this\n * object in one line. Any other tuning runtime can satisfy the same two methods directly.\n */\nexport interface NoeticOSLike {\n /** Pauses parameter tuning for the agent, recording the reason in the audit trail. */\n freeze(agentId: string, reason: string): void;\n /** Resumes parameter tuning for the agent. Releasing a non-frozen agent is a no-op. */\n release(agentId: string): void;\n}\n\n/**\n * Module shape of the real @takk/noeticos package, structurally: the two module-level\n * tuning functions the bridge needs. Members are method-style on purpose, method\n * parameters are checked bivariantly, so the published functions, whose first parameter\n * is the concrete `NoeticOS` runtime, satisfy `unknown` here without this module ever\n * naming the sibling type.\n */\nexport interface NoeticosModuleLike {\n /** The published `freezeTuning(runtime, agentId, reason)`. */\n freezeTuning(runtime: unknown, agentId: string, reason: string): void;\n /** The published `releaseTuning(runtime, agentId)`. */\n releaseTuning(runtime: unknown, agentId: string): void;\n}\n\n/**\n * Folds the real @takk/noeticos module surface into a {@link NoeticOSLike} bound to one\n * runtime, in one line per method.\n *\n * @param noeticosModule - The imported module, or any object carrying `freezeTuning` and\n * `releaseTuning` with the published signatures.\n * @param runtime - The `NoeticOS` runtime the functions act on, opaque to this package.\n * @returns A {@link NoeticOSLike} bound to that runtime.\n *\n * @example\n * ```ts\n * import * as noeticos from '@takk/noeticos';\n * import { noeticosAdapter } from '@takk/racs/integrations';\n *\n * const runtime = noeticos.createNoeticOS();\n * const like = noeticosAdapter(noeticos, runtime);\n * like.freeze('support-agent', 'manual maintenance window');\n * ```\n */\nexport function noeticosAdapter(\n noeticosModule: NoeticosModuleLike,\n runtime: unknown,\n): NoeticOSLike {\n return {\n freeze: (agentId, reason): void => noeticosModule.freezeTuning(runtime, agentId, reason),\n release: (agentId): void => noeticosModule.releaseTuning(runtime, agentId),\n };\n}\n\n/** Per-agent freeze bookkeeping of {@link noeticosBridge}. */\ninterface FreezeState {\n /** Prefix key the lineage drifted to, the baseline stable plans are counted against. */\n prefixKey: string;\n /** Consecutive zero-drift plans observed since the latest drift. */\n stablePlans: number;\n /** The drifting plan's own `'plan.created'` event is still pending and must not count. */\n skipNext: boolean;\n}\n\n/**\n * Freezes @takk/noeticos parameter tuning across prompt-prefix discontinuities.\n *\n * Rationale: parameter tuning must not learn across a prefix discontinuity, the reward\n * landscape moved. A drifted prefix changes hit ratio, latency, and cost all at once, so\n * reward samples taken right after the drift would be credited to parameter choices that\n * had nothing to do with them.\n *\n * Behavior: on every `'prefix.drifted'` event carrying an agentId (it flows in from\n * {@link PlanInput.agentId}), the bridge freezes that agent with a reason naming the\n * changed segments, then counts subsequent `'plan.created'` events for the agent with\n * zero drift and releases after `releaseAfterStablePlans` of them (default 3). A new\n * drift during the count re-freezes, which refreshes the audit trail, and restarts the\n * count.\n *\n * Disposal: the returned function only unsubscribes from telemetry. Agents frozen at\n * that moment stay frozen, releasing tuning silently would be a policy decision only the\n * host can take.\n *\n * @example\n * ```ts\n * import * as noeticos from '@takk/noeticos';\n * import { createRACS } from '@takk/racs';\n * import { noeticosAdapter, noeticosBridge } from '@takk/racs/integrations';\n *\n * const racs = createRACS();\n * const runtime = noeticos.createNoeticOS();\n * const dispose = noeticosBridge(racs, noeticosAdapter(noeticos, runtime), {\n * releaseAfterStablePlans: 3,\n * });\n * // Plans carrying an agentId now freeze that agent's tuning on prefix drift:\n * racs.plan({\n * agentId: 'support-agent',\n * provider: 'anthropic',\n * model: 'claude-sonnet-4-5',\n * segments: [{ id: 'system', role: 'system', stability: 'stable', content: SYSTEM }],\n * });\n * ```\n */\nexport function noeticosBridge(\n racs: RACS,\n noeticos: NoeticOSLike,\n options?: { readonly releaseAfterStablePlans?: number },\n): () => void {\n const releaseAfter = options?.releaseAfterStablePlans ?? 3;\n const frozen = new Map<string, FreezeState>();\n return racs.on((event) => {\n if (event.type === 'prefix.drifted') {\n const agentId = event.report.agentId;\n if (agentId === undefined) {\n return;\n }\n const segments = event.report.changedSegmentIds.join(', ');\n try {\n noeticos.freeze(\n agentId,\n `RACS prefix drift: changed segments [${segments}], ` +\n `${event.report.invalidatedTokens} cached prefix tokens invalidated.`,\n );\n } catch {\n // Telemetry listeners must not throw, see TelemetryListener; a failing sibling\n // call is contained here instead of leaning on the engine's safety net.\n }\n frozen.set(agentId, {\n prefixKey: event.report.prefixKey,\n stablePlans: 0,\n skipNext: true,\n });\n return;\n }\n if (event.type !== 'plan.created') {\n return;\n }\n // The prefix key embeds the agent identity, so matching on it is matching the agent.\n for (const [agentId, state] of frozen) {\n if (state.prefixKey !== event.plan.prefixKey) {\n continue;\n }\n if (state.skipNext) {\n // The drifting plan emits 'prefix.drifted' first and 'plan.created' second; the\n // skip flag keeps that plan from counting toward its own release.\n state.skipNext = false;\n break;\n }\n state.stablePlans += 1;\n if (state.stablePlans >= releaseAfter) {\n frozen.delete(agentId);\n try {\n noeticos.release(agentId);\n } catch {\n // Same containment as freeze above.\n }\n }\n break;\n }\n });\n}\n\n/**\n * The synthetic turn {@link behavioralaiBridge} reports, a narrow structural slice of\n * the sibling's `TurnObservation`, field names verbatim from the published\n * @takk/behavioralai types.\n */\nexport interface CacheTurnObservation {\n /** Behavioral profile the turn belongs to, the bridge's `options.agentId`. */\n readonly agentId: string;\n /** End-to-end latency in milliseconds. Declared for shape parity, never populated. */\n readonly latencyMs?: number;\n /** Cache-write spend of the call in USD, present when pricing covers the model. */\n readonly costUsd?: number;\n /** Always false, a recorded usage is a completed call. */\n readonly error?: boolean;\n /** Keys and counts only: the prefix key (a hash) and the hit flag. */\n readonly metadata?: Readonly<Record<string, string>>;\n}\n\n/**\n * Structural observation surface of a @takk/behavioralai engine. The real\n * `BehavioralAI.observe(turn)` returns a drift report; the bridge has no use for it, so\n * the return type stays `unknown`.\n */\nexport interface BehavioralAILike {\n /** Ingests one observed turn into the behavioral fingerprint. */\n observe(turn: CacheTurnObservation): unknown;\n}\n\n/**\n * Cache-write spend of one usage record in USD, `undefined` when the table does not\n * cover the model or misses a TTL tier the call wrote to: an unpriceable turn is omitted\n * entirely rather than underreported.\n */\nfunction writeCostUsd(usage: CacheUsage, pricing: PricingTable | undefined): number | undefined {\n const card = pricing?.[usage.model];\n if (card === undefined) {\n return undefined;\n }\n const write5m = usage.cacheWriteTokens5m ?? 0;\n const write1h = usage.cacheWriteTokens1h ?? 0;\n if (write5m > 0 && card.cacheWrite5mPerMTok === undefined) {\n return undefined;\n }\n if (write1h > 0 && card.cacheWrite1hPerMTok === undefined) {\n return undefined;\n }\n const spend =\n write5m * (card.cacheWrite5mPerMTok ?? 0) + write1h * (card.cacheWrite1hPerMTok ?? 0);\n return spend / 1_000_000;\n}\n\n/**\n * Turns the cache itself into a behaviorally observed agent of @takk/behavioralai.\n *\n * Behavior: on every `'usage.recorded'` event the bridge reports one synthetic turn\n * under `options.agentId` (default `'racs-cache'`): `error` false, `metadata` carrying\n * the prefix key (when the usage was linked to a plan) and the hit flag as\n * `'true' | 'false'`, and `costUsd` set to the call's cache-write spend when pricing\n * covers the model. A healthy cache fingerprints as near-zero write cost and hit\n * `'true'` almost always, so a hit-ratio collapse, a burst of misses paying write\n * premiums, shifts the fingerprint and surfaces as behavioral drift in the sibling.\n *\n * Pricing design, the simplest honest one: the per-turn write cost is computed from the\n * usage record's own write token counts and the table passed in `options.pricing` (same\n * shape as `RACSOptions.pricing`). Reading `racs.stats` instead would only offer\n * cumulative aggregates, and deriving per-turn deltas from those would need shadow state\n * and would misattribute under interleaved recording. Without pricing coverage `costUsd`\n * is omitted, never guessed.\n *\n * Privacy posture: keys and counts only. The bridge forwards the prefix key (a hash), a\n * hit flag, and a USD figure derived from token counts. Prompt content never crosses.\n *\n * @example\n * ```ts\n * import { createBehavioralAI } from '@takk/behavioralai';\n * import { createRACS } from '@takk/racs';\n * import { behavioralaiBridge } from '@takk/racs/integrations';\n *\n * const racs = createRACS();\n * const behavioral = createBehavioralAI();\n * const dispose = behavioralaiBridge(racs, behavioral, {\n * pricing: {\n * 'claude-sonnet-4-5': {\n * inputPerMTok: 3,\n * cacheReadPerMTok: 0.3,\n * cacheWrite5mPerMTok: 3.75,\n * cacheWrite1hPerMTok: 6,\n * },\n * },\n * });\n * racs.record({\n * provider: 'anthropic',\n * model: 'claude-sonnet-4-5',\n * prefixKey: plan.prefixKey,\n * inputTokens: 5000,\n * cacheReadTokens: 4200,\n * });\n * // -> behavioral.observe({ agentId: 'racs-cache', costUsd: 0, error: false,\n * // metadata: { prefixKey: plan.prefixKey, hit: 'true' } })\n * ```\n */\nexport function behavioralaiBridge(\n racs: RACS,\n behavioral: BehavioralAILike,\n options?: { readonly agentId?: string; readonly pricing?: PricingTable },\n): () => void {\n const agentId = options?.agentId ?? 'racs-cache';\n const pricing = options?.pricing;\n return racs.on((event) => {\n if (event.type !== 'usage.recorded') {\n return;\n }\n const costUsd = writeCostUsd(event.usage, pricing);\n const turn: CacheTurnObservation = {\n agentId,\n error: false,\n metadata: {\n ...(event.usage.prefixKey !== undefined ? { prefixKey: event.usage.prefixKey } : {}),\n hit: event.hit ? 'true' : 'false',\n },\n ...(costUsd !== undefined ? { costUsd } : {}),\n };\n try {\n behavioral.observe(turn);\n } catch {\n // Telemetry listeners must not throw, see TelemetryListener; a failing sibling\n // call is contained here instead of leaning on the engine's safety net.\n }\n });\n}\n\n/** Per-model cache planning surface returned by {@link modelchainBridge}. */\nexport interface ModelchainCachePlanner {\n /**\n * Plans the cache for one routed model: `base` is the model-agnostic plan input,\n * `modelId` is the id the router actually picked. Because the deterministic prefix key\n * includes the model, every routed model gets its own prefix key, fingerprint lineage,\n * and keep-warm schedule, which is exactly right: provider caches are per-model, a\n * prefix cached for one model is cold for every other.\n */\n planForModel(base: Omit<PlanInput, 'model'>, modelId: string): CachePlan;\n}\n\n/**\n * Pure helper for @takk/modelchain routed traffic: per-model cache plans from one shared\n * base input. No subscriptions, no state, just {@link RACS.plan} with the routed model\n * spliced in.\n *\n * @example\n * ```ts\n * import { createModelchain } from '@takk/modelchain';\n * import { createRACS } from '@takk/racs';\n * import { modelchainBridge } from '@takk/racs/integrations';\n *\n * const racs = createRACS();\n * const planner = modelchainBridge(racs);\n * const router = createModelchain({ models });\n *\n * const response = await router.complete({ prompt: userTurn, system: SYSTEM });\n * // CompletionResponse.modelId names the model the router picked; plan its cache:\n * const plan = planner.planForModel(\n * {\n * provider: 'openai',\n * segments: [\n * { id: 'system', role: 'system', stability: 'stable', content: SYSTEM },\n * { id: 'turn', role: 'dynamic', stability: 'volatile', content: userTurn },\n * ],\n * reuse: { intervalSeconds: 45 },\n * },\n * response.modelId,\n * );\n * ```\n */\nexport function modelchainBridge(racs: RACS): ModelchainCachePlanner {\n return {\n planForModel(base: Omit<PlanInput, 'model'>, modelId: string): CachePlan {\n return racs.plan({ ...base, model: modelId });\n },\n };\n}\n\n/**\n * The keymesh telemetry events that signal credentials in flux, names verbatim from the\n * published @takk/keymesh event union. The bridge subscribes to `'key.rotated'` and\n * `'circuit.open'`; `'all.exhausted'` is part of the structural surface so hosts can\n * hang their own handlers off the same {@link KeymeshLike} object.\n */\nexport type KeymeshCredentialEventName = 'key.rotated' | 'circuit.open' | 'all.exhausted';\n\n/**\n * Structural on/off pair of a keymesh client, the `KeymeshExtras` surface every\n * `createKeymesh` client carries. The real methods are generic over the full event\n * union; this narrowed method pair is satisfied by them structurally.\n */\nexport interface KeymeshLike {\n /** Subscribes a handler to one telemetry event. */\n on(event: KeymeshCredentialEventName, handler: (event: unknown) => void): void;\n /** Unsubscribes a previously subscribed handler. */\n off(event: KeymeshCredentialEventName, handler: (event: unknown) => void): void;\n}\n\n/**\n * Invalidates provider-scoped cache state when @takk/keymesh signals credentials in\n * flux: on `'key.rotated'` and on `'circuit.open'` the bridge calls\n * `racs.invalidate({ provider })` once per provider in `options.providers`, clearing\n * fingerprints, keep-warm schedules, and resource registry entries, with one\n * `'resource.action'` delete event per dropped resource for the host to mirror.\n *\n * Why rotation invalidates: cache entries and cachedContent handles may be scoped to the\n * credential or workspace that created them. Gemini `cachedContents` especially, a\n * resource created under a rotated or disabled key may be unreachable or orphaned, and\n * silently still billing storage. Routing-key and breakpoint caches can land in a\n * different account-side namespace under the new credential. Re-planning from scratch\n * costs one write premium; planning against a dead handle costs failed calls. The same\n * logic covers `'circuit.open'`: a credential in cooldown leaves its provider-side\n * resources unrefreshable, so they expire mid-schedule anyway.\n *\n * @example\n * ```ts\n * import { createKeymesh } from '@takk/keymesh';\n * import { createRACS } from '@takk/racs';\n * import { keymeshBridge } from '@takk/racs/integrations';\n *\n * const racs = createRACS();\n * const gemini = createKeymesh({ provider: geminiAdapter, keys: geminiKeys });\n * const dispose = keymeshBridge(racs, gemini, { providers: ['google'] });\n * // From here a rotation or an opened circuit clears every google-attributed prefix\n * // and the host re-plans, recreating provider resources under the new credential.\n * ```\n */\nexport function keymeshBridge(\n racs: RACS,\n keymesh: KeymeshLike,\n options: { readonly providers: readonly ProviderId[] },\n): () => void {\n const onCredentialChange = (): void => {\n for (const provider of options.providers) {\n racs.invalidate({ provider });\n }\n };\n keymesh.on('key.rotated', onCredentialChange);\n keymesh.on('circuit.open', onCredentialChange);\n return (): void => {\n keymesh.off('key.rotated', onCredentialChange);\n keymesh.off('circuit.open', onCredentialChange);\n };\n}\n"]}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// src/otel/index.ts
|
|
4
|
+
var GEN_AI_SYSTEM_TO_PROVIDER = {
|
|
5
|
+
anthropic: "anthropic",
|
|
6
|
+
openai: "openai",
|
|
7
|
+
"gcp.gemini": "google",
|
|
8
|
+
gemini: "google",
|
|
9
|
+
google: "google",
|
|
10
|
+
"aws.bedrock": "bedrock",
|
|
11
|
+
groq: "groq",
|
|
12
|
+
deepseek: "deepseek",
|
|
13
|
+
mistral_ai: "mistral",
|
|
14
|
+
mistral: "mistral",
|
|
15
|
+
xai: "xai"
|
|
16
|
+
};
|
|
17
|
+
var CACHE_READ_ATTRIBUTES = [
|
|
18
|
+
"gen_ai.usage.cache_read_input_tokens",
|
|
19
|
+
"gen_ai.usage.cached_tokens",
|
|
20
|
+
"gen_ai.usage.input_cached_tokens"
|
|
21
|
+
];
|
|
22
|
+
var EXCLUSIVE_READ_ATTRIBUTE = "gen_ai.usage.cache_read_input_tokens";
|
|
23
|
+
var CACHE_WRITE_ATTRIBUTES = [
|
|
24
|
+
"gen_ai.usage.cache_creation_input_tokens",
|
|
25
|
+
"gen_ai.usage.cache_write_input_tokens"
|
|
26
|
+
];
|
|
27
|
+
function isRecord(value) {
|
|
28
|
+
return typeof value === "object" && value !== null;
|
|
29
|
+
}
|
|
30
|
+
function countOf(value) {
|
|
31
|
+
if (typeof value === "number" && Number.isFinite(value) && value >= 0) {
|
|
32
|
+
return value;
|
|
33
|
+
}
|
|
34
|
+
if (typeof value === "string" && /^\d+$/.test(value)) {
|
|
35
|
+
return Number(value);
|
|
36
|
+
}
|
|
37
|
+
return void 0;
|
|
38
|
+
}
|
|
39
|
+
function stringOf(value) {
|
|
40
|
+
return typeof value === "string" && value !== "" ? value : void 0;
|
|
41
|
+
}
|
|
42
|
+
function endMillisOf(value) {
|
|
43
|
+
if (typeof value === "number" && Number.isFinite(value) && value > 0) {
|
|
44
|
+
return Math.floor(value / 1e6);
|
|
45
|
+
}
|
|
46
|
+
if (typeof value === "string" && /^\d+$/.test(value)) {
|
|
47
|
+
const millis = Number(BigInt(value) / 1000000n);
|
|
48
|
+
return millis > 0 ? millis : void 0;
|
|
49
|
+
}
|
|
50
|
+
return void 0;
|
|
51
|
+
}
|
|
52
|
+
function firstCount(attributes, names) {
|
|
53
|
+
for (const name of names) {
|
|
54
|
+
const count = countOf(attributes[name]);
|
|
55
|
+
if (count !== void 0) {
|
|
56
|
+
return { name, count };
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
return void 0;
|
|
60
|
+
}
|
|
61
|
+
function usageFromSpan(span, fallback) {
|
|
62
|
+
const attributes = isRecord(span.attributes) ? span.attributes : {};
|
|
63
|
+
const system = stringOf(attributes["gen_ai.system"]);
|
|
64
|
+
const mapped = system === void 0 ? void 0 : GEN_AI_SYSTEM_TO_PROVIDER[system.toLowerCase()];
|
|
65
|
+
const provider = mapped ?? fallback?.provider;
|
|
66
|
+
if (provider === void 0) {
|
|
67
|
+
return void 0;
|
|
68
|
+
}
|
|
69
|
+
const model = stringOf(attributes["gen_ai.request.model"]) ?? stringOf(attributes["gen_ai.response.model"]) ?? fallback?.model;
|
|
70
|
+
if (model === void 0) {
|
|
71
|
+
return void 0;
|
|
72
|
+
}
|
|
73
|
+
const rawInputTokens = countOf(attributes["gen_ai.usage.input_tokens"]) ?? 0;
|
|
74
|
+
const read = firstCount(attributes, CACHE_READ_ATTRIBUTES);
|
|
75
|
+
const write = firstCount(attributes, CACHE_WRITE_ATTRIBUTES);
|
|
76
|
+
const timestamp = endMillisOf(span.endTimeUnixNano);
|
|
77
|
+
const exclusiveRead = read !== void 0 && read.name === EXCLUSIVE_READ_ATTRIBUTE;
|
|
78
|
+
const inputTokens = rawInputTokens + (exclusiveRead ? read.count : 0) + (write?.count ?? 0);
|
|
79
|
+
const cacheReadTokens = read?.count ?? 0;
|
|
80
|
+
const cacheWriteTokens5m = write?.count;
|
|
81
|
+
return {
|
|
82
|
+
provider,
|
|
83
|
+
model,
|
|
84
|
+
inputTokens,
|
|
85
|
+
cacheReadTokens,
|
|
86
|
+
...cacheWriteTokens5m !== void 0 ? { cacheWriteTokens5m } : {},
|
|
87
|
+
...timestamp !== void 0 ? { timestamp } : {}
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
exports.usageFromSpan = usageFromSpan;
|
|
92
|
+
//# sourceMappingURL=index.cjs.map
|
|
93
|
+
//# sourceMappingURL=index.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/otel/index.ts"],"names":[],"mappings":";;;AA4CA,IAAM,yBAAA,GAAkE;AAAA,EACtE,SAAA,EAAW,WAAA;AAAA,EACX,MAAA,EAAQ,QAAA;AAAA,EACR,YAAA,EAAc,QAAA;AAAA,EACd,MAAA,EAAQ,QAAA;AAAA,EACR,MAAA,EAAQ,QAAA;AAAA,EACR,aAAA,EAAe,SAAA;AAAA,EACf,IAAA,EAAM,MAAA;AAAA,EACN,QAAA,EAAU,UAAA;AAAA,EACV,UAAA,EAAY,SAAA;AAAA,EACZ,OAAA,EAAS,SAAA;AAAA,EACT,GAAA,EAAK;AACP,CAAA;AAqBA,IAAM,qBAAA,GAA2C;AAAA,EAC/C,sCAAA;AAAA,EACA,4BAAA;AAAA,EACA;AACF,CAAA;AAMA,IAAM,wBAAA,GAA2B,sCAAA;AAoBjC,IAAM,sBAAA,GAA4C;AAAA,EAChD,0CAAA;AAAA,EACA;AACF,CAAA;AAGA,SAAS,SAAS,KAAA,EAAkD;AAClE,EAAA,OAAO,OAAO,KAAA,KAAU,QAAA,IAAY,KAAA,KAAU,IAAA;AAChD;AAMA,SAAS,QAAQ,KAAA,EAAoC;AACnD,EAAA,IAAI,OAAO,UAAU,QAAA,IAAY,MAAA,CAAO,SAAS,KAAK,CAAA,IAAK,SAAS,CAAA,EAAG;AACrE,IAAA,OAAO,KAAA;AAAA,EACT;AACA,EAAA,IAAI,OAAO,KAAA,KAAU,QAAA,IAAY,OAAA,CAAQ,IAAA,CAAK,KAAK,CAAA,EAAG;AACpD,IAAA,OAAO,OAAO,KAAK,CAAA;AAAA,EACrB;AACA,EAAA,OAAO,MAAA;AACT;AAGA,SAAS,SAAS,KAAA,EAAoC;AACpD,EAAA,OAAO,OAAO,KAAA,KAAU,QAAA,IAAY,KAAA,KAAU,KAAK,KAAA,GAAQ,MAAA;AAC7D;AAQA,SAAS,YAAY,KAAA,EAAwD;AAC3E,EAAA,IAAI,OAAO,UAAU,QAAA,IAAY,MAAA,CAAO,SAAS,KAAK,CAAA,IAAK,QAAQ,CAAA,EAAG;AACpE,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,KAAA,GAAQ,GAAS,CAAA;AAAA,EACrC;AACA,EAAA,IAAI,OAAO,KAAA,KAAU,QAAA,IAAY,OAAA,CAAQ,IAAA,CAAK,KAAK,CAAA,EAAG;AACpD,IAAA,MAAM,MAAA,GAAS,MAAA,CAAO,MAAA,CAAO,KAAK,IAAI,QAAU,CAAA;AAChD,IAAA,OAAO,MAAA,GAAS,IAAI,MAAA,GAAS,MAAA;AAAA,EAC/B;AACA,EAAA,OAAO,MAAA;AACT;AAOA,SAAS,UAAA,CACP,YACA,KAAA,EAC+D;AAC/D,EAAA,KAAA,MAAW,QAAQ,KAAA,EAAO;AACxB,IAAA,MAAM,KAAA,GAAQ,OAAA,CAAQ,UAAA,CAAW,IAAI,CAAC,CAAA;AACtC,IAAA,IAAI,UAAU,MAAA,EAAW;AACvB,MAAA,OAAO,EAAE,MAAM,KAAA,EAAM;AAAA,IACvB;AAAA,EACF;AACA,EAAA,OAAO,MAAA;AACT;AAgEO,SAAS,aAAA,CACd,MACA,QAAA,EACwB;AACxB,EAAA,MAAM,aAAgD,QAAA,CAAS,IAAA,CAAK,UAAU,CAAA,GAC1E,IAAA,CAAK,aACL,EAAC;AAEL,EAAA,MAAM,MAAA,GAAS,QAAA,CAAS,UAAA,CAAW,eAAe,CAAC,CAAA;AACnD,EAAA,MAAM,SAAS,MAAA,KAAW,MAAA,GAAY,SAAY,yBAAA,CAA0B,MAAA,CAAO,aAAa,CAAA;AAChG,EAAA,MAAM,QAAA,GAAW,UAAU,QAAA,EAAU,QAAA;AACrC,EAAA,IAAI,aAAa,MAAA,EAAW;AAC1B,IAAA,OAAO,MAAA;AAAA,EACT;AAEA,EAAA,MAAM,KAAA,GACJ,QAAA,CAAS,UAAA,CAAW,sBAAsB,CAAC,CAAA,IAC3C,QAAA,CAAS,UAAA,CAAW,uBAAuB,CAAC,CAAA,IAC5C,QAAA,EAAU,KAAA;AACZ,EAAA,IAAI,UAAU,MAAA,EAAW;AACvB,IAAA,OAAO,MAAA;AAAA,EACT;AAEA,EAAA,MAAM,cAAA,GAAiB,OAAA,CAAQ,UAAA,CAAW,2BAA2B,CAAC,CAAA,IAAK,CAAA;AAC3E,EAAA,MAAM,IAAA,GAAO,UAAA,CAAW,UAAA,EAAY,qBAAqB,CAAA;AACzD,EAAA,MAAM,KAAA,GAAQ,UAAA,CAAW,UAAA,EAAY,sBAAsB,CAAA;AAC3D,EAAA,MAAM,SAAA,GAAY,WAAA,CAAY,IAAA,CAAK,eAAe,CAAA;AAOlD,EAAA,MAAM,aAAA,GAAgB,IAAA,KAAS,MAAA,IAAa,IAAA,CAAK,IAAA,KAAS,wBAAA;AAC1D,EAAA,MAAM,cAAc,cAAA,IAAkB,aAAA,GAAgB,KAAK,KAAA,GAAQ,CAAA,CAAA,IAAM,OAAO,KAAA,IAAS,CAAA,CAAA;AACzF,EAAA,MAAM,eAAA,GAAkB,MAAM,KAAA,IAAS,CAAA;AACvC,EAAA,MAAM,qBAAqB,KAAA,EAAO,KAAA;AAElC,EAAA,OAAO;AAAA,IACL,QAAA;AAAA,IACA,KAAA;AAAA,IACA,WAAA;AAAA,IACA,eAAA;AAAA,IACA,GAAI,kBAAA,KAAuB,MAAA,GAAY,EAAE,kBAAA,KAAuB,EAAC;AAAA,IACjE,GAAI,SAAA,KAAc,MAAA,GAAY,EAAE,SAAA,KAAc;AAAC,GACjD;AACF","file":"index.cjs","sourcesContent":["/**\n * OpenTelemetry GenAI ingestion for RACS (Remote Agent Context Store): turns one finished\n * GenAI span into one normalized {@link CacheUsage} record for cache analytics.\n *\n * Structural by design. This module declares its own {@link GenAISpanLike} shape instead\n * of importing OpenTelemetry types, so the package keeps its zero-runtime-dependency\n * invariant: any span object that structurally matches works, whether it comes from the\n * OpenTelemetry JS SDK, an OTLP JSON payload, a collector processor, hermes-otel spans, or\n * the spans Vercel AI SDK telemetry emits with `experimental_telemetry` enabled.\n *\n * Privacy contract: {@link usageFromSpan} reads only the provider attribute, the model\n * attributes, the usage counters, and the span end time. It never reads prompt or\n * completion content attributes (`gen_ai.prompt`, `gen_ai.completion`, event bodies, or\n * any other content-bearing field), so wiring it into a span pipeline leaks nothing.\n *\n * @packageDocumentation\n */\n\nimport type { CacheUsage, ProviderId } from '../types.js';\n\n/**\n * Minimal structural shape of one GenAI span, the subset {@link usageFromSpan} reads.\n *\n * Matches both live SDK spans (numeric attribute values) and OTLP JSON spans, where int64\n * values arrive as decimal strings, which is why the time fields and the numeric attribute\n * reads accept strings.\n */\nexport interface GenAISpanLike {\n /** Span name, unused by ingestion, present so real spans match without widening. */\n readonly name?: string;\n /** Flat attribute bag, GenAI semantic-convention keys, values of any wire type. */\n readonly attributes?: Readonly<Record<string, unknown>>;\n /** Span start in nanoseconds since the Unix epoch, number or OTLP decimal string. */\n readonly startTimeUnixNano?: number | string;\n /** Span end in nanoseconds since the Unix epoch, number or OTLP decimal string. */\n readonly endTimeUnixNano?: number | string;\n}\n\n/**\n * Mapping from `gen_ai.system` attribute values to RACS provider ids, per the\n * OpenTelemetry GenAI semantic conventions as of June 2026. Instrumentations disagree on\n * the Google and Mistral spellings, so every spelling seen in the wild maps here.\n * Lookup is case-insensitive, values are lowercased before the table is consulted.\n */\nconst GEN_AI_SYSTEM_TO_PROVIDER: Readonly<Record<string, ProviderId>> = {\n anthropic: 'anthropic',\n openai: 'openai',\n 'gcp.gemini': 'google',\n gemini: 'google',\n google: 'google',\n 'aws.bedrock': 'bedrock',\n groq: 'groq',\n deepseek: 'deepseek',\n mistral_ai: 'mistral',\n mistral: 'mistral',\n xai: 'xai',\n};\n\n/**\n * Token-count attribute names for cached reads, in lookup order. Instrumentations\n * disagree, so the first numeric value among them wins. Each spelling also pins the\n * input-token convention of its source, which decides the all-in normalization in\n * {@link usageFromSpan}:\n *\n * - `'gen_ai.usage.cache_read_input_tokens'`: Anthropic-flavored instrumentations,\n * including hermes-otel, mirror the raw `usage.cache_read_input_tokens` response field\n * under this name. EXCLUSIVE convention: the matching `gen_ai.usage.input_tokens`\n * mirrors raw Anthropic `input_tokens`, which excludes cache reads and cache writes, so\n * the read count is ADDED to reach the all-in total.\n * - `'gen_ai.usage.cached_tokens'`: OpenAI-flavored instrumentations in the OpenLLMetry\n * lineage mirror `prompt_tokens_details.cached_tokens` under this name. INCLUSIVE\n * convention: OpenAI `cached_tokens` is a subset of `prompt_tokens`, so nothing is\n * added.\n * - `'gen_ai.usage.input_cached_tokens'`: the newer semantic-convention draft naming some\n * collectors and SDK instrumentations have adopted, descending from the OpenAI detail\n * shape. INCLUSIVE convention, nothing is added.\n */\nconst CACHE_READ_ATTRIBUTES: readonly string[] = [\n 'gen_ai.usage.cache_read_input_tokens',\n 'gen_ai.usage.cached_tokens',\n 'gen_ai.usage.input_cached_tokens',\n];\n\n/**\n * The one read spelling whose source reports EXCLUSIVE input counts, see\n * {@link CACHE_READ_ATTRIBUTES} for the per-attribute convention catalog.\n */\nconst EXCLUSIVE_READ_ATTRIBUTE = 'gen_ai.usage.cache_read_input_tokens';\n\n/**\n * Token-count attribute names for cache writes, in lookup order:\n *\n * - `'gen_ai.usage.cache_creation_input_tokens'`: Anthropic-flavored instrumentations\n * mirror the raw `usage.cache_creation_input_tokens` response field. EXCLUSIVE\n * convention: raw Anthropic `input_tokens` excludes written tokens, so the write count\n * is ADDED to reach the all-in input total.\n * - `'gen_ai.usage.cache_write_input_tokens'`: the generic spelling other\n * instrumentations use for the same quantity. Write counters exist only on\n * breakpoint-family providers whose raw usage excludes them from the input count, so\n * this spelling follows the same EXCLUSIVE convention and is ADDED likewise.\n *\n * TTL attribution convention: the GenAI semantic conventions carry no TTL split, a write\n * is just a write, so every written token is attributed to {@link CacheUsage.cacheWriteTokens5m},\n * the 5-minute tier, the conservative default because it is the cheaper write premium.\n * Hosts that know a span wrote a 1-hour span should move the count to\n * `cacheWriteTokens1h` before recording.\n */\nconst CACHE_WRITE_ATTRIBUTES: readonly string[] = [\n 'gen_ai.usage.cache_creation_input_tokens',\n 'gen_ai.usage.cache_write_input_tokens',\n];\n\n/** True for any non-null object, the first gate of every structural check. */\nfunction isRecord(value: unknown): value is Record<string, unknown> {\n return typeof value === 'object' && value !== null;\n}\n\n/**\n * Reads one attribute as a finite non-negative token count. Accepts plain numbers and\n * OTLP JSON int64 values, which arrive as decimal strings, anything else is absent.\n */\nfunction countOf(value: unknown): number | undefined {\n if (typeof value === 'number' && Number.isFinite(value) && value >= 0) {\n return value;\n }\n if (typeof value === 'string' && /^\\d+$/.test(value)) {\n return Number(value);\n }\n return undefined;\n}\n\n/** Reads one attribute as a non-empty string, anything else is absent. */\nfunction stringOf(value: unknown): string | undefined {\n return typeof value === 'string' && value !== '' ? value : undefined;\n}\n\n/**\n * Converts span end nanoseconds to milliseconds since the Unix epoch. OTLP decimal\n * strings go through BigInt so values above 2^53 nanoseconds, every realistic wall-clock\n * time, lose nothing before the division. Zero and negatives mean \"not set\" in OTLP and\n * come back absent.\n */\nfunction endMillisOf(value: number | string | undefined): number | undefined {\n if (typeof value === 'number' && Number.isFinite(value) && value > 0) {\n return Math.floor(value / 1_000_000);\n }\n if (typeof value === 'string' && /^\\d+$/.test(value)) {\n const millis = Number(BigInt(value) / 1_000_000n);\n return millis > 0 ? millis : undefined;\n }\n return undefined;\n}\n\n/**\n * First numeric value among the named attributes, in order, with the attribute name that\n * supplied it, absent when none is numeric. The name matters: it pins which input-token\n * convention the source follows, see {@link CACHE_READ_ATTRIBUTES}.\n */\nfunction firstCount(\n attributes: Readonly<Record<string, unknown>>,\n names: readonly string[],\n): { readonly name: string; readonly count: number } | undefined {\n for (const name of names) {\n const count = countOf(attributes[name]);\n if (count !== undefined) {\n return { name, count };\n }\n }\n return undefined;\n}\n\n/**\n * Normalizes one finished GenAI span into one {@link CacheUsage} record, or `undefined`\n * when the span carries too little identity to account for.\n *\n * Field provenance, all reads structural and defensive:\n * - `provider`: inferred from the `gen_ai.system` attribute through the spelling table\n * ({@link GEN_AI_SYSTEM_TO_PROVIDER}, case-insensitive). Unknown or missing spellings\n * fall back to `fallback.provider`; with neither, the function returns `undefined`.\n * - `model`: `gen_ai.request.model`, then `gen_ai.response.model`, then `fallback.model`.\n * A model is required for ledger attribution, without one the function returns\n * `undefined`.\n * - `inputTokens`: `gen_ai.usage.input_tokens` (zero when absent), normalized to the\n * ALL-IN convention {@link CacheUsage.inputTokens} documents. The matched cache\n * attribute spellings decide the adjustment: an Anthropic-flavored read count\n * (`cache_read_input_tokens`, EXCLUSIVE) is added, an OpenAI-flavored read count\n * (`cached_tokens`, `input_cached_tokens`, INCLUSIVE subsets of `input_tokens`) is not,\n * and any write count is added because write counters only exist on providers whose raw\n * input count excludes them. Per-attribute conventions are documented on\n * {@link CACHE_READ_ATTRIBUTES} and {@link CACHE_WRITE_ATTRIBUTES}.\n * - `cacheReadTokens`: the first numeric among the read spellings, see\n * {@link CACHE_READ_ATTRIBUTES} for why three names exist. Zero when none is present.\n * - `cacheWriteTokens5m`: the first numeric among the write spellings, see\n * {@link CACHE_WRITE_ATTRIBUTES} for the 5-minute attribution convention. Omitted when\n * absent.\n * - `timestamp`: the span end time converted from nanoseconds, omitted when the span\n * carries none, in which case {@link RACS.record} stamps it with the engine clock.\n *\n * `prefixKey` is never set here: the GenAI conventions carry no prefix identity, so spans\n * aggregate into ledger totals. Hosts that track the plan per call can spread one in\n * before recording: `racs.record({ ...usage, prefixKey: plan.prefixKey })`.\n *\n * Privacy: only provider, model, usage counters, and the end timestamp are read, never\n * `gen_ai.prompt`, `gen_ai.completion`, or any other content attribute.\n *\n * @param span - Any structurally matching span, see {@link GenAISpanLike}.\n * @param fallback - Provider and model to use when the span attributes lack them, the\n * usual case for telemetry pipelines that already know which client they instrument.\n * @returns A normalized usage record ready for {@link RACS.record}, or `undefined` when\n * neither the span nor the fallback yields a provider and a model.\n *\n * @example\n * Wiring span ingestion into a RACS engine. The same hook shape works for hermes-otel\n * spans and for the spans Vercel AI SDK telemetry emits with `experimental_telemetry`:\n * ```ts\n * import { createRACS } from '@takk/racs';\n * import { usageFromSpan, type GenAISpanLike } from '@takk/racs/otel';\n *\n * const racs = createRACS();\n *\n * // Inside a span processor's onEnd, an OTLP collector hook, or wherever finished\n * // spans surface in the host:\n * function onSpanEnd(span: GenAISpanLike): void {\n * const usage = usageFromSpan(span, { provider: 'anthropic' });\n * if (usage !== undefined) {\n * racs.record(usage);\n * }\n * }\n *\n * // Later, the same analytics as any other ingestion path:\n * const { hitRatio, savedUsd } = racs.stats();\n * ```\n */\nexport function usageFromSpan(\n span: GenAISpanLike,\n fallback?: { provider?: ProviderId; model?: string },\n): CacheUsage | undefined {\n const attributes: Readonly<Record<string, unknown>> = isRecord(span.attributes)\n ? span.attributes\n : {};\n\n const system = stringOf(attributes['gen_ai.system']);\n const mapped = system === undefined ? undefined : GEN_AI_SYSTEM_TO_PROVIDER[system.toLowerCase()];\n const provider = mapped ?? fallback?.provider;\n if (provider === undefined) {\n return undefined;\n }\n\n const model =\n stringOf(attributes['gen_ai.request.model']) ??\n stringOf(attributes['gen_ai.response.model']) ??\n fallback?.model;\n if (model === undefined) {\n return undefined;\n }\n\n const rawInputTokens = countOf(attributes['gen_ai.usage.input_tokens']) ?? 0;\n const read = firstCount(attributes, CACHE_READ_ATTRIBUTES);\n const write = firstCount(attributes, CACHE_WRITE_ATTRIBUTES);\n const timestamp = endMillisOf(span.endTimeUnixNano);\n\n // All-in normalization, see CacheUsage.inputTokens: the Anthropic-flavored read\n // spelling marks an exclusive source whose input count omits cached reads, so the read\n // count is added back; the OpenAI-flavored spellings are inclusive subsets and add\n // nothing. Write counts are always added, both write spellings mirror raw counts that\n // their source excludes from the input count.\n const exclusiveRead = read !== undefined && read.name === EXCLUSIVE_READ_ATTRIBUTE;\n const inputTokens = rawInputTokens + (exclusiveRead ? read.count : 0) + (write?.count ?? 0);\n const cacheReadTokens = read?.count ?? 0;\n const cacheWriteTokens5m = write?.count;\n\n return {\n provider,\n model,\n inputTokens,\n cacheReadTokens,\n ...(cacheWriteTokens5m !== undefined ? { cacheWriteTokens5m } : {}),\n ...(timestamp !== undefined ? { timestamp } : {}),\n };\n}\n"]}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import { j as ProviderId, c as CacheUsage } from '../types-DQ7-9sk3.cjs';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* OpenTelemetry GenAI ingestion for RACS (Remote Agent Context Store): turns one finished
|
|
5
|
+
* GenAI span into one normalized {@link CacheUsage} record for cache analytics.
|
|
6
|
+
*
|
|
7
|
+
* Structural by design. This module declares its own {@link GenAISpanLike} shape instead
|
|
8
|
+
* of importing OpenTelemetry types, so the package keeps its zero-runtime-dependency
|
|
9
|
+
* invariant: any span object that structurally matches works, whether it comes from the
|
|
10
|
+
* OpenTelemetry JS SDK, an OTLP JSON payload, a collector processor, hermes-otel spans, or
|
|
11
|
+
* the spans Vercel AI SDK telemetry emits with `experimental_telemetry` enabled.
|
|
12
|
+
*
|
|
13
|
+
* Privacy contract: {@link usageFromSpan} reads only the provider attribute, the model
|
|
14
|
+
* attributes, the usage counters, and the span end time. It never reads prompt or
|
|
15
|
+
* completion content attributes (`gen_ai.prompt`, `gen_ai.completion`, event bodies, or
|
|
16
|
+
* any other content-bearing field), so wiring it into a span pipeline leaks nothing.
|
|
17
|
+
*
|
|
18
|
+
* @packageDocumentation
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Minimal structural shape of one GenAI span, the subset {@link usageFromSpan} reads.
|
|
23
|
+
*
|
|
24
|
+
* Matches both live SDK spans (numeric attribute values) and OTLP JSON spans, where int64
|
|
25
|
+
* values arrive as decimal strings, which is why the time fields and the numeric attribute
|
|
26
|
+
* reads accept strings.
|
|
27
|
+
*/
|
|
28
|
+
interface GenAISpanLike {
|
|
29
|
+
/** Span name, unused by ingestion, present so real spans match without widening. */
|
|
30
|
+
readonly name?: string;
|
|
31
|
+
/** Flat attribute bag, GenAI semantic-convention keys, values of any wire type. */
|
|
32
|
+
readonly attributes?: Readonly<Record<string, unknown>>;
|
|
33
|
+
/** Span start in nanoseconds since the Unix epoch, number or OTLP decimal string. */
|
|
34
|
+
readonly startTimeUnixNano?: number | string;
|
|
35
|
+
/** Span end in nanoseconds since the Unix epoch, number or OTLP decimal string. */
|
|
36
|
+
readonly endTimeUnixNano?: number | string;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Normalizes one finished GenAI span into one {@link CacheUsage} record, or `undefined`
|
|
40
|
+
* when the span carries too little identity to account for.
|
|
41
|
+
*
|
|
42
|
+
* Field provenance, all reads structural and defensive:
|
|
43
|
+
* - `provider`: inferred from the `gen_ai.system` attribute through the spelling table
|
|
44
|
+
* ({@link GEN_AI_SYSTEM_TO_PROVIDER}, case-insensitive). Unknown or missing spellings
|
|
45
|
+
* fall back to `fallback.provider`; with neither, the function returns `undefined`.
|
|
46
|
+
* - `model`: `gen_ai.request.model`, then `gen_ai.response.model`, then `fallback.model`.
|
|
47
|
+
* A model is required for ledger attribution, without one the function returns
|
|
48
|
+
* `undefined`.
|
|
49
|
+
* - `inputTokens`: `gen_ai.usage.input_tokens` (zero when absent), normalized to the
|
|
50
|
+
* ALL-IN convention {@link CacheUsage.inputTokens} documents. The matched cache
|
|
51
|
+
* attribute spellings decide the adjustment: an Anthropic-flavored read count
|
|
52
|
+
* (`cache_read_input_tokens`, EXCLUSIVE) is added, an OpenAI-flavored read count
|
|
53
|
+
* (`cached_tokens`, `input_cached_tokens`, INCLUSIVE subsets of `input_tokens`) is not,
|
|
54
|
+
* and any write count is added because write counters only exist on providers whose raw
|
|
55
|
+
* input count excludes them. Per-attribute conventions are documented on
|
|
56
|
+
* {@link CACHE_READ_ATTRIBUTES} and {@link CACHE_WRITE_ATTRIBUTES}.
|
|
57
|
+
* - `cacheReadTokens`: the first numeric among the read spellings, see
|
|
58
|
+
* {@link CACHE_READ_ATTRIBUTES} for why three names exist. Zero when none is present.
|
|
59
|
+
* - `cacheWriteTokens5m`: the first numeric among the write spellings, see
|
|
60
|
+
* {@link CACHE_WRITE_ATTRIBUTES} for the 5-minute attribution convention. Omitted when
|
|
61
|
+
* absent.
|
|
62
|
+
* - `timestamp`: the span end time converted from nanoseconds, omitted when the span
|
|
63
|
+
* carries none, in which case {@link RACS.record} stamps it with the engine clock.
|
|
64
|
+
*
|
|
65
|
+
* `prefixKey` is never set here: the GenAI conventions carry no prefix identity, so spans
|
|
66
|
+
* aggregate into ledger totals. Hosts that track the plan per call can spread one in
|
|
67
|
+
* before recording: `racs.record({ ...usage, prefixKey: plan.prefixKey })`.
|
|
68
|
+
*
|
|
69
|
+
* Privacy: only provider, model, usage counters, and the end timestamp are read, never
|
|
70
|
+
* `gen_ai.prompt`, `gen_ai.completion`, or any other content attribute.
|
|
71
|
+
*
|
|
72
|
+
* @param span - Any structurally matching span, see {@link GenAISpanLike}.
|
|
73
|
+
* @param fallback - Provider and model to use when the span attributes lack them, the
|
|
74
|
+
* usual case for telemetry pipelines that already know which client they instrument.
|
|
75
|
+
* @returns A normalized usage record ready for {@link RACS.record}, or `undefined` when
|
|
76
|
+
* neither the span nor the fallback yields a provider and a model.
|
|
77
|
+
*
|
|
78
|
+
* @example
|
|
79
|
+
* Wiring span ingestion into a RACS engine. The same hook shape works for hermes-otel
|
|
80
|
+
* spans and for the spans Vercel AI SDK telemetry emits with `experimental_telemetry`:
|
|
81
|
+
* ```ts
|
|
82
|
+
* import { createRACS } from '@takk/racs';
|
|
83
|
+
* import { usageFromSpan, type GenAISpanLike } from '@takk/racs/otel';
|
|
84
|
+
*
|
|
85
|
+
* const racs = createRACS();
|
|
86
|
+
*
|
|
87
|
+
* // Inside a span processor's onEnd, an OTLP collector hook, or wherever finished
|
|
88
|
+
* // spans surface in the host:
|
|
89
|
+
* function onSpanEnd(span: GenAISpanLike): void {
|
|
90
|
+
* const usage = usageFromSpan(span, { provider: 'anthropic' });
|
|
91
|
+
* if (usage !== undefined) {
|
|
92
|
+
* racs.record(usage);
|
|
93
|
+
* }
|
|
94
|
+
* }
|
|
95
|
+
*
|
|
96
|
+
* // Later, the same analytics as any other ingestion path:
|
|
97
|
+
* const { hitRatio, savedUsd } = racs.stats();
|
|
98
|
+
* ```
|
|
99
|
+
*/
|
|
100
|
+
declare function usageFromSpan(span: GenAISpanLike, fallback?: {
|
|
101
|
+
provider?: ProviderId;
|
|
102
|
+
model?: string;
|
|
103
|
+
}): CacheUsage | undefined;
|
|
104
|
+
|
|
105
|
+
export { type GenAISpanLike, usageFromSpan };
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import { j as ProviderId, c as CacheUsage } from '../types-DQ7-9sk3.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* OpenTelemetry GenAI ingestion for RACS (Remote Agent Context Store): turns one finished
|
|
5
|
+
* GenAI span into one normalized {@link CacheUsage} record for cache analytics.
|
|
6
|
+
*
|
|
7
|
+
* Structural by design. This module declares its own {@link GenAISpanLike} shape instead
|
|
8
|
+
* of importing OpenTelemetry types, so the package keeps its zero-runtime-dependency
|
|
9
|
+
* invariant: any span object that structurally matches works, whether it comes from the
|
|
10
|
+
* OpenTelemetry JS SDK, an OTLP JSON payload, a collector processor, hermes-otel spans, or
|
|
11
|
+
* the spans Vercel AI SDK telemetry emits with `experimental_telemetry` enabled.
|
|
12
|
+
*
|
|
13
|
+
* Privacy contract: {@link usageFromSpan} reads only the provider attribute, the model
|
|
14
|
+
* attributes, the usage counters, and the span end time. It never reads prompt or
|
|
15
|
+
* completion content attributes (`gen_ai.prompt`, `gen_ai.completion`, event bodies, or
|
|
16
|
+
* any other content-bearing field), so wiring it into a span pipeline leaks nothing.
|
|
17
|
+
*
|
|
18
|
+
* @packageDocumentation
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Minimal structural shape of one GenAI span, the subset {@link usageFromSpan} reads.
|
|
23
|
+
*
|
|
24
|
+
* Matches both live SDK spans (numeric attribute values) and OTLP JSON spans, where int64
|
|
25
|
+
* values arrive as decimal strings, which is why the time fields and the numeric attribute
|
|
26
|
+
* reads accept strings.
|
|
27
|
+
*/
|
|
28
|
+
interface GenAISpanLike {
|
|
29
|
+
/** Span name, unused by ingestion, present so real spans match without widening. */
|
|
30
|
+
readonly name?: string;
|
|
31
|
+
/** Flat attribute bag, GenAI semantic-convention keys, values of any wire type. */
|
|
32
|
+
readonly attributes?: Readonly<Record<string, unknown>>;
|
|
33
|
+
/** Span start in nanoseconds since the Unix epoch, number or OTLP decimal string. */
|
|
34
|
+
readonly startTimeUnixNano?: number | string;
|
|
35
|
+
/** Span end in nanoseconds since the Unix epoch, number or OTLP decimal string. */
|
|
36
|
+
readonly endTimeUnixNano?: number | string;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Normalizes one finished GenAI span into one {@link CacheUsage} record, or `undefined`
|
|
40
|
+
* when the span carries too little identity to account for.
|
|
41
|
+
*
|
|
42
|
+
* Field provenance, all reads structural and defensive:
|
|
43
|
+
* - `provider`: inferred from the `gen_ai.system` attribute through the spelling table
|
|
44
|
+
* ({@link GEN_AI_SYSTEM_TO_PROVIDER}, case-insensitive). Unknown or missing spellings
|
|
45
|
+
* fall back to `fallback.provider`; with neither, the function returns `undefined`.
|
|
46
|
+
* - `model`: `gen_ai.request.model`, then `gen_ai.response.model`, then `fallback.model`.
|
|
47
|
+
* A model is required for ledger attribution, without one the function returns
|
|
48
|
+
* `undefined`.
|
|
49
|
+
* - `inputTokens`: `gen_ai.usage.input_tokens` (zero when absent), normalized to the
|
|
50
|
+
* ALL-IN convention {@link CacheUsage.inputTokens} documents. The matched cache
|
|
51
|
+
* attribute spellings decide the adjustment: an Anthropic-flavored read count
|
|
52
|
+
* (`cache_read_input_tokens`, EXCLUSIVE) is added, an OpenAI-flavored read count
|
|
53
|
+
* (`cached_tokens`, `input_cached_tokens`, INCLUSIVE subsets of `input_tokens`) is not,
|
|
54
|
+
* and any write count is added because write counters only exist on providers whose raw
|
|
55
|
+
* input count excludes them. Per-attribute conventions are documented on
|
|
56
|
+
* {@link CACHE_READ_ATTRIBUTES} and {@link CACHE_WRITE_ATTRIBUTES}.
|
|
57
|
+
* - `cacheReadTokens`: the first numeric among the read spellings, see
|
|
58
|
+
* {@link CACHE_READ_ATTRIBUTES} for why three names exist. Zero when none is present.
|
|
59
|
+
* - `cacheWriteTokens5m`: the first numeric among the write spellings, see
|
|
60
|
+
* {@link CACHE_WRITE_ATTRIBUTES} for the 5-minute attribution convention. Omitted when
|
|
61
|
+
* absent.
|
|
62
|
+
* - `timestamp`: the span end time converted from nanoseconds, omitted when the span
|
|
63
|
+
* carries none, in which case {@link RACS.record} stamps it with the engine clock.
|
|
64
|
+
*
|
|
65
|
+
* `prefixKey` is never set here: the GenAI conventions carry no prefix identity, so spans
|
|
66
|
+
* aggregate into ledger totals. Hosts that track the plan per call can spread one in
|
|
67
|
+
* before recording: `racs.record({ ...usage, prefixKey: plan.prefixKey })`.
|
|
68
|
+
*
|
|
69
|
+
* Privacy: only provider, model, usage counters, and the end timestamp are read, never
|
|
70
|
+
* `gen_ai.prompt`, `gen_ai.completion`, or any other content attribute.
|
|
71
|
+
*
|
|
72
|
+
* @param span - Any structurally matching span, see {@link GenAISpanLike}.
|
|
73
|
+
* @param fallback - Provider and model to use when the span attributes lack them, the
|
|
74
|
+
* usual case for telemetry pipelines that already know which client they instrument.
|
|
75
|
+
* @returns A normalized usage record ready for {@link RACS.record}, or `undefined` when
|
|
76
|
+
* neither the span nor the fallback yields a provider and a model.
|
|
77
|
+
*
|
|
78
|
+
* @example
|
|
79
|
+
* Wiring span ingestion into a RACS engine. The same hook shape works for hermes-otel
|
|
80
|
+
* spans and for the spans Vercel AI SDK telemetry emits with `experimental_telemetry`:
|
|
81
|
+
* ```ts
|
|
82
|
+
* import { createRACS } from '@takk/racs';
|
|
83
|
+
* import { usageFromSpan, type GenAISpanLike } from '@takk/racs/otel';
|
|
84
|
+
*
|
|
85
|
+
* const racs = createRACS();
|
|
86
|
+
*
|
|
87
|
+
* // Inside a span processor's onEnd, an OTLP collector hook, or wherever finished
|
|
88
|
+
* // spans surface in the host:
|
|
89
|
+
* function onSpanEnd(span: GenAISpanLike): void {
|
|
90
|
+
* const usage = usageFromSpan(span, { provider: 'anthropic' });
|
|
91
|
+
* if (usage !== undefined) {
|
|
92
|
+
* racs.record(usage);
|
|
93
|
+
* }
|
|
94
|
+
* }
|
|
95
|
+
*
|
|
96
|
+
* // Later, the same analytics as any other ingestion path:
|
|
97
|
+
* const { hitRatio, savedUsd } = racs.stats();
|
|
98
|
+
* ```
|
|
99
|
+
*/
|
|
100
|
+
declare function usageFromSpan(span: GenAISpanLike, fallback?: {
|
|
101
|
+
provider?: ProviderId;
|
|
102
|
+
model?: string;
|
|
103
|
+
}): CacheUsage | undefined;
|
|
104
|
+
|
|
105
|
+
export { type GenAISpanLike, usageFromSpan };
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
// src/otel/index.ts
|
|
2
|
+
var GEN_AI_SYSTEM_TO_PROVIDER = {
|
|
3
|
+
anthropic: "anthropic",
|
|
4
|
+
openai: "openai",
|
|
5
|
+
"gcp.gemini": "google",
|
|
6
|
+
gemini: "google",
|
|
7
|
+
google: "google",
|
|
8
|
+
"aws.bedrock": "bedrock",
|
|
9
|
+
groq: "groq",
|
|
10
|
+
deepseek: "deepseek",
|
|
11
|
+
mistral_ai: "mistral",
|
|
12
|
+
mistral: "mistral",
|
|
13
|
+
xai: "xai"
|
|
14
|
+
};
|
|
15
|
+
var CACHE_READ_ATTRIBUTES = [
|
|
16
|
+
"gen_ai.usage.cache_read_input_tokens",
|
|
17
|
+
"gen_ai.usage.cached_tokens",
|
|
18
|
+
"gen_ai.usage.input_cached_tokens"
|
|
19
|
+
];
|
|
20
|
+
var EXCLUSIVE_READ_ATTRIBUTE = "gen_ai.usage.cache_read_input_tokens";
|
|
21
|
+
var CACHE_WRITE_ATTRIBUTES = [
|
|
22
|
+
"gen_ai.usage.cache_creation_input_tokens",
|
|
23
|
+
"gen_ai.usage.cache_write_input_tokens"
|
|
24
|
+
];
|
|
25
|
+
function isRecord(value) {
|
|
26
|
+
return typeof value === "object" && value !== null;
|
|
27
|
+
}
|
|
28
|
+
function countOf(value) {
|
|
29
|
+
if (typeof value === "number" && Number.isFinite(value) && value >= 0) {
|
|
30
|
+
return value;
|
|
31
|
+
}
|
|
32
|
+
if (typeof value === "string" && /^\d+$/.test(value)) {
|
|
33
|
+
return Number(value);
|
|
34
|
+
}
|
|
35
|
+
return void 0;
|
|
36
|
+
}
|
|
37
|
+
function stringOf(value) {
|
|
38
|
+
return typeof value === "string" && value !== "" ? value : void 0;
|
|
39
|
+
}
|
|
40
|
+
function endMillisOf(value) {
|
|
41
|
+
if (typeof value === "number" && Number.isFinite(value) && value > 0) {
|
|
42
|
+
return Math.floor(value / 1e6);
|
|
43
|
+
}
|
|
44
|
+
if (typeof value === "string" && /^\d+$/.test(value)) {
|
|
45
|
+
const millis = Number(BigInt(value) / 1000000n);
|
|
46
|
+
return millis > 0 ? millis : void 0;
|
|
47
|
+
}
|
|
48
|
+
return void 0;
|
|
49
|
+
}
|
|
50
|
+
function firstCount(attributes, names) {
|
|
51
|
+
for (const name of names) {
|
|
52
|
+
const count = countOf(attributes[name]);
|
|
53
|
+
if (count !== void 0) {
|
|
54
|
+
return { name, count };
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return void 0;
|
|
58
|
+
}
|
|
59
|
+
function usageFromSpan(span, fallback) {
|
|
60
|
+
const attributes = isRecord(span.attributes) ? span.attributes : {};
|
|
61
|
+
const system = stringOf(attributes["gen_ai.system"]);
|
|
62
|
+
const mapped = system === void 0 ? void 0 : GEN_AI_SYSTEM_TO_PROVIDER[system.toLowerCase()];
|
|
63
|
+
const provider = mapped ?? fallback?.provider;
|
|
64
|
+
if (provider === void 0) {
|
|
65
|
+
return void 0;
|
|
66
|
+
}
|
|
67
|
+
const model = stringOf(attributes["gen_ai.request.model"]) ?? stringOf(attributes["gen_ai.response.model"]) ?? fallback?.model;
|
|
68
|
+
if (model === void 0) {
|
|
69
|
+
return void 0;
|
|
70
|
+
}
|
|
71
|
+
const rawInputTokens = countOf(attributes["gen_ai.usage.input_tokens"]) ?? 0;
|
|
72
|
+
const read = firstCount(attributes, CACHE_READ_ATTRIBUTES);
|
|
73
|
+
const write = firstCount(attributes, CACHE_WRITE_ATTRIBUTES);
|
|
74
|
+
const timestamp = endMillisOf(span.endTimeUnixNano);
|
|
75
|
+
const exclusiveRead = read !== void 0 && read.name === EXCLUSIVE_READ_ATTRIBUTE;
|
|
76
|
+
const inputTokens = rawInputTokens + (exclusiveRead ? read.count : 0) + (write?.count ?? 0);
|
|
77
|
+
const cacheReadTokens = read?.count ?? 0;
|
|
78
|
+
const cacheWriteTokens5m = write?.count;
|
|
79
|
+
return {
|
|
80
|
+
provider,
|
|
81
|
+
model,
|
|
82
|
+
inputTokens,
|
|
83
|
+
cacheReadTokens,
|
|
84
|
+
...cacheWriteTokens5m !== void 0 ? { cacheWriteTokens5m } : {},
|
|
85
|
+
...timestamp !== void 0 ? { timestamp } : {}
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
export { usageFromSpan };
|
|
90
|
+
//# sourceMappingURL=index.js.map
|
|
91
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/otel/index.ts"],"names":[],"mappings":";AA4CA,IAAM,yBAAA,GAAkE;AAAA,EACtE,SAAA,EAAW,WAAA;AAAA,EACX,MAAA,EAAQ,QAAA;AAAA,EACR,YAAA,EAAc,QAAA;AAAA,EACd,MAAA,EAAQ,QAAA;AAAA,EACR,MAAA,EAAQ,QAAA;AAAA,EACR,aAAA,EAAe,SAAA;AAAA,EACf,IAAA,EAAM,MAAA;AAAA,EACN,QAAA,EAAU,UAAA;AAAA,EACV,UAAA,EAAY,SAAA;AAAA,EACZ,OAAA,EAAS,SAAA;AAAA,EACT,GAAA,EAAK;AACP,CAAA;AAqBA,IAAM,qBAAA,GAA2C;AAAA,EAC/C,sCAAA;AAAA,EACA,4BAAA;AAAA,EACA;AACF,CAAA;AAMA,IAAM,wBAAA,GAA2B,sCAAA;AAoBjC,IAAM,sBAAA,GAA4C;AAAA,EAChD,0CAAA;AAAA,EACA;AACF,CAAA;AAGA,SAAS,SAAS,KAAA,EAAkD;AAClE,EAAA,OAAO,OAAO,KAAA,KAAU,QAAA,IAAY,KAAA,KAAU,IAAA;AAChD;AAMA,SAAS,QAAQ,KAAA,EAAoC;AACnD,EAAA,IAAI,OAAO,UAAU,QAAA,IAAY,MAAA,CAAO,SAAS,KAAK,CAAA,IAAK,SAAS,CAAA,EAAG;AACrE,IAAA,OAAO,KAAA;AAAA,EACT;AACA,EAAA,IAAI,OAAO,KAAA,KAAU,QAAA,IAAY,OAAA,CAAQ,IAAA,CAAK,KAAK,CAAA,EAAG;AACpD,IAAA,OAAO,OAAO,KAAK,CAAA;AAAA,EACrB;AACA,EAAA,OAAO,MAAA;AACT;AAGA,SAAS,SAAS,KAAA,EAAoC;AACpD,EAAA,OAAO,OAAO,KAAA,KAAU,QAAA,IAAY,KAAA,KAAU,KAAK,KAAA,GAAQ,MAAA;AAC7D;AAQA,SAAS,YAAY,KAAA,EAAwD;AAC3E,EAAA,IAAI,OAAO,UAAU,QAAA,IAAY,MAAA,CAAO,SAAS,KAAK,CAAA,IAAK,QAAQ,CAAA,EAAG;AACpE,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,KAAA,GAAQ,GAAS,CAAA;AAAA,EACrC;AACA,EAAA,IAAI,OAAO,KAAA,KAAU,QAAA,IAAY,OAAA,CAAQ,IAAA,CAAK,KAAK,CAAA,EAAG;AACpD,IAAA,MAAM,MAAA,GAAS,MAAA,CAAO,MAAA,CAAO,KAAK,IAAI,QAAU,CAAA;AAChD,IAAA,OAAO,MAAA,GAAS,IAAI,MAAA,GAAS,MAAA;AAAA,EAC/B;AACA,EAAA,OAAO,MAAA;AACT;AAOA,SAAS,UAAA,CACP,YACA,KAAA,EAC+D;AAC/D,EAAA,KAAA,MAAW,QAAQ,KAAA,EAAO;AACxB,IAAA,MAAM,KAAA,GAAQ,OAAA,CAAQ,UAAA,CAAW,IAAI,CAAC,CAAA;AACtC,IAAA,IAAI,UAAU,MAAA,EAAW;AACvB,MAAA,OAAO,EAAE,MAAM,KAAA,EAAM;AAAA,IACvB;AAAA,EACF;AACA,EAAA,OAAO,MAAA;AACT;AAgEO,SAAS,aAAA,CACd,MACA,QAAA,EACwB;AACxB,EAAA,MAAM,aAAgD,QAAA,CAAS,IAAA,CAAK,UAAU,CAAA,GAC1E,IAAA,CAAK,aACL,EAAC;AAEL,EAAA,MAAM,MAAA,GAAS,QAAA,CAAS,UAAA,CAAW,eAAe,CAAC,CAAA;AACnD,EAAA,MAAM,SAAS,MAAA,KAAW,MAAA,GAAY,SAAY,yBAAA,CAA0B,MAAA,CAAO,aAAa,CAAA;AAChG,EAAA,MAAM,QAAA,GAAW,UAAU,QAAA,EAAU,QAAA;AACrC,EAAA,IAAI,aAAa,MAAA,EAAW;AAC1B,IAAA,OAAO,MAAA;AAAA,EACT;AAEA,EAAA,MAAM,KAAA,GACJ,QAAA,CAAS,UAAA,CAAW,sBAAsB,CAAC,CAAA,IAC3C,QAAA,CAAS,UAAA,CAAW,uBAAuB,CAAC,CAAA,IAC5C,QAAA,EAAU,KAAA;AACZ,EAAA,IAAI,UAAU,MAAA,EAAW;AACvB,IAAA,OAAO,MAAA;AAAA,EACT;AAEA,EAAA,MAAM,cAAA,GAAiB,OAAA,CAAQ,UAAA,CAAW,2BAA2B,CAAC,CAAA,IAAK,CAAA;AAC3E,EAAA,MAAM,IAAA,GAAO,UAAA,CAAW,UAAA,EAAY,qBAAqB,CAAA;AACzD,EAAA,MAAM,KAAA,GAAQ,UAAA,CAAW,UAAA,EAAY,sBAAsB,CAAA;AAC3D,EAAA,MAAM,SAAA,GAAY,WAAA,CAAY,IAAA,CAAK,eAAe,CAAA;AAOlD,EAAA,MAAM,aAAA,GAAgB,IAAA,KAAS,MAAA,IAAa,IAAA,CAAK,IAAA,KAAS,wBAAA;AAC1D,EAAA,MAAM,cAAc,cAAA,IAAkB,aAAA,GAAgB,KAAK,KAAA,GAAQ,CAAA,CAAA,IAAM,OAAO,KAAA,IAAS,CAAA,CAAA;AACzF,EAAA,MAAM,eAAA,GAAkB,MAAM,KAAA,IAAS,CAAA;AACvC,EAAA,MAAM,qBAAqB,KAAA,EAAO,KAAA;AAElC,EAAA,OAAO;AAAA,IACL,QAAA;AAAA,IACA,KAAA;AAAA,IACA,WAAA;AAAA,IACA,eAAA;AAAA,IACA,GAAI,kBAAA,KAAuB,MAAA,GAAY,EAAE,kBAAA,KAAuB,EAAC;AAAA,IACjE,GAAI,SAAA,KAAc,MAAA,GAAY,EAAE,SAAA,KAAc;AAAC,GACjD;AACF","file":"index.js","sourcesContent":["/**\n * OpenTelemetry GenAI ingestion for RACS (Remote Agent Context Store): turns one finished\n * GenAI span into one normalized {@link CacheUsage} record for cache analytics.\n *\n * Structural by design. This module declares its own {@link GenAISpanLike} shape instead\n * of importing OpenTelemetry types, so the package keeps its zero-runtime-dependency\n * invariant: any span object that structurally matches works, whether it comes from the\n * OpenTelemetry JS SDK, an OTLP JSON payload, a collector processor, hermes-otel spans, or\n * the spans Vercel AI SDK telemetry emits with `experimental_telemetry` enabled.\n *\n * Privacy contract: {@link usageFromSpan} reads only the provider attribute, the model\n * attributes, the usage counters, and the span end time. It never reads prompt or\n * completion content attributes (`gen_ai.prompt`, `gen_ai.completion`, event bodies, or\n * any other content-bearing field), so wiring it into a span pipeline leaks nothing.\n *\n * @packageDocumentation\n */\n\nimport type { CacheUsage, ProviderId } from '../types.js';\n\n/**\n * Minimal structural shape of one GenAI span, the subset {@link usageFromSpan} reads.\n *\n * Matches both live SDK spans (numeric attribute values) and OTLP JSON spans, where int64\n * values arrive as decimal strings, which is why the time fields and the numeric attribute\n * reads accept strings.\n */\nexport interface GenAISpanLike {\n /** Span name, unused by ingestion, present so real spans match without widening. */\n readonly name?: string;\n /** Flat attribute bag, GenAI semantic-convention keys, values of any wire type. */\n readonly attributes?: Readonly<Record<string, unknown>>;\n /** Span start in nanoseconds since the Unix epoch, number or OTLP decimal string. */\n readonly startTimeUnixNano?: number | string;\n /** Span end in nanoseconds since the Unix epoch, number or OTLP decimal string. */\n readonly endTimeUnixNano?: number | string;\n}\n\n/**\n * Mapping from `gen_ai.system` attribute values to RACS provider ids, per the\n * OpenTelemetry GenAI semantic conventions as of June 2026. Instrumentations disagree on\n * the Google and Mistral spellings, so every spelling seen in the wild maps here.\n * Lookup is case-insensitive, values are lowercased before the table is consulted.\n */\nconst GEN_AI_SYSTEM_TO_PROVIDER: Readonly<Record<string, ProviderId>> = {\n anthropic: 'anthropic',\n openai: 'openai',\n 'gcp.gemini': 'google',\n gemini: 'google',\n google: 'google',\n 'aws.bedrock': 'bedrock',\n groq: 'groq',\n deepseek: 'deepseek',\n mistral_ai: 'mistral',\n mistral: 'mistral',\n xai: 'xai',\n};\n\n/**\n * Token-count attribute names for cached reads, in lookup order. Instrumentations\n * disagree, so the first numeric value among them wins. Each spelling also pins the\n * input-token convention of its source, which decides the all-in normalization in\n * {@link usageFromSpan}:\n *\n * - `'gen_ai.usage.cache_read_input_tokens'`: Anthropic-flavored instrumentations,\n * including hermes-otel, mirror the raw `usage.cache_read_input_tokens` response field\n * under this name. EXCLUSIVE convention: the matching `gen_ai.usage.input_tokens`\n * mirrors raw Anthropic `input_tokens`, which excludes cache reads and cache writes, so\n * the read count is ADDED to reach the all-in total.\n * - `'gen_ai.usage.cached_tokens'`: OpenAI-flavored instrumentations in the OpenLLMetry\n * lineage mirror `prompt_tokens_details.cached_tokens` under this name. INCLUSIVE\n * convention: OpenAI `cached_tokens` is a subset of `prompt_tokens`, so nothing is\n * added.\n * - `'gen_ai.usage.input_cached_tokens'`: the newer semantic-convention draft naming some\n * collectors and SDK instrumentations have adopted, descending from the OpenAI detail\n * shape. INCLUSIVE convention, nothing is added.\n */\nconst CACHE_READ_ATTRIBUTES: readonly string[] = [\n 'gen_ai.usage.cache_read_input_tokens',\n 'gen_ai.usage.cached_tokens',\n 'gen_ai.usage.input_cached_tokens',\n];\n\n/**\n * The one read spelling whose source reports EXCLUSIVE input counts, see\n * {@link CACHE_READ_ATTRIBUTES} for the per-attribute convention catalog.\n */\nconst EXCLUSIVE_READ_ATTRIBUTE = 'gen_ai.usage.cache_read_input_tokens';\n\n/**\n * Token-count attribute names for cache writes, in lookup order:\n *\n * - `'gen_ai.usage.cache_creation_input_tokens'`: Anthropic-flavored instrumentations\n * mirror the raw `usage.cache_creation_input_tokens` response field. EXCLUSIVE\n * convention: raw Anthropic `input_tokens` excludes written tokens, so the write count\n * is ADDED to reach the all-in input total.\n * - `'gen_ai.usage.cache_write_input_tokens'`: the generic spelling other\n * instrumentations use for the same quantity. Write counters exist only on\n * breakpoint-family providers whose raw usage excludes them from the input count, so\n * this spelling follows the same EXCLUSIVE convention and is ADDED likewise.\n *\n * TTL attribution convention: the GenAI semantic conventions carry no TTL split, a write\n * is just a write, so every written token is attributed to {@link CacheUsage.cacheWriteTokens5m},\n * the 5-minute tier, the conservative default because it is the cheaper write premium.\n * Hosts that know a span wrote a 1-hour span should move the count to\n * `cacheWriteTokens1h` before recording.\n */\nconst CACHE_WRITE_ATTRIBUTES: readonly string[] = [\n 'gen_ai.usage.cache_creation_input_tokens',\n 'gen_ai.usage.cache_write_input_tokens',\n];\n\n/** True for any non-null object, the first gate of every structural check. */\nfunction isRecord(value: unknown): value is Record<string, unknown> {\n return typeof value === 'object' && value !== null;\n}\n\n/**\n * Reads one attribute as a finite non-negative token count. Accepts plain numbers and\n * OTLP JSON int64 values, which arrive as decimal strings, anything else is absent.\n */\nfunction countOf(value: unknown): number | undefined {\n if (typeof value === 'number' && Number.isFinite(value) && value >= 0) {\n return value;\n }\n if (typeof value === 'string' && /^\\d+$/.test(value)) {\n return Number(value);\n }\n return undefined;\n}\n\n/** Reads one attribute as a non-empty string, anything else is absent. */\nfunction stringOf(value: unknown): string | undefined {\n return typeof value === 'string' && value !== '' ? value : undefined;\n}\n\n/**\n * Converts span end nanoseconds to milliseconds since the Unix epoch. OTLP decimal\n * strings go through BigInt so values above 2^53 nanoseconds, every realistic wall-clock\n * time, lose nothing before the division. Zero and negatives mean \"not set\" in OTLP and\n * come back absent.\n */\nfunction endMillisOf(value: number | string | undefined): number | undefined {\n if (typeof value === 'number' && Number.isFinite(value) && value > 0) {\n return Math.floor(value / 1_000_000);\n }\n if (typeof value === 'string' && /^\\d+$/.test(value)) {\n const millis = Number(BigInt(value) / 1_000_000n);\n return millis > 0 ? millis : undefined;\n }\n return undefined;\n}\n\n/**\n * First numeric value among the named attributes, in order, with the attribute name that\n * supplied it, absent when none is numeric. The name matters: it pins which input-token\n * convention the source follows, see {@link CACHE_READ_ATTRIBUTES}.\n */\nfunction firstCount(\n attributes: Readonly<Record<string, unknown>>,\n names: readonly string[],\n): { readonly name: string; readonly count: number } | undefined {\n for (const name of names) {\n const count = countOf(attributes[name]);\n if (count !== undefined) {\n return { name, count };\n }\n }\n return undefined;\n}\n\n/**\n * Normalizes one finished GenAI span into one {@link CacheUsage} record, or `undefined`\n * when the span carries too little identity to account for.\n *\n * Field provenance, all reads structural and defensive:\n * - `provider`: inferred from the `gen_ai.system` attribute through the spelling table\n * ({@link GEN_AI_SYSTEM_TO_PROVIDER}, case-insensitive). Unknown or missing spellings\n * fall back to `fallback.provider`; with neither, the function returns `undefined`.\n * - `model`: `gen_ai.request.model`, then `gen_ai.response.model`, then `fallback.model`.\n * A model is required for ledger attribution, without one the function returns\n * `undefined`.\n * - `inputTokens`: `gen_ai.usage.input_tokens` (zero when absent), normalized to the\n * ALL-IN convention {@link CacheUsage.inputTokens} documents. The matched cache\n * attribute spellings decide the adjustment: an Anthropic-flavored read count\n * (`cache_read_input_tokens`, EXCLUSIVE) is added, an OpenAI-flavored read count\n * (`cached_tokens`, `input_cached_tokens`, INCLUSIVE subsets of `input_tokens`) is not,\n * and any write count is added because write counters only exist on providers whose raw\n * input count excludes them. Per-attribute conventions are documented on\n * {@link CACHE_READ_ATTRIBUTES} and {@link CACHE_WRITE_ATTRIBUTES}.\n * - `cacheReadTokens`: the first numeric among the read spellings, see\n * {@link CACHE_READ_ATTRIBUTES} for why three names exist. Zero when none is present.\n * - `cacheWriteTokens5m`: the first numeric among the write spellings, see\n * {@link CACHE_WRITE_ATTRIBUTES} for the 5-minute attribution convention. Omitted when\n * absent.\n * - `timestamp`: the span end time converted from nanoseconds, omitted when the span\n * carries none, in which case {@link RACS.record} stamps it with the engine clock.\n *\n * `prefixKey` is never set here: the GenAI conventions carry no prefix identity, so spans\n * aggregate into ledger totals. Hosts that track the plan per call can spread one in\n * before recording: `racs.record({ ...usage, prefixKey: plan.prefixKey })`.\n *\n * Privacy: only provider, model, usage counters, and the end timestamp are read, never\n * `gen_ai.prompt`, `gen_ai.completion`, or any other content attribute.\n *\n * @param span - Any structurally matching span, see {@link GenAISpanLike}.\n * @param fallback - Provider and model to use when the span attributes lack them, the\n * usual case for telemetry pipelines that already know which client they instrument.\n * @returns A normalized usage record ready for {@link RACS.record}, or `undefined` when\n * neither the span nor the fallback yields a provider and a model.\n *\n * @example\n * Wiring span ingestion into a RACS engine. The same hook shape works for hermes-otel\n * spans and for the spans Vercel AI SDK telemetry emits with `experimental_telemetry`:\n * ```ts\n * import { createRACS } from '@takk/racs';\n * import { usageFromSpan, type GenAISpanLike } from '@takk/racs/otel';\n *\n * const racs = createRACS();\n *\n * // Inside a span processor's onEnd, an OTLP collector hook, or wherever finished\n * // spans surface in the host:\n * function onSpanEnd(span: GenAISpanLike): void {\n * const usage = usageFromSpan(span, { provider: 'anthropic' });\n * if (usage !== undefined) {\n * racs.record(usage);\n * }\n * }\n *\n * // Later, the same analytics as any other ingestion path:\n * const { hitRatio, savedUsd } = racs.stats();\n * ```\n */\nexport function usageFromSpan(\n span: GenAISpanLike,\n fallback?: { provider?: ProviderId; model?: string },\n): CacheUsage | undefined {\n const attributes: Readonly<Record<string, unknown>> = isRecord(span.attributes)\n ? span.attributes\n : {};\n\n const system = stringOf(attributes['gen_ai.system']);\n const mapped = system === undefined ? undefined : GEN_AI_SYSTEM_TO_PROVIDER[system.toLowerCase()];\n const provider = mapped ?? fallback?.provider;\n if (provider === undefined) {\n return undefined;\n }\n\n const model =\n stringOf(attributes['gen_ai.request.model']) ??\n stringOf(attributes['gen_ai.response.model']) ??\n fallback?.model;\n if (model === undefined) {\n return undefined;\n }\n\n const rawInputTokens = countOf(attributes['gen_ai.usage.input_tokens']) ?? 0;\n const read = firstCount(attributes, CACHE_READ_ATTRIBUTES);\n const write = firstCount(attributes, CACHE_WRITE_ATTRIBUTES);\n const timestamp = endMillisOf(span.endTimeUnixNano);\n\n // All-in normalization, see CacheUsage.inputTokens: the Anthropic-flavored read\n // spelling marks an exclusive source whose input count omits cached reads, so the read\n // count is added back; the OpenAI-flavored spellings are inclusive subsets and add\n // nothing. Write counts are always added, both write spellings mirror raw counts that\n // their source excludes from the input count.\n const exclusiveRead = read !== undefined && read.name === EXCLUSIVE_READ_ATTRIBUTE;\n const inputTokens = rawInputTokens + (exclusiveRead ? read.count : 0) + (write?.count ?? 0);\n const cacheReadTokens = read?.count ?? 0;\n const cacheWriteTokens5m = write?.count;\n\n return {\n provider,\n model,\n inputTokens,\n cacheReadTokens,\n ...(cacheWriteTokens5m !== undefined ? { cacheWriteTokens5m } : {}),\n ...(timestamp !== undefined ? { timestamp } : {}),\n };\n}\n"]}
|