@inkeep/agents-core 0.74.1 → 0.74.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auth/auth-schema.d.ts +163 -163
- package/dist/auth/auth-validation-schemas.d.ts +137 -137
- package/dist/auth/permissions.d.ts +9 -9
- package/dist/client-exports.d.ts +2 -1
- package/dist/client-exports.js +2 -1
- package/dist/constants/models.d.ts +3 -0
- package/dist/constants/models.js +3 -0
- package/dist/constants/otel-attributes.d.ts +4 -0
- package/dist/constants/otel-attributes.js +4 -0
- package/dist/data-access/manage/agents.d.ts +26 -26
- package/dist/data-access/manage/agents.js +12 -10
- package/dist/data-access/manage/artifactComponents.d.ts +14 -14
- package/dist/data-access/manage/contextConfigs.d.ts +12 -12
- package/dist/data-access/manage/dataComponents.d.ts +6 -6
- package/dist/data-access/manage/functionTools.d.ts +18 -18
- package/dist/data-access/manage/skills.d.ts +15 -15
- package/dist/data-access/manage/subAgentExternalAgentRelations.d.ts +24 -24
- package/dist/data-access/manage/subAgentRelations.d.ts +26 -26
- package/dist/data-access/manage/subAgentTeamAgentRelations.d.ts +24 -24
- package/dist/data-access/manage/subAgents.d.ts +18 -18
- package/dist/data-access/manage/tools.d.ts +27 -27
- package/dist/data-access/manage/triggers.d.ts +4 -4
- package/dist/data-access/runtime/apiKeys.d.ts +20 -20
- package/dist/data-access/runtime/apps.d.ts +19 -19
- package/dist/data-access/runtime/conversations.d.ts +28 -28
- package/dist/data-access/runtime/events.d.ts +5 -5
- package/dist/data-access/runtime/feedback.d.ts +6 -6
- package/dist/data-access/runtime/messages.d.ts +21 -21
- package/dist/data-access/runtime/scheduledTriggerInvocations.d.ts +4 -4
- package/dist/data-access/runtime/scheduledTriggerUsers.d.ts +1 -1
- package/dist/data-access/runtime/tasks.d.ts +5 -5
- package/dist/db/manage/manage-schema.d.ts +484 -484
- package/dist/db/runtime/runtime-schema.d.ts +453 -453
- package/dist/index.d.ts +2 -2
- package/dist/index.js +2 -2
- package/dist/utils/cache-debug-query.d.ts +43 -0
- package/dist/utils/cache-debug-query.js +70 -0
- package/dist/utils/cache-debug-walk.d.ts +36 -0
- package/dist/utils/cache-debug-walk.js +91 -0
- package/dist/utils/cache-state.d.ts +36 -0
- package/dist/utils/cache-state.js +33 -0
- package/dist/utils/index.d.ts +2 -2
- package/dist/utils/index.js +2 -2
- package/dist/utils/model-factory.d.ts +7 -0
- package/dist/utils/model-factory.js +18 -2
- package/dist/utils/token-estimator.d.ts +1 -0
- package/dist/utils/usage-cost-middleware.d.ts +3 -1
- package/dist/utils/usage-cost-middleware.js +70 -12
- package/dist/validation/drizzle-schema-helpers.d.ts +3 -3
- package/dist/validation/schemas/skills.d.ts +63 -63
- package/dist/validation/schemas.d.ts +2760 -2740
- package/dist/validation/schemas.js +5 -1
- package/package.json +3 -2
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import { AI_OPERATIONS, SPAN_KEYS } from "../constants/otel-attributes.js";
|
|
2
|
+
import { FIELD_CONTEXTS, FIELD_DATA_TYPES, OPERATORS, ORDER_DIRECTIONS, QUERY_DEFAULTS, QUERY_EXPRESSIONS, QUERY_TYPES, REQUEST_TYPES, SIGNALS, buildFilterExpression } from "../constants/signoz-queries.js";
|
|
3
|
+
|
|
4
|
+
//#region src/utils/cache-debug-query.ts
|
|
5
|
+
const CACHE_DEBUG_QUERY_NAME = QUERY_EXPRESSIONS.AI_LLM_CALLS;
|
|
6
|
+
const selectField = (name, fieldDataType, fieldContext) => ({
|
|
7
|
+
name,
|
|
8
|
+
fieldDataType,
|
|
9
|
+
fieldContext
|
|
10
|
+
});
|
|
11
|
+
function buildCacheDebugQuery(conversationId, options) {
|
|
12
|
+
const { start, end, projectId, limit = QUERY_DEFAULTS.LIMIT_UNLIMITED } = options;
|
|
13
|
+
const filterItems = [
|
|
14
|
+
{
|
|
15
|
+
key: SPAN_KEYS.CONVERSATION_ID,
|
|
16
|
+
op: OPERATORS.EQUALS,
|
|
17
|
+
value: conversationId
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
key: SPAN_KEYS.AI_OPERATION_ID,
|
|
21
|
+
op: OPERATORS.IN,
|
|
22
|
+
value: [AI_OPERATIONS.GENERATE_TEXT, AI_OPERATIONS.STREAM_TEXT]
|
|
23
|
+
},
|
|
24
|
+
...projectId ? [{
|
|
25
|
+
key: SPAN_KEYS.PROJECT_ID,
|
|
26
|
+
op: OPERATORS.EQUALS,
|
|
27
|
+
value: projectId
|
|
28
|
+
}] : []
|
|
29
|
+
];
|
|
30
|
+
return {
|
|
31
|
+
start,
|
|
32
|
+
end,
|
|
33
|
+
requestType: REQUEST_TYPES.RAW,
|
|
34
|
+
...projectId ? { projectId } : {},
|
|
35
|
+
compositeQuery: { queries: [{
|
|
36
|
+
type: QUERY_TYPES.BUILDER_QUERY,
|
|
37
|
+
spec: {
|
|
38
|
+
name: CACHE_DEBUG_QUERY_NAME,
|
|
39
|
+
signal: SIGNALS.TRACES,
|
|
40
|
+
filter: { expression: buildFilterExpression(filterItems) },
|
|
41
|
+
selectFields: [
|
|
42
|
+
selectField(SPAN_KEYS.SPAN_ID, FIELD_DATA_TYPES.STRING, FIELD_CONTEXTS.SPAN),
|
|
43
|
+
selectField(SPAN_KEYS.TIMESTAMP, FIELD_DATA_TYPES.INT64, FIELD_CONTEXTS.SPAN),
|
|
44
|
+
selectField(SPAN_KEYS.AI_OPERATION_ID, FIELD_DATA_TYPES.STRING, FIELD_CONTEXTS.ATTRIBUTE),
|
|
45
|
+
selectField(SPAN_KEYS.AI_MODEL_ID, FIELD_DATA_TYPES.STRING, FIELD_CONTEXTS.ATTRIBUTE),
|
|
46
|
+
selectField(SPAN_KEYS.AI_TELEMETRY_GENERATION_TYPE, FIELD_DATA_TYPES.STRING, FIELD_CONTEXTS.ATTRIBUTE),
|
|
47
|
+
selectField(SPAN_KEYS.AI_TELEMETRY_SUB_AGENT_ID, FIELD_DATA_TYPES.STRING, FIELD_CONTEXTS.ATTRIBUTE),
|
|
48
|
+
selectField(SPAN_KEYS.AGENT_ID, FIELD_DATA_TYPES.STRING, FIELD_CONTEXTS.ATTRIBUTE),
|
|
49
|
+
selectField(SPAN_KEYS.AI_MODEL_PROVIDER, FIELD_DATA_TYPES.STRING, FIELD_CONTEXTS.ATTRIBUTE),
|
|
50
|
+
selectField(SPAN_KEYS.GEN_AI_RESPONSE_PROVIDER, FIELD_DATA_TYPES.STRING, FIELD_CONTEXTS.ATTRIBUTE),
|
|
51
|
+
selectField(SPAN_KEYS.GEN_AI_USAGE_INPUT_TOKENS, FIELD_DATA_TYPES.INT64, FIELD_CONTEXTS.ATTRIBUTE),
|
|
52
|
+
selectField(SPAN_KEYS.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, FIELD_DATA_TYPES.INT64, FIELD_CONTEXTS.ATTRIBUTE),
|
|
53
|
+
selectField(SPAN_KEYS.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, FIELD_DATA_TYPES.INT64, FIELD_CONTEXTS.ATTRIBUTE),
|
|
54
|
+
selectField(SPAN_KEYS.CACHE_INTENT_MARKER_COUNT, FIELD_DATA_TYPES.INT64, FIELD_CONTEXTS.ATTRIBUTE),
|
|
55
|
+
selectField(SPAN_KEYS.CACHE_INTENT_PREFIX_SIGNATURE, FIELD_DATA_TYPES.STRING, FIELD_CONTEXTS.ATTRIBUTE)
|
|
56
|
+
],
|
|
57
|
+
order: [{
|
|
58
|
+
key: { name: SPAN_KEYS.TIMESTAMP },
|
|
59
|
+
direction: ORDER_DIRECTIONS.DESC
|
|
60
|
+
}],
|
|
61
|
+
limit,
|
|
62
|
+
stepInterval: QUERY_DEFAULTS.STEP_INTERVAL,
|
|
63
|
+
disabled: QUERY_DEFAULTS.DISABLED
|
|
64
|
+
}
|
|
65
|
+
}] }
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
//#endregion
|
|
70
|
+
export { CACHE_DEBUG_QUERY_NAME, buildCacheDebugQuery };
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { CacheState } from "./cache-state.js";
|
|
2
|
+
|
|
3
|
+
//#region src/utils/cache-debug-walk.d.ts
|
|
4
|
+
type CacheDebugSpanRow = {
|
|
5
|
+
data?: Record<string, unknown>;
|
|
6
|
+
} & Record<string, unknown>;
|
|
7
|
+
interface CacheDebugCall {
|
|
8
|
+
spanId: string;
|
|
9
|
+
timestamp: string;
|
|
10
|
+
operationId: string;
|
|
11
|
+
model: string;
|
|
12
|
+
modelProvider: string;
|
|
13
|
+
generationType: string;
|
|
14
|
+
subAgentId: string;
|
|
15
|
+
inputTokens: number;
|
|
16
|
+
cacheReadTokens: number;
|
|
17
|
+
cacheCreationTokens: number;
|
|
18
|
+
markerCount: number;
|
|
19
|
+
prefixSignature: string;
|
|
20
|
+
cacheState: CacheState;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Walk the LLM-span rows chronologically, deriving each call's cache state with
|
|
24
|
+
* a per-agent prior-signature cursor.
|
|
25
|
+
*
|
|
26
|
+
* The walk:
|
|
27
|
+
* 1. Sorts rows by `SPAN_KEYS.TIMESTAMP` (ascending).
|
|
28
|
+
* 2. For each row, resolves the sub-agent key via fallback chain
|
|
29
|
+
* `AI_TELEMETRY_SUB_AGENT_ID -> AGENT_ID -> '_default'`.
|
|
30
|
+
* 3. Looks up that agent's prior signature in `priorSignatureByAgent`.
|
|
31
|
+
* 4. Calls `deriveCacheState` and, if the row's `prefix_signature` is non-empty,
|
|
32
|
+
* updates the agent's cursor.
|
|
33
|
+
*/
|
|
34
|
+
declare function deriveCacheDebugCalls(rows: CacheDebugSpanRow[]): CacheDebugCall[];
|
|
35
|
+
//#endregion
|
|
36
|
+
export { CacheDebugCall, CacheDebugSpanRow, deriveCacheDebugCalls };
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import { SPAN_KEYS } from "../constants/otel-attributes.js";
|
|
2
|
+
import { deriveCacheState, isProviderSupportedForCaching, resolveCachingProvider } from "./cache-state.js";
|
|
3
|
+
|
|
4
|
+
//#region src/utils/cache-debug-walk.ts
|
|
5
|
+
/**
|
|
6
|
+
* Pure cache-debug walk: turns the SigNoz LLM-span rows returned by
|
|
7
|
+
* `buildCacheDebugQuery` into a chronologically ordered list of `CacheDebugCall`
|
|
8
|
+
* records — each carrying the derived `CacheState`.
|
|
9
|
+
*
|
|
10
|
+
* Per-agent priorSignature tracking (mirrors the per-agent walk in
|
|
11
|
+
* agents-manage-ui/.../[conversationId]/route.ts). A single global cursor
|
|
12
|
+
* cross-contaminates `priorSignature` between sub-agents in multi-agent
|
|
13
|
+
* conversations and reports false MISS-expected / MISS-regression states; the
|
|
14
|
+
* Map<subAgentId, signature> keeps each agent's cursor independent.
|
|
15
|
+
*
|
|
16
|
+
* This is the pure, testable core of the `pnpm cache-debug` CLI. The CLI itself
|
|
17
|
+
* remains a thin shell that fetches from SigNoz and prints these records.
|
|
18
|
+
*/
|
|
19
|
+
function getField(row, key) {
|
|
20
|
+
const data = row.data;
|
|
21
|
+
if (data && typeof data === "object" && key in data) return data[key];
|
|
22
|
+
return row[key];
|
|
23
|
+
}
|
|
24
|
+
function getString(row, key) {
|
|
25
|
+
const value = getField(row, key);
|
|
26
|
+
if (typeof value === "string") return value;
|
|
27
|
+
return value == null ? "" : String(value);
|
|
28
|
+
}
|
|
29
|
+
function getNumber(row, key) {
|
|
30
|
+
const value = getField(row, key);
|
|
31
|
+
const n = typeof value === "number" ? value : Number(value);
|
|
32
|
+
return Number.isFinite(n) ? n : 0;
|
|
33
|
+
}
|
|
34
|
+
function subAgentKey(row) {
|
|
35
|
+
return getString(row, SPAN_KEYS.AI_TELEMETRY_SUB_AGENT_ID) || getString(row, SPAN_KEYS.AGENT_ID) || "_default";
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Walk the LLM-span rows chronologically, deriving each call's cache state with
|
|
39
|
+
* a per-agent prior-signature cursor.
|
|
40
|
+
*
|
|
41
|
+
* The walk:
|
|
42
|
+
* 1. Sorts rows by `SPAN_KEYS.TIMESTAMP` (ascending).
|
|
43
|
+
* 2. For each row, resolves the sub-agent key via fallback chain
|
|
44
|
+
* `AI_TELEMETRY_SUB_AGENT_ID -> AGENT_ID -> '_default'`.
|
|
45
|
+
* 3. Looks up that agent's prior signature in `priorSignatureByAgent`.
|
|
46
|
+
* 4. Calls `deriveCacheState` and, if the row's `prefix_signature` is non-empty,
|
|
47
|
+
* updates the agent's cursor.
|
|
48
|
+
*/
|
|
49
|
+
function deriveCacheDebugCalls(rows) {
|
|
50
|
+
const sorted = [...rows].sort((a, b) => getString(a, SPAN_KEYS.TIMESTAMP).localeCompare(getString(b, SPAN_KEYS.TIMESTAMP)));
|
|
51
|
+
const calls = [];
|
|
52
|
+
const priorSignatureByAgent = /* @__PURE__ */ new Map();
|
|
53
|
+
for (const row of sorted) {
|
|
54
|
+
const prefixSignature = getString(row, SPAN_KEYS.CACHE_INTENT_PREFIX_SIGNATURE) || null;
|
|
55
|
+
const cacheReadTokens = getNumber(row, SPAN_KEYS.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS);
|
|
56
|
+
const markerCount = getNumber(row, SPAN_KEYS.CACHE_INTENT_MARKER_COUNT);
|
|
57
|
+
const requestProvider = getString(row, SPAN_KEYS.AI_MODEL_PROVIDER);
|
|
58
|
+
const cachingProvider = resolveCachingProvider({
|
|
59
|
+
requestProvider,
|
|
60
|
+
responseProvider: getString(row, SPAN_KEYS.GEN_AI_RESPONSE_PROVIDER)
|
|
61
|
+
});
|
|
62
|
+
const subAgentId = subAgentKey(row);
|
|
63
|
+
const cacheState = deriveCacheState({
|
|
64
|
+
markerCount,
|
|
65
|
+
prefixSignature,
|
|
66
|
+
cacheRead: cacheReadTokens,
|
|
67
|
+
priorSignature: priorSignatureByAgent.get(subAgentId) ?? null,
|
|
68
|
+
providerSupportsCaching: cachingProvider ? isProviderSupportedForCaching(cachingProvider) : true
|
|
69
|
+
});
|
|
70
|
+
if (prefixSignature) priorSignatureByAgent.set(subAgentId, prefixSignature);
|
|
71
|
+
calls.push({
|
|
72
|
+
spanId: getString(row, SPAN_KEYS.SPAN_ID),
|
|
73
|
+
timestamp: getString(row, SPAN_KEYS.TIMESTAMP),
|
|
74
|
+
operationId: getString(row, SPAN_KEYS.AI_OPERATION_ID),
|
|
75
|
+
model: getString(row, SPAN_KEYS.AI_MODEL_ID),
|
|
76
|
+
modelProvider: requestProvider,
|
|
77
|
+
generationType: getString(row, SPAN_KEYS.AI_TELEMETRY_GENERATION_TYPE),
|
|
78
|
+
subAgentId: getString(row, SPAN_KEYS.AI_TELEMETRY_SUB_AGENT_ID),
|
|
79
|
+
inputTokens: getNumber(row, SPAN_KEYS.GEN_AI_USAGE_INPUT_TOKENS),
|
|
80
|
+
cacheReadTokens,
|
|
81
|
+
cacheCreationTokens: getNumber(row, SPAN_KEYS.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS),
|
|
82
|
+
markerCount,
|
|
83
|
+
prefixSignature: prefixSignature ?? "",
|
|
84
|
+
cacheState
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
return calls;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
//#endregion
|
|
91
|
+
export { deriveCacheDebugCalls };
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
//#region src/utils/cache-state.d.ts
|
|
2
|
+
type CacheState = 'HIT' | 'MISS-regression' | 'MISS-expected' | 'NOT-ATTEMPTED' | 'NOT-SUPPORTED-BY-PROVIDER';
|
|
3
|
+
declare function isProviderSupportedForCaching(provider: string): boolean;
|
|
4
|
+
/**
|
|
5
|
+
* Resolve the provider to use for the caching-support gate.
|
|
6
|
+
*
|
|
7
|
+
* Vercel-AI-Gateway-routed deployments report `ai.model.provider = 'gateway'`,
|
|
8
|
+
* which is a router, not a caching-capable provider. The actual model provider
|
|
9
|
+
* that produced the response (and owns the cache keys) lives in
|
|
10
|
+
* `gen_ai.response.provider`. Prefer that resolved provider when present so the
|
|
11
|
+
* support gate reflects the real backend (e.g. 'anthropic'); fall back to the
|
|
12
|
+
* request-side `ai.model.provider` only when the resolved provider is absent.
|
|
13
|
+
*/
|
|
14
|
+
declare function resolveCachingProvider({
|
|
15
|
+
requestProvider,
|
|
16
|
+
responseProvider
|
|
17
|
+
}: {
|
|
18
|
+
requestProvider?: string | null;
|
|
19
|
+
responseProvider?: string | null;
|
|
20
|
+
}): string;
|
|
21
|
+
interface DeriveCacheStateInput {
|
|
22
|
+
markerCount: number;
|
|
23
|
+
prefixSignature: string | null;
|
|
24
|
+
cacheRead: number;
|
|
25
|
+
priorSignature?: string | null;
|
|
26
|
+
providerSupportsCaching?: boolean;
|
|
27
|
+
}
|
|
28
|
+
declare function deriveCacheState({
|
|
29
|
+
markerCount,
|
|
30
|
+
prefixSignature,
|
|
31
|
+
cacheRead,
|
|
32
|
+
priorSignature,
|
|
33
|
+
providerSupportsCaching
|
|
34
|
+
}: DeriveCacheStateInput): CacheState;
|
|
35
|
+
//#endregion
|
|
36
|
+
export { CacheState, DeriveCacheStateInput, deriveCacheState, isProviderSupportedForCaching, resolveCachingProvider };
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
//#region src/utils/cache-state.ts
|
|
2
|
+
const CACHING_SUPPORTED_PROVIDERS = new Set([
|
|
3
|
+
"anthropic",
|
|
4
|
+
"openai",
|
|
5
|
+
"google",
|
|
6
|
+
"gemini"
|
|
7
|
+
]);
|
|
8
|
+
function isProviderSupportedForCaching(provider) {
|
|
9
|
+
return CACHING_SUPPORTED_PROVIDERS.has(provider.toLowerCase());
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Resolve the provider to use for the caching-support gate.
|
|
13
|
+
*
|
|
14
|
+
* Vercel-AI-Gateway-routed deployments report `ai.model.provider = 'gateway'`,
|
|
15
|
+
* which is a router, not a caching-capable provider. The actual model provider
|
|
16
|
+
* that produced the response (and owns the cache keys) lives in
|
|
17
|
+
* `gen_ai.response.provider`. Prefer that resolved provider when present so the
|
|
18
|
+
* support gate reflects the real backend (e.g. 'anthropic'); fall back to the
|
|
19
|
+
* request-side `ai.model.provider` only when the resolved provider is absent.
|
|
20
|
+
*/
|
|
21
|
+
function resolveCachingProvider({ requestProvider, responseProvider }) {
|
|
22
|
+
return (responseProvider || requestProvider || "").trim();
|
|
23
|
+
}
|
|
24
|
+
function deriveCacheState({ markerCount, prefixSignature, cacheRead, priorSignature = null, providerSupportsCaching = true }) {
|
|
25
|
+
if (!providerSupportsCaching) return "NOT-SUPPORTED-BY-PROVIDER";
|
|
26
|
+
if (markerCount <= 0) return "NOT-ATTEMPTED";
|
|
27
|
+
if (cacheRead > 0) return "HIT";
|
|
28
|
+
if (priorSignature && prefixSignature && prefixSignature === priorSignature) return "MISS-regression";
|
|
29
|
+
return "MISS-expected";
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
//#endregion
|
|
33
|
+
export { deriveCacheState, isProviderSupportedForCaching, resolveCachingProvider };
|
package/dist/utils/index.d.ts
CHANGED
|
@@ -34,9 +34,9 @@ import "./third-party-mcp-servers/index.js";
|
|
|
34
34
|
import { AssembleResult, estimateTokens } from "./token-estimator.js";
|
|
35
35
|
import { flushTraces, getTracer, setSpanWithError, unwrapError } from "./tracer-factory.js";
|
|
36
36
|
import { HashedHeaderValue, SignatureVerificationErrorCode, SignatureVerificationResult, TriggerAuthResult, hashAuthenticationHeaders, hashTriggerHeaderValue, validateTriggerHeaderValue, verifySignatureWithConfig, verifyTriggerAuth } from "./trigger-auth.js";
|
|
37
|
-
import { extractUsageTokens, gatewayCostMiddleware, normalizeModelId } from "./usage-cost-middleware.js";
|
|
37
|
+
import { computePrefixSignature, countCacheMarkers, extractUsageTokens, gatewayCostMiddleware, normalizeModelId } from "./usage-cost-middleware.js";
|
|
38
38
|
import "./usage-tracker.js";
|
|
39
39
|
import { ValidatePublicKeyResult, validatePublicKey } from "./validate-public-key.js";
|
|
40
40
|
import { _resetWaitUntilCache, getWaitUntil } from "./wait-until.js";
|
|
41
41
|
import { TRUSTED_WORK_APP_MCP_PATHS, isTrustedWorkAppMcpUrl } from "./work-app-mcp.js";
|
|
42
|
-
export { ApiKeyGenerationResult, AppCredentialResult, AssembleResult, CommonCreateErrorResponses, CommonDeleteErrorResponses, CommonGetErrorResponses, CommonUpdateErrorResponses, ComposioAuthResult, CredentialScope, ERROR_DOCS_BASE_URL, ErrorCode, ErrorCodes, ErrorResponse, GenerateInternalServiceTokenParams, GenerateServiceTokenParams, GenerationType, HashedHeaderValue, InternalServiceId, InternalServiceTokenPayload, InternalServices, JsonTransformer, JwtVerifyResult, LLMMessage, LoggerFactoryConfig, McpClient, McpClientOptions, McpOAuthFlowResult, McpSSEConfig, McpServerConfig, McpStreamableHttpConfig, McpTokenExchangeResult, MockLanguageModel, ModelFactory, OAuthConfig, ParsedSSEResponse, PinoLogger, PinoLoggerConfig, PoWError, PoWResult, ProblemDetails, SKILL_ENTRY_FILE_PATH, ServiceTokenPayload, SignJwtOptions, SignSlackLinkTokenParams, SignSlackUserTokenParams, SignatureVerificationErrorCode, SignatureVerificationResult, SlackAccessTokenPayload, SlackAccessTokenPayloadSchema, SlackLinkIntent, SlackLinkIntentSchema, SlackLinkTokenPayload, SlackLinkTokenPayloadSchema, TRUSTED_WORK_APP_MCP_PATHS, TempTokenPayload, TriggerAuthResult, ValidatePublicKeyResult, VerifyInternalServiceTokenResult, VerifyJwtOptions, VerifyServiceTokenResult, VerifySlackLinkTokenResult, VerifySlackUserTokenResult, _resetWaitUntilCache, activeMcpClients, buildConversationMetadata, buildConversationUserProperties, commonCreateErrorResponses, commonDeleteErrorResponses, commonGetErrorResponses, commonUpdateErrorResponses, computeNextRunAt, configureComposioMCPServer, convertZodToJsonSchema, convertZodToJsonSchemaWithPreview, createApiError, createMockModel, decodeJwtPayload, deleteComposioConnectedAccount, deriveKidFromPublicKey, deriveRelationId, detectAuthenticationRequired, errorResponseSchema, errorSchemaFactory, estimateTokens, exchangeMcpAuthorizationCode, extractBearerToken, extractComposioServerId, extractPreviewFields, extractPublicId, extractUsageTokens, fetchComposioServers, fetchSingleComposioServer, flushTraces, formatMessagesForLLM, formatMessagesForLLMContext, gatewayCostMiddleware, generateApiKey, generateAppCredential, generateId, generateInternalServiceToken, generateServiceToken, getComposioInstance, getComposioOAuthRedirectUrl, getComposioUserId, getConversationId, getConversationProperties, getConversationUserProperties, getCredentialStoreLookupKeyFromRetrievalParams, getDatabaseErrorLogContext, getInProcessFetch, getJwtSecret, getLogger, getMessageUserProperties, getMetadataFromApiKey, getPoWErrorMessage, getTracer, getWaitUntil, handleApiError, hasIssuer, hashApiKey, hashAuthenticationHeaders, hashTriggerHeaderValue, initiateMcpOAuthFlow, interpolateTemplate, isApiKeyExpired, isComposioMCPServerAuthenticated, isDevelopment, isInternalServiceToken, isPoWEnabled, isProduction, isSlackUserToken, isTest, isThirdPartyMCPServerAuthenticated, isTrustedWorkAppMcpUrl, isUniqueConstraintError, isZodSchema, loggerFactory, makeAllPropertiesRequired, maskApiKey, normalizeDataComponentSchema, normalizeDateString, normalizeModelId, parseEmbeddedJson, parseSSEResponse, parseSkillFromMarkdown, preview, problemDetailsSchema, registerAppFetch, retryWithBackoff, runWithLogContext, sanitizeAppConfig, serializeSkillToMarkdown, setSpanWithError, signJwt, signSlackLinkToken, signSlackUserToken, stripUnsupportedConstraints, throwIfUniqueConstraintError, toISODateString, unwrapError, validateApiKey, validateInternalServiceProjectAccess, validateInternalServiceTenantAccess, validateOrigin, validatePublicKey, validateTargetAgent, validateTenantId, validateTriggerHeaderValue, verifyAuthorizationHeader, verifyInternalServiceAuthHeader, verifyInternalServiceToken, verifyJwt, verifyPoW, verifyServiceToken, verifySignatureWithConfig, verifySlackLinkToken, verifySlackUserToken, verifyTempToken, verifyTriggerAuth };
|
|
42
|
+
export { ApiKeyGenerationResult, AppCredentialResult, AssembleResult, CommonCreateErrorResponses, CommonDeleteErrorResponses, CommonGetErrorResponses, CommonUpdateErrorResponses, ComposioAuthResult, CredentialScope, ERROR_DOCS_BASE_URL, ErrorCode, ErrorCodes, ErrorResponse, GenerateInternalServiceTokenParams, GenerateServiceTokenParams, GenerationType, HashedHeaderValue, InternalServiceId, InternalServiceTokenPayload, InternalServices, JsonTransformer, JwtVerifyResult, LLMMessage, LoggerFactoryConfig, McpClient, McpClientOptions, McpOAuthFlowResult, McpSSEConfig, McpServerConfig, McpStreamableHttpConfig, McpTokenExchangeResult, MockLanguageModel, ModelFactory, OAuthConfig, ParsedSSEResponse, PinoLogger, PinoLoggerConfig, PoWError, PoWResult, ProblemDetails, SKILL_ENTRY_FILE_PATH, ServiceTokenPayload, SignJwtOptions, SignSlackLinkTokenParams, SignSlackUserTokenParams, SignatureVerificationErrorCode, SignatureVerificationResult, SlackAccessTokenPayload, SlackAccessTokenPayloadSchema, SlackLinkIntent, SlackLinkIntentSchema, SlackLinkTokenPayload, SlackLinkTokenPayloadSchema, TRUSTED_WORK_APP_MCP_PATHS, TempTokenPayload, TriggerAuthResult, ValidatePublicKeyResult, VerifyInternalServiceTokenResult, VerifyJwtOptions, VerifyServiceTokenResult, VerifySlackLinkTokenResult, VerifySlackUserTokenResult, _resetWaitUntilCache, activeMcpClients, buildConversationMetadata, buildConversationUserProperties, commonCreateErrorResponses, commonDeleteErrorResponses, commonGetErrorResponses, commonUpdateErrorResponses, computeNextRunAt, computePrefixSignature, configureComposioMCPServer, convertZodToJsonSchema, convertZodToJsonSchemaWithPreview, countCacheMarkers, createApiError, createMockModel, decodeJwtPayload, deleteComposioConnectedAccount, deriveKidFromPublicKey, deriveRelationId, detectAuthenticationRequired, errorResponseSchema, errorSchemaFactory, estimateTokens, exchangeMcpAuthorizationCode, extractBearerToken, extractComposioServerId, extractPreviewFields, extractPublicId, extractUsageTokens, fetchComposioServers, fetchSingleComposioServer, flushTraces, formatMessagesForLLM, formatMessagesForLLMContext, gatewayCostMiddleware, generateApiKey, generateAppCredential, generateId, generateInternalServiceToken, generateServiceToken, getComposioInstance, getComposioOAuthRedirectUrl, getComposioUserId, getConversationId, getConversationProperties, getConversationUserProperties, getCredentialStoreLookupKeyFromRetrievalParams, getDatabaseErrorLogContext, getInProcessFetch, getJwtSecret, getLogger, getMessageUserProperties, getMetadataFromApiKey, getPoWErrorMessage, getTracer, getWaitUntil, handleApiError, hasIssuer, hashApiKey, hashAuthenticationHeaders, hashTriggerHeaderValue, initiateMcpOAuthFlow, interpolateTemplate, isApiKeyExpired, isComposioMCPServerAuthenticated, isDevelopment, isInternalServiceToken, isPoWEnabled, isProduction, isSlackUserToken, isTest, isThirdPartyMCPServerAuthenticated, isTrustedWorkAppMcpUrl, isUniqueConstraintError, isZodSchema, loggerFactory, makeAllPropertiesRequired, maskApiKey, normalizeDataComponentSchema, normalizeDateString, normalizeModelId, parseEmbeddedJson, parseSSEResponse, parseSkillFromMarkdown, preview, problemDetailsSchema, registerAppFetch, retryWithBackoff, runWithLogContext, sanitizeAppConfig, serializeSkillToMarkdown, setSpanWithError, signJwt, signSlackLinkToken, signSlackUserToken, stripUnsupportedConstraints, throwIfUniqueConstraintError, toISODateString, unwrapError, validateApiKey, validateInternalServiceProjectAccess, validateInternalServiceTenantAccess, validateOrigin, validatePublicKey, validateTargetAgent, validateTenantId, validateTriggerHeaderValue, verifyAuthorizationHeader, verifyInternalServiceAuthHeader, verifyInternalServiceToken, verifyJwt, verifyPoW, verifyServiceToken, verifySignatureWithConfig, verifySlackLinkToken, verifySlackUserToken, verifyTempToken, verifyTriggerAuth };
|
package/dist/utils/index.js
CHANGED
|
@@ -21,7 +21,7 @@ import { JsonTransformer } from "./JsonTransformer.js";
|
|
|
21
21
|
import { parseEmbeddedJson } from "./json-parser.js";
|
|
22
22
|
import { McpClient, activeMcpClients } from "./mcp-client.js";
|
|
23
23
|
import { MockLanguageModel, createMockModel } from "./mock-provider.js";
|
|
24
|
-
import { extractUsageTokens, gatewayCostMiddleware, normalizeModelId } from "./usage-cost-middleware.js";
|
|
24
|
+
import { computePrefixSignature, countCacheMarkers, extractUsageTokens, gatewayCostMiddleware, normalizeModelId } from "./usage-cost-middleware.js";
|
|
25
25
|
import { ModelFactory } from "./model-factory.js";
|
|
26
26
|
import { getPoWErrorMessage, isPoWEnabled, verifyPoW } from "./pow.js";
|
|
27
27
|
import { retryWithBackoff } from "./retry.js";
|
|
@@ -38,4 +38,4 @@ import { validatePublicKey } from "./validate-public-key.js";
|
|
|
38
38
|
import { _resetWaitUntilCache, getWaitUntil } from "./wait-until.js";
|
|
39
39
|
import { TRUSTED_WORK_APP_MCP_PATHS, isTrustedWorkAppMcpUrl } from "./work-app-mcp.js";
|
|
40
40
|
|
|
41
|
-
export { ERROR_DOCS_BASE_URL, ErrorCode, InternalServices, JsonTransformer, McpClient, MockLanguageModel, ModelFactory, PinoLogger, SKILL_ENTRY_FILE_PATH, SlackAccessTokenPayloadSchema, SlackLinkIntentSchema, SlackLinkTokenPayloadSchema, TRUSTED_WORK_APP_MCP_PATHS, _resetWaitUntilCache, activeMcpClients, buildConversationMetadata, buildConversationUserProperties, commonCreateErrorResponses, commonDeleteErrorResponses, commonGetErrorResponses, commonUpdateErrorResponses, computeNextRunAt, configureComposioMCPServer, convertZodToJsonSchema, convertZodToJsonSchemaWithPreview, createApiError, createMockModel, decodeJwtPayload, deleteComposioConnectedAccount, deriveKidFromPublicKey, deriveRelationId, detectAuthenticationRequired, errorResponseSchema, errorSchemaFactory, estimateTokens, exchangeMcpAuthorizationCode, extractBearerToken, extractComposioServerId, extractPreviewFields, extractPublicId, extractUsageTokens, fetchComposioServers, fetchSingleComposioServer, flushTraces, formatMessagesForLLM, formatMessagesForLLMContext, gatewayCostMiddleware, generateApiKey, generateAppCredential, generateId, generateInternalServiceToken, generateServiceToken, getComposioInstance, getComposioOAuthRedirectUrl, getComposioUserId, getConversationId, getConversationProperties, getConversationUserProperties, getCredentialStoreLookupKeyFromRetrievalParams, getDatabaseErrorLogContext, getInProcessFetch, getJwtSecret, getLogger, getMessageUserProperties, getMetadataFromApiKey, getPoWErrorMessage, getTracer, getWaitUntil, handleApiError, hasIssuer, hashApiKey, hashAuthenticationHeaders, hashTriggerHeaderValue, initiateMcpOAuthFlow, interpolateTemplate, isApiKeyExpired, isComposioMCPServerAuthenticated, isDevelopment, isInternalServiceToken, isPoWEnabled, isProduction, isSlackUserToken, isTest, isThirdPartyMCPServerAuthenticated, isTrustedWorkAppMcpUrl, isUniqueConstraintError, isZodSchema, loggerFactory, makeAllPropertiesRequired, maskApiKey, normalizeDataComponentSchema, normalizeDateString, normalizeModelId, parseEmbeddedJson, parseSSEResponse, parseSkillFromMarkdown, preview, problemDetailsSchema, registerAppFetch, retryWithBackoff, runWithLogContext, sanitizeAppConfig, serializeSkillToMarkdown, setSpanWithError, signJwt, signSlackLinkToken, signSlackUserToken, stripUnsupportedConstraints, throwIfUniqueConstraintError, toISODateString, unwrapError, validateApiKey, validateInternalServiceProjectAccess, validateInternalServiceTenantAccess, validateOrigin, validatePublicKey, validateTargetAgent, validateTenantId, validateTriggerHeaderValue, verifyAuthorizationHeader, verifyInternalServiceAuthHeader, verifyInternalServiceToken, verifyJwt, verifyPoW, verifyServiceToken, verifySignatureWithConfig, verifySlackLinkToken, verifySlackUserToken, verifyTempToken, verifyTriggerAuth };
|
|
41
|
+
export { ERROR_DOCS_BASE_URL, ErrorCode, InternalServices, JsonTransformer, McpClient, MockLanguageModel, ModelFactory, PinoLogger, SKILL_ENTRY_FILE_PATH, SlackAccessTokenPayloadSchema, SlackLinkIntentSchema, SlackLinkTokenPayloadSchema, TRUSTED_WORK_APP_MCP_PATHS, _resetWaitUntilCache, activeMcpClients, buildConversationMetadata, buildConversationUserProperties, commonCreateErrorResponses, commonDeleteErrorResponses, commonGetErrorResponses, commonUpdateErrorResponses, computeNextRunAt, computePrefixSignature, configureComposioMCPServer, convertZodToJsonSchema, convertZodToJsonSchemaWithPreview, countCacheMarkers, createApiError, createMockModel, decodeJwtPayload, deleteComposioConnectedAccount, deriveKidFromPublicKey, deriveRelationId, detectAuthenticationRequired, errorResponseSchema, errorSchemaFactory, estimateTokens, exchangeMcpAuthorizationCode, extractBearerToken, extractComposioServerId, extractPreviewFields, extractPublicId, extractUsageTokens, fetchComposioServers, fetchSingleComposioServer, flushTraces, formatMessagesForLLM, formatMessagesForLLMContext, gatewayCostMiddleware, generateApiKey, generateAppCredential, generateId, generateInternalServiceToken, generateServiceToken, getComposioInstance, getComposioOAuthRedirectUrl, getComposioUserId, getConversationId, getConversationProperties, getConversationUserProperties, getCredentialStoreLookupKeyFromRetrievalParams, getDatabaseErrorLogContext, getInProcessFetch, getJwtSecret, getLogger, getMessageUserProperties, getMetadataFromApiKey, getPoWErrorMessage, getTracer, getWaitUntil, handleApiError, hasIssuer, hashApiKey, hashAuthenticationHeaders, hashTriggerHeaderValue, initiateMcpOAuthFlow, interpolateTemplate, isApiKeyExpired, isComposioMCPServerAuthenticated, isDevelopment, isInternalServiceToken, isPoWEnabled, isProduction, isSlackUserToken, isTest, isThirdPartyMCPServerAuthenticated, isTrustedWorkAppMcpUrl, isUniqueConstraintError, isZodSchema, loggerFactory, makeAllPropertiesRequired, maskApiKey, normalizeDataComponentSchema, normalizeDateString, normalizeModelId, parseEmbeddedJson, parseSSEResponse, parseSkillFromMarkdown, preview, problemDetailsSchema, registerAppFetch, retryWithBackoff, runWithLogContext, sanitizeAppConfig, serializeSkillToMarkdown, setSpanWithError, signJwt, signSlackLinkToken, signSlackUserToken, stripUnsupportedConstraints, throwIfUniqueConstraintError, toISODateString, unwrapError, validateApiKey, validateInternalServiceProjectAccess, validateInternalServiceTenantAccess, validateOrigin, validatePublicKey, validateTargetAgent, validateTenantId, validateTriggerHeaderValue, verifyAuthorizationHeader, verifyInternalServiceAuthHeader, verifyInternalServiceToken, verifyJwt, verifyPoW, verifyServiceToken, verifySignatureWithConfig, verifySlackLinkToken, verifySlackUserToken, verifyTempToken, verifyTriggerAuth };
|
|
@@ -25,6 +25,13 @@ declare class ModelFactory {
|
|
|
25
25
|
* a provider-specific per-call option, e.g. anthropic.thinking, gateway.models.
|
|
26
26
|
*/
|
|
27
27
|
static extractStreamProviderOptions(providerOptions?: Record<string, unknown>): Record<string, JSONObject> | undefined;
|
|
28
|
+
/**
|
|
29
|
+
* Whether the given model settings will route through the Vercel AI Gateway
|
|
30
|
+
* vs. instantiating a direct provider client. Mirrors the gate used inside
|
|
31
|
+
* createModel so callers (e.g. the prompt-caching actuator) can detect mode
|
|
32
|
+
* without recreating the model instance.
|
|
33
|
+
*/
|
|
34
|
+
static shouldRouteViaGateway(modelSettings: ModelSettings): boolean;
|
|
28
35
|
/**
|
|
29
36
|
* Create a language model instance from configuration
|
|
30
37
|
* Throws error if no config provided - models must be configured at project level
|
|
@@ -94,6 +94,19 @@ var ModelFactory = class ModelFactory {
|
|
|
94
94
|
return Object.keys(result).length > 0 ? result : void 0;
|
|
95
95
|
}
|
|
96
96
|
/**
|
|
97
|
+
* Whether the given model settings will route through the Vercel AI Gateway
|
|
98
|
+
* vs. instantiating a direct provider client. Mirrors the gate used inside
|
|
99
|
+
* createModel so callers (e.g. the prompt-caching actuator) can detect mode
|
|
100
|
+
* without recreating the model instance.
|
|
101
|
+
*/
|
|
102
|
+
static shouldRouteViaGateway(modelSettings) {
|
|
103
|
+
const modelString = modelSettings.model?.trim();
|
|
104
|
+
if (!modelString) return false;
|
|
105
|
+
const { provider } = ModelFactory.parseModelString(modelString);
|
|
106
|
+
const providerConfig = ModelFactory.extractProviderConfig(modelSettings.providerOptions);
|
|
107
|
+
return !!process.env.AI_GATEWAY_API_KEY && GATEWAY_ROUTABLE_PROVIDERS_SET.has(provider) && Object.keys(providerConfig).length === 0;
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
97
110
|
* Create a language model instance from configuration
|
|
98
111
|
* Throws error if no config provided - models must be configured at project level
|
|
99
112
|
*/
|
|
@@ -110,9 +123,12 @@ var ModelFactory = class ModelFactory {
|
|
|
110
123
|
hasProviderOptions: !!modelSettings.providerOptions
|
|
111
124
|
}, "Creating language model from config");
|
|
112
125
|
const providerConfig = ModelFactory.extractProviderConfig(modelSettings.providerOptions);
|
|
113
|
-
const
|
|
126
|
+
const routeViaGateway = ModelFactory.shouldRouteViaGateway({
|
|
127
|
+
model: modelSettings.model,
|
|
128
|
+
providerOptions: modelSettings.providerOptions
|
|
129
|
+
});
|
|
114
130
|
let model;
|
|
115
|
-
if (
|
|
131
|
+
if (routeViaGateway) model = gateway(!!modelSettings.allowedProviders?.length ? modelName : `${provider}/${modelName}`);
|
|
116
132
|
else if (provider !== "mock" && (provider === "azure" || Object.keys(providerConfig).length > 0)) {
|
|
117
133
|
logger.info({ config: providerConfig }, `Applying custom ${provider} provider configuration`);
|
|
118
134
|
model = ModelFactory.createProvider(provider, providerConfig).languageModel(modelName);
|
|
@@ -9,6 +9,8 @@ declare function extractUsageTokens(usage: any): {
|
|
|
9
9
|
cachedWriteTokens?: number;
|
|
10
10
|
};
|
|
11
11
|
declare function normalizeModelId(modelId: string): string;
|
|
12
|
+
declare function countCacheMarkers(prompt: readonly any[], callProviderOptions?: Record<string, any>): number;
|
|
13
|
+
declare function computePrefixSignature(prompt: readonly any[], tools?: readonly any[]): string;
|
|
12
14
|
declare const gatewayCostMiddleware: LanguageModelMiddleware;
|
|
13
15
|
//#endregion
|
|
14
|
-
export { extractUsageTokens, gatewayCostMiddleware, normalizeModelId };
|
|
16
|
+
export { computePrefixSignature, countCacheMarkers, extractUsageTokens, gatewayCostMiddleware, normalizeModelId };
|
|
@@ -1,17 +1,27 @@
|
|
|
1
1
|
import { SPAN_KEYS } from "../constants/otel-attributes.js";
|
|
2
2
|
import { GATEWAY_ROUTABLE_PROVIDERS_SET } from "../constants/models.js";
|
|
3
3
|
import { getLogger } from "./logger.js";
|
|
4
|
+
import { createHash } from "node:crypto";
|
|
4
5
|
import { trace } from "@opentelemetry/api";
|
|
5
6
|
|
|
6
7
|
//#region src/utils/usage-cost-middleware.ts
|
|
7
8
|
const logger = getLogger("usage-cost-middleware");
|
|
8
9
|
function extractUsageTokens(usage) {
|
|
10
|
+
const inputTokens = typeof usage?.inputTokens === "object" ? usage.inputTokens.total ?? 0 : usage?.inputTokens ?? 0;
|
|
11
|
+
const outputTokens = typeof usage?.outputTokens === "object" ? usage.outputTokens.total ?? 0 : usage?.outputTokens ?? 0;
|
|
12
|
+
const reasoningTokens = typeof usage?.outputTokens === "object" ? usage.outputTokens.reasoning : void 0;
|
|
13
|
+
let cachedReadTokens = typeof usage?.inputTokens === "object" ? usage.inputTokens.cacheRead : void 0;
|
|
14
|
+
const cachedWriteTokens = typeof usage?.inputTokens === "object" ? usage.inputTokens.cacheWrite : void 0;
|
|
15
|
+
if (cachedReadTokens === void 0) {
|
|
16
|
+
const geminiCacheRead = usage?.inputTokenDetails?.cacheReadTokens ?? usage?.cachedInputTokens;
|
|
17
|
+
if (typeof geminiCacheRead === "number") cachedReadTokens = geminiCacheRead;
|
|
18
|
+
}
|
|
9
19
|
return {
|
|
10
|
-
inputTokens
|
|
11
|
-
outputTokens
|
|
12
|
-
reasoningTokens
|
|
13
|
-
cachedReadTokens
|
|
14
|
-
cachedWriteTokens
|
|
20
|
+
inputTokens,
|
|
21
|
+
outputTokens,
|
|
22
|
+
reasoningTokens,
|
|
23
|
+
cachedReadTokens,
|
|
24
|
+
cachedWriteTokens
|
|
15
25
|
};
|
|
16
26
|
}
|
|
17
27
|
function extractGatewayCost(providerMetadata) {
|
|
@@ -43,29 +53,77 @@ function setGatewayAttributesOnSpan(providerMetadata) {
|
|
|
43
53
|
if (routing?.resolvedProvider) activeSpan.setAttribute(SPAN_KEYS.GEN_AI_REQUEST_PROVIDER, routing.resolvedProvider);
|
|
44
54
|
}
|
|
45
55
|
}
|
|
56
|
+
function countCacheMarkers(prompt, callProviderOptions) {
|
|
57
|
+
let count = 0;
|
|
58
|
+
const caching = callProviderOptions?.gateway?.caching;
|
|
59
|
+
if (caching && caching !== "off" && caching !== "disabled") count++;
|
|
60
|
+
for (const msg of prompt) if (msg.providerOptions?.anthropic?.cacheControl) count++;
|
|
61
|
+
return Math.min(count, 4);
|
|
62
|
+
}
|
|
63
|
+
function computePrefixSignature(prompt, tools) {
|
|
64
|
+
const systemParts = [];
|
|
65
|
+
for (const msg of prompt) {
|
|
66
|
+
if (msg.role !== "system") continue;
|
|
67
|
+
if (typeof msg.content === "string") systemParts.push(msg.content);
|
|
68
|
+
else if (Array.isArray(msg.content)) {
|
|
69
|
+
for (const part of msg.content) if (typeof part === "string") systemParts.push(part);
|
|
70
|
+
else if (part?.text) systemParts.push(part.text);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
const toolParts = (tools ?? []).map((tool) => [
|
|
74
|
+
tool.name ?? "",
|
|
75
|
+
tool.description ?? "",
|
|
76
|
+
tool.parameters ? JSON.stringify(tool.parameters) : ""
|
|
77
|
+
]);
|
|
78
|
+
return createHash("sha256").update(JSON.stringify([systemParts, toolParts])).digest("hex").slice(0, 10);
|
|
79
|
+
}
|
|
80
|
+
function setCacheAttributesOnSpan(params, usage) {
|
|
81
|
+
const activeSpan = trace.getActiveSpan();
|
|
82
|
+
if (!activeSpan) return;
|
|
83
|
+
const { cachedReadTokens, cachedWriteTokens } = extractUsageTokens(usage);
|
|
84
|
+
activeSpan.setAttribute(SPAN_KEYS.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, typeof cachedReadTokens === "number" ? cachedReadTokens : 0);
|
|
85
|
+
activeSpan.setAttribute(SPAN_KEYS.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, typeof cachedWriteTokens === "number" ? cachedWriteTokens : 0);
|
|
86
|
+
const prompt = params.prompt ?? [];
|
|
87
|
+
const markerCount = countCacheMarkers(prompt, params.providerOptions);
|
|
88
|
+
activeSpan.setAttribute(SPAN_KEYS.CACHE_INTENT_MARKER_COUNT, markerCount);
|
|
89
|
+
const prefixSignature = computePrefixSignature(prompt, params.tools);
|
|
90
|
+
activeSpan.setAttribute(SPAN_KEYS.CACHE_INTENT_PREFIX_SIGNATURE, prefixSignature);
|
|
91
|
+
}
|
|
46
92
|
const gatewayCostMiddleware = {
|
|
47
93
|
specificationVersion: "v3",
|
|
48
94
|
overrideModelId({ model }) {
|
|
49
95
|
return normalizeModelId(model.modelId);
|
|
50
96
|
},
|
|
51
|
-
async wrapGenerate({ doGenerate }) {
|
|
97
|
+
async wrapGenerate({ doGenerate, params }) {
|
|
52
98
|
const result = await doGenerate();
|
|
53
99
|
try {
|
|
54
100
|
setGatewayAttributesOnSpan(result.providerMetadata);
|
|
55
101
|
} catch (error) {
|
|
56
102
|
logger.warn({ error }, "Failed to extract gateway cost in wrapGenerate");
|
|
57
103
|
}
|
|
104
|
+
try {
|
|
105
|
+
setCacheAttributesOnSpan(params, result.usage);
|
|
106
|
+
} catch (error) {
|
|
107
|
+
logger.warn({ error }, "Failed to set cache attributes in wrapGenerate");
|
|
108
|
+
}
|
|
58
109
|
return result;
|
|
59
110
|
},
|
|
60
|
-
async wrapStream({ doStream }) {
|
|
111
|
+
async wrapStream({ doStream, params }) {
|
|
61
112
|
const { stream, ...rest } = await doStream();
|
|
62
113
|
return {
|
|
63
114
|
stream: stream.pipeThrough(new TransformStream({ transform(chunk, controller) {
|
|
64
115
|
controller.enqueue(chunk);
|
|
65
|
-
if (chunk.type === "finish")
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
116
|
+
if (chunk.type === "finish") {
|
|
117
|
+
try {
|
|
118
|
+
setGatewayAttributesOnSpan(chunk.providerMetadata);
|
|
119
|
+
} catch (error) {
|
|
120
|
+
logger.warn({ error }, "Failed to extract gateway cost in wrapStream");
|
|
121
|
+
}
|
|
122
|
+
try {
|
|
123
|
+
setCacheAttributesOnSpan(params, chunk.usage);
|
|
124
|
+
} catch (error) {
|
|
125
|
+
logger.warn({ error }, "Failed to set cache attributes in wrapStream");
|
|
126
|
+
}
|
|
69
127
|
}
|
|
70
128
|
} })),
|
|
71
129
|
...rest
|
|
@@ -74,4 +132,4 @@ const gatewayCostMiddleware = {
|
|
|
74
132
|
};
|
|
75
133
|
|
|
76
134
|
//#endregion
|
|
77
|
-
export { extractUsageTokens, gatewayCostMiddleware, normalizeModelId };
|
|
135
|
+
export { computePrefixSignature, countCacheMarkers, extractUsageTokens, gatewayCostMiddleware, normalizeModelId };
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { z } from "@hono/zod-openapi";
|
|
2
|
-
import * as
|
|
2
|
+
import * as drizzle_zod15 from "drizzle-zod";
|
|
3
3
|
import { AnySQLiteTable } from "drizzle-orm/sqlite-core";
|
|
4
4
|
|
|
5
5
|
//#region src/validation/drizzle-schema-helpers.d.ts
|
|
6
|
-
declare function createSelectSchemaWithModifiers<T extends AnySQLiteTable>(table: T, overrides?: Partial<Record<keyof T['_']['columns'], (schema: z.ZodTypeAny) => z.ZodTypeAny>>):
|
|
7
|
-
declare function createInsertSchemaWithModifiers<T extends AnySQLiteTable>(table: T, overrides?: Partial<Record<keyof T['_']['columns'], (schema: z.ZodTypeAny) => z.ZodTypeAny>>):
|
|
6
|
+
declare function createSelectSchemaWithModifiers<T extends AnySQLiteTable>(table: T, overrides?: Partial<Record<keyof T['_']['columns'], (schema: z.ZodTypeAny) => z.ZodTypeAny>>): drizzle_zod15.BuildSchema<"select", T["_"]["columns"], drizzle_zod15.BuildRefine<T["_"]["columns"], undefined>, undefined>;
|
|
7
|
+
declare function createInsertSchemaWithModifiers<T extends AnySQLiteTable>(table: T, overrides?: Partial<Record<keyof T['_']['columns'], (schema: z.ZodTypeAny) => z.ZodTypeAny>>): drizzle_zod15.BuildSchema<"insert", T["_"]["columns"], drizzle_zod15.BuildRefine<Pick<T["_"]["columns"], keyof T["$inferInsert"]>, undefined>, undefined>;
|
|
8
8
|
declare const createSelectSchema: typeof createSelectSchemaWithModifiers;
|
|
9
9
|
declare const createInsertSchema: typeof createInsertSchemaWithModifiers;
|
|
10
10
|
/**
|