@x12i/ai-gateway 9.0.8 → 9.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +897 -998
  2. package/dist/activity-manager.js +46 -6
  3. package/dist/config/activity-tracking-config.d.ts +2 -1
  4. package/dist/config/activity-tracking-config.js +3 -2
  5. package/dist/gateway-memory.d.ts +1 -2
  6. package/dist/gateway-memory.js +1 -15
  7. package/dist/gateway-meta.js +3 -0
  8. package/dist/gateway-utils.d.ts +15 -1
  9. package/dist/gateway-utils.js +125 -17
  10. package/dist/gateway-validation.d.ts +3 -3
  11. package/dist/gateway-validation.js +10 -1
  12. package/dist/gateway.d.ts +2 -2
  13. package/dist/gateway.js +73 -22
  14. package/dist/index.d.ts +2 -2
  15. package/dist/instruction-optimizer.js +3 -0
  16. package/dist/runtime-objects.d.ts +2 -13
  17. package/dist/troubleshooting-helper.d.ts +0 -3
  18. package/dist/troubleshooting-helper.js +99 -20
  19. package/dist/types.d.ts +39 -89
  20. package/dist-cjs/activity-manager.cjs +45 -5
  21. package/dist-cjs/config/activity-tracking-config.cjs +3 -2
  22. package/dist-cjs/config/activity-tracking-config.d.ts +2 -1
  23. package/dist-cjs/gateway-memory.cjs +1 -15
  24. package/dist-cjs/gateway-memory.d.ts +1 -2
  25. package/dist-cjs/gateway-meta.cjs +3 -0
  26. package/dist-cjs/gateway-utils.cjs +128 -17
  27. package/dist-cjs/gateway-utils.d.ts +15 -1
  28. package/dist-cjs/gateway-validation.cjs +10 -1
  29. package/dist-cjs/gateway-validation.d.ts +3 -3
  30. package/dist-cjs/gateway.cjs +72 -21
  31. package/dist-cjs/gateway.d.ts +2 -2
  32. package/dist-cjs/index.d.ts +2 -2
  33. package/dist-cjs/instruction-optimizer.cjs +3 -0
  34. package/dist-cjs/runtime-objects.d.ts +2 -13
  35. package/dist-cjs/troubleshooting-helper.cjs +99 -20
  36. package/dist-cjs/troubleshooting-helper.d.ts +0 -3
  37. package/dist-cjs/types.d.ts +39 -89
  38. package/package.json +2 -2
@@ -4,7 +4,7 @@
4
4
  * Manages activity tracking for LLM requests.
5
5
  * Wraps the ActivityTracker and provides convenience methods.
6
6
  */
7
- import { Activix, activixActivityIo, activixOuterTier } from '@x12i/activix';
7
+ import { Activix, activixActivityIo, activixOuterTier, resolveActivixLogsDatabaseName, resolveActivixMongoUriFromEnv } from '@x12i/activix';
8
8
  import { resolveActivityTrackingConfig } from './config/activity-tracking-config.js';
9
9
  import { gatewayLogDebug, withActivityIdentity } from './gateway-log-meta.js';
10
10
  function readAiRequestIdFromRequest(request) {
@@ -161,6 +161,17 @@ function mergeGatewayActivityIdentity(request, aiRequestId, extras) {
161
161
  merged.aiRequestId = aiRequestId;
162
162
  merged.jobId = upstreamJobId;
163
163
  merged.taskId = upstreamTaskId;
164
+ // gateway.invoke (AIInvokeRequest): request root is canonical for Activix runContext.
165
+ if ('actionType' in request && 'actionRef' in request) {
166
+ const inv = request;
167
+ if (inv.actionType) {
168
+ merged.actionType = inv.actionType;
169
+ }
170
+ const ref = typeof inv.actionRef === 'string' ? inv.actionRef.trim() : '';
171
+ if (ref) {
172
+ merged.actionRef = ref;
173
+ }
174
+ }
164
175
  return merged;
165
176
  }
166
177
  /**
@@ -259,12 +270,37 @@ export class ActivityManager {
259
270
  }
260
271
  }
261
272
  });
262
- this.initPromise = this.activix.init().catch((error) => {
263
- // MongoDB config not available - log warning but don't throw.
264
- // This allows tests and development to work without MongoDB.
265
- this.logger.warn('Activity tracking enabled but MongoDB configuration not available. Activity records will not be persisted.', {
273
+ this.initPromise = this.activix
274
+ .init()
275
+ .then(() => {
276
+ const ax = this.activix;
277
+ if (!ax) {
278
+ return;
279
+ }
280
+ const backend = ax.storageBackend;
281
+ const mongoDb = backend === 'database' ? resolveActivixLogsDatabaseName() : undefined;
282
+ const mongoUriConfigured = Boolean(resolveActivixMongoUriFromEnv());
283
+ this.logger.info('Activity tracking persistence backend ready', {
284
+ storageBackend: backend,
285
+ mongoDatabase: mongoDb,
286
+ mongoUriConfigured,
287
+ mainCollection: collectionName,
288
+ badRequestsCollection: badRequestsCollectionName,
289
+ skillExecutionsCollection: this.skillExecutionsCollectionName,
290
+ ...(backend === 'local'
291
+ ? {
292
+ note: 'Activix is using local playground storage, not MongoDB. The ai-actions collection will not appear in Mongo until URI is set (MONGO_URI or MONGO_LOGS_URI), Activix can ping the database, and at least one activity is written.'
293
+ }
294
+ : {
295
+ note: 'MongoDB stores one document per activity; the ai-actions collection is created on first insert (empty collections may be hidden in some tools until then).'
296
+ })
297
+ });
298
+ })
299
+ .catch((error) => {
300
+ // Init threw — disable tracker so requests are not blocked.
301
+ this.logger.warn('Activity tracking enabled but Activix init failed. Activity records will not be persisted.', {
266
302
  error: error instanceof Error ? error.message : String(error),
267
- hint: 'Set MONGO_URI and MONGO_LOGS_DB (or MONGO_DB) environment variables to enable activity tracking persistence'
303
+ hint: 'Set MONGO_URI or MONGO_LOGS_URI and a database name (MONGO_LOGS_DB, MONGO_DB, MONGO_AI_LOGS_DB, or ACTIVIX_DB_NAME). See README: Activity tracking / persistence troubleshooting.'
268
304
  });
269
305
  this.activix = undefined;
270
306
  });
@@ -343,6 +379,8 @@ export class ActivityManager {
343
379
  startTime,
344
380
  status: 'started',
345
381
  activityType: 'gateway-invocation',
382
+ ...(identity.actionType !== undefined && { actionType: identity.actionType }),
383
+ ...(identity.actionRef !== undefined && identity.actionRef !== '' && { actionRef: identity.actionRef }),
346
384
  // Activix v5+: correlation BSON field is `runContext` (same object as `request.identity`)
347
385
  runContext: identity
348
386
  // Removed root-level fields per v2.3.2:
@@ -557,6 +595,8 @@ export class ActivityManager {
557
595
  taskTypeId: request.taskTypeId,
558
596
  startTime,
559
597
  status: 'started',
598
+ ...(identity.actionType !== undefined && { actionType: identity.actionType }),
599
+ ...(identity.actionRef !== undefined && identity.actionRef !== '' && { actionRef: identity.actionRef }),
560
600
  runContext: identity,
561
601
  ...(instructionMetadata.key && { instructionKey: instructionMetadata.key }),
562
602
  ...(instructionMetadata.version && { instructionVersion: instructionMetadata.version }),
@@ -1,6 +1,7 @@
1
1
  /**
2
2
  * Centralized activity tracking configuration.
3
- * Single source of truth for package-level collection names.
3
+ * Package-level Mongo collection names are fixed literals here (no env override).
4
+ * Main gateway rows: `ai-actions`; bad requests: `bad-requests` (see constants below).
4
5
  */
5
6
  export interface ActivityTrackingConfig {
6
7
  mongoUri: string;
@@ -1,8 +1,9 @@
1
1
  /**
2
2
  * Centralized activity tracking configuration.
3
- * Single source of truth for package-level collection names.
3
+ * Package-level Mongo collection names are fixed literals here (no env override).
4
+ * Main gateway rows: `ai-actions`; bad requests: `bad-requests` (see constants below).
4
5
  */
5
- const ACTIVITY_COLLECTION_NAME = 'ai-activities';
6
+ const ACTIVITY_COLLECTION_NAME = 'ai-actions';
6
7
  const BAD_REQUESTS_COLLECTION_NAME = 'bad-requests';
7
8
  export function resolveActivityTrackingConfig() {
8
9
  // Collection names are intentionally hardcoded at package level.
@@ -10,7 +10,6 @@ type Request = ChatRequest | AIRequest;
10
10
  * Merges existing workingMemory (from request or memory component) with request metadata
11
11
  *
12
12
  * Implements tiered token resolution:
13
- * - Tier 1 (highest): templateTokens (handled in resolveTemplateParams, merged into shortTermMemory)
14
13
  * - Tier 2: workingMemory (this method) - checks existing workingMemory first
15
14
  * - Tier 3: derived from request fields or other memories (fallback)
16
15
  */
@@ -27,7 +26,7 @@ export declare function buildWorkingMemory(request: Request, existingWorkingMemo
27
26
  }): unknown;
28
27
  /**
29
28
  * Resolves template parameters with smart fallback logic
30
- * Priority: Request args (tier 1) -> Memory component (tier 2) -> Gateway config (tier 3) -> defaults
29
+ * Priority: request.workingMemory -> memoryManager resolution -> buildWorkingMemory merge
31
30
  */
32
31
  export declare function resolveTemplateParams(request: Request, config: GatewayConfig, logger: Logxer): Promise<{
33
32
  workingMemory: unknown;
@@ -14,7 +14,6 @@ function isAIRequest(request) {
14
14
  * Merges existing workingMemory (from request or memory component) with request metadata
15
15
  *
16
16
  * Implements tiered token resolution:
17
- * - Tier 1 (highest): templateTokens (handled in resolveTemplateParams, merged into shortTermMemory)
18
17
  * - Tier 2: workingMemory (this method) - checks existing workingMemory first
19
18
  * - Tier 3: derived from request fields or other memories (fallback)
20
19
  */
@@ -34,7 +33,6 @@ export function buildWorkingMemory(request, existingWorkingMemory, otherMemories
34
33
  }
35
34
  /**
36
35
  * Token Resolution with Tiered Fallback
37
- * Tier 1: templateTokens (handled in resolveTemplateParams, merged into shortTermMemory)
38
36
  * Tier 2: workingMemory (check existing workingMemory first)
39
37
  * Tier 3: derive from request fields or other memories
40
38
  */
@@ -138,7 +136,7 @@ export function buildWorkingMemory(request, existingWorkingMemory, otherMemories
138
136
  }
139
137
  /**
140
138
  * Resolves template parameters with smart fallback logic
141
- * Priority: Request args (tier 1) -> Memory component (tier 2) -> Gateway config (tier 3) -> defaults
139
+ * Priority: request.workingMemory -> memoryManager resolution -> buildWorkingMemory merge
142
140
  */
143
141
  export async function resolveTemplateParams(request, config, logger) {
144
142
  // Tier 1: Request args (highest priority)
@@ -189,19 +187,7 @@ export async function resolveTemplateParams(request, config, logger) {
189
187
  // Build proper workingMemory structure (merge with request fields if needed)
190
188
  // This implements tiered token resolution: tier 2 (workingMemory) and tier 3 (derive from request fields)
191
189
  const finalWorkingMemory = buildWorkingMemory(request, workingMemory);
192
- // Merge templateTokens (tier 1 - highest priority) into shortTermMemory AFTER memory resolution
193
- // This ensures templateTokens override everything (workingMemory and other memories)
194
- // Rendrix priority: shortTermMemory > workingMemory > experienceMemory > knowledgeMemory
195
- if (request.templateTokens && Object.keys(request.templateTokens).length > 0) {
196
- logger?.debug('Merged templateTokens into shortTermMemory (tier 1 - highest priority)', {
197
- jobId: request.identity.jobId,
198
- tokenKeys: Object.keys(request.templateTokens)
199
- });
200
- }
201
- // Note: taskConfig removed - Rendrix 3.0.0+ no longer accepts it
202
- // taskConfig is deprecated and no longer used
203
190
  return {
204
191
  workingMemory: finalWorkingMemory
205
- // taskConfig removed - Rendrix 3.0.0+ no longer uses it
206
192
  };
207
193
  }
@@ -27,8 +27,11 @@ export async function testInstructions(instructions, testInput, expectedSchema,
27
27
  const testRequest = {
28
28
  aiRequestId,
29
29
  agentId,
30
+ actionType: 'skill',
31
+ actionRef: 'gateway-meta/test-instructions',
30
32
  instructions,
31
33
  identity: runtimeIdentity,
34
+ prompt: '{{input}}',
32
35
  workingMemory: { input: testInput },
33
36
  config: {
34
37
  model,
@@ -21,7 +21,7 @@ export declare function mergeConfig(request: ChatRequest & {
21
21
  }, config: GatewayConfig, logger: Logxer): Promise<ChatRequest['config']>;
22
22
  /**
23
23
  * Maps provider/router usage objects to gateway token counts (`metadata.tokens`, Activix, trace attempts).
24
- * Handles promptTokens/inputTokens, OpenAI-style snake_case, and missing total (sum prompt+completion).
24
+ * Handles promptTokens/inputTokens, OpenAI-style snake_case, Responses-style input/output tokens, and missing total (sum prompt+completion).
25
25
  */
26
26
  export declare function normalizeRouterUsageTokens(usage: unknown): {
27
27
  prompt: number;
@@ -30,9 +30,23 @@ export declare function normalizeRouterUsageTokens(usage: unknown): {
30
30
  } | undefined;
31
31
  /**
32
32
  * Reads token usage from every stable location the router may populate (see docs/PROVIDERS_ROUTER_DIAGNOSTICS_TRACE_REQUIREMENTS.md).
33
+ * Prefers the raw/provider body (`rawResponse` / `raw`) when it carries non-zero usage before re-reading the outer envelope.
33
34
  */
34
35
  export declare function extractTokenUsageFromRouterResponse(routerResponse: unknown): {
35
36
  prompt: number;
36
37
  completion: number;
37
38
  total: number;
38
39
  };
40
+ /**
41
+ * Best-effort USD cost from router/sync AIResponse shape: metadata.costUsd (preferred),
42
+ * metadata.attempts[].costUsd, response root, then common raw payload locations.
43
+ * Does not compute cost from tokens — adapters must populate normalized fields or raw usage.cost-style keys.
44
+ */
45
+ export declare function extractCostUsdFromRouterResponse(routerResponse: unknown): number | undefined;
46
+ /** Default JSON string length cap for Activix `content.fullResponse` when diagnostics allow storing it. */
47
+ export declare const DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS = 512000;
48
+ /**
49
+ * Size-cap a provider/router payload before storing on an activity record.
50
+ * Non-serializable values become a small marker object instead of throwing.
51
+ */
52
+ export declare function capActivityFullResponsePayload(payload: unknown, maxChars?: number): unknown;
@@ -183,51 +183,159 @@ function firstFiniteNumber(...vals) {
183
183
  for (const v of vals) {
184
184
  if (typeof v === 'number' && Number.isFinite(v))
185
185
  return v;
186
+ if (typeof v === 'string' && v.trim() !== '') {
187
+ const n = Number(v);
188
+ if (Number.isFinite(n))
189
+ return n;
190
+ }
186
191
  }
187
192
  return undefined;
188
193
  }
194
+ function isNonZeroTokenCount(n) {
195
+ return !!(n.prompt || n.completion || n.total);
196
+ }
189
197
  /**
190
198
  * Maps provider/router usage objects to gateway token counts (`metadata.tokens`, Activix, trace attempts).
191
- * Handles promptTokens/inputTokens, OpenAI-style snake_case, and missing total (sum prompt+completion).
199
+ * Handles promptTokens/inputTokens, OpenAI-style snake_case, Responses-style input/output tokens, and missing total (sum prompt+completion).
192
200
  */
193
201
  export function normalizeRouterUsageTokens(usage) {
194
202
  if (usage == null || typeof usage !== 'object')
195
203
  return undefined;
196
204
  const u = usage;
197
- const prompt = firstFiniteNumber(u.promptTokens, u.inputTokens, u.prompt, u.prompt_tokens) ?? 0;
198
- const completion = firstFiniteNumber(u.completionTokens, u.outputTokens, u.completion, u.completion_tokens) ?? 0;
199
- let total = firstFiniteNumber(u.totalTokens, u.total_tokens) ?? 0;
205
+ const prompt = firstFiniteNumber(u.promptTokens, u.inputTokens, u.input_tokens, u.prompt, u.prompt_tokens) ?? 0;
206
+ const completion = firstFiniteNumber(u.completionTokens, u.outputTokens, u.output_tokens, u.completion, u.completion_tokens) ?? 0;
207
+ let total = firstFiniteNumber(u.totalTokens, u.total_tokens, u.total) ?? 0;
200
208
  if (!total && (prompt || completion))
201
209
  total = prompt + completion;
202
210
  return { prompt, completion, total };
203
211
  }
212
+ /**
213
+ * Collect usage from one router/provider envelope (single object).
214
+ * When followRaw is true, also reads `(rawResponse ?? raw).usage` on that envelope.
215
+ */
216
+ function collectUsageBucketsFromRoot(root, followRaw) {
217
+ const meta = root.metadata != null && typeof root.metadata === 'object'
218
+ ? root.metadata
219
+ : undefined;
220
+ const buckets = [root.usage];
221
+ if (meta) {
222
+ buckets.push(meta.usage);
223
+ buckets.push(meta.tokens);
224
+ const nested = meta['ai-activities-response'];
225
+ if (nested != null && typeof nested === 'object') {
226
+ buckets.push(nested.usage);
227
+ }
228
+ }
229
+ if (followRaw) {
230
+ const raw = root.rawResponse ?? root.raw;
231
+ if (raw != null && typeof raw === 'object') {
232
+ buckets.push(raw.usage);
233
+ }
234
+ }
235
+ return buckets;
236
+ }
237
+ function firstNonZeroUsageFromBuckets(buckets) {
238
+ for (const b of buckets) {
239
+ const n = normalizeRouterUsageTokens(b);
240
+ if (n && isNonZeroTokenCount(n))
241
+ return n;
242
+ }
243
+ return undefined;
244
+ }
204
245
  /**
205
246
  * Reads token usage from every stable location the router may populate (see docs/PROVIDERS_ROUTER_DIAGNOSTICS_TRACE_REQUIREMENTS.md).
247
+ * Prefers the raw/provider body (`rawResponse` / `raw`) when it carries non-zero usage before re-reading the outer envelope.
206
248
  */
207
249
  export function extractTokenUsageFromRouterResponse(routerResponse) {
250
+ const zeros = { prompt: 0, completion: 0, total: 0 };
208
251
  if (routerResponse == null || typeof routerResponse !== 'object') {
209
- return { prompt: 0, completion: 0, total: 0 };
252
+ return zeros;
253
+ }
254
+ const r = routerResponse;
255
+ const raw = r.rawResponse ?? r.raw;
256
+ const inner = raw != null && typeof raw === 'object' ? raw : undefined;
257
+ const roots = inner != null && inner !== r
258
+ ? [
259
+ { root: inner, followRaw: false },
260
+ { root: r, followRaw: true }
261
+ ]
262
+ : [{ root: r, followRaw: true }];
263
+ for (const { root, followRaw } of roots) {
264
+ const buckets = collectUsageBucketsFromRoot(root, followRaw);
265
+ const found = firstNonZeroUsageFromBuckets(buckets);
266
+ if (found)
267
+ return found;
210
268
  }
269
+ return zeros;
270
+ }
271
+ /**
272
+ * Best-effort USD cost from router/sync AIResponse shape: metadata.costUsd (preferred),
273
+ * metadata.attempts[].costUsd, response root, then common raw payload locations.
274
+ * Does not compute cost from tokens — adapters must populate normalized fields or raw usage.cost-style keys.
275
+ */
276
+ export function extractCostUsdFromRouterResponse(routerResponse) {
277
+ if (routerResponse == null || typeof routerResponse !== 'object')
278
+ return undefined;
211
279
  const r = routerResponse;
212
280
  const meta = r.metadata != null && typeof r.metadata === 'object'
213
281
  ? r.metadata
214
282
  : undefined;
215
- const buckets = [r.usage];
216
- if (meta) {
217
- buckets.push(meta.usage);
218
- const nested = meta['ai-activities-response'];
219
- if (nested != null && typeof nested === 'object') {
220
- buckets.push(nested.usage);
283
+ const pick = (...vals) => firstFiniteNumber(...vals);
284
+ const fromMeta = pick(meta?.costUsd, meta?.cost);
285
+ if (fromMeta !== undefined)
286
+ return fromMeta;
287
+ const fromRoot = pick(r.costUsd, r.cost);
288
+ if (fromRoot !== undefined)
289
+ return fromRoot;
290
+ const attempts = meta?.attempts;
291
+ if (Array.isArray(attempts)) {
292
+ for (let i = attempts.length - 1; i >= 0; i--) {
293
+ const a = attempts[i];
294
+ if (a != null && typeof a === 'object') {
295
+ const o = a;
296
+ const c = pick(o.costUsd, o.cost);
297
+ if (c !== undefined)
298
+ return c;
299
+ }
221
300
  }
222
301
  }
223
302
  const raw = r.rawResponse ?? r.raw;
224
303
  if (raw != null && typeof raw === 'object') {
225
- buckets.push(raw.usage);
304
+ const rawObj = raw;
305
+ const usage = rawObj.usage;
306
+ if (usage != null && typeof usage === 'object') {
307
+ const u = usage;
308
+ const fromUsage = pick(u.cost, u.costUsd, u.total_cost, u.totalCost);
309
+ if (fromUsage !== undefined)
310
+ return fromUsage;
311
+ }
312
+ const fromRawTop = pick(rawObj.cost, rawObj.costUsd);
313
+ if (fromRawTop !== undefined)
314
+ return fromRawTop;
226
315
  }
227
- for (const b of buckets) {
228
- const n = normalizeRouterUsageTokens(b);
229
- if (n && (n.prompt || n.completion || n.total))
230
- return n;
316
+ return undefined;
317
+ }
318
+ /** Default JSON string length cap for Activix `content.fullResponse` when diagnostics allow storing it. */
319
+ export const DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS = 512_000;
320
+ /**
321
+ * Size-cap a provider/router payload before storing on an activity record.
322
+ * Non-serializable values become a small marker object instead of throwing.
323
+ */
324
+ export function capActivityFullResponsePayload(payload, maxChars = DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS) {
325
+ if (payload == null)
326
+ return payload;
327
+ let serialized;
328
+ try {
329
+ serialized = typeof payload === 'string' ? payload : JSON.stringify(payload);
231
330
  }
232
- return { prompt: 0, completion: 0, total: 0 };
331
+ catch {
332
+ return { _truncated: true, _reason: 'not_serializable' };
333
+ }
334
+ if (serialized.length <= maxChars)
335
+ return payload;
336
+ return {
337
+ _truncated: true,
338
+ _originalCharLength: serialized.length,
339
+ _preview: serialized.slice(0, maxChars)
340
+ };
233
341
  }
@@ -2,12 +2,12 @@
2
2
  * Gateway Validation Module
3
3
  * Basic validation for clean proxy implementation
4
4
  */
5
- import type { ChatRequest, AIRequest } from './types.js';
5
+ import type { ChatRequest, AIInvokeRequest } from './types.js';
6
6
  /**
7
7
  * Validates ChatRequest has required fields
8
8
  */
9
9
  export declare function validateChatRequest(request: ChatRequest): void;
10
10
  /**
11
- * Validates AIRequest has required fields
11
+ * Validates AIInvokeRequest has required fields
12
12
  */
13
- export declare function validateAIRequest(request: AIRequest): void;
13
+ export declare function validateAIRequest(request: AIInvokeRequest): void;
@@ -32,8 +32,9 @@ export function validateChatRequest(request) {
32
32
  throw err;
33
33
  }
34
34
  }
35
+ const GATEWAY_ACTION_TYPES = ['skill', 'preSkill', 'postSkill'];
35
36
  /**
36
- * Validates AIRequest has required fields
37
+ * Validates AIInvokeRequest has required fields
37
38
  */
38
39
  export function validateAIRequest(request) {
39
40
  if (!request.aiRequestId) {
@@ -43,6 +44,14 @@ export function validateAIRequest(request) {
43
44
  throw new Error('agentId is required for AI requests');
44
45
  }
45
46
  validateMandatoryRuntimeIdentity(request);
47
+ if (!request.actionType ||
48
+ !GATEWAY_ACTION_TYPES.includes(request.actionType)) {
49
+ throw new Error(`actionType is required and must be one of: ${GATEWAY_ACTION_TYPES.join(', ')}`);
50
+ }
51
+ const ref = typeof request.actionRef === 'string' ? request.actionRef.trim() : '';
52
+ if (!ref) {
53
+ throw new Error('actionRef is required and must be a non-empty string');
54
+ }
46
55
  // Reject input field - it has been removed
47
56
  if ('input' in request && request.input !== undefined) {
48
57
  const err = new Error(`The 'input' field has been removed. Use workingMemory.input instead for template rendering. Prompt templates should contain {{input}} which will be resolved from workingMemory.input.`);
package/dist/gateway.d.ts CHANGED
@@ -4,7 +4,7 @@
4
4
  * Simplified AI Gateway - Clean proxy implementation
5
5
  */
6
6
  import { LLMProviderRouter } from '@x12i/ai-providers-router';
7
- import type { GatewayConfig, ChatRequest, AIRequest, EnhancedLLMResponse } from './types.js';
7
+ import type { GatewayConfig, ChatRequest, AIInvokeRequest, EnhancedLLMResponse } from './types.js';
8
8
  import type { Logxer } from '@x12i/logxer';
9
9
  import { ActivityManager } from './activity-manager.js';
10
10
  /**
@@ -25,7 +25,7 @@ export declare class AIGateway {
25
25
  /**
26
26
  * Invoke AI request (with structured output support)
27
27
  */
28
- invoke<TContent = unknown>(request: AIRequest): Promise<EnhancedLLMResponse<TContent>>;
28
+ invoke<TContent = unknown>(request: AIInvokeRequest): Promise<EnhancedLLMResponse<TContent>>;
29
29
  /**
30
30
  * Build simple messages from request (instructions and prompt as literal template text; no registry).
31
31
  */
package/dist/gateway.js CHANGED
@@ -8,7 +8,7 @@ import { ensureGatewayRequestIdentity } from './activity-manager.js';
8
8
  import { initializeGatewayComponents } from './gateway-config.js';
9
9
  import { buildMessages } from './message-builder.js';
10
10
  import { extractJsonFromFlexMd } from './flex-md-loader.js';
11
- import { extractTokenUsageFromRouterResponse, mergeConfig } from './gateway-utils.js';
11
+ import { capActivityFullResponsePayload, DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, extractCostUsdFromRouterResponse, extractTokenUsageFromRouterResponse, mergeConfig } from './gateway-utils.js';
12
12
  import { autoRegisterProviders } from './gateway-provider-auto-register.js';
13
13
  import { setGatewayLastJobId, setGatewayRuntimeClients } from './runtime-objects.js';
14
14
  import { gatewayLogDebug, withActivityIdentity } from './gateway-log-meta.js';
@@ -16,6 +16,25 @@ import { invokeWithRetry } from './gateway-retry.js';
16
16
  /** Error message thrown by the router when no provider is registered or specified */
17
17
  const NO_PROVIDER_ERROR = 'No provider specified and no providers registered';
18
18
  const NO_PROVIDER_HINT = ' Set OPEN_ROUTER_KEY (or OPENROUTER_API_KEY) in the environment to use OpenRouter, or register a provider with the router (e.g. via autoRegisterProviders or gateway config).';
19
+ /** Warn when a successful call reports no tokens and/or explicit zero cost (often missing adapter metadata). */
20
+ function warnIfSuccessfulInvokeReportsZeroUsageOrCost(logger, identity, meta, invokeKind) {
21
+ const { tokens, costUsd, cost } = meta;
22
+ const zeroTokens = tokens.prompt === 0 && tokens.completion === 0 && tokens.total === 0;
23
+ const zeroCostUsd = typeof costUsd === 'number' && costUsd === 0;
24
+ const zeroCost = typeof cost === 'number' && cost === 0;
25
+ if (!zeroTokens && !zeroCostUsd && !zeroCost)
26
+ return;
27
+ logger.warn('Successful provider response reported zero token usage and/or zero cost; verify router adapter usage and billing metadata', withActivityIdentity(identity, {
28
+ invokeKind,
29
+ zeroTokens,
30
+ zeroCostUsd,
31
+ zeroCostField: zeroCost,
32
+ tokens,
33
+ costUsd,
34
+ cost,
35
+ debugKind: gatewayLogDebug.anomaly
36
+ }));
37
+ }
19
38
  /**
20
39
  * Simplified AI Gateway - Clean proxy implementation
21
40
  */
@@ -87,6 +106,8 @@ export class AIGateway {
87
106
  },
88
107
  mode: 'sync'
89
108
  });
109
+ const costUsdChat = extractCostUsdFromRouterResponse(response);
110
+ const metaChat = response?.metadata || {};
90
111
  // Create enhanced response
91
112
  const enhancedResponse = {
92
113
  content: response.content || '',
@@ -96,13 +117,20 @@ export class AIGateway {
96
117
  latencyMs: Date.now() - startTime,
97
118
  tokens: extractTokenUsageFromRouterResponse(response),
98
119
  taskTypeId,
99
- agentType: 'chat'
120
+ agentType: 'chat',
121
+ ...(typeof costUsdChat === 'number'
122
+ ? {
123
+ costUsd: costUsdChat,
124
+ ...(typeof metaChat.cost === 'number' ? { cost: metaChat.cost } : { cost: costUsdChat })
125
+ }
126
+ : {})
100
127
  }
101
128
  };
102
129
  // Track activity success if activity was started
103
130
  if (activity) {
104
131
  try {
105
132
  await this.activityManager.logSuccess(activity, {
133
+ ...(typeof costUsdChat === 'number' ? { cost: costUsdChat } : {}),
106
134
  response: enhancedResponse,
107
135
  endTime: Date.now(),
108
136
  duration: Date.now() - startTime
@@ -116,6 +144,11 @@ export class AIGateway {
116
144
  });
117
145
  }
118
146
  }
147
+ warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
148
+ tokens: enhancedResponse.metadata.tokens,
149
+ costUsd: enhancedResponse.metadata.costUsd,
150
+ cost: enhancedResponse.metadata.cost
151
+ }, 'invokeChat');
119
152
  return enhancedResponse;
120
153
  }
121
154
  catch (error) {
@@ -369,17 +402,9 @@ export class AIGateway {
369
402
  a.routing.requestIds = requestIds;
370
403
  a.modelUsed =
371
404
  meta?.modelUsed || meta?.model || respAny.model || candidate.model;
372
- const costUsd = typeof meta?.costUsd === 'number'
373
- ? meta.costUsd
374
- : typeof meta?.cost === 'number'
375
- ? meta.cost
376
- : typeof respAny?.costUsd === 'number'
377
- ? respAny.costUsd
378
- : typeof respAny?.cost === 'number'
379
- ? respAny.cost
380
- : undefined;
381
- if (typeof costUsd === 'number')
382
- a.costUsd = costUsd;
405
+ const attemptCostUsd = extractCostUsdFromRouterResponse(respAny);
406
+ if (typeof attemptCostUsd === 'number')
407
+ a.costUsd = attemptCostUsd;
383
408
  if (includeRawProviderPayload) {
384
409
  // Size-capped preview only.
385
410
  const raw = respAny.rawResponse ?? respAny.raw ?? respAny;
@@ -488,7 +513,17 @@ export class AIGateway {
488
513
  }
489
514
  contentType = 'structured';
490
515
  parsingMethod = 'flex-md';
491
- const tokens = extractTokenUsageFromRouterResponse(routerResponse);
516
+ let tokens = extractTokenUsageFromRouterResponse(routerResponse);
517
+ if (!(tokens.prompt || tokens.completion || tokens.total)) {
518
+ const alt = routerResponse?.rawResponse ?? routerResponse?.raw;
519
+ if (alt != null && typeof alt === 'object' && alt !== routerResponse) {
520
+ const second = extractTokenUsageFromRouterResponse(alt);
521
+ if (second.prompt || second.completion || second.total)
522
+ tokens = second;
523
+ }
524
+ }
525
+ const resolvedCostUsd = extractCostUsdFromRouterResponse(routerResponse);
526
+ const routerMetaForCost = routerResponse?.metadata || {};
492
527
  const enhancedResponse = {
493
528
  content: content,
494
529
  parsedContent: parsedContent,
@@ -501,6 +536,14 @@ export class AIGateway {
501
536
  agentType: 'ai',
502
537
  contentType,
503
538
  parsingMethod,
539
+ ...(typeof resolvedCostUsd === 'number'
540
+ ? {
541
+ costUsd: resolvedCostUsd,
542
+ ...(typeof routerMetaForCost.cost === 'number'
543
+ ? { cost: routerMetaForCost.cost }
544
+ : { cost: resolvedCostUsd })
545
+ }
546
+ : {}),
504
547
  ...(traceEnabled
505
548
  ? (() => {
506
549
  const meta = routerResponse?.metadata || {};
@@ -512,18 +555,11 @@ export class AIGateway {
512
555
  : typeof mergedConfig?.maxTokens === 'number'
513
556
  ? mergedConfig.maxTokens
514
557
  : undefined;
515
- const costUsd = typeof meta.costUsd === 'number'
516
- ? meta.costUsd
517
- : typeof meta.cost === 'number'
518
- ? meta.cost
519
- : undefined;
520
558
  return {
521
559
  provider,
522
560
  region,
523
561
  modelUsed,
524
562
  maxTokensRequested,
525
- cost: typeof meta.cost === 'number' ? meta.cost : undefined,
526
- costUsd,
527
563
  requestIds: traceRequestIds,
528
564
  retryCount: traceRetryCount,
529
565
  fallbackCount: traceFallbackCount,
@@ -536,11 +572,20 @@ export class AIGateway {
536
572
  // Track activity success if activity was started
537
573
  if (activity) {
538
574
  try {
575
+ const diag = request.diagnostics;
576
+ const includeFullProviderBlob = diag?.includeFullProviderResponseInActivity !== false;
577
+ const maxFullChars = typeof diag?.activityFullResponseMaxChars === 'number' && diag.activityFullResponseMaxChars > 0
578
+ ? diag.activityFullResponseMaxChars
579
+ : DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS;
580
+ const rawFull = routerResponse.rawResponse || routerResponse;
581
+ const fullResponseForActivity = includeFullProviderBlob
582
+ ? capActivityFullResponsePayload(rawFull, maxFullChars)
583
+ : undefined;
539
584
  // Create activity response with proper structure for ActivityTracker
540
585
  const activityResponse = {
541
586
  content: {
542
587
  rawContent: content, // Store the actual response content as rawContent
543
- fullResponse: routerResponse.rawResponse || routerResponse // Include full router response
588
+ ...(fullResponseForActivity !== undefined ? { fullResponse: fullResponseForActivity } : {})
544
589
  },
545
590
  parsed: parsedContent, // Include parsed content in activity record
546
591
  metadata: enhancedResponse.metadata,
@@ -549,6 +594,7 @@ export class AIGateway {
549
594
  usage: tokens
550
595
  };
551
596
  await this.activityManager.logSuccess(activity, {
597
+ ...(typeof resolvedCostUsd === 'number' ? { cost: resolvedCostUsd } : {}),
552
598
  response: activityResponse,
553
599
  endTime: Date.now(),
554
600
  duration: Date.now() - startTime
@@ -562,6 +608,11 @@ export class AIGateway {
562
608
  });
563
609
  }
564
610
  }
611
+ warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
612
+ tokens: enhancedResponse.metadata.tokens,
613
+ costUsd: enhancedResponse.metadata.costUsd,
614
+ cost: enhancedResponse.metadata.cost
615
+ }, 'invoke');
565
616
  this.logger.debug('gateway: enhancedResponse', withActivityIdentity(request.identity, {
566
617
  latencyMs: enhancedResponse.metadata?.latencyMs,
567
618
  contentType: enhancedResponse.metadata?.contentType,