@x12i/ai-gateway 9.2.0 → 9.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/README.md +105 -13
  2. package/dist/activity-manager.d.ts +1 -0
  3. package/dist/activity-manager.js +123 -26
  4. package/dist/ai-tools-client.d.ts +20 -0
  5. package/dist/ai-tools-client.js +91 -0
  6. package/dist/gateway-config.d.ts +2 -0
  7. package/dist/gateway-config.js +2 -1
  8. package/dist/gateway-mode.d.ts +40 -0
  9. package/dist/gateway-mode.js +75 -0
  10. package/dist/gateway-utils.d.ts +28 -1
  11. package/dist/gateway-utils.js +137 -12
  12. package/dist/gateway.d.ts +3 -0
  13. package/dist/gateway.js +34 -6
  14. package/dist/index.d.ts +3 -1
  15. package/dist/index.js +2 -1
  16. package/dist/types.d.ts +21 -0
  17. package/dist-cjs/activity-manager.cjs +137 -45
  18. package/dist-cjs/activity-manager.d.ts +1 -0
  19. package/dist-cjs/ai-tools-client.cjs +91 -0
  20. package/dist-cjs/ai-tools-client.d.ts +20 -0
  21. package/dist-cjs/config/activity-tracking-config.cjs +1 -4
  22. package/dist-cjs/content-normalizer/content-normalizer.cjs +3 -8
  23. package/dist-cjs/content-normalizer/index.cjs +1 -7
  24. package/dist-cjs/content-normalizer/types.cjs +1 -2
  25. package/dist-cjs/flex-md-loader.cjs +20 -67
  26. package/dist-cjs/gateway-config.cjs +25 -63
  27. package/dist-cjs/gateway-config.d.ts +2 -0
  28. package/dist-cjs/gateway-conversion.cjs +10 -48
  29. package/dist-cjs/gateway-instructions.cjs +5 -10
  30. package/dist-cjs/gateway-log-meta.cjs +9 -14
  31. package/dist-cjs/gateway-memory.cjs +2 -6
  32. package/dist-cjs/gateway-messages.cjs +3 -6
  33. package/dist-cjs/gateway-meta.cjs +1 -4
  34. package/dist-cjs/gateway-mode.cjs +75 -0
  35. package/dist-cjs/gateway-mode.d.ts +40 -0
  36. package/dist-cjs/gateway-provider-auto-register.cjs +2 -38
  37. package/dist-cjs/gateway-provider.cjs +10 -22
  38. package/dist-cjs/gateway-rate-limiter-constants.cjs +2 -5
  39. package/dist-cjs/gateway-rate-limiter.cjs +5 -9
  40. package/dist-cjs/gateway-retry.cjs +6 -14
  41. package/dist-cjs/gateway-utils.cjs +160 -89
  42. package/dist-cjs/gateway-utils.d.ts +28 -1
  43. package/dist-cjs/gateway-validation.cjs +2 -6
  44. package/dist-cjs/gateway.cjs +91 -67
  45. package/dist-cjs/gateway.d.ts +3 -0
  46. package/dist-cjs/index.cjs +22 -98
  47. package/dist-cjs/index.d.ts +3 -1
  48. package/dist-cjs/instruction-errors.cjs +2 -7
  49. package/dist-cjs/instruction-optimizer.cjs +4 -10
  50. package/dist-cjs/instructions-parser.cjs +5 -10
  51. package/dist-cjs/logger-factory.cjs +3 -6
  52. package/dist-cjs/memory-path-resolution.cjs +8 -18
  53. package/dist-cjs/message-builder.cjs +11 -47
  54. package/dist-cjs/object-types-library-integration.cjs +3 -8
  55. package/dist-cjs/object-types-library.cjs +5 -10
  56. package/dist-cjs/output-auditor.cjs +1 -4
  57. package/dist-cjs/output-contract-normalizer.cjs +9 -14
  58. package/dist-cjs/request-report-generator.cjs +1 -4
  59. package/dist-cjs/response-analyzer/format-type-detector.cjs +1 -5
  60. package/dist-cjs/response-analyzer/index.cjs +3 -9
  61. package/dist-cjs/response-analyzer/object-type-detector.cjs +1 -5
  62. package/dist-cjs/response-analyzer/response-analyzer.cjs +6 -10
  63. package/dist-cjs/response-analyzer/types.cjs +1 -2
  64. package/dist-cjs/response-fallback-fixer.cjs +1 -4
  65. package/dist-cjs/runtime-objects.cjs +7 -13
  66. package/dist-cjs/template-parser.cjs +5 -42
  67. package/dist-cjs/template-render-merge.cjs +2 -6
  68. package/dist-cjs/troubleshooting-helper.cjs +13 -28
  69. package/dist-cjs/types.cjs +1 -2
  70. package/dist-cjs/types.d.ts +21 -0
  71. package/dist-cjs/usage-tracker.cjs +3 -7
  72. package/package.json +11 -5
package/README.md CHANGED
@@ -63,6 +63,7 @@ npm install @x12i/ai-gateway
63
63
  **📚 Documentation**: After installation, documentation is available in:
64
64
  - `node_modules/@x12i/ai-gateway/CONTENT_RESOLVER_UPSTREAM_GUIDE.md` - **Content resolver (nx-content)**: config, keys, local/git, upstream checklist
65
65
  - `node_modules/@x12i/ai-gateway/docs/IDENTITY_OBJECT_CONTRACT.md` - **Identity contract** for Activix (`sessionId` + `instance`)
66
+ - `node_modules/@x12i/ai-gateway/docs/AI_GATEWAY_INVOKE_EXECUTION_METADATA.md` - **Invoke metadata**, cost/billing (G8), output contract (G6), Activix completion fields
66
67
  - `node_modules/@x12i/ai-gateway/docs/LOGGER_INITIALIZATION.md` - **Required reading**: How to properly initialize logger
67
68
  - `node_modules/@x12i/ai-gateway/TROUBLESHOOTING.md` - Troubleshooting guide
68
69
  - `node_modules/@x12i/ai-gateway/TROUBLESHOOTING_TOOLBOX.md` - Diagnostic tools
@@ -309,7 +310,7 @@ The gateway reads **Mongo connection** settings from the environment, but **coll
309
310
  `ActivityManager` drives **`@x12i/activix` v7** with a **two-phase** API:
310
311
 
311
312
  1. **`startRecord`** — Inserts a new document with **`status: 'started'`**, **`startTime`**, **`runContext`** (same object as **`request.identity`**), root **`request`** / **`config`** snapshots, gateway metadata (e.g. **`activityType`**, **`aiRequestId`**), and the initial **`outer`** fragment (see below). Activix returns **`activityId`** (prefix **`act-`**, configured as the collection **`primaryKey`**); that id is used for all later updates — **not** `jobId`.
312
- 2. **`completeRecord`** or **`failRecord`** — Patches the **same** document by **`activityId`**. Success adds **`response`**, **`endTime`**, **`duration`**, **`cost`**, refreshed **`status`**, and sets **`outer.output`** to the completion payload. Failure adds error details (and may attach **`outer.output`** for certain failure modes such as response parsing).
313
+ 2. **`completeRecord`** or **`failRecord`** — Patches the **same** document by **`activityId`**. On success, adds **`response`**, **`endTime`**, **`duration`**, root **`cost`** / **`costUsd`** / **`costStatus`**, sets **`outer.output`** to the completion payload, merges billing into **`outer.metadata`**, and when priced or unpriced with usage, sets Activix **`outer.cost`** (`usd`, `tokens`, `provider`, `model`, optional `details`). Failure adds error details (and may attach **`outer.output`** for certain failure modes such as response parsing).
313
314
 
314
315
  **How a document is shaped (reading `ai-actions` in Mongo)**
315
316
 
@@ -317,7 +318,7 @@ The gateway reads **Mongo connection** settings from the environment, but **coll
317
318
  - **Root-level copies** of common identity fields may appear beside **`runContext`** for convenient indexing; treat **`runContext`** as the full envelope when in doubt.
318
319
  - **`request`**: Structured snapshot only — **`raw`** / **`parsed`** instructions, context, prompt; **`messages`**; **`workingMemory`** (template/user payload). There is **no** separate legacy **`input`** field on this object; use **`workingMemory`**.
319
320
  - **`config`**: `model`, `provider`, `temperature`, `maxTokens`, **`rawConfig`** (exact router config).
320
- - **`outer`**: Activix v7 **validated I/O** at the document root. At **start**, **`outer.input`** contains **`activityType`** and the same **`request`** snapshot as root **`request`** when a body exists (`{ activityType, request }`). At **success**, **`outer.output`** matches the **`response`** object written on completion. Root **`request`** / **`response`** support querying and older tooling; **`outer`** satisfies Activix’s envelope — so the same logical request snapshot can appear both at **`request`** and under **`outer.input.request`** by design. Large provider blobs (**`response.content.fullResponse`**) and size limits are described in [Activities outer duplication & payload controls](./docs/ACTIVITIES_OUTER_DUPLICATION.md).
321
+ - **`outer`**: Activix v7 **validated I/O** at the document root. At **start**, **`outer.input`** contains **`activityType`** and the same **`request`** snapshot as root **`request`** when a body exists (`{ activityType, request }`). At **success**, **`outer.output`** matches the **`response`** object written on completion; **`outer.metadata`** mirrors routing and billing from **`response.metadata`** (`modelUsed`, `provider`, `cost`, `costUsd`, `costStatus`, optional `costBreakdown`); **`outer.cost`** holds the canonical Activix cost object when usage or price is known (see [Cost reporting](#cost-reporting-invoke-response--activix-run-analysis-g8) below). Root **`request`** / **`response`** support querying and older tooling; **`outer`** satisfies Activix’s envelope — so the same logical request snapshot can appear both at **`request`** and under **`outer.input.request`** by design. Large provider blobs (**`response.content.fullResponse`**) and size limits are described in [Activities outer duplication & payload controls](./docs/ACTIVITIES_OUTER_DUPLICATION.md).
321
322
 
322
323
  **Environment variable priority (Activix / Mongo — implemented in `@x12i/activix`, not in `activity-tracking-config.ts`):**
323
324
  - **Mongo URI**: `MONGO_LOGS_URI` if set, otherwise **`MONGO_URI`**. If neither is set, Activix cannot use the database.
@@ -368,6 +369,69 @@ The gateway only exposes official queryable clients. It exposes `activixClient`
368
369
 
369
370
  See [Runtime Objects Observability Methodology](./docs/RUNTIME_OBJECTS_OBSERVABILITY.md) for the reusable package-level contract.
370
371
 
372
+ ### Model catalog resolution and defaults (`@x12i/ai-tools`)
373
+
374
+ Before each invoke, the gateway can normalize caller `config.model` / `modelConfig` via the **ai-models** Catalox catalog (`@x12i/ai-tools`). After invoke, when the router leaves cost **unpriced**, the gateway may compute USD from the same catalog.
375
+
376
+ **Environment variables:**
377
+
378
+ | Variable | Purpose |
379
+ |----------|---------|
380
+ | `AI_GATEWAY_DEFAULT_MODEL` | Default model when none is provided, or when resolution fails in **`mode=prod`**. Supports `provider/model` (e.g. `openrouter/openai/gpt-5-nano`) or a bare model id. |
381
+ | `mode` / `MODE` | `prod` — unresolved models fall back to the default chain (with **Logxer `warn`**). `dev` / `debug` / omitted — unresolved models throw **`ModelResolutionError`**. |
382
+
383
+ **Default model priority** (prod fallback only): `AI_GATEWAY_DEFAULT_MODEL` → `src/defaults/model-config.json` `defaultModel` → code constant `gpt-5-nano`.
384
+
385
+ **Logxer warnings** on default substitution include structured fields: `reason` (`no_model_provided`, `model_resolution_failed`, `ai_tools_unavailable`), `defaultSource` (`env`, `model-config.json`, `code`), `originalModel`, `defaultModel`, and `mode`.
386
+
387
+ Catalox/Firebase credentials are required for catalog bootstrap (same as `@x12i/ai-tools` — see that package’s README). Disable with `aiTools: { enabled: false }` on `GatewayConfig`, or inject `aiTools.catalox` for tests.
388
+
389
+ **GatewayConfig (optional overrides):**
390
+
391
+ ```typescript
392
+ const gateway = new AIGateway({
393
+ mode: 'prod', // or 'dev' | 'debug' — overrides process.env.mode
394
+ aiTools: {
395
+ enabled: true,
396
+ resolveModels: true,
397
+ calculateCost: true,
398
+ costIncludeBreakdown: false,
399
+ cacheTtlMs: 60_000,
400
+ // catalox: injectedCataloxInstance,
401
+ },
402
+ });
403
+ ```
404
+
405
+ #### Cost reporting (invoke response + Activix, Run Analysis G8)
406
+
407
+ Billing is resolved once per successful **`invoke()`** / **`invokeChat()`** via **`resolveCostCompletionWithAiTools`** (see [`docs/AI_GATEWAY_INVOKE_EXECUTION_METADATA.md`](./docs/AI_GATEWAY_INVOKE_EXECUTION_METADATA.md)):
408
+
409
+ | Layer | Fields |
410
+ |--------|--------|
411
+ | **Router** (`@x12i/ai-providers-router`) | Preferred source: **`metadata.costStatus`** (`priced` \| `unpriced`), **`metadata.costUsd`** / **`metadata.cost`** when priced |
412
+ | **Gateway response** | Same slice on **`response.metadata`**: **`costStatus`**, **`costUsd`**, **`cost`**, optional **`costBreakdown`** (when **`aiTools.calculateCost`** and catalog pricing apply and the router left cost unpriced) |
413
+ | **Activix activity (on `logSuccess`)** | Root **`cost`**, **`costUsd`**, **`costStatus`**; **`outer.metadata`** mirror; **`outer.cost`** (`usd`, `tokens` with `input`/`output`/`total`, `provider`, `model`, `details.costStatus`, optional `details.costBreakdown`) |
414
+
415
+ **`costStatus` semantics:**
416
+
417
+ - **`priced`** — **`costUsd`** / **`cost`** is a finite USD amount for this call (from the router or from **`@x12i/ai-tools`** catalog **`CostCalculator`** when the router did not price).
418
+ - **`unpriced`** — Token usage was recorded but no authoritative USD price was available (explicit router **`unpriced`** is never overridden by catalog).
419
+ - Omitted — No non-zero token usage (no billing signal).
420
+
421
+ Requires **`enableActivityTracking: true`** (default when Mongo/env is configured) for Activix persistence; invoke metadata is always set on the gateway response regardless.
422
+
423
+ **Tests before release:**
424
+
425
+ ```bash
426
+ npm run build
427
+ npm test # integration (tsx)
428
+ npm run test:ai-tools # unit: mode, defaults, cost helper
429
+ npm run test:live # LIVE: catalog + invoke (needs .env + Firebase + LLM key)
430
+ npm run test:real:comprehensive # optional: compiled real router matrix + npm test
431
+ ```
432
+
433
+ See [`.env.example`](./.env.example) for `AI_GATEWAY_DEFAULT_MODEL`, `mode`, provider keys, and Firebase/Catalox variables.
434
+
371
435
  **Recommended (auto-configured from environment variables):**
372
436
 
373
437
  ```typescript
@@ -427,7 +491,7 @@ When the gateway constructs Activix internally, each collection uses **`primaryK
427
491
  - **Config data**: Stored in **`config`** (model, provider, temperature, maxTokens, **`rawConfig`**)
428
492
  - **Response data**: Stored in **`response`** on completion (content, metadata, optional **`fullResponse`** per diagnostics)
429
493
  - **Activix I/O**: Root **`outer`** — **`outer.input`** at start, **`outer.output`** on success (and some failure paths)
430
- - **Cost**: Calculated and stored per activity on success
494
+ - **Cost / billing**: On success, root **`cost`**, **`costUsd`**, **`costStatus`**, plus **`outer.metadata`** and **`outer.cost`** (same values as **`response.metadata`** from the invoke path — router passthrough or catalog pricing via **`@x12i/ai-tools`**)
431
495
 
432
496
  **Best Practices for Type IDs:**
433
497
  - **`jobTypeId`**: Use MD5 hash of your job type string (e.g., `MD5('data-processing-job')`) for consistent job-level aggregation
@@ -1074,7 +1138,7 @@ The gateway uses **`@x12i/activix` v7** (xronox-activitix) for full lifecycle lo
1074
1138
  - Sends **`runContext`**, **`request`**, **`config`**, **`startTime`**, **`status: 'started'`**, plus Activix **`outer.input`** (wraps **`activityType`** and the same **`request`** snapshot when present — see section 2).
1075
1139
  - Returns **`activityId`** (and record payload) for phase 2.
1076
1140
  - **Phase 2 (complete / fail)**: Updates the SAME document by **`activityId`**
1077
- - Success: **`response`**, **`cost`**, **`endTime`**, **`duration`**, **`status`**, and **`outer.output`** set to the completion **`response`** payload (request/config are **not** re-sent).
1141
+ - Success: **`response`**, root **`cost`** / **`costUsd`** / **`costStatus`**, **`endTime`**, **`duration`**, **`status`**, **`outer.output`** (completion payload), **`outer.metadata`** (routing + billing mirror), and **`outer.cost`** when usage or price is known (see [Cost reporting](#cost-reporting-invoke-response--activix-run-analysis-g8)).
1078
1142
  - Failure: error payload and timing; optional **`response`** / **`outer.output`** only for specific failure kinds.
1079
1143
 
1080
1144
  4. **Structured fields vs Activix `outer` (v2.6.0+):**
@@ -1219,8 +1283,22 @@ Example shape for a completed row in **`ai-actions`** (`activityType: 'gateway-i
1219
1283
  // completeRecord: outer.output ← same object as root `response` on success
1220
1284
  outer: {
1221
1285
  input: { activityType: 'gateway-invocation', request: { /* same snapshot as root request */ } },
1222
- output: { /* success: normalized gateway response object */ },
1223
- metadata: { /* tier metadata / aiRequestId routing — see @x12i/activix */ }
1286
+ output: { /* success: gateway activity response (content, parsed, metadata, usage) */ },
1287
+ metadata: {
1288
+ modelUsed: 'openai/gpt-5-nano-2025-08-07',
1289
+ provider: 'openrouter',
1290
+ cost: 0.0000348,
1291
+ costUsd: 0.0000348,
1292
+ costStatus: 'priced'
1293
+ },
1294
+ cost: {
1295
+ usd: 0.0000348,
1296
+ unit: 'USD',
1297
+ tokens: { input: 16, output: 85, total: 101 },
1298
+ provider: 'openrouter',
1299
+ model: 'openai/gpt-5-nano-2025-08-07',
1300
+ details: { costStatus: 'priced' /* optional costBreakdown when aiTools.costIncludeBreakdown */ }
1301
+ }
1224
1302
  },
1225
1303
  // inner: optional step array for multi-step flows (see @x12i/activix docs)
1226
1304
 
@@ -1261,8 +1339,10 @@ Example shape for a completed row in **`ai-actions`** (`activityType: 'gateway-i
1261
1339
  metadata: {...}
1262
1340
  },
1263
1341
 
1264
- // Cost (from logSuccess)
1265
- cost: 0.002,
1342
+ // Billing (from logSuccess — mirrors response.metadata from invoke)
1343
+ cost: 0.0000348,
1344
+ costUsd: 0.0000348,
1345
+ costStatus: 'priced',
1266
1346
 
1267
1347
  // Metadata
1268
1348
  createdAt: Date,
@@ -1274,7 +1354,7 @@ Example shape for a completed row in **`ai-actions`** (`activityType: 'gateway-i
1274
1354
  - ✅ Each activity = separate Mongo document (**`_id`**) with stable **`activityId`** (`act-…`) for Activix APIs
1275
1355
  - ✅ **`aiRequestId`** = per-request correlation (required on invoke)
1276
1356
  - ✅ **`runContext.jobId`** / **`runContext.taskId`** = upstream identity (required on invoke since v9+)
1277
- - ✅ Request/config sent at **start**; response/timing/cost at **complete**
1357
+ - ✅ Request/config sent at **start**; response/timing/billing (`cost`, `costUsd`, `costStatus`, `outer.cost`) at **complete**
1278
1358
  - ✅ Updates target **`activityId`** from **`startRecord`**, not **`jobId`**
1279
1359
 
1280
1360
  #### Retry Tracking (@x12i/activix v7)
@@ -1410,8 +1490,16 @@ const response = await gateway.invoke({
1410
1490
  cacheTotalTokens?: number
1411
1491
  },
1412
1492
  model?: string, // Model ID used (e.g., 'gpt-4o', 'claude-sonnet-4')
1493
+ modelUsed?: string, // Resolved/served model id (when distinct from request model)
1413
1494
  provider?: string, // Provider used (e.g., 'openai', 'anthropic')
1414
- cost?: number, // Cost in USD (if available)
1495
+ costStatus?: 'priced' | 'unpriced', // Billing state (Run Analysis G8)
1496
+ costUsd?: number, // USD when costStatus === 'priced' (preferred field)
1497
+ cost?: number, // USD mirror of costUsd when priced
1498
+ costBreakdown?: { // Optional when aiTools catalog pricing runs (calculateCost + breakdown)
1499
+ promptCostUsd?: number;
1500
+ completionCostUsd?: number;
1501
+ // ...other breakdown keys from @x12i/ai-tools
1502
+ },
1415
1503
 
1416
1504
  // ============================================
1417
1505
  // Inference Output Parsing (if inferenceType provided)
@@ -1458,8 +1546,10 @@ const response = await gateway.invoke({
1458
1546
  - `metadata.jobId` - Job ID for correlation
1459
1547
  - `metadata.latencyMs` - Request duration in milliseconds
1460
1548
  - `metadata.tokens` - Token breakdown (prompt, completion, total, cache tokens)
1461
- - `metadata.cost` - Cost in USD
1462
- - `metadata.model` - Model ID used
1549
+ - `metadata.costStatus` - `priced` | `unpriced` (see [Cost reporting](#cost-reporting-invoke-response--activix-run-analysis-g8))
1550
+ - `metadata.costUsd` / `metadata.cost` - USD when priced
1551
+ - `metadata.costBreakdown` - Optional catalog breakdown when `aiTools.calculateCost` applies
1552
+ - `metadata.model` / `metadata.modelUsed` - Model id used
1463
1553
  - `metadata.provider` - Provider used
1464
1554
 
1465
1555
  #### Example: Full Response
@@ -1509,8 +1599,10 @@ const response = await gateway.invoke({
1509
1599
  completion: 50,
1510
1600
  total: 150
1511
1601
  },
1512
- model: 'gpt-5-mini',
1602
+ modelUsed: 'gpt-5-mini',
1513
1603
  provider: 'openai',
1604
+ costStatus: 'priced',
1605
+ costUsd: 0.002,
1514
1606
  cost: 0.002,
1515
1607
 
1516
1608
  // Inference output (parsed)
@@ -121,6 +121,7 @@ export declare class ActivityManager {
121
121
  logSuccess(activity: ActivityMetadata | undefined, details: {
122
122
  cost?: number;
123
123
  costStatus?: 'priced' | 'unpriced';
124
+ costBreakdown?: Record<string, unknown>;
124
125
  response: any;
125
126
  endTime: number;
126
127
  duration: number;
@@ -133,34 +133,120 @@ function logUpstreamIdentityWarnings(logger, incomingIdentity, merged) {
133
133
  }));
134
134
  }
135
135
  }
136
- /** Routing / generation facts from gateway response metadata for Activix `outer.metadata` on completion. */
137
- function pickActivixCompletionRoutingMetadata(response) {
136
+ /** Token counts for Activix `outer.cost.tokens` (maps gateway prompt/completion → input/output). */
137
+ function pickActivixUsageTokens(response) {
138
138
  if (response == null || typeof response !== 'object')
139
- return {};
140
- const meta = response.metadata;
141
- if (meta == null || typeof meta !== 'object')
142
- return {};
143
- const m = meta;
139
+ return undefined;
140
+ const r = response;
141
+ const raw = (r.usage != null && typeof r.usage === 'object' ? r.usage : undefined) ??
142
+ (r.metadata != null && typeof r.metadata === 'object'
143
+ ? r.metadata.tokens
144
+ : undefined);
145
+ if (raw == null || typeof raw !== 'object')
146
+ return undefined;
147
+ const t = raw;
148
+ const input = typeof t.prompt === 'number'
149
+ ? t.prompt
150
+ : typeof t.input === 'number'
151
+ ? t.input
152
+ : undefined;
153
+ const output = typeof t.completion === 'number'
154
+ ? t.completion
155
+ : typeof t.output === 'number'
156
+ ? t.output
157
+ : undefined;
158
+ const total = typeof t.total === 'number' ? t.total : undefined;
159
+ if (input === undefined && output === undefined && total === undefined)
160
+ return undefined;
161
+ return {
162
+ ...(input !== undefined ? { input } : {}),
163
+ ...(output !== undefined ? { output } : {}),
164
+ ...(total !== undefined ? { total } : {})
165
+ };
166
+ }
167
+ /**
168
+ * Activix v6+ `outer.cost` from gateway billing + routing metadata (Run Analysis G8).
169
+ */
170
+ function buildActivixOuterCost(routingMeta, billing, response) {
171
+ const usd = typeof billing.cost === 'number' && Number.isFinite(billing.cost)
172
+ ? billing.cost
173
+ : typeof routingMeta.costUsd === 'number' && Number.isFinite(routingMeta.costUsd)
174
+ ? routingMeta.costUsd
175
+ : typeof routingMeta.cost === 'number' && Number.isFinite(routingMeta.cost)
176
+ ? routingMeta.cost
177
+ : undefined;
178
+ const tokens = pickActivixUsageTokens(response);
179
+ const provider = typeof routingMeta.provider === 'string' ? routingMeta.provider : undefined;
180
+ const model = typeof routingMeta.modelUsed === 'string'
181
+ ? routingMeta.modelUsed
182
+ : typeof routingMeta.model === 'string'
183
+ ? routingMeta.model
184
+ : undefined;
185
+ const details = {};
186
+ if (billing.costStatus === 'priced' || billing.costStatus === 'unpriced') {
187
+ details.costStatus = billing.costStatus;
188
+ }
189
+ if (billing.costBreakdown != null && typeof billing.costBreakdown === 'object') {
190
+ details.costBreakdown = billing.costBreakdown;
191
+ }
192
+ const hasDetails = Object.keys(details).length > 0;
193
+ if (usd === undefined && !tokens && !provider && !model && !hasDetails) {
194
+ return undefined;
195
+ }
196
+ return {
197
+ ...(usd !== undefined ? { usd, unit: 'USD' } : {}),
198
+ ...(tokens ? { tokens } : {}),
199
+ ...(provider ? { provider } : {}),
200
+ ...(model ? { model } : {}),
201
+ ...(hasDetails ? { details } : {})
202
+ };
203
+ }
204
+ /** Routing / generation facts for Activix `outer.metadata` on completion (includes billing mirror). */
205
+ function pickActivixCompletionRoutingMetadata(response, billing) {
144
206
  const out = {};
145
- if (typeof m.modelUsed === 'string')
146
- out.modelUsed = m.modelUsed;
147
- if (typeof m.model === 'string')
148
- out.model = m.model;
149
- if (typeof m.provider === 'string')
150
- out.provider = m.provider;
151
- if (typeof m.maxTokensRequested === 'number')
152
- out.maxTokensRequested = m.maxTokensRequested;
153
- if (typeof m.region === 'string')
154
- out.region = m.region;
155
- if (m.effectiveModelConfig != null && typeof m.effectiveModelConfig === 'object') {
156
- out.effectiveModelConfig = m.effectiveModelConfig;
207
+ if (response != null && typeof response === 'object') {
208
+ const meta = response.metadata;
209
+ if (meta != null && typeof meta === 'object') {
210
+ const m = meta;
211
+ if (typeof m.modelUsed === 'string')
212
+ out.modelUsed = m.modelUsed;
213
+ if (typeof m.model === 'string')
214
+ out.model = m.model;
215
+ if (typeof m.provider === 'string')
216
+ out.provider = m.provider;
217
+ if (typeof m.maxTokensRequested === 'number')
218
+ out.maxTokensRequested = m.maxTokensRequested;
219
+ if (typeof m.region === 'string')
220
+ out.region = m.region;
221
+ if (m.effectiveModelConfig != null && typeof m.effectiveModelConfig === 'object') {
222
+ out.effectiveModelConfig = m.effectiveModelConfig;
223
+ }
224
+ if (typeof m.cost === 'number' && Number.isFinite(m.cost))
225
+ out.cost = m.cost;
226
+ if (typeof m.costUsd === 'number' && Number.isFinite(m.costUsd))
227
+ out.costUsd = m.costUsd;
228
+ if (m.costStatus === 'priced' || m.costStatus === 'unpriced')
229
+ out.costStatus = m.costStatus;
230
+ if (m.costBreakdown != null && typeof m.costBreakdown === 'object') {
231
+ out.costBreakdown = m.costBreakdown;
232
+ }
233
+ }
234
+ }
235
+ if (billing) {
236
+ if ((out.costStatus !== 'priced' && out.costStatus !== 'unpriced') &&
237
+ (billing.costStatus === 'priced' || billing.costStatus === 'unpriced')) {
238
+ out.costStatus = billing.costStatus;
239
+ }
240
+ if (typeof billing.cost === 'number' && Number.isFinite(billing.cost)) {
241
+ if (out.cost === undefined)
242
+ out.cost = billing.cost;
243
+ if (out.costUsd === undefined)
244
+ out.costUsd = billing.cost;
245
+ }
246
+ if (out.costBreakdown === undefined && billing.costBreakdown != null) {
247
+ out.costBreakdown = billing.costBreakdown;
248
+ }
157
249
  }
158
- if (typeof m.cost === 'number' && Number.isFinite(m.cost))
159
- out.cost = m.cost;
160
- if (typeof m.costUsd === 'number' && Number.isFinite(m.costUsd))
161
- out.costUsd = m.costUsd;
162
- if (m.costStatus === 'priced' || m.costStatus === 'unpriced')
163
- out.costStatus = m.costStatus;
164
250
  return out;
165
251
  }
166
252
  function mergeGatewayActivityIdentity(request, aiRequestId, extras) {
@@ -848,13 +934,24 @@ export class ActivityManager {
848
934
  });
849
935
  return;
850
936
  }
937
+ const billingSlice = {
938
+ cost: details.cost,
939
+ costStatus: details.costStatus,
940
+ costBreakdown: details.costBreakdown
941
+ };
942
+ const outerMetadata = pickActivixCompletionRoutingMetadata(details.response, billingSlice);
943
+ const outerCost = buildActivixOuterCost(outerMetadata, billingSlice, details.response);
851
944
  await this.activix.completeRecord(activity.activityId, {
852
945
  cost: details.cost,
946
+ ...(typeof details.cost === 'number' && Number.isFinite(details.cost)
947
+ ? { costUsd: details.cost }
948
+ : {}),
853
949
  ...(details.costStatus ? { costStatus: details.costStatus } : {}),
854
950
  response: details.response,
855
951
  outer: {
856
952
  output: details.response,
857
- metadata: pickActivixCompletionRoutingMetadata(details.response)
953
+ metadata: outerMetadata,
954
+ ...(outerCost ? { cost: outerCost } : {})
858
955
  },
859
956
  endTime: details.endTime,
860
957
  duration: details.duration
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Lazy @x12i/ai-tools catalog + cost calculator bootstrap.
3
+ */
4
+ import { AiModelsCatalogClient, CostCalculator, type ModelResolutionSuccess } from '@x12i/ai-tools';
5
+ import type { Logxer } from '@x12i/logxer';
6
+ import type { ChatRequest, GatewayConfig } from './types.js';
7
+ export type AiToolsClientBundle = {
8
+ catalog: AiModelsCatalogClient;
9
+ calculator: CostCalculator;
10
+ };
11
+ /**
12
+ * Returns catalog + calculator, or null when disabled or bootstrap fails.
13
+ */
14
+ export declare function getAiToolsClient(config: GatewayConfig, logger: Logxer): Promise<AiToolsClientBundle | null>;
15
+ /** Reset singleton (tests). */
16
+ export declare function resetAiToolsClientForTests(): void;
17
+ /**
18
+ * Map catalog resolution to router config provider/model fields.
19
+ */
20
+ export declare function applyModelResolution(merged: NonNullable<ChatRequest['config']>, resolution: ModelResolutionSuccess, gatewayDefaultEngine?: string): void;
@@ -0,0 +1,91 @@
1
+ /**
2
+ * Lazy @x12i/ai-tools catalog + cost calculator bootstrap.
3
+ */
4
+ import { AiModelsCatalogClient, CostCalculator, ensureAiModelsCatalog } from '@x12i/ai-tools';
5
+ import { gatewayLogDebug, withActivityIdentity } from './gateway-log-meta.js';
6
+ let sharedClientPromise = null;
7
+ let sharedConfigKey;
8
+ let bootstrapFailedLogged = false;
9
+ function configKey(config) {
10
+ const injected = config.aiTools?.catalox ? 'injected' : 'env';
11
+ return `${injected}:${config.aiTools?.cacheTtlMs ?? ''}:${config.aiTools?.costIncludeBreakdown ?? ''}`;
12
+ }
13
+ /**
14
+ * Returns catalog + calculator, or null when disabled or bootstrap fails.
15
+ */
16
+ export async function getAiToolsClient(config, logger) {
17
+ if (config.aiTools?.enabled === false) {
18
+ return null;
19
+ }
20
+ const key = configKey(config);
21
+ if (sharedClientPromise && sharedConfigKey !== key) {
22
+ sharedClientPromise = null;
23
+ }
24
+ sharedConfigKey = key;
25
+ if (!sharedClientPromise) {
26
+ sharedClientPromise = bootstrapAiTools(config, logger);
27
+ }
28
+ return sharedClientPromise;
29
+ }
30
+ /** Reset singleton (tests). */
31
+ export function resetAiToolsClientForTests() {
32
+ sharedClientPromise = null;
33
+ sharedConfigKey = undefined;
34
+ bootstrapFailedLogged = false;
35
+ }
36
+ async function bootstrapAiTools(config, logger) {
37
+ try {
38
+ let catalox = config.aiTools?.catalox;
39
+ if (!catalox) {
40
+ const { createCataloxFromEnv } = await import('@x12i/catalox/firebase');
41
+ const bootstrapped = createCataloxFromEnv();
42
+ catalox = bootstrapped.catalox;
43
+ }
44
+ await ensureAiModelsCatalog(catalox);
45
+ const catalog = new AiModelsCatalogClient({
46
+ catalox,
47
+ cacheTtlMs: config.aiTools?.cacheTtlMs
48
+ });
49
+ const calculator = new CostCalculator(catalog, {
50
+ includeBreakdown: config.aiTools?.costIncludeBreakdown === true
51
+ });
52
+ logger.debug('ai-tools catalog client ready', {
53
+ debugKind: gatewayLogDebug.state
54
+ });
55
+ return { catalog, calculator };
56
+ }
57
+ catch (error) {
58
+ if (!bootstrapFailedLogged) {
59
+ bootstrapFailedLogged = true;
60
+ logger.warn('ai-tools catalog bootstrap failed; model resolution and catalog cost calculation disabled', withActivityIdentity(undefined, {
61
+ error: error instanceof Error ? error.message : String(error),
62
+ debugKind: gatewayLogDebug.anomaly
63
+ }));
64
+ }
65
+ return null;
66
+ }
67
+ }
68
+ /**
69
+ * Map catalog resolution to router config provider/model fields.
70
+ */
71
+ export function applyModelResolution(merged, resolution, gatewayDefaultEngine) {
72
+ if (resolution.routedViaOpenRouter) {
73
+ merged.provider = 'openrouter';
74
+ merged.model = resolution.modelId;
75
+ return;
76
+ }
77
+ const slash = resolution.modelId.indexOf('/');
78
+ if (slash > 0) {
79
+ merged.provider = resolution.record?.providerId ?? resolution.modelId.slice(0, slash);
80
+ merged.model = resolution.modelId.slice(slash + 1);
81
+ }
82
+ else {
83
+ merged.model = resolution.modelId;
84
+ if (resolution.record?.providerId) {
85
+ merged.provider = resolution.record.providerId;
86
+ }
87
+ }
88
+ if (!merged.provider && gatewayDefaultEngine) {
89
+ merged.provider = gatewayDefaultEngine;
90
+ }
91
+ }
@@ -19,6 +19,7 @@ export interface GatewayConfigContext {
19
19
  usageTracker: UsageTracker;
20
20
  messageBuilderConfig: MessageBuilderConfig;
21
21
  }
22
+ export type InitializedGatewayComponents = ReturnType<typeof initializeGatewayComponents>;
22
23
  /**
23
24
  * Loads configuration from JSON files (model config and instructionsBlocks).
24
25
  * Pass a {@link Logxer} instance so load diagnostics go through logxer (not console).
@@ -46,4 +47,5 @@ export declare function initializeGatewayComponents(config: GatewayConfig): {
46
47
  activityManager: ActivityManager;
47
48
  usageTracker: UsageTracker;
48
49
  messageBuilderConfig: MessageBuilderConfig;
50
+ defaultModelConfig: Record<string, unknown>;
49
51
  };
@@ -283,6 +283,7 @@ export function initializeGatewayComponents(config) {
283
283
  router,
284
284
  activityManager,
285
285
  usageTracker,
286
- messageBuilderConfig
286
+ messageBuilderConfig,
287
+ defaultModelConfig
287
288
  };
288
289
  }
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Gateway operational mode (prod vs dev/debug) and default model resolution.
3
+ */
4
+ import type { Logxer } from '@x12i/logxer';
5
+ import type { ActivityIdentity, GatewayConfig } from './types.js';
6
+ export type GatewayOperationalMode = 'prod' | 'debug' | 'dev';
7
+ export type GatewayDefaultModelSource = 'env' | 'model-config.json' | 'code';
8
+ export type DefaultModelSubstitutionReason = 'no_model_provided' | 'model_resolution_failed' | 'ai_tools_unavailable';
9
+ export declare const CODE_DEFAULT_MODEL = "gpt-5-nano";
10
+ export type ResolvedGatewayDefault = {
11
+ model: string;
12
+ provider?: string;
13
+ source: GatewayDefaultModelSource;
14
+ };
15
+ /**
16
+ * Operational mode: `GatewayConfig.mode` overrides `process.env.mode` / `MODE`.
17
+ * Only `prod` allows silent default-model substitution; all other values are strict.
18
+ */
19
+ export declare function getGatewayOperationalMode(config?: Pick<GatewayConfig, 'mode'>): GatewayOperationalMode;
20
+ export declare function isProdGatewayMode(mode: GatewayOperationalMode): boolean;
21
+ /**
22
+ * Parse `provider/model` or bare model id (OpenRouter ids may contain multiple slashes).
23
+ */
24
+ export declare function parseModelProviderSpec(spec: string): {
25
+ provider?: string;
26
+ model: string;
27
+ };
28
+ /**
29
+ * Default model priority: AI_GATEWAY_DEFAULT_MODEL → model-config.json → code constant.
30
+ */
31
+ export declare function resolveGatewayDefaultModel(defaultModelConfig?: Record<string, unknown>, gatewayDefaultEngine?: string): ResolvedGatewayDefault;
32
+ export declare function warnDefaultModelSubstitution(logger: Logxer, identity: Partial<ActivityIdentity> | undefined, details: {
33
+ reason: DefaultModelSubstitutionReason;
34
+ mode: GatewayOperationalMode;
35
+ defaultSource: GatewayDefaultModelSource;
36
+ defaultProvider?: string;
37
+ defaultModel: string;
38
+ originalProvider?: string;
39
+ originalModel?: string;
40
+ }): void;
@@ -0,0 +1,75 @@
1
+ /**
2
+ * Gateway operational mode (prod vs dev/debug) and default model resolution.
3
+ */
4
+ import { gatewayLogDebug, withActivityIdentity } from './gateway-log-meta.js';
5
+ export const CODE_DEFAULT_MODEL = 'gpt-5-nano';
6
+ /**
7
+ * Operational mode: `GatewayConfig.mode` overrides `process.env.mode` / `MODE`.
8
+ * Only `prod` allows silent default-model substitution; all other values are strict.
9
+ */
10
+ export function getGatewayOperationalMode(config) {
11
+ if (config?.mode) {
12
+ return config.mode;
13
+ }
14
+ const raw = (process.env.mode ?? process.env.MODE ?? '').toLowerCase();
15
+ if (raw === 'prod')
16
+ return 'prod';
17
+ if (raw === 'dev')
18
+ return 'dev';
19
+ return 'debug';
20
+ }
21
+ export function isProdGatewayMode(mode) {
22
+ return mode === 'prod';
23
+ }
24
+ /**
25
+ * Parse `provider/model` or bare model id (OpenRouter ids may contain multiple slashes).
26
+ */
27
+ export function parseModelProviderSpec(spec) {
28
+ const trimmed = spec.trim();
29
+ if (!trimmed) {
30
+ return { model: CODE_DEFAULT_MODEL };
31
+ }
32
+ const slash = trimmed.indexOf('/');
33
+ if (slash === -1) {
34
+ return { model: trimmed };
35
+ }
36
+ const first = trimmed.slice(0, slash);
37
+ const rest = trimmed.slice(slash + 1);
38
+ if (rest.includes('/') && (first === 'openrouter' || first === 'open-router')) {
39
+ return { provider: 'openrouter', model: trimmed };
40
+ }
41
+ return { provider: first, model: rest };
42
+ }
43
+ /**
44
+ * Default model priority: AI_GATEWAY_DEFAULT_MODEL → model-config.json → code constant.
45
+ */
46
+ export function resolveGatewayDefaultModel(defaultModelConfig, gatewayDefaultEngine) {
47
+ const envSpec = process.env.AI_GATEWAY_DEFAULT_MODEL?.trim();
48
+ if (envSpec) {
49
+ const parsed = parseModelProviderSpec(envSpec);
50
+ return { model: parsed.model, provider: parsed.provider, source: 'env' };
51
+ }
52
+ const jsonModel = typeof defaultModelConfig?.defaultModel === 'string' ? defaultModelConfig.defaultModel : undefined;
53
+ if (jsonModel) {
54
+ const parsed = parseModelProviderSpec(jsonModel);
55
+ const jsonEngine = typeof defaultModelConfig?.defaultEngine === 'string'
56
+ ? defaultModelConfig.defaultEngine
57
+ : gatewayDefaultEngine;
58
+ return {
59
+ model: parsed.model,
60
+ provider: parsed.provider ?? jsonEngine,
61
+ source: 'model-config.json'
62
+ };
63
+ }
64
+ return {
65
+ model: CODE_DEFAULT_MODEL,
66
+ provider: gatewayDefaultEngine,
67
+ source: 'code'
68
+ };
69
+ }
70
+ export function warnDefaultModelSubstitution(logger, identity, details) {
71
+ logger.warn('Gateway substituted default model for request', withActivityIdentity(identity, {
72
+ ...details,
73
+ debugKind: gatewayLogDebug.anomaly
74
+ }));
75
+ }