@x12i/ai-gateway 9.7.9 → 10.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/README.md +67 -12
  2. package/dist/defaults/log-diagnostics.json +0 -68
  3. package/dist/gateway-config.d.ts +1 -15
  4. package/dist/gateway-config.js +17 -134
  5. package/dist/gateway-defaults.d.ts +23 -0
  6. package/dist/gateway-defaults.js +29 -0
  7. package/dist/gateway-log-diagnostics.d.ts +0 -4
  8. package/dist/gateway-log-diagnostics.js +1 -5
  9. package/dist/gateway-log-levels.d.ts +0 -1
  10. package/dist/gateway-log-levels.js +0 -1
  11. package/dist/gateway-messages.js +0 -3
  12. package/dist/gateway-meta.js +12 -10
  13. package/dist/gateway-mode.d.ts +3 -26
  14. package/dist/gateway-mode.js +3 -48
  15. package/dist/gateway-retry.js +7 -6
  16. package/dist/gateway-utils.d.ts +1 -19
  17. package/dist/gateway-utils.js +37 -199
  18. package/dist/gateway.d.ts +0 -3
  19. package/dist/gateway.js +4 -63
  20. package/dist/index.d.ts +4 -6
  21. package/dist/index.js +4 -7
  22. package/dist/instruction-errors.d.ts +9 -1
  23. package/dist/instruction-errors.js +15 -1
  24. package/dist/instruction-optimizer.js +5 -1
  25. package/dist/message-builder.d.ts +0 -6
  26. package/dist/message-builder.js +4 -145
  27. package/dist/types.d.ts +16 -57
  28. package/dist-cjs/defaults/log-diagnostics.json +0 -68
  29. package/dist-cjs/gateway-config.cjs +17 -134
  30. package/dist-cjs/gateway-config.d.ts +1 -15
  31. package/dist-cjs/gateway-defaults.cjs +29 -0
  32. package/dist-cjs/gateway-defaults.d.ts +23 -0
  33. package/dist-cjs/gateway-log-diagnostics.cjs +1 -5
  34. package/dist-cjs/gateway-log-diagnostics.d.ts +0 -4
  35. package/dist-cjs/gateway-log-levels.cjs +0 -1
  36. package/dist-cjs/gateway-log-levels.d.ts +0 -1
  37. package/dist-cjs/gateway-messages.cjs +0 -3
  38. package/dist-cjs/gateway-meta.cjs +12 -10
  39. package/dist-cjs/gateway-mode.cjs +3 -48
  40. package/dist-cjs/gateway-mode.d.ts +3 -26
  41. package/dist-cjs/gateway-retry.cjs +7 -6
  42. package/dist-cjs/gateway-utils.cjs +37 -199
  43. package/dist-cjs/gateway-utils.d.ts +1 -19
  44. package/dist-cjs/gateway.cjs +4 -63
  45. package/dist-cjs/gateway.d.ts +0 -3
  46. package/dist-cjs/index.cjs +4 -7
  47. package/dist-cjs/index.d.ts +4 -6
  48. package/dist-cjs/instruction-errors.cjs +15 -1
  49. package/dist-cjs/instruction-errors.d.ts +9 -1
  50. package/dist-cjs/instruction-optimizer.cjs +5 -1
  51. package/dist-cjs/message-builder.cjs +4 -145
  52. package/dist-cjs/message-builder.d.ts +0 -6
  53. package/dist-cjs/types.d.ts +16 -57
  54. package/package.json +1 -2
  55. package/dist/defaults/instructions-blocks.json +0 -61
  56. package/dist/defaults/model-config.json +0 -15
  57. package/dist/gateway-instructions.d.ts +0 -30
  58. package/dist/gateway-instructions.js +0 -62
  59. package/dist/gateway-rate-limiter-constants.d.ts +0 -16
  60. package/dist/gateway-rate-limiter-constants.js +0 -16
  61. package/dist/gateway-rate-limiter.d.ts +0 -56
  62. package/dist/gateway-rate-limiter.js +0 -107
  63. package/dist/optimixer-manager.d.ts +0 -33
  64. package/dist/optimixer-manager.js +0 -142
  65. package/dist/token-estimate.d.ts +0 -12
  66. package/dist/token-estimate.js +0 -30
  67. package/dist-cjs/defaults/instructions-blocks.json +0 -61
  68. package/dist-cjs/defaults/model-config.json +0 -15
  69. package/dist-cjs/gateway-instructions.cjs +0 -62
  70. package/dist-cjs/gateway-instructions.d.ts +0 -30
  71. package/dist-cjs/gateway-rate-limiter-constants.cjs +0 -16
  72. package/dist-cjs/gateway-rate-limiter-constants.d.ts +0 -16
  73. package/dist-cjs/gateway-rate-limiter.cjs +0 -107
  74. package/dist-cjs/gateway-rate-limiter.d.ts +0 -56
  75. package/dist-cjs/optimixer-manager.cjs +0 -142
  76. package/dist-cjs/optimixer-manager.d.ts +0 -33
  77. package/dist-cjs/token-estimate.cjs +0 -30
  78. package/dist-cjs/token-estimate.d.ts +0 -12
package/README.md CHANGED
@@ -118,23 +118,76 @@ const router = new LLMProviderRouter({ defaultProvider: 'openai' });
118
118
  | `activityTracker` | — | Custom `Activix` instance (collection names must still match package constants) |
119
119
  | `enableUsageTracking` | `true` | In-process usage tier helper |
120
120
  | `aiTools` | see below | Model resolution + catalog pricing |
121
- | `mode` | `'debug'` | `'dev'` \| `'debug'` \| `'prod'` — affects strict model resolution |
121
+ | `mode` | `'debug'` | `'dev'` \| `'debug'` \| `'prod'` — ai-tools model resolution strictness (see below) |
122
122
  | `diagnostics` | — | `{ mode: 'trace' }` for rich `metadata.attempts` / `metadata.usage` |
123
- | `retry` / `rateLimit` | from `defaults/model-config.json` | Router retry and between-call spacing |
123
+ | `retry` | code defaults | Provider invoke retry; override per request (see **Runtime defaults**) |
124
+ | `temperature`, `topP`, `frequencyPenalty`, `presencePenalty` | code defaults | Gateway-wide sampling; override per request |
125
+ | `maxTokens` | — | **Required** on every invoke (see below); optional gateway-wide default |
124
126
 
125
- Defaults load from `defaults/model-config.json`, `instructions-blocks.json`, and `template-rendering.json` (copied into `dist/` on build).
127
+ Packaged defaults: only **`defaults/template-rendering.json`** (Rendrix merge at init). **No** packaged model, instructions blocks, or rate-limit JSON.
128
+
129
+ ### Runtime defaults (v10+)
130
+
131
+ Constants exported from `@x12i/ai-gateway` — **not** env vars. Downstream packages should re-export or pass through on their public invoke API.
132
+
133
+ | Constant | Default | Override priority |
134
+ |----------|---------|-------------------|
135
+ | `GATEWAY_DEFAULT_TEMPERATURE` | `0.7` | `modelConfig` > `request.config` > `GatewayConfig` > constant |
136
+ | `GATEWAY_DEFAULT_TOP_P` | `1.0` | same |
137
+ | `GATEWAY_DEFAULT_FREQUENCY_PENALTY` | `0.0` | same |
138
+ | `GATEWAY_DEFAULT_PRESENCE_PENALTY` | `0.0` | same |
139
+ | `GATEWAY_DEFAULT_RETRY` | `{ maxRetries: 3, initialDelay: 1000, maxDelay: 30000, backoffMultiplier: 2, enableJitter: true, throttlingDelay: 5000 }` | `request.config.retry` > `request.retry` > `GatewayConfig.retry` > constant |
140
+
141
+ ```typescript
142
+ import {
143
+ GATEWAY_DEFAULT_RETRY,
144
+ GATEWAY_DEFAULT_TEMPERATURE,
145
+ resolveRetryConfig
146
+ } from '@x12i/ai-gateway';
147
+ ```
148
+
149
+ **Required on every invoke:** `config.model` (or `modelConfig.model`) and `maxTokens` (`request.config`, `modelConfig`, `GatewayConfig`, or `internalSystemActions`). Missing model → `ModelRequiredError` (`code: 'MODEL_REQUIRED'`). Missing `maxTokens` → `MaxTokensRequiredError` (`code: 'MAX_TOKENS_REQUIRED'`). There is **no** packaged default model, **no** flex-md / Optimixer auto-fill, and **no** `GATEWAY_DEFAULT_MAX_TOKENS`. Use [@x12i/optimixer](https://www.npmjs.com/package/@x12i/optimixer) in the **client** that wraps this gateway if you want adaptive completion budgets.
150
+
151
+ **Rate limiting:** removed from the gateway. See [AI_PROVIDER_ROUTER_RATE_LIMITING_FEATURE_REQUEST.md](./docs/AI_PROVIDER_ROUTER_RATE_LIMITING_FEATURE_REQUEST.md) — implement in `@x12i/ai-providers-router`.
152
+
153
+ ### Template rendering (`defaults/template-rendering.json`)
154
+
155
+ Used by **@x12i/rendrix** when parsing `instructions`, `prompt`, and `context`:
156
+
157
+ 1. Loaded at gateway init from `defaults/template-rendering.json` (copied to `dist/defaults/` on build).
158
+ 2. Merged with `GatewayConfig.templateRendering`.
159
+ 3. Per-request override via `templateRenderOptions`, `smartInput`, `smartInputRenderOptions`.
160
+
161
+ Flow: `mergeGatewayAndRequestTemplateRenderOptions()` → `parseTemplate()` → Rendrix `render()`. Details: [UPSTREAM_TEMPLATE_RENDERING_AND_PARSER_V4.md](./docs/UPSTREAM_TEMPLATE_RENDERING_AND_PARSER_V4.md).
162
+
163
+ ### Downstream passthrough (ai-skills, ai-tasks, graph-engine)
164
+
165
+ Hosts wrapping the gateway should expose on **their** public API:
166
+
167
+ | Field | Required | Notes |
168
+ |-------|----------|-------|
169
+ | `model` | **Yes** | Never omit — gateway does not infer a model |
170
+ | `provider` | When not fully resolved by OpenRouter + ai-tools | |
171
+ | `temperature`, `topP`, `frequencyPenalty`, `presencePenalty`, `maxTokens` | Optional | Document defaults from `GATEWAY_DEFAULT_*` |
172
+ | `retry` | Optional | Same shape as `RetryConfig`; defaults from `GATEWAY_DEFAULT_RETRY` |
173
+ | `mode` | Optional | `'dev'` \| `'debug'` \| `'prod'` — pass through to `GatewayConfig.mode` |
174
+ | `templateRenderOptions` / `smartInput` | Optional | Rendrix overrides |
175
+
176
+ Instructions must be **complete caller text** — the gateway no longer injects packaged instruction blocks.
177
+
178
+ ### Activix response size cap
179
+
180
+ `DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS` (`512_000`) caps JSON stored in Activix `content.fullResponse` when diagnostics allow it. Override with `diagnostics.activityFullResponseMaxChars` on the invoke request.
126
181
 
127
182
  ### Environment (selected)
128
183
 
129
184
  | Variable | Role |
130
185
  |----------|------|
131
186
  | `MONGO_URI`, `MONGO_LOGS_DB` / `MONGO_DB` | Activix when no custom tracker |
132
- | `AI_GATEWAY_DEFAULT_MODEL` | Default model slug (`provider/model` or OpenRouter id) |
133
- | `mode` / `MODE` | Operational mode (`dev`, `debug`, `prod`) |
187
+ | `mode` / `MODE` | Operational mode (`dev`, `debug`, `prod`) expose to downstream clients |
134
188
  | `AI_GATEWAY_LOGS_LEVEL` | Log threshold for gateway diagnostics (`AI_GATEWAY` prefix): `error` … `verbose` |
135
189
  | `AI_GATEWAY_VERBOSE` | Full payload lines (still requires `AI_GATEWAY_LOGS_LEVEL=verbose`) |
136
190
  | `LOGXER_PACKAGE_LEVELS` | Bulk stack levels, e.g. `AI_GATEWAY:info,AI_PROVIDER_ROUTER:debug` |
137
- | `FLEX_MD_MIN_COMPLIANCE_LEVEL` | `L0`–`L3` output-format validation (default `L0`) |
138
191
  | `OPENROUTER_API_KEY` | OpenRouter key; always wired when set (required for profile/OpenRouter routes) |
139
192
  | `USE_OPENROUTER` | Optional; default **prefer** OpenRouter when key is set. `false` = use direct provider keys when present; OpenRouter still used as fallback when a provider has no key |
140
193
  | Other provider keys | `OPENAI_API_KEY`, `GROK_API_KEY`, etc. |
@@ -217,11 +270,13 @@ Adds **`metadata.attempts`**, **`metadata.usage`**, **`metadata.requestIds`**, a
217
270
 
218
271
  | Mode | Model resolution | Notes |
219
272
  |------|------------------|-------|
220
- | `dev` | Strict — unknown models fail at `mergeConfig` | Best for CI / local |
221
- | `debug` | Lenient defaults | Default when env unset |
222
- | `prod` | Falls back to configured default model when resolution fails | See `src/gateway-mode.ts` |
273
+ | `dev` | Strict — unknown profile/model fails at `mergeConfig` when `aiTools.resolveModels` is on | Best for CI / local |
274
+ | `debug` | Same strict resolution | Default when env unset |
275
+ | `prod` | Same strict resolution | **No** implicit default model callers must pass `model` |
276
+
277
+ Set via constructor `mode` or env `mode` / `MODE`. **Downstream hosts should document and expose `mode`** so graph/skill callers know resolution behavior.
223
278
 
224
- Set via constructor `mode` or env `mode` / `MODE`.
279
+ Every mode requires an explicit **`model`** on the request. Unresolved catalog profiles throw (e.g. `ModelProfileUnroutableError` in dev when profile has no routable target).
225
280
 
226
281
  ---
227
282
 
@@ -252,8 +307,9 @@ Live tests use `LIVE_TEST_PROVIDER` / `LIVE_TEST_MODEL` (default `openrouter` +
252
307
  | [OPENROUTER_ENV.md](./docs/OPENROUTER_ENV.md) | `OPENROUTER_API_KEY` and `USE_OPENROUTER` semantics |
253
308
  | [UPSTREAM_PROFILE_RESOLUTION_AND_OPENROUTER_FALLBACK.md](./docs/UPSTREAM_PROFILE_RESOLUTION_AND_OPENROUTER_FALLBACK.md) | Profile routing and OpenRouter fallback checklist |
254
309
  | [upstream-reports/README.md](./docs/upstream-reports/README.md) | Upstream issues (one file per package/gap) |
255
- | [UPSTREAM_TEMPLATE_RENDERING_AND_PARSER_V4.md](./docs/UPSTREAM_TEMPLATE_RENDERING_AND_PARSER_V4.md) | Parser v4 |
310
+ | [AI_PROVIDER_ROUTER_RATE_LIMITING_FEATURE_REQUEST.md](./docs/AI_PROVIDER_ROUTER_RATE_LIMITING_FEATURE_REQUEST.md) | Router rate-limit FR (gateway no longer sleeps between calls) |
256
311
  | [RUNTIME_OBJECTS_OBSERVABILITY.md](./docs/RUNTIME_OBJECTS_OBSERVABILITY.md) | Runtime object keys |
312
+ | [UPSTREAM_TEMPLATE_RENDERING_AND_PARSER_V4.md](./docs/UPSTREAM_TEMPLATE_RENDERING_AND_PARSER_V4.md) | Parser v4 + `template-rendering.json` |
257
313
  | [GRAPH_EXECUTION_SUPPORT.md](./docs/GRAPH_EXECUTION_SUPPORT.md) | Graph / node identity |
258
314
  | [DUAL_PACKAGE_SETUP_GUIDE.md](./docs/DUAL_PACKAGE_SETUP_GUIDE.md) | ESM + CJS publish layout |
259
315
 
@@ -273,7 +329,6 @@ export AI_PROVIDER_ROUTER_LOGS_LEVEL=debug
273
329
  # Optional full I/O payloads (requires _LOGS_LEVEL=verbose on the relevant package):
274
330
  export AI_GATEWAY_VERBOSE=true
275
331
  export AI_PROVIDER_ROUTER_VERBOSE=true
276
- export FLEX_MD_MIN_COMPLIANCE_LEVEL=L0
277
332
  ```
278
333
 
279
334
  ---
@@ -70,23 +70,6 @@
70
70
  "userActionRequired": false,
71
71
  "confidence": "medium"
72
72
  },
73
- "GATEWAY_DEFAULT_MODEL_SUBSTITUTED": {
74
- "defaultLevel": "warn",
75
- "title": "Gateway substituted the configured default model",
76
- "impact": "The request may run on a different provider/model than the caller specified.",
77
- "possibleCauses": [
78
- "Request omitted model while gateway defaults apply.",
79
- "Operational mode requires a packaged default engine.",
80
- "Profile resolution fell back to gateway defaults."
81
- ],
82
- "remediation": [
83
- "Pass an explicit model on the request when substitution is undesired.",
84
- "Review default model configuration and AI_TOOLS routing."
85
- ],
86
- "retryable": false,
87
- "userActionRequired": false,
88
- "confidence": "high"
89
- },
90
73
  "GATEWAY_RETRY_MAX_EXCEEDED": {
91
74
  "defaultLevel": "warn",
92
75
  "title": "Provider invoke retries exhausted",
@@ -121,56 +104,5 @@
121
104
  "retryable": true,
122
105
  "userActionRequired": false,
123
106
  "confidence": "medium"
124
- },
125
- "GATEWAY_OPTIMIXER_ACTIVIX_UNAVAILABLE": {
126
- "defaultLevel": "warn",
127
- "title": "Optimixer enabled but Activix is unavailable",
128
- "impact": "Adaptive max_tokens prediction is disabled for this gateway instance.",
129
- "possibleCauses": [
130
- "Activity tracking is disabled or Activix failed to initialize.",
131
- "Mongo connection or collection configuration is missing."
132
- ],
133
- "remediation": [
134
- "Enable activity tracking with a working Activix connection.",
135
- "Verify activixCollection configuration."
136
- ],
137
- "retryable": false,
138
- "userActionRequired": true,
139
- "confidence": "high"
140
- },
141
- "GATEWAY_OPTIMIXER_INIT_FAILED": {
142
- "defaultLevel": "warn",
143
- "title": "Optimixer initialization failed",
144
- "impact": "Adaptive max_tokens prediction is disabled.",
145
- "possibleCauses": [
146
- "Activix schema or collection mismatch.",
147
- "Optimixer configuration error.",
148
- "Dependency or network failure during create()."
149
- ],
150
- "remediation": [
151
- "Check Activix connectivity and collection names.",
152
- "Review optimixer gateway config."
153
- ],
154
- "retryable": false,
155
- "userActionRequired": true,
156
- "confidence": "high"
157
- },
158
- "GATEWAY_OPTIMIXER_PREDICT_FAILED": {
159
- "defaultLevel": "warn",
160
- "title": "Optimixer predictAiMaxTokens failed",
161
- "impact": "Caller should use fallback max_tokens for the invoke.",
162
- "possibleCauses": [
163
- "Insufficient historical samples for the template.",
164
- "Token estimation or profile resolution failed.",
165
- "Optimixer internal error."
166
- ],
167
- "remediation": [
168
- "Set explicit max_tokens on the request.",
169
- "Verify templateId and model profile fields.",
170
- "Check prediction history in Activix."
171
- ],
172
- "retryable": true,
173
- "userActionRequired": false,
174
- "confidence": "medium"
175
107
  }
176
108
  }
@@ -6,36 +6,24 @@ import type { GatewayConfig } from './types.js';
6
6
  import type { Logxer } from '@x12i/logxer';
7
7
  import { LLMProviderRouter } from '@x12i/ai-providers-router';
8
8
  import { ActivityManager } from './activity-manager.js';
9
- import { OptimixerManager } from './optimixer-manager.js';
10
9
  import { UsageTracker } from './usage-tracker.js';
11
10
  import type { MessageBuilderConfig } from './message-builder.js';
12
11
  import type { TemplateRenderOptions } from '@x12i/rendrix';
13
12
  export interface GatewayConfigContext {
14
- defaultModelConfig: Record<string, unknown>;
15
- defaultInstructionsBlocks: Record<string, any>;
16
13
  config: GatewayConfig;
17
14
  logger: Logxer;
18
15
  router: LLMProviderRouter;
19
16
  activityManager: ActivityManager;
20
- optimixerManager: OptimixerManager;
21
17
  usageTracker: UsageTracker;
22
18
  messageBuilderConfig: MessageBuilderConfig;
23
19
  }
24
20
  export type InitializedGatewayComponents = ReturnType<typeof initializeGatewayComponents>;
25
21
  /**
26
- * Loads configuration from JSON files (model config and instructionsBlocks).
27
- * Pass a {@link Logxer} instance so load diagnostics go through logxer (not console).
22
+ * Loads packaged template-rendering defaults for Rendrix merge at init.
28
23
  */
29
24
  export declare function loadConfig(logger: Logxer): {
30
- defaultModelConfig: Record<string, unknown>;
31
- defaultInstructionsBlocks: Record<string, any>;
32
25
  defaultTemplateRendering?: TemplateRenderOptions;
33
26
  };
34
- /**
35
- * Gets the minimum flex-md compliance level from environment variable
36
- * Defaults to 'L0' if not set or invalid
37
- */
38
- export declare function getFlexMdMinComplianceLevel(): 'L0' | 'L1' | 'L2' | 'L3';
39
27
  /**
40
28
  * Sets up request interceptor for jobId propagation and config cleanup
41
29
  */
@@ -47,10 +35,8 @@ export declare function initializeGatewayComponents(config: GatewayConfig): {
47
35
  logger: Logxer;
48
36
  router: LLMProviderRouter;
49
37
  activityManager: ActivityManager;
50
- optimixerManager: OptimixerManager;
51
38
  usageTracker: UsageTracker;
52
39
  messageBuilderConfig: MessageBuilderConfig;
53
- defaultModelConfig: Record<string, unknown>;
54
40
  preferOpenRouter: boolean;
55
41
  openRouterApiKey?: string;
56
42
  };
@@ -6,6 +6,11 @@ import * as fs from 'fs';
6
6
  import * as path from 'path';
7
7
  import { fileURLToPath } from 'url';
8
8
  import { resolveOpenRouterApiKey, resolvePreferOpenRouter, } from './openrouter-routing.js';
9
+ import { LLMProviderRouter } from '@x12i/ai-providers-router';
10
+ import { createGatewayLogger } from './logger-factory.js';
11
+ import { ActivityManager } from './activity-manager.js';
12
+ import { UsageTracker } from './usage-tracker.js';
13
+ import { mergeTemplateRenderOptions } from './template-render-merge.js';
9
14
  /** Resolve current module directory across ESM/CJS builds. */
10
15
  function getModuleDir() {
11
16
  if (typeof __dirname !== 'undefined') {
@@ -38,29 +43,17 @@ function getDefaultsDir() {
38
43
  path.join(cwd, 'src'),
39
44
  ];
40
45
  for (const dir of candidates) {
41
- const modelConfigPath = path.join(dir, 'defaults', 'model-config.json');
42
- if (fs.existsSync(modelConfigPath)) {
46
+ const templateRenderingPath = path.join(dir, 'defaults', 'template-rendering.json');
47
+ if (fs.existsSync(templateRenderingPath)) {
43
48
  return dir;
44
49
  }
45
50
  }
46
- // Keep existing behavior as a last fallback.
47
51
  return path.join(cwd, 'dist');
48
52
  }
49
- import { LLMProviderRouter } from '@x12i/ai-providers-router';
50
- import { createGatewayLogger } from './logger-factory.js';
51
- import { ActivityManager } from './activity-manager.js';
52
- import { OptimixerManager } from './optimixer-manager.js';
53
- import { UsageTracker } from './usage-tracker.js';
54
- import { mergeTemplateRenderOptions } from './template-render-merge.js';
55
- import { GatewayRateLimiter } from './gateway-rate-limiter.js';
56
- import { DEFAULT_RATE_LIMIT_MIN_INTERVAL_MS, DEFAULT_RATE_LIMIT_ENABLED } from './gateway-rate-limiter-constants.js';
57
53
  /**
58
- * Loads configuration from JSON files (model config and instructionsBlocks).
59
- * Pass a {@link Logxer} instance so load diagnostics go through logxer (not console).
54
+ * Loads packaged template-rendering defaults for Rendrix merge at init.
60
55
  */
61
56
  export function loadConfig(logger) {
62
- const defaultModelConfig = {};
63
- const defaultInstructionsBlocks = {};
64
57
  let defaultTemplateRendering;
65
58
  try {
66
59
  const defaultsDir = getDefaultsDir();
@@ -68,57 +61,21 @@ export function loadConfig(logger) {
68
61
  if (fs.existsSync(templateRenderingPath)) {
69
62
  const trContent = fs.readFileSync(templateRenderingPath, 'utf-8');
70
63
  defaultTemplateRendering = JSON.parse(trContent);
71
- }
72
- // Load model config (includes rate limiting and retry defaults)
73
- const modelConfigPath = path.join(defaultsDir, 'defaults', 'model-config.json');
74
- if (fs.existsSync(modelConfigPath)) {
75
- const content = fs.readFileSync(modelConfigPath, 'utf-8');
76
- const parsed = JSON.parse(content);
77
- Object.assign(defaultModelConfig, parsed);
78
- }
79
- // Load instructionsBlocks
80
- const instructionsBlocksPath = path.join(defaultsDir, 'defaults', 'instructions-blocks.json');
81
- if (fs.existsSync(instructionsBlocksPath)) {
82
- const content = fs.readFileSync(instructionsBlocksPath, 'utf-8');
83
- const parsed = JSON.parse(content);
84
- // Use Object.assign to merge, preserving nested structure
85
- Object.assign(defaultInstructionsBlocks, parsed);
86
- logger.debug('Loaded instructions blocks from defaults', {
87
- topLevelKeys: Object.keys(defaultInstructionsBlocks),
88
- hasOutput: 'output' in defaultInstructionsBlocks,
89
- outputKeys: 'output' in defaultInstructionsBlocks ? Object.keys(defaultInstructionsBlocks.output) : []
64
+ logger.debug('Loaded template rendering defaults', {
65
+ path: templateRenderingPath,
66
+ hasSubPathSearch: !!defaultTemplateRendering?.subPathSearch
90
67
  });
91
68
  }
92
69
  else {
93
- logger.verbose('Optional instructions blocks file not found; using packaged fallbacks', {
94
- instructionsBlocksPath
95
- });
70
+ logger.verbose('Packaged template-rendering defaults not found', { templateRenderingPath });
96
71
  }
97
72
  }
98
73
  catch (error) {
99
- logger.warn('Failed to load defaults from JSON files', {
74
+ logger.warn('Failed to load template-rendering defaults', {
100
75
  error: error instanceof Error ? error.message : String(error)
101
76
  });
102
77
  }
103
- // Ensure critical blocks exist even if file loading failed
104
- if (!defaultInstructionsBlocks['outputObjectPrefix']) {
105
- defaultInstructionsBlocks['outputObjectPrefix'] = "Reply in Markdown. Return your entire answer inside a single ```markdown fenced block and nothing else. The content must conform to the schema provided below. If no items are found, return empty arrays (e.g., emails: []). Never ask for more input. Do not write conversational text. Do not write explanations. Do not ask questions.\n\n";
106
- }
107
- if (!defaultInstructionsBlocks['outputObjectTypesPrefix']) {
108
- defaultInstructionsBlocks['outputObjectTypesPrefix'] = "Reply in Markdown. Return your entire answer inside a single ```markdown fenced block and nothing else. Select ONE of the following object types based on the input. The content must conform to the chosen schema. Do not write conversational text. Do not write explanations.\n\n";
109
- }
110
- return { defaultModelConfig, defaultInstructionsBlocks, defaultTemplateRendering };
111
- }
112
- /**
113
- * Gets the minimum flex-md compliance level from environment variable
114
- * Defaults to 'L0' if not set or invalid
115
- */
116
- export function getFlexMdMinComplianceLevel() {
117
- const envValue = process.env.FLEX_MD_MIN_COMPLIANCE_LEVEL;
118
- if (envValue === 'L0' || envValue === 'L1' || envValue === 'L2' || envValue === 'L3') {
119
- return envValue;
120
- }
121
- return 'L0'; // Default: allow anything
78
+ return { defaultTemplateRendering };
122
79
  }
123
80
  /**
124
81
  * Sets up request interceptor for jobId propagation and config cleanup
@@ -140,9 +97,6 @@ export function setupRequestInterceptor(router, logger) {
140
97
  }
141
98
  request.config.metadata.jobId = identityJobId;
142
99
  }
143
- // Remove 'provider' from config - router uses it for routing but providers don't accept it
144
- // Router reads config.provider to determine which provider to call, but then passes
145
- // the entire config to the provider, which rejects 'provider' as invalid
146
100
  if (request.config && 'provider' in request.config) {
147
101
  logger.debug('Removing provider from config before passing to provider', {
148
102
  provider: request.config.provider
@@ -158,7 +112,6 @@ export function setupRequestInterceptor(router, logger) {
158
112
  * Initializes gateway components
159
113
  */
160
114
  export function initializeGatewayComponents(config) {
161
- // Initialize logger FIRST (before other components that might need it)
162
115
  const logger = createGatewayLogger({
163
116
  enableLogging: config.enableLogging ?? true,
164
117
  customLogger: config.logger,
@@ -167,14 +120,11 @@ export function initializeGatewayComponents(config) {
167
120
  logLevel: config.logLevel,
168
121
  verbose: config.verbose
169
122
  });
170
- const { defaultModelConfig, defaultInstructionsBlocks, defaultTemplateRendering } = loadConfig(logger);
123
+ const { defaultTemplateRendering } = loadConfig(logger);
171
124
  logger.verbose('Gateway initializing', {
172
125
  defaultEngine: config.defaultEngine,
173
- hasDefaultInstructionsBlocks: Object.keys(defaultInstructionsBlocks).length > 0
126
+ hasTemplateRenderingDefaults: !!defaultTemplateRendering
174
127
  });
175
- // Activity tracking is handled by Activix internally.
176
- // Initialize router - this is the ONLY way to access providers
177
- // RouterConfig properties are inherited from RouterConfig interface
178
128
  const routerConfig = {};
179
129
  const defaultTarget = config.defaultTarget;
180
130
  if (defaultTarget) {
@@ -203,8 +153,6 @@ export function initializeGatewayComponents(config) {
203
153
  routerConfig.logLevel = config.logLevel;
204
154
  if (config.logging !== undefined)
205
155
  routerConfig.logging = config.logging;
206
- // OpenRouter: always pass apiKey when set (fallback for providers without direct keys).
207
- // PREFER_OPENROUTER=false only disables *preferring* OpenRouter when direct provider keys exist.
208
156
  const openRouterKey = resolveOpenRouterApiKey(config);
209
157
  const preferOpenRouter = resolvePreferOpenRouter(config);
210
158
  if (openRouterKey) {
@@ -218,64 +166,12 @@ export function initializeGatewayComponents(config) {
218
166
  }
219
167
  }
220
168
  const router = new LLMProviderRouter(routerConfig);
221
- // Set up BETWEEN-CALLS rate limiting as a request interceptor (applies to all provider calls)
222
- // This ensures rate limiting works even when router is used directly without gateway
223
- // Hidden in the flow - automatic and transparent
224
- //
225
- // NOTE: This is for BETWEEN-CALLS rate limiting (smart, tracks last call time).
226
- // Retry delays are handled separately in gateway-retry.ts (simple sleep, not smart).
227
- const rateLimitConfig = config.rateLimit;
228
- // Get defaults from JSON config, fallback to constants
229
- const jsonRateLimitConfig = defaultModelConfig.rateLimit || {};
230
- const rateLimitEnabled = rateLimitConfig?.enabled ?? jsonRateLimitConfig.enabled ?? DEFAULT_RATE_LIMIT_ENABLED;
231
- if (rateLimitEnabled) {
232
- // Priority: explicit config > JSON defaults > constants
233
- const defaultMinIntervalMs = rateLimitConfig?.defaultMinIntervalMs
234
- ?? jsonRateLimitConfig.defaultMinIntervalMs
235
- ?? DEFAULT_RATE_LIMIT_MIN_INTERVAL_MS;
236
- const providerIntervals = rateLimitConfig?.providerIntervals;
237
- const rateLimiter = new GatewayRateLimiter(defaultMinIntervalMs, providerIntervals, logger);
238
- // Add request interceptor for BETWEEN-CALLS rate limiting (hidden in the flow)
239
- router.addRequestInterceptor(async (request, provider) => {
240
- // Get provider name
241
- const providerName = typeof provider?.getProviderName === 'function'
242
- ? provider.getProviderName()
243
- : 'global';
244
- // Smart rate limiting: wait only if necessary based on last call time
245
- // This is for BETWEEN-CALLS, not retries (retries use simple sleep in gateway-retry.ts)
246
- await rateLimiter.waitIfNeeded(providerName);
247
- // Return request unchanged (interceptor can modify request, but we just need to wait)
248
- return request;
249
- });
250
- // Add response interceptor to record call completion
251
- // Note: Type assertion needed due to ResponseInterceptor type definition mismatch
252
- router.addResponseInterceptor((async (response, request, provider) => {
253
- // Get provider name
254
- const providerName = typeof provider?.getProviderName === 'function'
255
- ? provider.getProviderName()
256
- : 'global';
257
- // Record the call time after completion (for smart between-calls rate limiting)
258
- rateLimiter.recordCall(providerName);
259
- // Return response unchanged
260
- return response;
261
- }));
262
- logger.debug('Between-calls rate limiting configured as router interceptor', {
263
- defaultMinIntervalMs,
264
- providerIntervals: providerIntervals ? Object.keys(providerIntervals).length : 0,
265
- enabled: true,
266
- note: 'Smart rate limiting (between-calls only). Retry delays handled separately (simple sleep).'
267
- });
268
- }
269
- else {
270
- logger.debug('Rate limiting disabled');
271
- }
272
- // Initialize usage tracking
169
+ setupRequestInterceptor(router, logger);
273
170
  const usageTracker = new UsageTracker({
274
171
  enableUsageTracking: config.enableUsageTracking ?? true,
275
172
  usageTier: config.usageTier,
276
173
  logger
277
174
  });
278
- // Initialize activity tracking
279
175
  const activityManager = new ActivityManager({
280
176
  enableActivityTracking: config.enableActivityTracking ?? true,
281
177
  customTracker: config.activityTracker,
@@ -292,19 +188,8 @@ export function initializeGatewayComponents(config) {
292
188
  }
293
189
  })
294
190
  });
295
- const optimixerManager = new OptimixerManager({
296
- optimixer: config.optimixer,
297
- logger,
298
- getActivix: () => activityManager.getReadyTracker()
299
- });
300
191
  const templateRendering = mergeTemplateRenderOptions(defaultTemplateRendering, config.templateRendering);
301
- const instructionsBlockOverrides = {
302
- ...(config.instructionsBlocks ?? {})
303
- };
304
- // Initialize message builder config - for direct message construction
305
192
  const messageBuilderConfig = {
306
- defaultInstructionsBlocks,
307
- instructionsBlockOverrides,
308
193
  logger,
309
194
  templateRendering
310
195
  };
@@ -312,10 +197,8 @@ export function initializeGatewayComponents(config) {
312
197
  logger,
313
198
  router,
314
199
  activityManager,
315
- optimixerManager,
316
200
  usageTracker,
317
201
  messageBuilderConfig,
318
- defaultModelConfig,
319
202
  preferOpenRouter,
320
203
  openRouterApiKey: openRouterKey,
321
204
  };
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Documented gateway runtime defaults (code constants — not env or packaged JSON).
3
+ * Downstream clients (ai-skills, ai-tasks, graph-engine) should re-export or pass these through.
4
+ */
5
+ import type { GatewayConfig, RetryConfig } from './types.js';
6
+ export declare const GATEWAY_DEFAULT_TEMPERATURE = 0.7;
7
+ export declare const GATEWAY_DEFAULT_TOP_P = 1;
8
+ export declare const GATEWAY_DEFAULT_FREQUENCY_PENALTY = 0;
9
+ export declare const GATEWAY_DEFAULT_PRESENCE_PENALTY = 0;
10
+ /** Caps JSON stored in Activix `content.fullResponse` when diagnostics allow storing it. */
11
+ export declare const DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS = 512000;
12
+ export declare const GATEWAY_DEFAULT_RETRY: Required<Pick<RetryConfig, 'maxRetries' | 'initialDelay' | 'maxDelay' | 'backoffMultiplier' | 'enableJitter' | 'throttlingDelay'>>;
13
+ type RetryRequestSlice = {
14
+ retry?: RetryConfig;
15
+ config?: {
16
+ retry?: RetryConfig;
17
+ };
18
+ };
19
+ /**
20
+ * Merge retry config: request.config.retry > request.retry > GatewayConfig.retry > GATEWAY_DEFAULT_RETRY.
21
+ */
22
+ export declare function resolveRetryConfig(request: RetryRequestSlice, config: GatewayConfig): RetryConfig;
23
+ export {};
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Documented gateway runtime defaults (code constants — not env or packaged JSON).
3
+ * Downstream clients (ai-skills, ai-tasks, graph-engine) should re-export or pass these through.
4
+ */
5
+ export const GATEWAY_DEFAULT_TEMPERATURE = 0.7;
6
+ export const GATEWAY_DEFAULT_TOP_P = 1.0;
7
+ export const GATEWAY_DEFAULT_FREQUENCY_PENALTY = 0.0;
8
+ export const GATEWAY_DEFAULT_PRESENCE_PENALTY = 0.0;
9
+ /** Caps JSON stored in Activix `content.fullResponse` when diagnostics allow storing it. */
10
+ export const DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS = 512_000;
11
+ export const GATEWAY_DEFAULT_RETRY = {
12
+ maxRetries: 3,
13
+ initialDelay: 1000,
14
+ maxDelay: 30000,
15
+ backoffMultiplier: 2,
16
+ enableJitter: true,
17
+ throttlingDelay: 5000
18
+ };
19
+ /**
20
+ * Merge retry config: request.config.retry > request.retry > GatewayConfig.retry > GATEWAY_DEFAULT_RETRY.
21
+ */
22
+ export function resolveRetryConfig(request, config) {
23
+ return {
24
+ ...GATEWAY_DEFAULT_RETRY,
25
+ ...config.retry,
26
+ ...request.retry,
27
+ ...request.config?.retry
28
+ };
29
+ }
@@ -7,12 +7,8 @@ export declare const GatewayLogCode: {
7
7
  readonly FALLBACK_CHAIN_EXHAUSTED: "GATEWAY_FALLBACK_CHAIN_EXHAUSTED";
8
8
  readonly FLEX_MD_EXTRACTION_FAILED: "GATEWAY_FLEX_MD_EXTRACTION_FAILED";
9
9
  readonly FLEX_MD_EXTRACTION_ERROR: "GATEWAY_FLEX_MD_EXTRACTION_ERROR";
10
- readonly DEFAULT_MODEL_SUBSTITUTED: "GATEWAY_DEFAULT_MODEL_SUBSTITUTED";
11
10
  readonly RETRY_MAX_EXCEEDED: "GATEWAY_RETRY_MAX_EXCEEDED";
12
11
  readonly RETRY_ATTEMPT: "GATEWAY_RETRY_ATTEMPT";
13
- readonly OPTIMIXER_ACTIVIX_UNAVAILABLE: "GATEWAY_OPTIMIXER_ACTIVIX_UNAVAILABLE";
14
- readonly OPTIMIXER_INIT_FAILED: "GATEWAY_OPTIMIXER_INIT_FAILED";
15
- readonly OPTIMIXER_PREDICT_FAILED: "GATEWAY_OPTIMIXER_PREDICT_FAILED";
16
12
  };
17
13
  export type GatewayLogCode = (typeof GatewayLogCode)[keyof typeof GatewayLogCode];
18
14
  /** Resolve packaged `defaults/log-diagnostics.json` for createLogxer diagnostics.catalogPath. */
@@ -13,12 +13,8 @@ export const GatewayLogCode = {
13
13
  FALLBACK_CHAIN_EXHAUSTED: 'GATEWAY_FALLBACK_CHAIN_EXHAUSTED',
14
14
  FLEX_MD_EXTRACTION_FAILED: 'GATEWAY_FLEX_MD_EXTRACTION_FAILED',
15
15
  FLEX_MD_EXTRACTION_ERROR: 'GATEWAY_FLEX_MD_EXTRACTION_ERROR',
16
- DEFAULT_MODEL_SUBSTITUTED: 'GATEWAY_DEFAULT_MODEL_SUBSTITUTED',
17
16
  RETRY_MAX_EXCEEDED: 'GATEWAY_RETRY_MAX_EXCEEDED',
18
- RETRY_ATTEMPT: 'GATEWAY_RETRY_ATTEMPT',
19
- OPTIMIXER_ACTIVIX_UNAVAILABLE: 'GATEWAY_OPTIMIXER_ACTIVIX_UNAVAILABLE',
20
- OPTIMIXER_INIT_FAILED: 'GATEWAY_OPTIMIXER_INIT_FAILED',
21
- OPTIMIXER_PREDICT_FAILED: 'GATEWAY_OPTIMIXER_PREDICT_FAILED'
17
+ RETRY_ATTEMPT: 'GATEWAY_RETRY_ATTEMPT'
22
18
  };
23
19
  function getModuleDir() {
24
20
  if (typeof __dirname !== 'undefined') {
@@ -19,7 +19,6 @@ export declare const GATEWAY_STACK_LOG_PREFIXES: {
19
19
  readonly gateway: "AI_GATEWAY";
20
20
  readonly router: "AI_PROVIDER_ROUTER";
21
21
  readonly flexMd: "FLEX_MD";
22
- readonly optimixer: "OPTIMIXER";
23
22
  };
24
23
  /**
25
24
  * Load bulk env (`LOGXER_PACKAGE_LEVELS`, `LOGXER_PACKAGE_LOGS_DEFAULT`) and merge optional host config.
@@ -20,7 +20,6 @@ export const GATEWAY_STACK_LOG_PREFIXES = {
20
20
  gateway: GATEWAY_LOG_ENV_PREFIX,
21
21
  router: ROUTER_LOG_ENV_PREFIX,
22
22
  flexMd: 'FLEX_MD',
23
- optimixer: 'OPTIMIXER'
24
23
  };
25
24
  let packageLogLevelsInitialized = false;
26
25
  /**
@@ -71,9 +71,6 @@ export async function constructMessages(request, config, logger, parsedSnapshot)
71
71
  const requestWithExamples = { ...request, instructions: finalInstructions };
72
72
  // Build messages using direct message builder
73
73
  const result = await buildMessages(requestWithExamples, config, {
74
- useSystemContextFallback: true,
75
- includeInputRecognition: isAIRequest(request),
76
- includeReinforcement: isAIRequest(request),
77
74
  parsedSnapshot
78
75
  });
79
76
  if (parsedSnapshot && result.metadata) {