@x12i/ai-gateway 9.7.8 → 10.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/README.md +67 -12
  2. package/dist/defaults/log-diagnostics.json +0 -68
  3. package/dist/gateway-config.d.ts +1 -15
  4. package/dist/gateway-config.js +17 -134
  5. package/dist/gateway-defaults.d.ts +23 -0
  6. package/dist/gateway-defaults.js +29 -0
  7. package/dist/gateway-log-diagnostics.d.ts +0 -4
  8. package/dist/gateway-log-diagnostics.js +1 -5
  9. package/dist/gateway-log-levels.d.ts +0 -1
  10. package/dist/gateway-log-levels.js +0 -1
  11. package/dist/gateway-messages.js +0 -3
  12. package/dist/gateway-meta.js +12 -10
  13. package/dist/gateway-mode.d.ts +3 -26
  14. package/dist/gateway-mode.js +3 -48
  15. package/dist/gateway-retry.js +7 -6
  16. package/dist/gateway-utils.d.ts +1 -19
  17. package/dist/gateway-utils.js +37 -199
  18. package/dist/gateway.d.ts +0 -3
  19. package/dist/gateway.js +4 -63
  20. package/dist/index.d.ts +4 -6
  21. package/dist/index.js +4 -7
  22. package/dist/instruction-errors.d.ts +9 -1
  23. package/dist/instruction-errors.js +15 -1
  24. package/dist/instruction-optimizer.js +5 -1
  25. package/dist/message-builder.d.ts +0 -6
  26. package/dist/message-builder.js +4 -145
  27. package/dist/types.d.ts +16 -57
  28. package/dist-cjs/defaults/log-diagnostics.json +0 -68
  29. package/dist-cjs/gateway-config.cjs +17 -134
  30. package/dist-cjs/gateway-config.d.ts +1 -15
  31. package/dist-cjs/gateway-defaults.cjs +29 -0
  32. package/dist-cjs/gateway-defaults.d.ts +23 -0
  33. package/dist-cjs/gateway-log-diagnostics.cjs +1 -5
  34. package/dist-cjs/gateway-log-diagnostics.d.ts +0 -4
  35. package/dist-cjs/gateway-log-levels.cjs +0 -1
  36. package/dist-cjs/gateway-log-levels.d.ts +0 -1
  37. package/dist-cjs/gateway-messages.cjs +0 -3
  38. package/dist-cjs/gateway-meta.cjs +12 -10
  39. package/dist-cjs/gateway-mode.cjs +3 -48
  40. package/dist-cjs/gateway-mode.d.ts +3 -26
  41. package/dist-cjs/gateway-retry.cjs +7 -6
  42. package/dist-cjs/gateway-utils.cjs +37 -199
  43. package/dist-cjs/gateway-utils.d.ts +1 -19
  44. package/dist-cjs/gateway.cjs +4 -63
  45. package/dist-cjs/gateway.d.ts +0 -3
  46. package/dist-cjs/index.cjs +4 -7
  47. package/dist-cjs/index.d.ts +4 -6
  48. package/dist-cjs/instruction-errors.cjs +15 -1
  49. package/dist-cjs/instruction-errors.d.ts +9 -1
  50. package/dist-cjs/instruction-optimizer.cjs +5 -1
  51. package/dist-cjs/message-builder.cjs +4 -145
  52. package/dist-cjs/message-builder.d.ts +0 -6
  53. package/dist-cjs/types.d.ts +16 -57
  54. package/package.json +2 -3
  55. package/dist/defaults/instructions-blocks.json +0 -61
  56. package/dist/defaults/model-config.json +0 -15
  57. package/dist/gateway-instructions.d.ts +0 -30
  58. package/dist/gateway-instructions.js +0 -62
  59. package/dist/gateway-rate-limiter-constants.d.ts +0 -16
  60. package/dist/gateway-rate-limiter-constants.js +0 -16
  61. package/dist/gateway-rate-limiter.d.ts +0 -56
  62. package/dist/gateway-rate-limiter.js +0 -107
  63. package/dist/optimixer-manager.d.ts +0 -33
  64. package/dist/optimixer-manager.js +0 -142
  65. package/dist/token-estimate.d.ts +0 -12
  66. package/dist/token-estimate.js +0 -30
  67. package/dist-cjs/defaults/instructions-blocks.json +0 -61
  68. package/dist-cjs/defaults/model-config.json +0 -15
  69. package/dist-cjs/gateway-instructions.cjs +0 -62
  70. package/dist-cjs/gateway-instructions.d.ts +0 -30
  71. package/dist-cjs/gateway-rate-limiter-constants.cjs +0 -16
  72. package/dist-cjs/gateway-rate-limiter-constants.d.ts +0 -16
  73. package/dist-cjs/gateway-rate-limiter.cjs +0 -107
  74. package/dist-cjs/gateway-rate-limiter.d.ts +0 -56
  75. package/dist-cjs/optimixer-manager.cjs +0 -142
  76. package/dist-cjs/optimixer-manager.d.ts +0 -33
  77. package/dist-cjs/token-estimate.cjs +0 -30
  78. package/dist-cjs/token-estimate.d.ts +0 -12
package/dist/gateway.js CHANGED
@@ -7,10 +7,11 @@ import { FallbackExhaustedError } from '@x12i/ai-providers-router';
7
7
  import { validateChatRequest, validateAIRequest } from './gateway-validation.js';
8
8
  import { ensureGatewayRequestIdentity } from './activity-manager.js';
9
9
  import { initializeGatewayComponents } from './gateway-config.js';
10
+ import { resolveRetryConfig } from './gateway-defaults.js';
10
11
  import { buildMessages } from './message-builder.js';
11
- import { extractJsonFromFlexMd, getModelMaxTokensFromFlexMd } from './flex-md-loader.js';
12
+ import { extractJsonFromFlexMd } from './flex-md-loader.js';
12
13
  import { enrichParsedContentForOutputContract, resolveOutputContractFieldKeys } from './output-contract-normalizer.js';
13
- import { attachGatewayInvokeRejectionMetadata, buildGatewayFallbackAttemptsFromTrace, buildInvokeRejectionMetadata, capActivityFullResponsePayload, formatFallbackExhaustionMessage, logResolvedModelRouting, mapGatewayFallbackAttemptsToRouter, hasNonZeroTokenUsage, DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, extractCostUsdFromRouterResponse, extractTokenUsageFromRouterResponse, mergeConfig, pickEffectiveModelConfigForMetadata, pickInvokeRoutingMetadataSlice, pickTraceMergedRouterConfig, resolveCostCompletionWithAiTools, buildOptimixerActualUsage, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, isMaxTokensExplicitlySet, tryExtractRouterLikePayloadFromErrorChain } from './gateway-utils.js';
14
+ import { attachGatewayInvokeRejectionMetadata, buildGatewayFallbackAttemptsFromTrace, buildInvokeRejectionMetadata, capActivityFullResponsePayload, formatFallbackExhaustionMessage, logResolvedModelRouting, mapGatewayFallbackAttemptsToRouter, hasNonZeroTokenUsage, DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, extractCostUsdFromRouterResponse, extractTokenUsageFromRouterResponse, mergeConfig, pickEffectiveModelConfigForMetadata, pickInvokeRoutingMetadataSlice, pickTraceMergedRouterConfig, resolveCostCompletionWithAiTools, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, tryExtractRouterLikePayloadFromErrorChain } from './gateway-utils.js';
14
15
  import { getAiToolsClient } from './ai-tools-client.js';
15
16
  import { autoRegisterProviders } from './gateway-provider-auto-register.js';
16
17
  import { applyOpenRouterInvokePolicy } from './ai-tools-client.js';
@@ -56,9 +57,7 @@ export class AIGateway {
56
57
  config;
57
58
  logger;
58
59
  activityManager;
59
- optimixerManager;
60
60
  messageBuilderConfig;
61
- defaultModelConfig = {};
62
61
  _autoRegisterDone = false;
63
62
  _aiToolsClient = null;
64
63
  preferOpenRouter;
@@ -70,9 +69,7 @@ export class AIGateway {
70
69
  this.logger = components.logger;
71
70
  this.router = components.router;
72
71
  this.activityManager = components.activityManager;
73
- this.optimixerManager = components.optimixerManager;
74
72
  this.messageBuilderConfig = components.messageBuilderConfig;
75
- this.defaultModelConfig = components.defaultModelConfig ?? {};
76
73
  this.preferOpenRouter = components.preferOpenRouter;
77
74
  this.openRouterApiKey = components.openRouterApiKey;
78
75
  setGatewayRuntimeClients({
@@ -101,7 +98,6 @@ export class AIGateway {
101
98
  // Merge config (modelConfig > request.config > gateway defaults)
102
99
  const aiTools = await this.getAiTools();
103
100
  const mergedConfig = await mergeConfig(request, this.config, this.logger, {
104
- defaultModelConfig: this.defaultModelConfig,
105
101
  catalog: aiTools?.catalog ?? null,
106
102
  routingEnv: aiTools?.routingEnv,
107
103
  });
@@ -118,7 +114,6 @@ export class AIGateway {
118
114
  await autoRegisterProviders(this.router, this.logger);
119
115
  this._autoRegisterDone = true;
120
116
  }
121
- const optimixerPrediction = await this.applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages);
122
117
  // Start activity tracking if available
123
118
  let activity = undefined;
124
119
  if (this.activityManager) {
@@ -192,9 +187,6 @@ export class AIGateway {
192
187
  });
193
188
  }
194
189
  }
195
- if (optimixerPrediction) {
196
- await this.optimixerManager?.completePrediction(optimixerPrediction.requestId, buildOptimixerActualUsage(tokensChat, response, Date.now() - startTime));
197
- }
198
190
  warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
199
191
  tokens: enhancedResponse.metadata.tokens,
200
192
  costUsd: enhancedResponse.metadata.costUsd,
@@ -298,7 +290,6 @@ export class AIGateway {
298
290
  // Merge config (modelConfig > request.config > gateway defaults)
299
291
  const aiTools = await this.getAiTools();
300
292
  const mergedConfig = await mergeConfig(request, this.config, this.logger, {
301
- defaultModelConfig: this.defaultModelConfig,
302
293
  catalog: aiTools?.catalog ?? null,
303
294
  routingEnv: aiTools?.routingEnv,
304
295
  });
@@ -318,7 +309,6 @@ export class AIGateway {
318
309
  await autoRegisterProviders(this.router, this.logger);
319
310
  this._autoRegisterDone = true;
320
311
  }
321
- const optimixerPrediction = await this.applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages);
322
312
  // Start activity tracking if available
323
313
  let activity = undefined;
324
314
  if (this.activityManager) {
@@ -417,7 +407,7 @@ export class AIGateway {
417
407
  model: candidate.model
418
408
  }
419
409
  }
420
- }, (this.config.retry ?? {}), request.identity.jobId || request.aiRequestId, this.router, this.logger, {
410
+ }, resolveRetryConfig(request, this.config), request.identity.jobId || request.aiRequestId, this.router, this.logger, {
421
411
  onTryStart: ({ retryIndex, startedAt }) => {
422
412
  const idx = traceAttempts.push({
423
413
  timing: { startedAt, endedAt: startedAt, durationMs: 0 },
@@ -727,9 +717,6 @@ export class AIGateway {
727
717
  });
728
718
  }
729
719
  }
730
- if (optimixerPrediction) {
731
- await this.optimixerManager?.completePrediction(optimixerPrediction.requestId, buildOptimixerActualUsage(tokens, routerResponse, Date.now() - startTime));
732
- }
733
720
  warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
734
721
  tokens: enhancedResponse.metadata.tokens,
735
722
  costUsd: enhancedResponse.metadata.costUsd,
@@ -765,52 +752,6 @@ export class AIGateway {
765
752
  }
766
753
  });
767
754
  }
768
- async applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages) {
769
- if (!this.optimixerManager?.isEnabled() || isMaxTokensExplicitlySet(request, this.config)) {
770
- return undefined;
771
- }
772
- const prediction = await this.optimixerManager.predictMaxTokens({
773
- request,
774
- mergedConfig,
775
- messages
776
- });
777
- if (prediction) {
778
- let maxTokens = prediction.recommendedMaxTokens;
779
- const useCeiling = this.config.optimixer?.useFlexMdCeiling !== false;
780
- if (useCeiling && mergedConfig?.model && mergedConfig?.provider) {
781
- try {
782
- const ceiling = await getModelMaxTokensFromFlexMd(mergedConfig.provider, mergedConfig.model);
783
- if (typeof ceiling === 'number' && ceiling > 0 && maxTokens > ceiling) {
784
- maxTokens = ceiling;
785
- }
786
- }
787
- catch {
788
- // Non-blocking: use uncapped prediction
789
- }
790
- }
791
- mergedConfig.maxTokens = maxTokens;
792
- request._mergedRouterConfig = mergedConfig;
793
- this.logger.debug('Applied Optimixer recommended max_tokens', {
794
- aiRequestId: request.aiRequestId,
795
- recommendedMaxTokens: prediction.recommendedMaxTokens,
796
- maxTokens,
797
- confidence: prediction.confidence,
798
- requestId: prediction.requestId
799
- });
800
- return prediction;
801
- }
802
- if (mergedConfig?.maxTokens === undefined && mergedConfig?.model && mergedConfig?.provider) {
803
- try {
804
- const flexMdMaxTokens = await getModelMaxTokensFromFlexMd(mergedConfig.provider, mergedConfig.model);
805
- mergedConfig.maxTokens = flexMdMaxTokens && flexMdMaxTokens > 0 ? flexMdMaxTokens : 2000;
806
- }
807
- catch {
808
- mergedConfig.maxTokens = 2000;
809
- }
810
- request._mergedRouterConfig = mergedConfig;
811
- }
812
- return undefined;
813
- }
814
755
  /**
815
756
  * Build simple messages from request (instructions and prompt as literal template text; no registry).
816
757
  */
package/dist/index.d.ts CHANGED
@@ -14,12 +14,13 @@ export type { RequestInterceptor, ResponseInterceptor } from '@x12i/ai-providers
14
14
  export type { UsageTracker } from '@x12i/ai-providers-router';
15
15
  export * from '@x12i/ai-providers-router';
16
16
  export { AIGateway } from './gateway.js';
17
- export { InstructionNotFoundError, InstructionBackendError } from './instruction-errors.js';
17
+ export { InstructionNotFoundError, InstructionBackendError, ModelRequiredError, MaxTokensRequiredError } from './instruction-errors.js';
18
18
  export { autoRegisterProviders } from './gateway-provider-auto-register.js';
19
19
  export type { GatewayConfig, ProviderModelRef, ModelConfig, RetryConfig, ChatRequest, AIInvokeRequest, AIRequest, GatewayActionType, GatewayInvokeRejectionMetadata, GatewayFallbackAttempt, GatewayTraceRequestIds, GatewayTraceAttempt, GatewayTraceUsageSummary, GatewayTraceMergedConfig, EnhancedLLMResponse, InstructionMetadata, ValidationRule, TemplateRenderOptions, SmartInputConfig, SmartInputRenderOptions } from './types.js';
20
20
  export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, tryExtractFallbackAttemptsFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, buildGatewayPricingRecord, mapAiCostResultToResolvedActivityCost, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, hasNonZeroTokenUsage, MODEL_PROFILE_UNROUTABLE, ModelProfileUnroutableError, buildGatewayFallbackAttemptsFromTrace, formatFallbackExhaustionMessage, logResolvedModelRouting, mapGatewayFallbackAttemptsToRouter } from './gateway-utils.js';
21
- export { getGatewayOperationalMode, isProdGatewayMode, resolveGatewayDefaultModel, parseModelProviderSpec, CODE_DEFAULT_MODEL } from './gateway-mode.js';
22
- export type { GatewayOperationalMode, GatewayDefaultModelSource, DefaultModelSubstitutionReason, ResolvedGatewayDefault } from './gateway-mode.js';
21
+ export { getGatewayOperationalMode, isProdGatewayMode, parseModelProviderSpec } from './gateway-mode.js';
22
+ export type { GatewayOperationalMode } from './gateway-mode.js';
23
+ export { DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, GATEWAY_DEFAULT_FREQUENCY_PENALTY, GATEWAY_DEFAULT_PRESENCE_PENALTY, GATEWAY_DEFAULT_RETRY, GATEWAY_DEFAULT_TEMPERATURE, GATEWAY_DEFAULT_TOP_P, resolveRetryConfig } from './gateway-defaults.js';
23
24
  export type { ActivityCostStatus, ResolvedActivityCost } from './gateway-utils.js';
24
25
  export { contractSpecToFieldKeys, enrichParsedContentForOutputContract, resolveOutputContractFieldKeys } from './output-contract-normalizer.js';
25
26
  export type { OutputContractSpec } from './output-contract-normalizer.js';
@@ -32,7 +33,6 @@ export { Activix } from '@x12i/activix';
32
33
  export type { ActivixRunContext, ActivixAutoCostOptions, ActivixCostShape, FindByRunContextCriteria, GetJobActivitiesInput, GetJobActivitiesResult } from '@x12i/activix';
33
34
  export { normalizeToActivixCostShape } from '@x12i/activix';
34
35
  export { ActivityManager, ensureGatewayRequestIdentity } from './activity-manager.js';
35
- export { OptimixerManager } from './optimixer-manager.js';
36
36
  export type { ActivityIdentity } from './types.js';
37
37
  export { activityIdentityToLogContext, activityIdentityToLogMeta, withActivityIdentity, withGatewayLogContext, gatewayLogDebug } from './gateway-log-meta.js';
38
38
  export { createGatewayLogger, resolveGatewayVerboseEnabled } from './logger-factory.js';
@@ -45,8 +45,6 @@ export { ROUTER_LOG_ENV_PREFIX } from '@x12i/ai-providers-router';
45
45
  export type { Logxer, LogMeta, RuntimeIdentity, LogRuntimeContext, GetJobLogsInput, GetJobLogsResult, QueryableLogLine, LogDiagnostics, DiagnosticEvidence, ScopeCriteria, ScopeLogsResult, StackLoggingOptions, PackageLogLevelsConfig, PackageLogLevelSetting } from '@x12i/logxer';
46
46
  export { runtimeObjects } from './runtime-objects.js';
47
47
  export type { ActivixQueryableClient, LogxerQueryableClient, PackageRuntimeObjects, RuntimeObjects } from './runtime-objects.js';
48
- export { GatewayRateLimiter } from './gateway-rate-limiter.js';
49
- export { DEFAULT_RATE_LIMIT_MIN_INTERVAL_MS, DEFAULT_RATE_LIMIT_ENABLED } from './gateway-rate-limiter-constants.js';
50
48
  export { validateAIRequest, validateJSON, extractJSON, validateResponse, diagnoseRequest, diagnoseResponse, supportsJSONMode, createTestAIRequest, createValidationTestCases, runValidationTests, formatDiagnostic, assertValidAIRequest } from './troubleshooting-helper.js';
51
49
  export type { ValidationResult, DiagnosticInfo } from './troubleshooting-helper.js';
52
50
  export { OBJECT_TYPES_LIBRARY, getObjectType, getObjectTypesForAgent } from './object-types-library.js';
package/dist/index.js CHANGED
@@ -15,10 +15,11 @@ export { ProviderNotFoundError, FallbackExhaustedError } from '@x12i/ai-provider
15
15
  export * from '@x12i/ai-providers-router';
16
16
  // Export enhanced gateway
17
17
  export { AIGateway } from './gateway.js';
18
- export { InstructionNotFoundError, InstructionBackendError } from './instruction-errors.js';
18
+ export { InstructionNotFoundError, InstructionBackendError, ModelRequiredError, MaxTokensRequiredError } from './instruction-errors.js';
19
19
  export { autoRegisterProviders } from './gateway-provider-auto-register.js';
20
20
  export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, tryExtractFallbackAttemptsFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, buildGatewayPricingRecord, mapAiCostResultToResolvedActivityCost, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, hasNonZeroTokenUsage, MODEL_PROFILE_UNROUTABLE, ModelProfileUnroutableError, buildGatewayFallbackAttemptsFromTrace, formatFallbackExhaustionMessage, logResolvedModelRouting, mapGatewayFallbackAttemptsToRouter } from './gateway-utils.js';
21
- export { getGatewayOperationalMode, isProdGatewayMode, resolveGatewayDefaultModel, parseModelProviderSpec, CODE_DEFAULT_MODEL } from './gateway-mode.js';
21
+ export { getGatewayOperationalMode, isProdGatewayMode, parseModelProviderSpec } from './gateway-mode.js';
22
+ export { DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, GATEWAY_DEFAULT_FREQUENCY_PENALTY, GATEWAY_DEFAULT_PRESENCE_PENALTY, GATEWAY_DEFAULT_RETRY, GATEWAY_DEFAULT_TEMPERATURE, GATEWAY_DEFAULT_TOP_P, resolveRetryConfig } from './gateway-defaults.js';
22
23
  export { contractSpecToFieldKeys, enrichParsedContentForOutputContract, resolveOutputContractFieldKeys } from './output-contract-normalizer.js';
23
24
  export { mergeGatewayAndRequestTemplateRenderOptions, mergeTemplateRenderOptions } from './template-render-merge.js';
24
25
  export { GATEWAY_DUAL_MEMORY_ROOTS, buildMemoryResolutionRootFromWorkingMemory, coalesceMergedInputBucket, extractCallerInputsBag, mapSmartInputPathsInputsToInput, parseLooseJsonObject, prepareWorkingMemoryForTemplateRender, resolveGatewayMemoryPathValue } from './memory-path-resolution.js';
@@ -28,7 +29,6 @@ export { GATEWAY_DUAL_MEMORY_ROOTS, buildMemoryResolutionRootFromWorkingMemory,
28
29
  export { Activix } from '@x12i/activix';
29
30
  export { normalizeToActivixCostShape } from '@x12i/activix';
30
31
  export { ActivityManager, ensureGatewayRequestIdentity } from './activity-manager.js';
31
- export { OptimixerManager } from './optimixer-manager.js';
32
32
  export { activityIdentityToLogContext, activityIdentityToLogMeta, withActivityIdentity, withGatewayLogContext, gatewayLogDebug } from './gateway-log-meta.js';
33
33
  export { createGatewayLogger, resolveGatewayVerboseEnabled } from './logger-factory.js';
34
34
  export { GATEWAY_LOG_ENV_PREFIX, GATEWAY_LOGXER_PACKAGE, GATEWAY_STACK_LOG_PREFIXES, initializeGatewayPackageLogLevels, resetGatewayPackageLogLevelsInit } from './gateway-log-levels.js';
@@ -38,10 +38,7 @@ export { createLogxer, DebugLogAbstract, runWithLogContext, getStationRuntimeIde
38
38
  export { ROUTER_LOG_ENV_PREFIX } from '@x12i/ai-providers-router';
39
39
  // Runtime observability surface (leaf package: no downstream runtime objects)
40
40
  export { runtimeObjects } from './runtime-objects.js';
41
- // Export rate limiter
42
- export { GatewayRateLimiter } from './gateway-rate-limiter.js';
43
- export { DEFAULT_RATE_LIMIT_MIN_INTERVAL_MS, DEFAULT_RATE_LIMIT_ENABLED } from './gateway-rate-limiter-constants.js';
44
- // Export troubleshooting helpers
41
+ // Runtime observability surface (leaf package: no downstream runtime objects)
45
42
  export { validateAIRequest, validateJSON, extractJSON, validateResponse, diagnoseRequest, diagnoseResponse, supportsJSONMode, createTestAIRequest, createValidationTestCases, runValidationTests, formatDiagnostic, assertValidAIRequest } from './troubleshooting-helper.js';
46
43
  // Export object types library
47
44
  export { OBJECT_TYPES_LIBRARY, getObjectType, getObjectTypesForAgent } from './object-types-library.js';
@@ -1,6 +1,14 @@
1
1
  /**
2
- * Errors for instruction / prompt template and instructions-block resolution.
2
+ * Errors for instruction / prompt template resolution.
3
3
  */
4
+ export declare class ModelRequiredError extends Error {
5
+ readonly code = "MODEL_REQUIRED";
6
+ constructor(message?: string);
7
+ }
8
+ export declare class MaxTokensRequiredError extends Error {
9
+ readonly code = "MAX_TOKENS_REQUIRED";
10
+ constructor(message?: string);
11
+ }
4
12
  export declare class InstructionNotFoundError extends Error {
5
13
  key: string;
6
14
  backend: string;
@@ -1,6 +1,20 @@
1
1
  /**
2
- * Errors for instruction / prompt template and instructions-block resolution.
2
+ * Errors for instruction / prompt template resolution.
3
3
  */
4
+ export class ModelRequiredError extends Error {
5
+ code = 'MODEL_REQUIRED';
6
+ constructor(message = 'model is required on every invoke request (request.config.model or request.modelConfig.model)') {
7
+ super(message);
8
+ this.name = 'ModelRequiredError';
9
+ }
10
+ }
11
+ export class MaxTokensRequiredError extends Error {
12
+ code = 'MAX_TOKENS_REQUIRED';
13
+ constructor(message = 'maxTokens is required on every invoke request (request.config.maxTokens, request.modelConfig.maxTokens, GatewayConfig.maxTokens, or internalSystemActions)') {
14
+ super(message);
15
+ this.name = 'MaxTokensRequiredError';
16
+ }
17
+ }
4
18
  export class InstructionNotFoundError extends Error {
5
19
  key;
6
20
  backend;
@@ -4,6 +4,7 @@
4
4
  * Uses AI to analyze and fix poorly-written instructions.
5
5
  * This is a meta-feature that uses the AI Gateway (via router) to improve AI instructions.
6
6
  */
7
+ import { MaxTokensRequiredError } from './instruction-errors.js';
7
8
  /**
8
9
  * The meta-instructions used to fix other instructions
9
10
  * Loaded from instructions-audit.md
@@ -128,6 +129,9 @@ export async function optimizeInstructions(gateway, originalInstructions, option
128
129
  if (enforceJsonOutput) {
129
130
  additionalContext += '\n\nIMPORTANT: The fixed instructions MUST include strict JSON-only output enforcement rules.';
130
131
  }
132
+ if (typeof internalConfig?.maxTokens !== 'number' || internalConfig.maxTokens <= 0) {
133
+ throw new MaxTokensRequiredError('maxTokens must be set in internalSystemActions.instructionOptimization for optimizeInstructions');
134
+ }
131
135
  const aiRequestId = `optimize-instructions-${Date.now()}`;
132
136
  const identity = {
133
137
  ...options.identity,
@@ -149,7 +153,7 @@ export async function optimizeInstructions(gateway, originalInstructions, option
149
153
  model,
150
154
  provider,
151
155
  temperature: internalConfig?.temperature ?? 0.3, // Use internal config or default
152
- maxTokens: internalConfig?.maxTokens ?? 4000 // Use internal config or default
156
+ maxTokens: internalConfig?.maxTokens
153
157
  },
154
158
  // Use JSON output type to ensure we get structured response
155
159
  primaryObjectType: {
@@ -8,17 +8,11 @@ import type { TemplateRenderOptions } from '@x12i/rendrix';
8
8
  import type { Logxer } from '@x12i/logxer';
9
9
  type Request = ChatRequest | AIRequest;
10
10
  export interface MessageBuilderConfig {
11
- defaultInstructionsBlocks: Record<string, any>;
12
- /** Flat block overrides from gateway `instructionsBlocks` (merged at init). */
13
- instructionsBlockOverrides: Record<string, string>;
14
11
  logger: Logxer;
15
12
  /** From packaged defaults + gateway `templateRendering`; merged per request with `templateRenderOptions`. */
16
13
  templateRendering?: TemplateRenderOptions;
17
14
  }
18
15
  export interface BuildMessagesOptions {
19
- useSystemContextFallback?: boolean;
20
- includeInputRecognition?: boolean;
21
- includeReinforcement?: boolean;
22
16
  parsedSnapshot?: any;
23
17
  shortTermMemory?: Record<string, any>;
24
18
  experienceMemory?: Record<string, any>;
@@ -5,137 +5,12 @@
5
5
  */
6
6
  import { parseTemplate } from './template-parser.js';
7
7
  import { mergeGatewayAndRequestTemplateRenderOptions } from './template-render-merge.js';
8
- import { resolveNestedInstructionsBlock } from './gateway-instructions.js';
9
8
  // Type guard
10
9
  // AIRequest is distinguished by having primaryObjectType or objectTypes
11
10
  // ChatRequest does not have these fields
12
11
  function isAIRequest(request) {
13
12
  return 'primaryObjectType' in request || ('objectTypes' in request && Array.isArray(request.objectTypes));
14
13
  }
15
- /**
16
- * Builds input recognition rules
17
- */
18
- async function buildInputRecognitionRules(request, config, options) {
19
- const { defaultInstructionsBlocks, instructionsBlockOverrides, logger } = config;
20
- if (!options.includeInputRecognition || !isAIRequest(request)) {
21
- return '';
22
- }
23
- const rules = [];
24
- // Add input recognition rules
25
- // Try direct access first (faster, more reliable), then fallback to resolver
26
- const rulePaths = [
27
- 'input.inputRecognitionRule',
28
- 'input.emptyInputHandling',
29
- 'input.testInputHandling',
30
- 'input.inputLocationClarifier'
31
- ];
32
- const requestInstructionsBlocks = isAIRequest(request) && request.config?.instructionsBlocks
33
- ? request.config.instructionsBlocks
34
- : undefined;
35
- const blockContext = {
36
- defaultInstructionsBlocks,
37
- instructionsBlockOverrides,
38
- requestInstructionsBlocks,
39
- config: {},
40
- logger
41
- };
42
- for (const rulePath of rulePaths) {
43
- try {
44
- // Try direct access to nested structure first
45
- const pathParts = rulePath.split('.');
46
- let rule;
47
- if (pathParts.length === 2) {
48
- const [parent, child] = pathParts;
49
- const parentObj = defaultInstructionsBlocks[parent];
50
- if (parentObj && typeof parentObj === 'object' && !Array.isArray(parentObj)) {
51
- rule = parentObj[child];
52
- if (rule && typeof rule === 'string') {
53
- logger.debug('Resolved rule via direct access', {
54
- rulePath,
55
- valueLength: rule.length
56
- });
57
- }
58
- }
59
- }
60
- // If direct access didn't work, try merged inline overrides / nested defaults
61
- if (!rule) {
62
- rule = await resolveNestedInstructionsBlock(rulePath, request.agentId || '', request.taskTypeId, blockContext);
63
- }
64
- if (rule && typeof rule === 'string' && rule.trim() !== '') {
65
- rules.push(rule);
66
- }
67
- }
68
- catch (error) {
69
- logger.debug('Failed to resolve input rule', {
70
- rulePath,
71
- error: error instanceof Error ? error.message : String(error)
72
- });
73
- }
74
- }
75
- return rules.join('\n\n');
76
- }
77
- /**
78
- * Builds reinforcement rules
79
- */
80
- async function buildReinforcementRules(request, config, options) {
81
- const { defaultInstructionsBlocks, instructionsBlockOverrides, logger } = config;
82
- if (!options.includeReinforcement || !isAIRequest(request)) {
83
- return '';
84
- }
85
- const rules = [];
86
- // Add reinforcement rules
87
- // Try direct access first (faster, more reliable), then fallback to resolver
88
- const rulePaths = [
89
- 'reinforcement.emptyIsSuccess',
90
- 'reinforcement.inputAlreadyProvided',
91
- 'reinforcement.noConversation',
92
- 'reinforcement.failureIndicators'
93
- ];
94
- const requestInstructionsBlocks = isAIRequest(request) && request.config?.instructionsBlocks
95
- ? request.config.instructionsBlocks
96
- : undefined;
97
- const blockContext = {
98
- defaultInstructionsBlocks,
99
- instructionsBlockOverrides,
100
- requestInstructionsBlocks,
101
- config: {},
102
- logger
103
- };
104
- for (const rulePath of rulePaths) {
105
- try {
106
- // Try direct access to nested structure first
107
- const pathParts = rulePath.split('.');
108
- let rule;
109
- if (pathParts.length === 2) {
110
- const [parent, child] = pathParts;
111
- const parentObj = defaultInstructionsBlocks[parent];
112
- if (parentObj && typeof parentObj === 'object' && !Array.isArray(parentObj)) {
113
- rule = parentObj[child];
114
- if (rule && typeof rule === 'string') {
115
- logger.debug('Resolved rule via direct access', {
116
- rulePath,
117
- valueLength: rule.length
118
- });
119
- }
120
- }
121
- }
122
- // If direct access didn't work, try merged inline overrides / nested defaults
123
- if (!rule) {
124
- rule = await resolveNestedInstructionsBlock(rulePath, request.agentId || '', request.taskTypeId, blockContext);
125
- }
126
- if (rule && typeof rule === 'string' && rule.trim() !== '') {
127
- rules.push(rule);
128
- }
129
- }
130
- catch (error) {
131
- logger.debug('Failed to resolve reinforcement rule', {
132
- rulePath,
133
- error: error instanceof Error ? error.message : String(error)
134
- });
135
- }
136
- }
137
- return rules.join('\n\n');
138
- }
139
14
  /**
140
15
  * Builds user message (prompt + input)
141
16
  */
@@ -369,10 +244,9 @@ async function hasFlexMdContract(instructionsText, complianceLevel = 'L0') {
369
244
  * Main function to build messages
370
245
  */
371
246
  export async function buildMessages(request, config, options = {}) {
372
- const { useSystemContextFallback = true, includeInputRecognition = true, includeReinforcement = true, parsedSnapshot } = options;
247
+ const { parsedSnapshot } = options;
373
248
  const { logger } = config;
374
249
  const messages = [];
375
- let usingSystemContext = false;
376
250
  // Step 1: Instructions as template text (parsed with full memory context)
377
251
  let instructionsText = '';
378
252
  // Extract memory context from options
@@ -403,27 +277,14 @@ export async function buildMessages(request, config, options = {}) {
403
277
  instructionsText = await parseTemplate(instructionsText, request.workingMemory, undefined, // taskConfig removed - no longer used
404
278
  shortTermMemory, experienceMemory, knowledgeMemory, templateRenderOptions, logger);
405
279
  }
406
- // Step 4: Add input recognition rules
407
- const inputRules = await buildInputRecognitionRules(request, config, options);
408
- if (inputRules) {
409
- instructionsText = `${instructionsText}\n\n${inputRules}`;
410
- }
411
- // Step 5: Add reinforcement rules
412
- const reinforcementRules = await buildReinforcementRules(request, config, options);
413
- if (reinforcementRules) {
414
- instructionsText = `${instructionsText}\n\n${reinforcementRules}`;
415
- }
416
- // Step 6: Add system message
417
- // CRITICAL: We must have instructions - this is a bad request if we don't
280
+ // Instructions must be provided explicitly — no packaged block injection
418
281
  if (!instructionsText || instructionsText.trim() === '') {
419
282
  const errorMessage = 'No instructions available - cannot proceed without clear instructions. This is a bad request.';
420
283
  logger.error(errorMessage, {
421
284
  jobId: request.identity.jobId,
422
285
  agentId: request.agentId,
423
286
  hasRequestInstructions: !!request.instructions,
424
- instructionType: typeof request.instructions,
425
- usedSystemContextFallback: usingSystemContext,
426
- systemContextFallbackEnabled: useSystemContextFallback
287
+ instructionType: typeof request.instructions
427
288
  });
428
289
  throw new Error(errorMessage);
429
290
  }
@@ -515,8 +376,6 @@ export async function buildMessages(request, config, options = {}) {
515
376
  });
516
377
  return {
517
378
  messages,
518
- metadata: {
519
- usingSystemContext
520
- }
379
+ metadata: {}
521
380
  };
522
381
  }
package/dist/types.d.ts CHANGED
@@ -398,9 +398,10 @@ export interface GatewayConfig extends Omit<RouterConfig, 'defaultEngine' | 'log
398
398
  prefer?: boolean;
399
399
  };
400
400
  /**
401
- * Operational mode override (`process.env.mode` / `MODE` when omitted).
402
- * - `prod`: unresolved models fall back to {@link AI_GATEWAY_DEFAULT_MODEL} / packaged default (with Logxer warn).
403
- * - `dev` / `debug`: unresolved models throw {@link ModelResolutionError} from `@x12i/ai-tools`.
401
+ * Operational mode override (`process.env.mode` / `MODE` when omitted; default `debug`).
402
+ * Downstream hosts (ai-skills, ai-tasks, graph-engine) should expose this to their clients.
403
+ * - `dev` / `debug`: unresolved profile/model names throw {@link ModelResolutionError} from `@x12i/ai-tools` when catalog resolution is enabled.
404
+ * - `prod`: same strict resolution — every invoke must include an explicit `model`; the gateway never substitutes a packaged or env default.
404
405
  */
405
406
  mode?: 'dev' | 'debug' | 'prod';
406
407
  /**
@@ -421,70 +422,24 @@ export interface GatewayConfig extends Omit<RouterConfig, 'defaultEngine' | 'log
421
422
  costIncludeBreakdown?: boolean;
422
423
  };
423
424
  /**
424
- * Adaptive `max_tokens` via @x12i/optimixer (embedded Activix mode).
425
- * When enabled, the gateway predicts completion budget before each LLM call unless
426
- * the caller explicitly sets `maxTokens` on the request / modelConfig / gateway config.
427
- */
428
- optimixer?: {
429
- /** @default false */
430
- enabled?: boolean;
431
- acceptableRisk?: 'very-low' | 'low' | 'medium' | 'high' | number;
432
- /** Cap predicted max tokens with flex-md model limit when available. @default true */
433
- useFlexMdCeiling?: boolean;
434
- /** Passed to Optimixer warmup on create. */
435
- warmupLimit?: number;
436
- };
437
- /**
438
- * InstructionsBlocks overrides
439
- * Key: block name, Value: block content
440
- */
441
- instructionsBlocks?: Record<string, string>;
442
- /**
443
- * Default temperature for LLM requests
425
+ * Default temperature for LLM requests when not set on the invoke request.
426
+ * @default 0.7 see {@link GATEWAY_DEFAULT_TEMPERATURE} in `@x12i/ai-gateway`.
444
427
  */
445
428
  temperature?: number;
446
429
  /**
447
- * Other LLM config options
430
+ * Gateway-wide completion budget. Merged when the invoke does not set `maxTokens` on
431
+ * `request.config` / `modelConfig` (lower priority than per-request values).
432
+ * Every invoke must end up with a positive `maxTokens` after merge — no code default.
448
433
  */
449
434
  maxTokens?: number;
450
435
  topP?: number;
451
436
  frequencyPenalty?: number;
452
437
  presencePenalty?: number;
453
438
  /**
454
- * Retry configuration for network and server errors
439
+ * Retry configuration for network and server errors on provider invoke.
440
+ * Defaults: {@link GATEWAY_DEFAULT_RETRY}. Override per request via `request.retry` or `request.config.retry`.
455
441
  */
456
442
  retry?: RetryConfig;
457
- /**
458
- * Rate limiting configuration
459
- * Smart rate limiting that tracks when the last API call was made
460
- * and only waits if necessary to maintain minimum intervals between calls.
461
- * Applied automatically to all provider calls via router interceptors.
462
- */
463
- rateLimit?: {
464
- /**
465
- * Enable rate limiting
466
- * @default true
467
- */
468
- enabled?: boolean;
469
- /**
470
- * Default minimum interval in milliseconds between API calls (used if provider-specific not set)
471
- * @default 500
472
- */
473
- defaultMinIntervalMs?: number;
474
- /**
475
- * Per-provider minimum intervals in milliseconds
476
- * Key: provider name (e.g., 'openai', 'grok')
477
- * Value: minimum milliseconds between calls for that provider
478
- *
479
- * @example
480
- * {
481
- * openai: 500, // 500ms between OpenAI calls
482
- * grok: 1000, // 1 second between Grok calls
483
- * anthropic: 300 // 300ms between Anthropic calls
484
- * }
485
- */
486
- providerIntervals?: Record<string, number>;
487
- };
488
443
  /**
489
444
  * Default task configuration for template rendering
490
445
  * @deprecated taskConfig is no longer used by Rendrix 3.0.0+
@@ -744,6 +699,10 @@ interface BaseLLMRequest extends Omit<LLMRequest, 'messages' | 'input' | 'reques
744
699
  * Merged the same way as `smartInput`; `templateRenderOptions.smartInputRenderOptions` wins when both are set.
745
700
  */
746
701
  smartInputRenderOptions?: SmartInputRenderOptions;
702
+ /**
703
+ * Per-request retry overrides (merged over gateway `retry` and {@link GATEWAY_DEFAULT_RETRY}).
704
+ */
705
+ retry?: RetryConfig;
747
706
  /**
748
707
  * Messages array - Optional, can be used instead of instructions/prompt
749
708
  * If provided, will be appended as-is after built messages; instructions template text is still parsed for the system message when present
@@ -1089,7 +1048,7 @@ export interface EnhancedLLMResponse<TContent = unknown> extends Omit<AIResponse
1089
1048
  usage?: GatewayTraceUsageSummary;
1090
1049
  /**
1091
1050
  * Merged gateway/router generation config actually used for the invocation (after
1092
- * {@link mergeConfig}: modelConfig / request.config / defaults / flex-md maxTokens).
1051
+ * {@link mergeConfig}: modelConfig / request.config / gateway maxTokens).
1093
1052
  * Only populated when diagnostics trace mode is enabled.
1094
1053
  */
1095
1054
  mergedRouterConfig?: GatewayTraceMergedConfig;