@lota-sdk/core 0.4.7 → 0.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. package/package.json +11 -12
  2. package/src/ai/embedding-cache.ts +94 -22
  3. package/src/ai-gateway/ai-gateway.ts +738 -223
  4. package/src/config/agent-defaults.ts +176 -75
  5. package/src/config/agent-types.ts +54 -4
  6. package/src/config/constants.ts +8 -2
  7. package/src/config/logger.ts +286 -19
  8. package/src/config/model-constants.ts +1 -0
  9. package/src/config/thread-defaults.ts +33 -21
  10. package/src/create-runtime.ts +725 -383
  11. package/src/db/base.service.ts +52 -28
  12. package/src/db/cursor-pagination.ts +71 -30
  13. package/src/db/memory-store.helpers.ts +4 -7
  14. package/src/db/memory-store.ts +856 -598
  15. package/src/db/memory.ts +398 -275
  16. package/src/db/record-id.ts +32 -10
  17. package/src/db/schema-fingerprint.ts +30 -12
  18. package/src/db/service-normalization.ts +255 -0
  19. package/src/db/service.ts +726 -761
  20. package/src/db/startup.ts +140 -66
  21. package/src/db/transaction-conflict.ts +15 -0
  22. package/src/effect/awaitable-effect.ts +87 -0
  23. package/src/effect/errors.ts +121 -0
  24. package/src/effect/helpers.ts +98 -0
  25. package/src/effect/index.ts +22 -0
  26. package/src/effect/layers.ts +228 -0
  27. package/src/effect/runtime-ref.ts +25 -0
  28. package/src/effect/runtime.ts +31 -0
  29. package/src/effect/services.ts +57 -0
  30. package/src/effect/zod.ts +43 -0
  31. package/src/embeddings/provider.ts +122 -71
  32. package/src/index.ts +46 -1
  33. package/src/openrouter/direct-provider.ts +29 -0
  34. package/src/queues/autonomous-job.queue.ts +130 -74
  35. package/src/queues/context-compaction.queue.ts +60 -15
  36. package/src/queues/delayed-node-promotion.queue.ts +52 -15
  37. package/src/queues/document-processor.queue.ts +52 -77
  38. package/src/queues/memory-consolidation.queue.ts +47 -32
  39. package/src/queues/organization-learning.queue.ts +13 -4
  40. package/src/queues/plan-agent-heartbeat.queue.ts +65 -21
  41. package/src/queues/plan-scheduler.queue.ts +107 -31
  42. package/src/queues/post-chat-memory.queue.ts +66 -24
  43. package/src/queues/queue-factory.ts +142 -52
  44. package/src/queues/standalone-worker.ts +39 -0
  45. package/src/queues/title-generation.queue.ts +54 -9
  46. package/src/redis/connection.ts +84 -32
  47. package/src/redis/index.ts +6 -8
  48. package/src/redis/org-memory-lock.ts +60 -27
  49. package/src/redis/redis-lease-lock.ts +200 -121
  50. package/src/redis/runtime-connection.ts +10 -0
  51. package/src/redis/stream-context.ts +84 -46
  52. package/src/runtime/agent-identity-overrides.ts +2 -2
  53. package/src/runtime/agent-runtime-policy.ts +4 -1
  54. package/src/runtime/agent-stream-helpers.ts +20 -9
  55. package/src/runtime/chat-run-orchestration.ts +102 -19
  56. package/src/runtime/chat-run-registry.ts +36 -2
  57. package/src/runtime/context-compaction/context-compaction-runtime.ts +107 -0
  58. package/src/runtime/{context-compaction.ts → context-compaction/context-compaction.ts} +114 -91
  59. package/src/runtime/execution-plan-visibility.ts +2 -2
  60. package/src/runtime/execution-plan.ts +42 -15
  61. package/src/runtime/graph-designer.ts +11 -7
  62. package/src/runtime/helper-model.ts +135 -48
  63. package/src/runtime/index.ts +7 -7
  64. package/src/runtime/indexed-repositories-policy.ts +3 -3
  65. package/src/runtime/{memory-block.ts → memory/memory-block.ts} +40 -36
  66. package/src/runtime/{memory-digest-policy.ts → memory/memory-digest-policy.ts} +1 -1
  67. package/src/runtime/{memory-pipeline.ts → memory/memory-pipeline.ts} +1 -1
  68. package/src/runtime/{memory-prompts-fact.ts → memory/memory-prompts-fact.ts} +2 -2
  69. package/src/runtime/{memory-scope.ts → memory/memory-scope.ts} +12 -6
  70. package/src/runtime/plugin-resolution.ts +144 -24
  71. package/src/runtime/plugin-types.ts +9 -1
  72. package/src/runtime/post-turn-side-effects.ts +197 -130
  73. package/src/runtime/retrieval-adapters.ts +38 -4
  74. package/src/runtime/runtime-config.ts +165 -61
  75. package/src/runtime/runtime-extensions.ts +21 -34
  76. package/src/runtime/social-chat/social-chat-agent-runner.ts +157 -0
  77. package/src/runtime/{social-chat-history.ts → social-chat/social-chat-history.ts} +42 -20
  78. package/src/runtime/social-chat/social-chat.ts +594 -0
  79. package/src/runtime/specialist-runner.ts +36 -10
  80. package/src/runtime/team-consultation/team-consultation-orchestrator.ts +427 -0
  81. package/src/runtime/{team-consultation-prompts.ts → team-consultation/team-consultation-prompts.ts} +6 -2
  82. package/src/runtime/thread-chat-helpers.ts +2 -2
  83. package/src/runtime/thread-plan-turn.ts +2 -1
  84. package/src/runtime/thread-turn-context.ts +172 -94
  85. package/src/runtime/turn-lifecycle.ts +93 -27
  86. package/src/services/agent-activity.service.ts +287 -203
  87. package/src/services/agent-executor.service.ts +329 -217
  88. package/src/services/artifact.service.ts +225 -148
  89. package/src/services/attachment.service.ts +137 -115
  90. package/src/services/autonomous-job.service.ts +888 -491
  91. package/src/services/chat-run-registry.service.ts +11 -1
  92. package/src/services/context-compaction.service.ts +136 -86
  93. package/src/services/document-chunk.service.ts +162 -90
  94. package/src/services/execution-plan/execution-plan-approval.ts +26 -0
  95. package/src/services/execution-plan/execution-plan-context.ts +29 -0
  96. package/src/services/execution-plan/execution-plan-graph.ts +256 -0
  97. package/src/services/execution-plan/execution-plan-schedule.ts +84 -0
  98. package/src/services/execution-plan/execution-plan-spec.ts +75 -0
  99. package/src/services/execution-plan/execution-plan.service.ts +1041 -0
  100. package/src/services/feedback-loop.service.ts +132 -76
  101. package/src/services/global-orchestrator.service.ts +80 -170
  102. package/src/services/graph-full-routing.ts +182 -0
  103. package/src/services/index.ts +18 -20
  104. package/src/services/institutional-memory.service.ts +220 -123
  105. package/src/services/learned-skill.service.ts +364 -259
  106. package/src/services/memory/memory-conversation.ts +95 -0
  107. package/src/services/memory/memory-org-memory.ts +39 -0
  108. package/src/services/memory/memory-preseeded.ts +80 -0
  109. package/src/services/memory/memory-rerank.ts +297 -0
  110. package/src/services/{memory-utils.ts → memory/memory-utils.ts} +5 -5
  111. package/src/services/memory/memory.service.ts +692 -0
  112. package/src/services/memory/rerank.service.ts +209 -0
  113. package/src/services/monitoring-window.service.ts +92 -70
  114. package/src/services/mutating-approval.service.ts +62 -53
  115. package/src/services/node-workspace.service.ts +141 -98
  116. package/src/services/notification.service.ts +17 -16
  117. package/src/services/organization-member.service.ts +120 -66
  118. package/src/services/organization.service.ts +144 -51
  119. package/src/services/ownership-dispatcher.service.ts +415 -264
  120. package/src/services/plan/plan-agent-heartbeat.service.ts +234 -0
  121. package/src/services/plan/plan-agent-query.service.ts +322 -0
  122. package/src/services/plan/plan-approval.service.ts +102 -0
  123. package/src/services/plan/plan-artifact.service.ts +60 -0
  124. package/src/services/plan/plan-builder.service.ts +76 -0
  125. package/src/services/plan/plan-checkpoint.service.ts +103 -0
  126. package/src/services/{plan-compiler.service.ts → plan/plan-compiler.service.ts} +26 -9
  127. package/src/services/plan/plan-completion-side-effects.ts +175 -0
  128. package/src/services/plan/plan-coordination.service.ts +181 -0
  129. package/src/services/plan/plan-cycle.service.ts +398 -0
  130. package/src/services/plan/plan-deadline.service.ts +547 -0
  131. package/src/services/plan/plan-event-delivery.service.ts +261 -0
  132. package/src/services/plan/plan-executor-context.ts +35 -0
  133. package/src/services/plan/plan-executor-graph.ts +475 -0
  134. package/src/services/plan/plan-executor-helpers.ts +322 -0
  135. package/src/services/plan/plan-executor-persistence.ts +209 -0
  136. package/src/services/plan/plan-executor.service.ts +1654 -0
  137. package/src/services/{plan-helpers.ts → plan/plan-helpers.ts} +1 -1
  138. package/src/services/{plan-run-data.ts → plan/plan-run-data.ts} +4 -4
  139. package/src/services/plan/plan-run-serialization.ts +15 -0
  140. package/src/services/plan/plan-run.service.ts +644 -0
  141. package/src/services/plan/plan-scheduler.service.ts +385 -0
  142. package/src/services/plan/plan-template.service.ts +224 -0
  143. package/src/services/plan/plan-transaction-events.ts +33 -0
  144. package/src/services/plan/plan-validator.service.ts +907 -0
  145. package/src/services/plan/plan-workspace.service.ts +125 -0
  146. package/src/services/plugin-executor.service.ts +97 -68
  147. package/src/services/quality-metrics.service.ts +112 -94
  148. package/src/services/queue-job.service.ts +296 -230
  149. package/src/services/recent-activity-title.service.ts +65 -36
  150. package/src/services/recent-activity.service.ts +274 -259
  151. package/src/services/skill-resolver.service.ts +38 -12
  152. package/src/services/social-chat-history.service.ts +176 -125
  153. package/src/services/system-executor.service.ts +91 -61
  154. package/src/services/thread/thread-active-run.ts +203 -0
  155. package/src/services/thread/thread-bootstrap.ts +369 -0
  156. package/src/services/thread/thread-listing.ts +198 -0
  157. package/src/services/thread/thread-memory-block.ts +117 -0
  158. package/src/services/thread/thread-message.service.ts +363 -0
  159. package/src/services/thread/thread-record-store.ts +155 -0
  160. package/src/services/thread/thread-title.service.ts +74 -0
  161. package/src/services/thread/thread-turn-execution.ts +280 -0
  162. package/src/services/thread/thread-turn-message-context.ts +73 -0
  163. package/src/services/thread/thread-turn-preparation.service.ts +1146 -0
  164. package/src/services/thread/thread-turn-streaming.ts +402 -0
  165. package/src/services/thread/thread-turn-tracing.ts +35 -0
  166. package/src/services/thread/thread-turn.ts +343 -0
  167. package/src/services/thread/thread.service.ts +335 -0
  168. package/src/services/user.service.ts +82 -32
  169. package/src/services/write-intent-validator.service.ts +63 -51
  170. package/src/storage/attachment-parser.ts +69 -27
  171. package/src/storage/attachment-storage.service.ts +331 -275
  172. package/src/storage/generated-document-storage.service.ts +66 -34
  173. package/src/system-agents/agent-result.ts +3 -1
  174. package/src/system-agents/context-compaction.agent.ts +2 -2
  175. package/src/system-agents/delegated-agent-factory.ts +159 -90
  176. package/src/system-agents/memory-reranker.agent.ts +2 -2
  177. package/src/system-agents/memory.agent.ts +2 -2
  178. package/src/system-agents/recent-activity-title-refiner.agent.ts +2 -2
  179. package/src/system-agents/regular-chat-memory-digest.agent.ts +2 -2
  180. package/src/system-agents/skill-extractor.agent.ts +2 -2
  181. package/src/system-agents/skill-manager.agent.ts +2 -2
  182. package/src/system-agents/thread-router.agent.ts +157 -113
  183. package/src/system-agents/title-generator.agent.ts +2 -2
  184. package/src/tools/execution-plan.tool.ts +220 -161
  185. package/src/tools/fetch-webpage.tool.ts +21 -17
  186. package/src/tools/firecrawl-client.ts +16 -6
  187. package/src/tools/index.ts +1 -0
  188. package/src/tools/memory-block.tool.ts +14 -6
  189. package/src/tools/plan-approval.tool.ts +49 -47
  190. package/src/tools/read-file-parts.tool.ts +44 -33
  191. package/src/tools/remember-memory.tool.ts +65 -45
  192. package/src/tools/search-web.tool.ts +26 -22
  193. package/src/tools/search.tool.ts +41 -29
  194. package/src/tools/team-think.tool.ts +124 -83
  195. package/src/tools/user-questions.tool.ts +4 -3
  196. package/src/tools/web-tool-shared.ts +6 -0
  197. package/src/utils/async.ts +17 -23
  198. package/src/utils/crypto.ts +21 -0
  199. package/src/utils/date-time.ts +40 -1
  200. package/src/utils/errors.ts +95 -16
  201. package/src/utils/hono-error-handler.ts +24 -39
  202. package/src/utils/index.ts +2 -1
  203. package/src/utils/null-proto-record.ts +41 -0
  204. package/src/utils/sse-keepalive.ts +124 -21
  205. package/src/workers/bootstrap.ts +186 -51
  206. package/src/workers/memory-consolidation.worker.ts +325 -237
  207. package/src/workers/organization-learning.worker.ts +50 -16
  208. package/src/workers/regular-chat-memory-digest.helpers.ts +28 -27
  209. package/src/workers/regular-chat-memory-digest.runner.ts +175 -114
  210. package/src/workers/skill-extraction.runner.ts +176 -93
  211. package/src/workers/utils/file-section-chunker.ts +8 -10
  212. package/src/workers/utils/repo-structure-extractor.ts +349 -260
  213. package/src/workers/utils/repomix-file-sections.ts +2 -2
  214. package/src/workers/utils/thread-message-query.ts +97 -38
  215. package/src/workers/worker-utils.ts +56 -31
  216. package/src/config/debug-logger.ts +0 -47
  217. package/src/redis/connection-accessor.ts +0 -26
  218. package/src/runtime/context-compaction-runtime.ts +0 -87
  219. package/src/runtime/social-chat-agent-runner.ts +0 -118
  220. package/src/runtime/social-chat.ts +0 -516
  221. package/src/runtime/team-consultation-orchestrator.ts +0 -272
  222. package/src/services/adaptive-playbook.service.ts +0 -152
  223. package/src/services/artifact-provenance.service.ts +0 -172
  224. package/src/services/chat-attachments.service.ts +0 -17
  225. package/src/services/context-compaction-runtime.singleton.ts +0 -13
  226. package/src/services/execution-plan.service.ts +0 -1118
  227. package/src/services/memory.service.ts +0 -844
  228. package/src/services/plan-agent-heartbeat.service.ts +0 -136
  229. package/src/services/plan-agent-query.service.ts +0 -267
  230. package/src/services/plan-approval.service.ts +0 -83
  231. package/src/services/plan-artifact.service.ts +0 -50
  232. package/src/services/plan-builder.service.ts +0 -67
  233. package/src/services/plan-checkpoint.service.ts +0 -81
  234. package/src/services/plan-completion-side-effects.ts +0 -80
  235. package/src/services/plan-coordination.service.ts +0 -157
  236. package/src/services/plan-cycle.service.ts +0 -284
  237. package/src/services/plan-deadline.service.ts +0 -430
  238. package/src/services/plan-event-delivery.service.ts +0 -166
  239. package/src/services/plan-executor.service.ts +0 -1950
  240. package/src/services/plan-run.service.ts +0 -515
  241. package/src/services/plan-scheduler.service.ts +0 -240
  242. package/src/services/plan-template.service.ts +0 -177
  243. package/src/services/plan-validator.service.ts +0 -818
  244. package/src/services/plan-workspace.service.ts +0 -83
  245. package/src/services/thread-message.service.ts +0 -275
  246. package/src/services/thread-plan-registry.service.ts +0 -22
  247. package/src/services/thread-title.service.ts +0 -39
  248. package/src/services/thread-turn-preparation.service.ts +0 -1147
  249. package/src/services/thread-turn.ts +0 -172
  250. package/src/services/thread.service.ts +0 -869
  251. package/src/utils/env.ts +0 -8
  252. /package/src/runtime/{context-compaction-constants.ts → context-compaction/context-compaction-constants.ts} +0 -0
  253. /package/src/runtime/{memory-format.ts → memory/memory-format.ts} +0 -0
  254. /package/src/runtime/{memory-prompts-parse.ts → memory/memory-prompts-parse.ts} +0 -0
  255. /package/src/runtime/{memory-prompts-update.ts → memory/memory-prompts-update.ts} +0 -0
  256. /package/src/runtime/{social-chat-prompts.ts → social-chat/social-chat-prompts.ts} +0 -0
  257. /package/src/services/{plan-node-spec.ts → plan/plan-node-spec.ts} +0 -0
  258. /package/src/services/{thread-constants.ts → thread/thread-constants.ts} +0 -0
  259. /package/src/services/{thread.types.ts → thread/thread.types.ts} +0 -0
@@ -1,33 +1,438 @@
1
1
  import { devToolsMiddleware } from '@ai-sdk/devtools'
2
2
  import { createOpenAI } from '@ai-sdk/openai'
3
- import { wrapLanguageModel } from 'ai'
3
+ import { wrapEmbeddingModel, wrapLanguageModel } from 'ai'
4
4
  import type { LanguageModelMiddleware } from 'ai'
5
+ import { Cause, Clock, Context, Duration, Effect, ExecutionPlan, Fiber, Layer, Semaphore } from 'effect'
5
6
 
6
- import { getRuntimeConfig } from '../runtime/runtime-config'
7
+ import { DEFAULT_AI_GATEWAY_URL } from '../config/constants'
8
+ import { AiGenerationError, ConfigurationError } from '../effect/errors'
9
+ import { getLotaSdkRuntime } from '../effect/runtime'
10
+ import { RuntimeConfigServiceTag } from '../effect/services'
11
+ import { getDirectOpenRouterProvider, normalizeDirectOpenRouterModelId } from '../openrouter/direct-provider'
7
12
  import { isRecord, readString } from '../utils/string'
8
13
  import { buildAiGatewayCacheHeaders } from './cache-headers'
9
14
 
10
- type AiGatewayLanguageModel = Parameters<typeof wrapLanguageModel>[0]['model']
11
- type AiGatewayExtraParams = Record<string, unknown>
12
15
  type AiGatewayChatResponse = { body?: unknown }
13
16
  type AiGatewayTransformParamsOptions = Parameters<NonNullable<LanguageModelMiddleware['transformParams']>>[0]
14
17
  type WrapStreamOptions = Parameters<NonNullable<LanguageModelMiddleware['wrapStream']>>[0]
18
+ type AiGatewayLanguageModel = Parameters<typeof wrapLanguageModel>[0]['model']
19
+ type AiGatewayEmbeddingModel = Parameters<typeof wrapEmbeddingModel>[0]['model']
15
20
  type AiGatewayCallOptions = WrapStreamOptions['params']
16
21
  type AiGatewayGenerateResult = Awaited<ReturnType<WrapStreamOptions['doGenerate']>>
17
22
  type AiGatewayStreamResult = Awaited<ReturnType<WrapStreamOptions['doStream']>>
18
23
  type AiGatewayGeneratedContent = AiGatewayGenerateResult['content'][number]
19
24
  type AiGatewayStreamPart = AiGatewayStreamResult['stream'] extends ReadableStream<infer T> ? T : never
20
- type AiGatewayConfig = { apiKey: string; baseURL: string }
21
25
  type AiGatewayProviderOptions = NonNullable<AiGatewayCallOptions['providerOptions']>
26
+ type AiGatewayAttemptResult<A> = { source: string; result: A }
27
+
28
+ class AiGatewayGenerateAttempt extends Context.Service<
29
+ AiGatewayGenerateAttempt,
30
+ { readonly execute: Effect.Effect<AiGatewayAttemptResult<AiGatewayGenerateResult>, AiGenerationError> }
31
+ >()('AiGatewayGenerateAttempt') {}
32
+
33
+ class AiGatewayStreamAttempt extends Context.Service<
34
+ AiGatewayStreamAttempt,
35
+ { readonly execute: Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError> }
36
+ >()('AiGatewayStreamAttempt') {}
22
37
 
23
38
  const EXPECTED_GATEWAY_KEY_PREFIX = 'sk-bf-'
24
39
  const AI_GATEWAY_VIRTUAL_KEY_HEADER = 'x-bf-vk'
25
- const AI_GATEWAY_EXTRA_PARAMS_HEADER = 'x-bf-passthrough-extra-params'
26
- const DEFAULT_AI_GATEWAY_URL = 'https://ai-gateway.gobrainy.ai' as const
27
- const OPENAI_PROMPT_CACHE_RETENTION = '24h' as const
28
- const OPENROUTER_RESPONSE_HEALING_EXTRA_PARAMS = {
29
- plugins: [{ id: 'response-healing' }],
30
- } as const satisfies AiGatewayExtraParams
40
+ const AI_GATEWAY_TIMEOUT_MS = 30_000
41
+ const AI_GATEWAY_STREAM_IDLE_TIMEOUT_MS = 30_000
42
+ const AI_GATEWAY_MAX_RETRIES = 4
43
+ const AI_GATEWAY_MAX_RETRY_DELAY_MS = 15_000
44
+ const OPENAI_RESPONSES_PROVIDER_ID = 'openai.responses'
45
+ const OPENAI_CHAT_PROVIDER_ID = 'openai.chat'
46
+ const OPENAI_EMBEDDING_PROVIDER_ID = 'openai.embedding'
47
+ const OPENAI_EMBEDDING_MAX_PER_CALL = 2_048
48
+ const RETRYABLE_NETWORK_ERROR_CODES = new Set([
49
+ 'ECONNABORTED',
50
+ 'ECONNREFUSED',
51
+ 'ECONNRESET',
52
+ 'EAI_AGAIN',
53
+ 'EHOSTUNREACH',
54
+ 'ENETDOWN',
55
+ 'ENETUNREACH',
56
+ 'ENOTFOUND',
57
+ 'ETIMEDOUT',
58
+ 'UND_ERR_CONNECT_TIMEOUT',
59
+ 'UND_ERR_SOCKET',
60
+ ])
61
+ const RETRYABLE_NETWORK_ERROR_PATTERNS = [
62
+ /fetch failed/i,
63
+ /network error/i,
64
+ /socket hang up/i,
65
+ /socket closed/i,
66
+ /connection (?:reset|refused|closed|timed out|timeout)/i,
67
+ /dns/i,
68
+ /temporary failure in name resolution/i,
69
+ /timed out/i,
70
+ ]
71
+
72
+ function getNumericField(value: Record<string, unknown>, key: string): number | null {
73
+ const field = value[key]
74
+ if (typeof field === 'number' && Number.isFinite(field)) return field
75
+ if (typeof field === 'string') {
76
+ const parsed = Number(field)
77
+ if (Number.isFinite(parsed)) return parsed
78
+ }
79
+ return null
80
+ }
81
+
82
+ function getErrorStatus(error: unknown): number | null {
83
+ if (!isRecord(error)) return null
84
+ return getNumericField(error, 'status') ?? getNumericField(error, 'statusCode')
85
+ }
86
+
87
+ function isHeaderEntry(value: unknown): value is readonly [string, string | readonly string[]] {
88
+ return Array.isArray(value) && value.length >= 2 && typeof value[0] === 'string'
89
+ }
90
+
91
+ function readHeaderValue(headers: unknown, name: string): string | null {
92
+ const normalizedName = name.toLowerCase()
93
+
94
+ if (headers instanceof Headers) {
95
+ return headers.get(name)
96
+ }
97
+
98
+ if (Array.isArray(headers)) {
99
+ for (const entry of headers) {
100
+ if (!isHeaderEntry(entry)) continue
101
+ const [headerName, headerValue] = entry
102
+ if (headerName.toLowerCase() !== normalizedName) continue
103
+ if (typeof headerValue === 'string') return headerValue
104
+ if (Array.isArray(headerValue)) {
105
+ const firstValue = headerValue.find((value): value is string => typeof value === 'string')
106
+ if (firstValue) return firstValue
107
+ }
108
+ }
109
+ return null
110
+ }
111
+
112
+ if (!isRecord(headers)) {
113
+ return null
114
+ }
115
+
116
+ for (const [headerName, headerValue] of Object.entries(headers)) {
117
+ if (headerName.toLowerCase() !== normalizedName) continue
118
+ if (typeof headerValue === 'string') return headerValue
119
+ if (Array.isArray(headerValue)) {
120
+ const firstValue = headerValue.find((value): value is string => typeof value === 'string')
121
+ if (firstValue) return firstValue
122
+ }
123
+ }
124
+
125
+ return null
126
+ }
127
+
128
+ function extractRetryAfter(error: unknown): string | undefined {
129
+ if (!isRecord(error)) return undefined
130
+
131
+ const direct =
132
+ readHeaderValue(error.responseHeaders, 'retry-after') ??
133
+ readHeaderValue(error.headers, 'retry-after') ??
134
+ (isRecord(error.response) ? readHeaderValue(error.response.headers, 'retry-after') : null)
135
+ if (direct) {
136
+ return direct
137
+ }
138
+
139
+ if (!isRecord(error.cause)) return undefined
140
+ return (
141
+ readHeaderValue(error.cause.responseHeaders, 'retry-after') ??
142
+ readHeaderValue(error.cause.headers, 'retry-after') ??
143
+ (isRecord(error.cause.response) ? readHeaderValue(error.cause.response.headers, 'retry-after') : undefined) ??
144
+ undefined
145
+ )
146
+ }
147
+
148
+ function stringifyProviderField(value: unknown, maxLength: number): string | undefined {
149
+ if (value === undefined) return undefined
150
+ try {
151
+ const stringified = typeof value === 'string' ? value : JSON.stringify(value)
152
+ return stringified.length > maxLength ? `${stringified.slice(0, maxLength)}...` : stringified
153
+ } catch {
154
+ return undefined
155
+ }
156
+ }
157
+
158
+ function classifyAiGatewayError(source: string, error: unknown): AiGenerationError {
159
+ if (error instanceof AiGenerationError) {
160
+ return error
161
+ }
162
+
163
+ const status = getErrorStatus(error)
164
+ const rateLimited = status === 429
165
+ const retryAfter = extractRetryAfter(error)
166
+ const errorRecord = isRecord(error) ? error : null
167
+ const message = error instanceof Error ? error.message : String(error)
168
+ const providerData = errorRecord ? stringifyProviderField(errorRecord.data, 600) : undefined
169
+ const cause = errorRecord ? stringifyProviderField(errorRecord.cause, 600) : undefined
170
+ const responseBody = errorRecord ? stringifyProviderField(errorRecord.responseBody, 600) : undefined
171
+ const url = errorRecord ? stringifyProviderField(errorRecord.url, 200) : undefined
172
+
173
+ const parts = [`[${source}]`]
174
+ if (status !== null) parts.push(`status=${status}`)
175
+ if (rateLimited) parts.push('rate_limited')
176
+ parts.push(message)
177
+ if (providerData) parts.push(`provider_data=${providerData}`)
178
+ if (cause) parts.push(`cause=${cause}`)
179
+ if (responseBody) parts.push(`response_body=${responseBody}`)
180
+ if (url) parts.push(`url=${url}`)
181
+
182
+ return new AiGenerationError({
183
+ source,
184
+ message: parts.join(' '),
185
+ ...(status !== null ? { status } : {}),
186
+ ...(rateLimited ? { rateLimited: true } : {}),
187
+ ...(retryAfter ? { retryAfter } : {}),
188
+ ...(providerData ? { providerData } : {}),
189
+ ...(responseBody ? { responseBody } : {}),
190
+ ...(url ? { url } : {}),
191
+ })
192
+ }
193
+
194
+ function isRetryableAiGatewayError(error: AiGenerationError): boolean {
195
+ if (error.rateLimited) return true
196
+
197
+ if (typeof error.status === 'number' && error.status >= 500) {
198
+ return true
199
+ }
200
+
201
+ const message = error.message.toLowerCase()
202
+ if (RETRYABLE_NETWORK_ERROR_PATTERNS.some((pattern) => pattern.test(message))) {
203
+ return true
204
+ }
205
+
206
+ for (const code of RETRYABLE_NETWORK_ERROR_CODES) {
207
+ if (message.includes(code.toLowerCase())) {
208
+ return true
209
+ }
210
+ }
211
+
212
+ return false
213
+ }
214
+
215
+ function parseRetryAfterDelayMs(retryAfter: string | undefined, nowMillis: number): number | undefined {
216
+ if (!retryAfter) return undefined
217
+
218
+ const trimmed = retryAfter.trim()
219
+ if (!trimmed) return undefined
220
+
221
+ const seconds = Number(trimmed)
222
+ if (Number.isFinite(seconds) && seconds >= 0) {
223
+ return Math.min(AI_GATEWAY_MAX_RETRY_DELAY_MS, Math.round(seconds * 1_000))
224
+ }
225
+
226
+ const retryAt = Date.parse(trimmed)
227
+ if (Number.isNaN(retryAt)) return undefined
228
+
229
+ return Math.min(AI_GATEWAY_MAX_RETRY_DELAY_MS, Math.max(0, retryAt - nowMillis))
230
+ }
231
+
232
+ function computeRetryDelayMs(attempt: number, error: AiGenerationError, nowMillis: number): number {
233
+ const baseDelayMs = Math.min(AI_GATEWAY_MAX_RETRY_DELAY_MS, 500 * 2 ** attempt)
234
+ const retryAfterDelayMs = parseRetryAfterDelayMs(error.retryAfter, nowMillis)
235
+ return retryAfterDelayMs === undefined ? baseDelayMs : Math.max(baseDelayMs, retryAfterDelayMs)
236
+ }
237
+
238
+ function withAiGatewayResilience<A>(source: string, effect: Effect.Effect<A, AiGenerationError>) {
239
+ const retryEffect = Effect.gen(function* () {
240
+ const runAttempt = (attempt: number): Effect.Effect<A, AiGenerationError> =>
241
+ effect.pipe(
242
+ Effect.catchTag('AiGenerationError', (error) =>
243
+ Effect.gen(function* () {
244
+ if (!isRetryableAiGatewayError(error) || attempt >= AI_GATEWAY_MAX_RETRIES - 1) {
245
+ return yield* error
246
+ }
247
+
248
+ const nowMillis = yield* Clock.currentTimeMillis
249
+ const retryDelayMs = computeRetryDelayMs(attempt, error, nowMillis)
250
+ yield* Effect.sleep(Duration.millis(retryDelayMs))
251
+ return yield* runAttempt(attempt + 1)
252
+ }),
253
+ ),
254
+ )
255
+
256
+ return yield* runAttempt(0)
257
+ })
258
+
259
+ return retryEffect.pipe(
260
+ Effect.timeout(Duration.millis(AI_GATEWAY_TIMEOUT_MS)),
261
+ Effect.catchIf(Cause.isTimeoutError, () =>
262
+ Effect.fail(new AiGenerationError({ source, message: `[${source}] Timed out after ${AI_GATEWAY_TIMEOUT_MS}ms` })),
263
+ ),
264
+ )
265
+ }
266
+
267
+ function withAiGatewayStreamIdleTimeout(
268
+ stream: ReadableStream<AiGatewayStreamPart>,
269
+ source: string,
270
+ onFinalize?: () => void,
271
+ ): ReadableStream<AiGatewayStreamPart> {
272
+ let closed = false
273
+ let reader: ReadableStreamDefaultReader<AiGatewayStreamPart> | null = null
274
+ let idleTimeoutFiber: ReturnType<typeof Effect.runFork> | null = null
275
+ let bodyPumpFiber: ReturnType<typeof Effect.runFork> | null = null
276
+ let finalized = false
277
+
278
+ const finalize = () => {
279
+ if (finalized) return
280
+ finalized = true
281
+ onFinalize?.()
282
+ }
283
+
284
+ const interruptFiber = (fiber: ReturnType<typeof Effect.runFork> | null) => {
285
+ if (!fiber) return
286
+ void Effect.runFork(Fiber.interrupt(fiber))
287
+ }
288
+
289
+ const stopIdleTimeout = () => {
290
+ const fiber = idleTimeoutFiber
291
+ idleTimeoutFiber = null
292
+ interruptFiber(fiber)
293
+ }
294
+
295
+ const stopBodyPump = () => {
296
+ const fiber = bodyPumpFiber
297
+ bodyPumpFiber = null
298
+ interruptFiber(fiber)
299
+ }
300
+
301
+ const releaseReader = (streamReader: ReadableStreamDefaultReader<AiGatewayStreamPart>) => {
302
+ try {
303
+ streamReader.releaseLock()
304
+ } catch {
305
+ // Best-effort cleanup.
306
+ }
307
+ }
308
+
309
+ const enqueueChunk = (
310
+ controller: ReadableStreamDefaultController<AiGatewayStreamPart>,
311
+ chunk: AiGatewayStreamPart,
312
+ ): boolean => {
313
+ if (closed) return false
314
+
315
+ try {
316
+ controller.enqueue(chunk)
317
+ return true
318
+ } catch {
319
+ closed = true
320
+ return false
321
+ }
322
+ }
323
+
324
+ const closeStream = (controller: ReadableStreamDefaultController<AiGatewayStreamPart>) => {
325
+ if (closed) return
326
+ closed = true
327
+ finalize()
328
+
329
+ try {
330
+ controller.close()
331
+ } catch {
332
+ // Best-effort cleanup.
333
+ }
334
+ }
335
+
336
+ const errorStream = (controller: ReadableStreamDefaultController<AiGatewayStreamPart>, error: unknown) => {
337
+ if (closed) return
338
+ closed = true
339
+ finalize()
340
+
341
+ try {
342
+ controller.error(error)
343
+ } catch {
344
+ // Best-effort cleanup.
345
+ }
346
+ }
347
+
348
+ const resetIdleTimeout = (controller: ReadableStreamDefaultController<AiGatewayStreamPart>) => {
349
+ stopIdleTimeout()
350
+ idleTimeoutFiber = Effect.runFork(
351
+ Effect.sleep(Duration.millis(AI_GATEWAY_STREAM_IDLE_TIMEOUT_MS)).pipe(
352
+ Effect.flatMap(() =>
353
+ Effect.gen(function* () {
354
+ if (closed) return
355
+
356
+ const timeoutError = new AiGenerationError({
357
+ source,
358
+ message: `[${source}] Stream stalled after ${AI_GATEWAY_STREAM_IDLE_TIMEOUT_MS}ms`,
359
+ })
360
+
361
+ yield* Effect.sync(() => errorStream(controller, timeoutError))
362
+ stopBodyPump()
363
+
364
+ const streamReader = reader
365
+ reader = null
366
+ if (!streamReader) return
367
+
368
+ yield* Effect.tryPromise(() => streamReader.cancel(timeoutError)).pipe(Effect.catch(() => Effect.void))
369
+ }),
370
+ ),
371
+ ),
372
+ )
373
+ }
374
+
375
+ const pumpStreamEffect = (
376
+ streamReader: ReadableStreamDefaultReader<AiGatewayStreamPart>,
377
+ controller: ReadableStreamDefaultController<AiGatewayStreamPart>,
378
+ ): Effect.Effect<void> =>
379
+ Effect.gen(function* () {
380
+ resetIdleTimeout(controller)
381
+
382
+ for (;;) {
383
+ if (closed) return
384
+
385
+ const { done, value } = yield* Effect.tryPromise(() => streamReader.read())
386
+ if (done) {
387
+ stopIdleTimeout()
388
+ yield* Effect.sync(() => closeStream(controller))
389
+ return
390
+ }
391
+
392
+ if (!enqueueChunk(controller, value)) {
393
+ return
394
+ }
395
+
396
+ resetIdleTimeout(controller)
397
+ }
398
+ }).pipe(
399
+ Effect.catch((error: unknown) => Effect.sync(() => errorStream(controller, error))),
400
+ Effect.ensuring(
401
+ Effect.sync(() => {
402
+ closed = true
403
+ finalize()
404
+ stopIdleTimeout()
405
+ bodyPumpFiber = null
406
+ reader = null
407
+ releaseReader(streamReader)
408
+ }),
409
+ ),
410
+ )
411
+
412
+ return new ReadableStream<AiGatewayStreamPart>({
413
+ start(controller) {
414
+ const streamReader = stream.getReader()
415
+ reader = streamReader
416
+ bodyPumpFiber = Effect.runFork(pumpStreamEffect(streamReader, controller))
417
+ },
418
+ cancel(reason) {
419
+ closed = true
420
+ finalize()
421
+ stopIdleTimeout()
422
+ stopBodyPump()
423
+
424
+ const streamReader = reader
425
+ reader = null
426
+ if (!streamReader) {
427
+ return
428
+ }
429
+
430
+ return Effect.runPromise(
431
+ Effect.tryPromise(() => streamReader.cancel(reason)).pipe(Effect.catch(() => Effect.void)),
432
+ )
433
+ },
434
+ })
435
+ }
31
436
 
32
437
  function mergeAiGatewayHeaders(
33
438
  existingHeaders: AiGatewayCallOptions['headers'] | undefined,
@@ -42,19 +447,6 @@ function mergeAiGatewayHeaders(
42
447
  return Object.fromEntries(merged.entries())
43
448
  }
44
449
 
45
- function parseAiGatewayJsonRequestBody(body: BodyInit | null | undefined): Record<string, unknown> | null {
46
- if (typeof body !== 'string') return null
47
-
48
- let parsed: unknown
49
- try {
50
- parsed = JSON.parse(body)
51
- } catch {
52
- return null
53
- }
54
-
55
- return isRecord(parsed) ? parsed : null
56
- }
57
-
58
450
  function withDefaultAiGatewayCacheHeaders(params: AiGatewayCallOptions): AiGatewayCallOptions {
59
451
  return { ...params, headers: mergeAiGatewayHeaders(params.headers, buildAiGatewayCacheHeaders('lota-sdk')) }
60
452
  }
@@ -62,31 +454,80 @@ function withDefaultAiGatewayCacheHeaders(params: AiGatewayCallOptions): AiGatew
62
454
  function normalizeAiGatewayUrl(value: string): string {
63
455
  const trimmed = value.trim()
64
456
  if (!trimmed) {
65
- throw new Error('[ai-gateway] AI gateway URL is required.')
457
+ throw new ConfigurationError({ message: '[ai-gateway] AI gateway URL is required.', key: 'aiGateway.url' })
66
458
  }
67
459
 
68
460
  const normalized = trimmed.replace(/\/+$/, '')
69
461
  return normalized.endsWith('/v1') ? normalized : `${normalized}/v1`
70
462
  }
71
463
 
72
- function readDirectEnvAiGatewayConfig(): AiGatewayConfig {
73
- const apiKey = (process.env.AI_GATEWAY_KEY ?? '').trim()
74
- if (!apiKey) {
75
- throw new Error(
76
- '[ai-gateway] Missing AI gateway key. Set AI_GATEWAY_KEY, or configure createLotaRuntime({ aiGateway: { key } }).',
77
- )
78
- }
464
+ // ── AiGateway Effect Layer ────────────────────────────────────────────
465
+ export class AiGatewayTag extends Context.Service<
466
+ AiGatewayTag,
467
+ { readonly semaphore: Semaphore.Semaphore; readonly provider: ReturnType<typeof createOpenAI> }
468
+ >()('AiGateway') {}
469
+
470
+ export const AiGatewayLive = Layer.effect(
471
+ AiGatewayTag,
472
+ Effect.gen(function* () {
473
+ const config = yield* RuntimeConfigServiceTag
474
+ const semaphore = yield* Semaphore.make(config.aiGateway.maxConcurrency)
475
+
476
+ const apiKey = config.aiGateway.key.trim()
477
+ if (!apiKey.startsWith(EXPECTED_GATEWAY_KEY_PREFIX)) {
478
+ return yield* new ConfigurationError({
479
+ message: `[ai-gateway] Gateway keys must use the ${EXPECTED_GATEWAY_KEY_PREFIX}* format.`,
480
+ key: 'aiGateway.key',
481
+ })
482
+ }
483
+ const baseURL = normalizeAiGatewayUrl(config.aiGateway.url)
484
+ const provider = createOpenAI({ baseURL, apiKey, headers: { [AI_GATEWAY_VIRTUAL_KEY_HEADER]: apiKey } })
79
485
 
80
- return { apiKey, baseURL: normalizeAiGatewayUrl(process.env.AI_GATEWAY_URL?.trim() || DEFAULT_AI_GATEWAY_URL) }
486
+ return AiGatewayTag.of({ semaphore, provider })
487
+ }),
488
+ )
489
+
490
+ function resolveFromRuntime<I, T>(tag: Context.Key<I, T>): T {
491
+ return getLotaSdkRuntime().runSync(Effect.service(tag))
81
492
  }
82
493
 
83
- function readAiGatewayConfig(): AiGatewayConfig {
84
- try {
85
- const { aiGateway } = getRuntimeConfig()
86
- return { apiKey: aiGateway.key.trim(), baseURL: normalizeAiGatewayUrl(aiGateway.url) }
87
- } catch {
88
- return readDirectEnvAiGatewayConfig()
89
- }
494
+ function getAiGateway(): AiGatewayTag['Service'] {
495
+ return resolveFromRuntime(AiGatewayTag)
496
+ }
497
+
498
+ function withAiGatewayConcurrency<A>(effect: Effect.Effect<A, AiGenerationError>): Effect.Effect<A, AiGenerationError> {
499
+ return getAiGateway().semaphore.withPermit(effect)
500
+ }
501
+
502
+ function withAiGatewayStreamConcurrency(
503
+ effect: Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError>,
504
+ ): Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError> {
505
+ return Effect.uninterruptibleMask((restore) =>
506
+ Effect.gen(function* () {
507
+ const { semaphore } = getAiGateway()
508
+ const currentContext = yield* Effect.context<never>()
509
+ yield* semaphore.take(1)
510
+
511
+ let released = false
512
+ const release = () => {
513
+ if (released) return
514
+ released = true
515
+ void Effect.runForkWith(currentContext)(semaphore.release(1))
516
+ }
517
+
518
+ const attempt = yield* restore(effect).pipe(
519
+ Effect.catchTag('AiGenerationError', (error) => Effect.sync(release).pipe(Effect.andThen(Effect.fail(error)))),
520
+ )
521
+
522
+ return {
523
+ ...attempt,
524
+ result: {
525
+ ...attempt.result,
526
+ stream: withAiGatewayStreamIdleTimeout(attempt.result.stream, attempt.source, release),
527
+ },
528
+ }
529
+ }),
530
+ )
90
531
  }
91
532
 
92
533
  function readReasoningDetailsText(value: unknown): string | null {
@@ -142,19 +583,6 @@ export function extractAiGatewayChatReasoningDeltaText(rawChunk: unknown): strin
142
583
  return null
143
584
  }
144
585
 
145
- type AiGatewayResponsesReasoningDelta = { id: string; delta: string; itemId: string }
146
-
147
- export function extractAiGatewayResponsesReasoningDelta(rawChunk: unknown): AiGatewayResponsesReasoningDelta | null {
148
- if (!isRecord(rawChunk) || rawChunk.type !== 'response.reasoning_summary_text.delta') return null
149
- if ('summary_index' in rawChunk) return null
150
-
151
- const itemId = readString(rawChunk.item_id)
152
- const delta = readReasoningDeltaText(rawChunk.delta)
153
- if (!itemId || !delta) return null
154
-
155
- return { id: `${itemId}:0`, delta, itemId }
156
- }
157
-
158
586
  export function injectAiGatewayChatReasoningContent(
159
587
  content: readonly AiGatewayGeneratedContent[],
160
588
  response?: AiGatewayChatResponse,
@@ -178,8 +606,158 @@ function isReasoningEnabled(params: AiGatewayCallOptions): boolean {
178
606
  return typeof openaiOptions.reasoningEffort === 'string' && openaiOptions.reasoningEffort !== 'none'
179
607
  }
180
608
 
609
+ function isOpenRouterModel(modelId: string): boolean {
610
+ return modelId.trim().toLowerCase().startsWith('openrouter/')
611
+ }
612
+
613
+ function hasDirectOpenRouterFallback(modelId: string): boolean {
614
+ const config = resolveFromRuntime(RuntimeConfigServiceTag)
615
+ return isOpenRouterModel(modelId) && Boolean(config.aiGateway.openRouterApiKey?.trim())
616
+ }
617
+
618
+ function getDirectOpenRouterChatModel(modelId: string): AiGatewayLanguageModel {
619
+ const config = resolveFromRuntime(RuntimeConfigServiceTag)
620
+ return getDirectOpenRouterProvider(config.aiGateway.openRouterApiKey).chat(normalizeDirectOpenRouterModelId(modelId))
621
+ }
622
+
623
+ function shouldFallbackToDirectOpenRouter(modelId: string, error: AiGenerationError): boolean {
624
+ return hasDirectOpenRouterFallback(modelId) && isRetryableAiGatewayError(error)
625
+ }
626
+
627
+ function attemptAiGatewayGenerate(
628
+ source: string,
629
+ evaluate: () => PromiseLike<AiGatewayGenerateResult>,
630
+ ): Effect.Effect<AiGatewayAttemptResult<AiGatewayGenerateResult>, AiGenerationError> {
631
+ return withAiGatewayResilience(
632
+ source,
633
+ Effect.tryPromise({ try: evaluate, catch: (cause) => classifyAiGatewayError(source, cause) }),
634
+ ).pipe(
635
+ Effect.map((result) => ({ source, result })),
636
+ Effect.withSpan('AiGateway.generateAttempt'),
637
+ Effect.annotateSpans({ gatewaySource: source }),
638
+ )
639
+ }
640
+
641
+ function attemptAiGatewayStream(
642
+ source: string,
643
+ evaluate: () => PromiseLike<AiGatewayStreamResult>,
644
+ ): Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError> {
645
+ return withAiGatewayResilience(
646
+ source,
647
+ Effect.tryPromise({ try: evaluate, catch: (cause) => classifyAiGatewayError(source, cause) }),
648
+ ).pipe(
649
+ Effect.map((result) => ({ source, result })),
650
+ Effect.withSpan('AiGateway.streamAttempt'),
651
+ Effect.annotateSpans({ gatewaySource: source }),
652
+ )
653
+ }
654
+
655
+ function attemptDirectOpenRouterGenerate(
656
+ modelId: string,
657
+ params: AiGatewayCallOptions,
658
+ ): Effect.Effect<AiGatewayAttemptResult<AiGatewayGenerateResult>, AiGenerationError> {
659
+ const model = getDirectOpenRouterChatModel(modelId)
660
+ return attemptAiGatewayGenerate('openrouter.generate', () => model.doGenerate(params))
661
+ }
662
+
663
+ function attemptDirectOpenRouterStream(
664
+ modelId: string,
665
+ params: AiGatewayCallOptions,
666
+ ): Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError> {
667
+ const model = getDirectOpenRouterChatModel(modelId)
668
+ return attemptAiGatewayStream('openrouter.stream', () => model.doStream(params))
669
+ }
670
+
671
+ function executeGenerateAttemptPlan(
672
+ modelId: string,
673
+ params: AiGatewayCallOptions,
674
+ doGenerate: () => PromiseLike<AiGatewayGenerateResult>,
675
+ ): Effect.Effect<AiGatewayAttemptResult<AiGatewayGenerateResult>, AiGenerationError> {
676
+ const primary = Layer.succeed(AiGatewayGenerateAttempt, {
677
+ execute: attemptAiGatewayGenerate('ai-gateway.generate', doGenerate),
678
+ })
679
+ const effect = Effect.gen(function* () {
680
+ const attempt = yield* AiGatewayGenerateAttempt
681
+ return yield* attempt.execute
682
+ })
683
+
684
+ if (!hasDirectOpenRouterFallback(modelId)) {
685
+ return effect.pipe(
686
+ Effect.provide(primary),
687
+ Effect.withSpan('AiGateway.executeGeneratePlan'),
688
+ Effect.annotateSpans({ modelId, directOpenRouterFallbackEnabled: false }),
689
+ )
690
+ }
691
+
692
+ return effect.pipe(
693
+ Effect.withExecutionPlan(
694
+ ExecutionPlan.make(
695
+ { provide: primary },
696
+ {
697
+ provide: Layer.succeed(AiGatewayGenerateAttempt, {
698
+ execute: attemptDirectOpenRouterGenerate(modelId, params),
699
+ }),
700
+ while: (error: AiGenerationError) => shouldFallbackToDirectOpenRouter(modelId, error),
701
+ },
702
+ ),
703
+ ),
704
+ Effect.withSpan('AiGateway.executeGeneratePlan'),
705
+ Effect.annotateSpans({ modelId, directOpenRouterFallbackEnabled: true }),
706
+ )
707
+ }
708
+
709
+ function executeStreamAttemptPlan(
710
+ modelId: string,
711
+ params: AiGatewayCallOptions,
712
+ doStream: () => PromiseLike<AiGatewayStreamResult>,
713
+ ): Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError> {
714
+ const primary = Layer.succeed(AiGatewayStreamAttempt, {
715
+ execute: attemptAiGatewayStream('ai-gateway.stream', doStream),
716
+ })
717
+ const effect = Effect.gen(function* () {
718
+ const attempt = yield* AiGatewayStreamAttempt
719
+ return yield* attempt.execute
720
+ })
721
+
722
+ if (!hasDirectOpenRouterFallback(modelId)) {
723
+ return effect.pipe(
724
+ Effect.provide(primary),
725
+ Effect.withSpan('AiGateway.executeStreamPlan'),
726
+ Effect.annotateSpans({ modelId, directOpenRouterFallbackEnabled: false }),
727
+ )
728
+ }
729
+
730
+ return effect.pipe(
731
+ Effect.withExecutionPlan(
732
+ ExecutionPlan.make(
733
+ { provide: primary },
734
+ {
735
+ provide: Layer.succeed(AiGatewayStreamAttempt, { execute: attemptDirectOpenRouterStream(modelId, params) }),
736
+ while: (error: AiGenerationError) => shouldFallbackToDirectOpenRouter(modelId, error),
737
+ },
738
+ ),
739
+ ),
740
+ Effect.withSpan('AiGateway.executeStreamPlan'),
741
+ Effect.annotateSpans({ modelId, directOpenRouterFallbackEnabled: true }),
742
+ )
743
+ }
744
+
745
+ function isOpenRouterOpenAIReasoningModel(modelId: string): boolean {
746
+ return modelId.trim().toLowerCase().startsWith('openrouter/openai/gpt-5')
747
+ }
748
+
181
749
  function shouldCloseInjectedReasoning(chunk: AiGatewayStreamPart): boolean {
182
- return chunk.type !== 'stream-start' && chunk.type !== 'response-metadata' && chunk.type !== 'raw'
750
+ switch (chunk.type) {
751
+ case 'stream-start':
752
+ case 'response-metadata':
753
+ case 'raw':
754
+ case 'text-start':
755
+ return false
756
+ case 'text-delta':
757
+ return chunk.delta.length > 0
758
+ default:
759
+ return true
760
+ }
183
761
  }
184
762
 
185
763
  export function injectAiGatewayChatReasoningStream(
@@ -232,30 +810,6 @@ export function injectAiGatewayChatReasoningStream(
232
810
  )
233
811
  }
234
812
 
235
- export function injectAiGatewayResponsesReasoningStream(
236
- stream: ReadableStream<AiGatewayStreamPart>,
237
- ): ReadableStream<AiGatewayStreamPart> {
238
- return stream.pipeThrough(
239
- new TransformStream<AiGatewayStreamPart, AiGatewayStreamPart>({
240
- transform(chunk, controller) {
241
- controller.enqueue(chunk)
242
-
243
- if (chunk.type !== 'raw') return
244
-
245
- const reasoningDelta = extractAiGatewayResponsesReasoningDelta(chunk.rawValue)
246
- if (!reasoningDelta) return
247
-
248
- controller.enqueue({
249
- type: 'reasoning-delta',
250
- id: reasoningDelta.id,
251
- delta: reasoningDelta.delta,
252
- providerMetadata: { openai: { itemId: reasoningDelta.itemId } },
253
- } satisfies AiGatewayStreamPart)
254
- },
255
- }),
256
- )
257
- }
258
-
259
813
  function addAiGatewayReasoningRawChunks(
260
814
  params: AiGatewayCallOptions,
261
815
  type: AiGatewayTransformParamsOptions['type'],
@@ -267,7 +821,49 @@ function addAiGatewayReasoningRawChunks(
267
821
  return { ...params, includeRawChunks: true }
268
822
  }
269
823
 
270
- export function normalizeAiGatewayChatProviderOptions(params: AiGatewayCallOptions): AiGatewayCallOptions {
824
+ function createAiGatewayLanguageModelMiddleware(modelId: string): LanguageModelMiddleware {
825
+ return {
826
+ specificationVersion: 'v3',
827
+ transformParams: ({ params, type }) =>
828
+ Promise.resolve(
829
+ withDefaultAiGatewayCacheHeaders(
830
+ addAiGatewayReasoningRawChunks(normalizeAiGatewayChatProviderOptions(params, modelId), type),
831
+ ),
832
+ ),
833
+ wrapGenerate: ({ doGenerate, params }) =>
834
+ Effect.runPromise(
835
+ withAiGatewayConcurrency(
836
+ executeGenerateAttemptPlan(modelId, params, doGenerate).pipe(
837
+ Effect.map(({ result }) => ({
838
+ ...result,
839
+ content: injectAiGatewayChatReasoningContent(
840
+ result.content,
841
+ result.response as AiGatewayChatResponse | undefined,
842
+ ),
843
+ })),
844
+ ),
845
+ ),
846
+ ),
847
+ wrapStream: ({ doStream, params }) =>
848
+ Effect.runPromise(
849
+ withAiGatewayStreamConcurrency(
850
+ executeStreamAttemptPlan(modelId, params, doStream).pipe(
851
+ Effect.map((attempt) => ({
852
+ ...attempt,
853
+ result: isReasoningEnabled(params)
854
+ ? { ...attempt.result, stream: injectAiGatewayChatReasoningStream(attempt.result.stream) }
855
+ : attempt.result,
856
+ })),
857
+ ),
858
+ ).pipe(Effect.map(({ result }) => result)),
859
+ ),
860
+ }
861
+ }
862
+
863
+ export function normalizeAiGatewayChatProviderOptions(
864
+ params: AiGatewayCallOptions,
865
+ modelId?: string,
866
+ ): AiGatewayCallOptions {
271
867
  const providerOptions = isRecord(params.providerOptions)
272
868
  ? ({ ...params.providerOptions } as AiGatewayProviderOptions)
273
869
  : ({} as AiGatewayProviderOptions)
@@ -275,189 +871,108 @@ export function normalizeAiGatewayChatProviderOptions(params: AiGatewayCallOptio
275
871
  ? { ...providerOptions.openai }
276
872
  : ({} as Record<string, unknown>)
277
873
 
278
- if (openaiOptions.systemMessageMode === 'system') {
874
+ if (modelId && isOpenRouterOpenAIReasoningModel(modelId) && openaiOptions.forceReasoning === undefined) {
875
+ openaiOptions.forceReasoning = true
876
+ }
877
+
878
+ if (providerOptions.openai === openaiOptions || Object.keys(openaiOptions).length === 0) {
279
879
  return params
280
880
  }
281
881
 
282
882
  return {
283
883
  ...params,
284
- providerOptions: {
285
- ...providerOptions,
286
- openai: {
287
- ...openaiOptions,
288
- ...(openaiOptions.systemMessageMode === 'remove' ? {} : { systemMessageMode: 'system' }),
289
- },
290
- },
884
+ providerOptions: { ...providerOptions, openai: openaiOptions as AiGatewayProviderOptions['openai'] },
291
885
  }
292
886
  }
293
887
 
294
- export function injectAiGatewayExtraParamsRequestBody(
295
- body: BodyInit | null | undefined,
296
- extraParams: AiGatewayExtraParams,
297
- ): BodyInit | null | undefined {
298
- const parsed = parseAiGatewayJsonRequestBody(body)
299
- if (!parsed) return body
300
-
301
- const mergedExtraParams = isRecord(parsed.extra_params)
302
- ? { ...parsed.extra_params, ...extraParams }
303
- : { ...extraParams }
304
-
305
- return JSON.stringify({ ...parsed, extra_params: mergedExtraParams })
306
- }
307
-
308
- function createAiGatewayFetch(extraParams?: AiGatewayExtraParams): typeof fetch {
309
- const fetchWithMutations = (input: RequestInfo | URL, init?: RequestInit | BunFetchRequestInit) => {
310
- const parsedBody = parseAiGatewayJsonRequestBody(init?.body)
311
- let nextBody = init?.body
312
- let nextParsedBody = parsedBody
313
-
314
- if (
315
- nextParsedBody &&
316
- readString(nextParsedBody.model)?.startsWith('openai/') &&
317
- !readString(nextParsedBody.prompt_cache_retention)
318
- ) {
319
- nextParsedBody = { ...nextParsedBody, prompt_cache_retention: OPENAI_PROMPT_CACHE_RETENTION }
320
- nextBody = JSON.stringify(nextParsedBody)
321
- }
322
-
323
- if (nextParsedBody && extraParams !== undefined) {
324
- nextParsedBody = {
325
- ...nextParsedBody,
326
- extra_params: isRecord(nextParsedBody.extra_params)
327
- ? { ...nextParsedBody.extra_params, ...extraParams }
328
- : { ...extraParams },
329
- }
330
- nextBody = JSON.stringify(nextParsedBody)
331
- }
332
-
333
- const headers = new Headers(init?.headers)
334
- if (
335
- extraParams !== undefined ||
336
- (readString(nextParsedBody?.model)?.startsWith('openai/') &&
337
- readString(nextParsedBody?.prompt_cache_retention) !== null)
338
- ) {
339
- // Bifrost only forwards provider-specific extra params when passthrough is enabled.
340
- headers.set(AI_GATEWAY_EXTRA_PARAMS_HEADER, 'true')
341
- }
342
-
343
- return globalThis.fetch(input, { ...init, headers, body: nextBody })
344
- }
345
-
346
- const preconnect = globalThis.fetch.preconnect
347
-
348
- if (typeof preconnect !== 'function') {
349
- return fetchWithMutations as typeof fetch
888
+ function withAiGatewayDevTools<TModel extends AiGatewayLanguageModel>(model: TModel): TModel {
889
+ if (Bun.env.NODE_ENV === 'production') {
890
+ return model
350
891
  }
351
892
 
352
- return Object.assign(fetchWithMutations, { preconnect: preconnect.bind(globalThis.fetch) })
893
+ return wrapLanguageModel({ model, middleware: devToolsMiddleware() }) as TModel
353
894
  }
354
895
 
355
- function createAiGatewayProvider(extraParams?: AiGatewayExtraParams) {
356
- const { apiKey, baseURL } = readAiGatewayConfig()
357
- if (!apiKey.startsWith(EXPECTED_GATEWAY_KEY_PREFIX)) {
358
- throw new Error(`[ai-gateway] Gateway keys must use the ${EXPECTED_GATEWAY_KEY_PREFIX}* format.`)
896
+ function createLazyAiGatewayLanguageModel(params: {
897
+ modelId: string
898
+ providerId: string
899
+ resolve: () => AiGatewayLanguageModel
900
+ }): AiGatewayLanguageModel {
901
+ return {
902
+ specificationVersion: 'v3',
903
+ provider: params.providerId,
904
+ modelId: params.modelId,
905
+ supportedUrls: {},
906
+ doGenerate: (options) => params.resolve().doGenerate(options),
907
+ doStream: (options) => params.resolve().doStream(options),
359
908
  }
360
-
361
- return createOpenAI({
362
- baseURL,
363
- apiKey,
364
- headers: { [AI_GATEWAY_VIRTUAL_KEY_HEADER]: apiKey },
365
- fetch: createAiGatewayFetch(extraParams),
366
- })
367
909
  }
368
910
 
369
- function withAiGatewayDevTools<TModel extends AiGatewayLanguageModel>(model: TModel): TModel {
370
- if (process.env.NODE_ENV === 'production') {
371
- return model
911
+ function createLazyAiGatewayEmbeddingModel(modelId: string): AiGatewayEmbeddingModel {
912
+ return {
913
+ specificationVersion: 'v3',
914
+ provider: OPENAI_EMBEDDING_PROVIDER_ID,
915
+ modelId,
916
+ maxEmbeddingsPerCall: OPENAI_EMBEDDING_MAX_PER_CALL,
917
+ supportsParallelCalls: true,
918
+ doEmbed: (options) => getAiGatewayProvider().embeddingModel(modelId).doEmbed(options),
372
919
  }
373
-
374
- return wrapLanguageModel({ model, middleware: devToolsMiddleware() }) as TModel
375
920
  }
376
921
 
377
- let provider: ReturnType<typeof createOpenAI> | null = null
378
- let openRouterResponseHealingProvider: ReturnType<typeof createOpenAI> | null = null
379
-
380
922
  export function getAiGatewayProvider() {
381
- if (provider) return provider
382
-
383
- provider = createAiGatewayProvider()
384
-
385
- return provider
386
- }
387
-
388
- export function getAiGatewayOpenRouterResponseHealingProvider() {
389
- if (openRouterResponseHealingProvider) return openRouterResponseHealingProvider
390
-
391
- openRouterResponseHealingProvider = createAiGatewayProvider(OPENROUTER_RESPONSE_HEALING_EXTRA_PARAMS)
392
-
393
- return openRouterResponseHealingProvider
923
+ return getAiGateway().provider
394
924
  }
395
925
 
396
926
  export function aiGatewayModel(modelId: string) {
927
+ if (isOpenRouterModel(modelId)) {
928
+ return aiGatewayChatModel(modelId)
929
+ }
930
+
397
931
  return withAiGatewayDevTools(
398
932
  wrapLanguageModel({
399
- model: getAiGatewayProvider()(modelId),
400
- middleware: {
401
- specificationVersion: 'v3',
402
- transformParams: async ({ params, type }) =>
403
- withDefaultAiGatewayCacheHeaders(addAiGatewayReasoningRawChunks(params, type)),
404
- wrapStream: async ({ doStream, params }) => {
405
- const result = await doStream()
406
- if (!isReasoningEnabled(params)) return result
407
-
408
- return { ...result, stream: injectAiGatewayResponsesReasoningStream(result.stream) }
409
- },
410
- },
933
+ model: createLazyAiGatewayLanguageModel({
934
+ modelId,
935
+ providerId: OPENAI_RESPONSES_PROVIDER_ID,
936
+ resolve: () => getAiGatewayProvider()(modelId),
937
+ }),
938
+ middleware: createAiGatewayLanguageModelMiddleware(modelId),
411
939
  }),
412
940
  )
413
941
  }
414
942
 
415
943
  export function aiGatewayOpenRouterResponseHealingModel(modelId: string) {
416
- return withAiGatewayDevTools(
417
- wrapLanguageModel({
418
- model: getAiGatewayOpenRouterResponseHealingProvider()(modelId),
419
- middleware: {
420
- specificationVersion: 'v3',
421
- transformParams: async ({ params }) => withDefaultAiGatewayCacheHeaders(params),
422
- },
423
- }),
424
- )
944
+ return aiGatewayChatModel(modelId)
425
945
  }
426
946
 
427
947
  export function aiGatewayChatModel(modelId: string) {
428
948
  return withAiGatewayDevTools(
429
949
  wrapLanguageModel({
430
- model: getAiGatewayProvider().chat(modelId),
431
- middleware: {
432
- specificationVersion: 'v3',
433
- transformParams: async ({ params, type }) =>
434
- normalizeAiGatewayChatProviderOptions(
435
- withDefaultAiGatewayCacheHeaders(addAiGatewayReasoningRawChunks(params, type)),
436
- ),
437
- wrapGenerate: async ({ doGenerate }) => {
438
- const result = await doGenerate()
439
-
440
- return {
441
- ...result,
442
- content: injectAiGatewayChatReasoningContent(
443
- result.content,
444
- result.response as AiGatewayChatResponse | undefined,
445
- ),
446
- }
447
- },
448
- wrapStream: async ({ doStream, params }) => {
449
- const result = await doStream()
450
- if (!isReasoningEnabled(params)) return result
451
-
452
- return { ...result, stream: injectAiGatewayChatReasoningStream(result.stream) }
453
- },
454
- },
950
+ model: createLazyAiGatewayLanguageModel({
951
+ modelId,
952
+ providerId: OPENAI_CHAT_PROVIDER_ID,
953
+ resolve: () => getAiGatewayProvider().chat(modelId),
954
+ }),
955
+ middleware: createAiGatewayLanguageModelMiddleware(modelId),
455
956
  }),
456
957
  )
457
958
  }
458
959
 
459
960
  export function aiGatewayEmbeddingModel(modelId: string) {
460
- return getAiGatewayProvider().embeddingModel(modelId)
961
+ return wrapEmbeddingModel({
962
+ model: createLazyAiGatewayEmbeddingModel(modelId),
963
+ middleware: {
964
+ specificationVersion: 'v3',
965
+ wrapEmbed: ({ doEmbed }) =>
966
+ Effect.runPromise(
967
+ withAiGatewayConcurrency(
968
+ withAiGatewayResilience(
969
+ 'ai-gateway.embed',
970
+ Effect.tryPromise({ try: doEmbed, catch: (cause) => classifyAiGatewayError('ai-gateway.embed', cause) }),
971
+ ),
972
+ ).pipe(Effect.withSpan('AiGateway.embed'), Effect.annotateSpans({ modelId })),
973
+ ),
974
+ },
975
+ })
461
976
  }
462
977
 
463
978
  export { DEFAULT_AI_GATEWAY_URL, normalizeAiGatewayUrl }