@lota-sdk/core 0.4.10 → 0.4.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/package.json +3 -3
  2. package/src/ai-gateway/ai-gateway.ts +214 -98
  3. package/src/ai-gateway/index.ts +16 -1
  4. package/src/config/agent-defaults.ts +4 -120
  5. package/src/config/logger.ts +18 -34
  6. package/src/config/model-constants.ts +1 -0
  7. package/src/config/thread-defaults.ts +1 -18
  8. package/src/create-runtime.ts +90 -28
  9. package/src/db/base.service.ts +30 -38
  10. package/src/db/service.ts +489 -545
  11. package/src/effect/index.ts +0 -2
  12. package/src/effect/layers.ts +6 -13
  13. package/src/embeddings/provider.ts +2 -7
  14. package/src/index.ts +4 -5
  15. package/src/queues/autonomous-job.queue.ts +159 -113
  16. package/src/queues/context-compaction.queue.ts +39 -25
  17. package/src/queues/delayed-node-promotion.queue.ts +56 -29
  18. package/src/queues/document-processor.queue.ts +5 -3
  19. package/src/queues/index.ts +1 -0
  20. package/src/queues/memory-consolidation.queue.ts +79 -53
  21. package/src/queues/organization-learning.queue.ts +63 -39
  22. package/src/queues/plan-agent-heartbeat.queue.ts +104 -79
  23. package/src/queues/plan-scheduler.queue.ts +100 -84
  24. package/src/queues/post-chat-memory.queue.ts +55 -33
  25. package/src/queues/queue-factory.ts +40 -41
  26. package/src/queues/queues.service.ts +61 -0
  27. package/src/queues/title-generation.queue.ts +42 -31
  28. package/src/redis/org-memory-lock.ts +24 -9
  29. package/src/redis/redis-lease-lock.ts +8 -1
  30. package/src/runtime/agent-identity-overrides.ts +7 -3
  31. package/src/runtime/agent-runtime-policy.ts +9 -4
  32. package/src/runtime/agent-stream-helpers.ts +9 -4
  33. package/src/runtime/context-compaction/context-compaction-runtime.ts +28 -32
  34. package/src/runtime/context-compaction/context-compaction.ts +9 -7
  35. package/src/runtime/domain-layer.ts +15 -4
  36. package/src/runtime/execution-plan-visibility.ts +5 -2
  37. package/src/runtime/graph-designer.ts +0 -22
  38. package/src/runtime/index.ts +2 -0
  39. package/src/runtime/indexed-repositories-policy.ts +2 -6
  40. package/src/runtime/live-turn-trace.ts +344 -0
  41. package/src/runtime/plugin-resolution.ts +29 -12
  42. package/src/runtime/post-turn-side-effects.ts +139 -141
  43. package/src/runtime/runtime-config.ts +0 -6
  44. package/src/runtime/runtime-extensions.ts +0 -54
  45. package/src/runtime/runtime-lifecycle.ts +4 -4
  46. package/src/runtime/runtime-services.ts +125 -53
  47. package/src/runtime/runtime-worker-registry.ts +113 -30
  48. package/src/runtime/social-chat/social-chat-agent-runner.ts +6 -3
  49. package/src/runtime/social-chat/social-chat-history.ts +3 -1
  50. package/src/runtime/social-chat/social-chat.ts +35 -20
  51. package/src/runtime/team-consultation/team-consultation-orchestrator.ts +6 -5
  52. package/src/runtime/team-consultation/team-consultation-prompts.ts +11 -6
  53. package/src/runtime/thread-chat-helpers.ts +18 -9
  54. package/src/runtime/thread-turn-context.ts +7 -47
  55. package/src/runtime/turn-lifecycle.ts +6 -14
  56. package/src/services/agent-activity.service.ts +168 -175
  57. package/src/services/agent-executor.service.ts +35 -16
  58. package/src/services/attachment.service.ts +4 -70
  59. package/src/services/autonomous-job.service.ts +53 -61
  60. package/src/services/context-compaction.service.ts +7 -9
  61. package/src/services/execution-plan/execution-plan-graph.ts +106 -115
  62. package/src/services/execution-plan/execution-plan-schedule.ts +1 -15
  63. package/src/services/execution-plan/execution-plan.service.ts +67 -50
  64. package/src/services/global-orchestrator.service.ts +18 -7
  65. package/src/services/graph-full-routing.ts +7 -6
  66. package/src/services/memory/memory-conversation.ts +10 -5
  67. package/src/services/memory/memory.service.ts +11 -8
  68. package/src/services/ownership-dispatcher.service.ts +16 -5
  69. package/src/services/plan/plan-agent-heartbeat.service.ts +29 -15
  70. package/src/services/plan/plan-agent-query.service.ts +12 -8
  71. package/src/services/plan/plan-completion-side-effects.ts +93 -101
  72. package/src/services/plan/plan-cycle.service.ts +7 -45
  73. package/src/services/plan/plan-deadline.service.ts +28 -17
  74. package/src/services/plan/plan-event-delivery.service.ts +47 -40
  75. package/src/services/plan/plan-executor-context.ts +2 -0
  76. package/src/services/plan/plan-executor-graph.ts +366 -391
  77. package/src/services/plan/plan-executor.service.ts +13 -91
  78. package/src/services/plan/plan-scheduler.service.ts +62 -49
  79. package/src/services/plan/plan-transaction-events.ts +1 -1
  80. package/src/services/recent-activity-title.service.ts +6 -2
  81. package/src/services/thread/thread-bootstrap.ts +11 -9
  82. package/src/services/thread/thread-message.service.ts +6 -5
  83. package/src/services/thread/thread-turn-execution.ts +86 -82
  84. package/src/services/thread/thread-turn-preparation.service.ts +92 -45
  85. package/src/services/thread/thread-turn-streaming.ts +60 -28
  86. package/src/services/thread/thread-turn.ts +212 -46
  87. package/src/services/thread/thread.service.ts +21 -6
  88. package/src/system-agents/recent-activity-title-refiner.agent.ts +8 -5
  89. package/src/system-agents/thread-router.agent.ts +23 -20
  90. package/src/tools/execution-plan.tool.ts +8 -3
  91. package/src/tools/fetch-webpage.tool.ts +10 -9
  92. package/src/tools/firecrawl-client.ts +0 -15
  93. package/src/tools/remember-memory.tool.ts +3 -6
  94. package/src/tools/research-topic.tool.ts +12 -3
  95. package/src/tools/search-web.tool.ts +10 -9
  96. package/src/tools/search.tool.ts +4 -5
  97. package/src/tools/team-think.tool.ts +139 -121
  98. package/src/workers/bootstrap.ts +9 -10
  99. package/src/workers/memory-consolidation.worker.ts +4 -1
  100. package/src/workers/organization-learning.worker.ts +15 -2
  101. package/src/workers/regular-chat-memory-digest.helpers.ts +3 -4
  102. package/src/workers/regular-chat-memory-digest.runner.ts +21 -14
  103. package/src/workers/skill-extraction.runner.ts +13 -15
  104. package/src/workers/worker-utils.ts +6 -18
  105. package/src/effect/awaitable-effect.ts +0 -96
  106. package/src/effect/runtime-ref.ts +0 -25
  107. package/src/effect/runtime.ts +0 -46
  108. package/src/redis/runtime-connection.ts +0 -20
  109. package/src/runtime/runtime-accessors.ts +0 -92
  110. package/src/runtime/runtime-token.ts +0 -47
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lota-sdk/core",
3
- "version": "0.4.10",
3
+ "version": "0.4.12",
4
4
  "type": "module",
5
5
  "main": "./src/index.ts",
6
6
  "types": "./src/index.ts",
@@ -31,10 +31,10 @@
31
31
  "@ai-sdk/openai": "^3.0.53",
32
32
  "@chat-adapter/slack": "^4.26.0",
33
33
  "@chat-adapter/state-ioredis": "^4.26.0",
34
- "@lota-sdk/shared": "0.4.10",
34
+ "@lota-sdk/shared": "0.4.12",
35
35
  "@mendable/firecrawl-js": "^4.18.3",
36
36
  "@surrealdb/node": "^3.0.3",
37
- "ai": "^6.0.167",
37
+ "ai": "^6.0.168",
38
38
  "bullmq": "^5.74.1",
39
39
  "chat": "^4.26.0",
40
40
  "effect": "^4.0.0-beta.50",
@@ -6,7 +6,6 @@ import { Cause, Clock, Context, Duration, Effect, ExecutionPlan, Fiber, Layer, S
6
6
 
7
7
  import { DEFAULT_AI_GATEWAY_URL } from '../config/constants'
8
8
  import { AiGenerationError, ConfigurationError } from '../effect/errors'
9
- import { resolveLotaService } from '../effect/runtime'
10
9
  import { RuntimeConfigServiceTag } from '../effect/services'
11
10
  import { getDirectOpenRouterProvider, normalizeDirectOpenRouterModelId } from '../openrouter/direct-provider'
12
11
  import { isRecord, readString } from '../utils/string'
@@ -24,6 +23,8 @@ type AiGatewayGeneratedContent = AiGatewayGenerateResult['content'][number]
24
23
  type AiGatewayStreamPart = AiGatewayStreamResult['stream'] extends ReadableStream<infer T> ? T : never
25
24
  type AiGatewayProviderOptions = NonNullable<AiGatewayCallOptions['providerOptions']>
26
25
  type AiGatewayAttemptResult<A> = { source: string; result: A }
26
+ // eslint-disable-next-line @typescript-eslint/no-redundant-type-constituents
27
+ type AiGatewayRunFork = <A, E>(effect: Effect.Effect<A, E, never>) => Fiber.Fiber<A, E | unknown>
27
28
 
28
29
  class AiGatewayGenerateAttempt extends Context.Service<
29
30
  AiGatewayGenerateAttempt,
@@ -271,12 +272,13 @@ function withAiGatewayResilience<A>(source: string, effect: Effect.Effect<A, AiG
271
272
  function withAiGatewayStreamIdleTimeout(
272
273
  stream: ReadableStream<AiGatewayStreamPart>,
273
274
  source: string,
275
+ runFork: AiGatewayRunFork,
274
276
  onFinalize?: () => void,
275
277
  ): ReadableStream<AiGatewayStreamPart> {
276
278
  let closed = false
277
279
  let reader: ReadableStreamDefaultReader<AiGatewayStreamPart> | null = null
278
- let idleTimeoutFiber: ReturnType<typeof Effect.runFork> | null = null
279
- let bodyPumpFiber: ReturnType<typeof Effect.runFork> | null = null
280
+ let idleTimeoutFiber: Fiber.Fiber<unknown, unknown> | null = null
281
+ let bodyPumpFiber: Fiber.Fiber<unknown, unknown> | null = null
280
282
  let finalized = false
281
283
 
282
284
  const finalize = () => {
@@ -285,9 +287,9 @@ function withAiGatewayStreamIdleTimeout(
285
287
  onFinalize?.()
286
288
  }
287
289
 
288
- const interruptFiber = (fiber: ReturnType<typeof Effect.runFork> | null) => {
290
+ const interruptFiber = (fiber: Fiber.Fiber<unknown, unknown> | null) => {
289
291
  if (!fiber) return
290
- void Effect.runFork(Fiber.interrupt(fiber))
292
+ void runFork(Fiber.interrupt(fiber))
291
293
  }
292
294
 
293
295
  const stopIdleTimeout = () => {
@@ -351,7 +353,7 @@ function withAiGatewayStreamIdleTimeout(
351
353
 
352
354
  const resetIdleTimeout = (controller: ReadableStreamDefaultController<AiGatewayStreamPart>) => {
353
355
  stopIdleTimeout()
354
- idleTimeoutFiber = Effect.runFork(
356
+ idleTimeoutFiber = runFork(
355
357
  Effect.sleep(Duration.millis(AI_GATEWAY_STREAM_IDLE_TIMEOUT_MS)).pipe(
356
358
  Effect.flatMap(() =>
357
359
  Effect.gen(function* () {
@@ -417,7 +419,7 @@ function withAiGatewayStreamIdleTimeout(
417
419
  start(controller) {
418
420
  const streamReader = stream.getReader()
419
421
  reader = streamReader
420
- bodyPumpFiber = Effect.runFork(pumpStreamEffect(streamReader, controller))
422
+ bodyPumpFiber = runFork(pumpStreamEffect(streamReader, controller))
421
423
  },
422
424
  cancel(reason) {
423
425
  closed = true
@@ -493,40 +495,22 @@ export const AiGatewayLive = Layer.effect(
493
495
 
494
496
  type AiGatewayRuntimeConfig = Context.Service.Shape<typeof RuntimeConfigServiceTag>
495
497
 
496
- let currentAiGateway: AiGatewayTag['Service'] | null = null
497
- let currentAiGatewayRuntimeConfig: AiGatewayRuntimeConfig | null = null
498
-
499
- export function configureAiGatewayRuntimeAccessors(params: {
500
- aiGateway: AiGatewayTag['Service']
501
- runtimeConfig: AiGatewayRuntimeConfig
502
- }): void {
503
- currentAiGateway = params.aiGateway
504
- currentAiGatewayRuntimeConfig = params.runtimeConfig
505
- }
506
-
507
- export function clearAiGatewayRuntimeAccessors(): void {
508
- currentAiGateway = null
509
- currentAiGatewayRuntimeConfig = null
510
- }
511
-
512
- function getAiGateway(): AiGatewayTag['Service'] {
513
- return currentAiGateway ?? resolveLotaService(AiGatewayTag)
514
- }
515
-
516
- function getAiGatewayRuntimeConfig(): AiGatewayRuntimeConfig {
517
- return currentAiGatewayRuntimeConfig ?? resolveLotaService(RuntimeConfigServiceTag)
518
- }
519
-
520
- function withAiGatewayConcurrency<A>(effect: Effect.Effect<A, AiGenerationError>): Effect.Effect<A, AiGenerationError> {
521
- return getAiGateway().semaphore.withPermit(effect)
498
+ function withAiGatewayConcurrency<A>(
499
+ effect: Effect.Effect<A, AiGenerationError>,
500
+ ): Effect.Effect<A, AiGenerationError, AiGatewayTag> {
501
+ return Effect.gen(function* () {
502
+ const gateway = yield* AiGatewayTag
503
+ return yield* gateway.semaphore.withPermit(effect)
504
+ })
522
505
  }
523
506
 
524
507
  function withAiGatewayStreamConcurrency(
525
508
  effect: Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError>,
526
- ): Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError> {
509
+ runFork: AiGatewayRunFork,
510
+ ): Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError, AiGatewayTag> {
527
511
  return Effect.uninterruptibleMask((restore) =>
528
512
  Effect.gen(function* () {
529
- const { semaphore } = getAiGateway()
513
+ const { semaphore } = yield* AiGatewayTag
530
514
  const currentContext = yield* Effect.context<never>()
531
515
  yield* semaphore.take(1)
532
516
 
@@ -551,7 +535,7 @@ function withAiGatewayStreamConcurrency(
551
535
  ...attempt,
552
536
  result: {
553
537
  ...attempt.result,
554
- stream: withAiGatewayStreamIdleTimeout(attempt.result.stream, attempt.source, release),
538
+ stream: withAiGatewayStreamIdleTimeout(attempt.result.stream, attempt.source, runFork, release),
555
539
  },
556
540
  }
557
541
  }),
@@ -611,6 +595,52 @@ export function extractAiGatewayChatReasoningDeltaText(rawChunk: unknown): strin
611
595
  return null
612
596
  }
613
597
 
598
+ function findAiGatewayChatReasoningOverlap(previousReasoningText: string, nextReasoningText: string): number {
599
+ const maxOverlap = Math.min(previousReasoningText.length, nextReasoningText.length)
600
+
601
+ for (let overlapLength = maxOverlap; overlapLength > 0; overlapLength -= 1) {
602
+ if (previousReasoningText.slice(-overlapLength) === nextReasoningText.slice(0, overlapLength)) {
603
+ return overlapLength
604
+ }
605
+ }
606
+
607
+ return 0
608
+ }
609
+
610
+ function deriveAiGatewayChatReasoningDeltaText(params: { previousReasoningText: string; rawChunk: unknown }): {
611
+ delta: string | null
612
+ nextReasoningText: string
613
+ } {
614
+ const extractedText = extractAiGatewayChatReasoningDeltaText(params.rawChunk)
615
+ if (!extractedText) {
616
+ return { delta: null, nextReasoningText: params.previousReasoningText }
617
+ }
618
+
619
+ if (params.previousReasoningText.length === 0) {
620
+ return { delta: extractedText, nextReasoningText: extractedText }
621
+ }
622
+
623
+ if (extractedText === params.previousReasoningText) {
624
+ return { delta: null, nextReasoningText: params.previousReasoningText }
625
+ }
626
+
627
+ if (extractedText.startsWith(params.previousReasoningText)) {
628
+ const delta = extractedText.slice(params.previousReasoningText.length)
629
+ return { delta: delta.length > 0 ? delta : null, nextReasoningText: extractedText }
630
+ }
631
+
632
+ const overlapLength = findAiGatewayChatReasoningOverlap(params.previousReasoningText, extractedText)
633
+ if (overlapLength > 0) {
634
+ const delta = extractedText.slice(overlapLength)
635
+ return { delta: delta.length > 0 ? delta : null, nextReasoningText: `${params.previousReasoningText}${delta}` }
636
+ }
637
+
638
+ // Some providers emit true deltas, others resend the full reasoning-so-far.
639
+ // If the chunk is not a prefix extension, treat it as a standalone delta and
640
+ // append it to the accumulated reasoning text.
641
+ return { delta: extractedText, nextReasoningText: `${params.previousReasoningText}${extractedText}` }
642
+ }
643
+
614
644
  export function injectAiGatewayChatReasoningContent(
615
645
  content: readonly AiGatewayGeneratedContent[],
616
646
  response?: AiGatewayChatResponse,
@@ -638,18 +668,20 @@ function isOpenRouterModel(modelId: string): boolean {
638
668
  return modelId.trim().toLowerCase().startsWith('openrouter/')
639
669
  }
640
670
 
641
- function hasDirectOpenRouterFallback(modelId: string): boolean {
642
- const config = getAiGatewayRuntimeConfig()
671
+ function hasDirectOpenRouterFallback(config: AiGatewayRuntimeConfig, modelId: string): boolean {
643
672
  return isOpenRouterModel(modelId) && Boolean(config.aiGateway.openRouterApiKey?.trim())
644
673
  }
645
674
 
646
- function getDirectOpenRouterChatModel(modelId: string): AiGatewayLanguageModel {
647
- const config = getAiGatewayRuntimeConfig()
675
+ function getDirectOpenRouterChatModel(config: AiGatewayRuntimeConfig, modelId: string): AiGatewayLanguageModel {
648
676
  return getDirectOpenRouterProvider(config.aiGateway.openRouterApiKey).chat(normalizeDirectOpenRouterModelId(modelId))
649
677
  }
650
678
 
651
- function shouldFallbackToDirectOpenRouter(modelId: string, error: AiGenerationError): boolean {
652
- return hasDirectOpenRouterFallback(modelId) && isRetryableAiGatewayError(error)
679
+ function shouldFallbackToDirectOpenRouter(
680
+ config: AiGatewayRuntimeConfig,
681
+ modelId: string,
682
+ error: AiGenerationError,
683
+ ): boolean {
684
+ return hasDirectOpenRouterFallback(config, modelId) && isRetryableAiGatewayError(error)
653
685
  }
654
686
 
655
687
  function attemptAiGatewayGenerate(
@@ -681,22 +713,25 @@ function attemptAiGatewayStream(
681
713
  }
682
714
 
683
715
  function attemptDirectOpenRouterGenerate(
716
+ config: AiGatewayRuntimeConfig,
684
717
  modelId: string,
685
718
  params: AiGatewayCallOptions,
686
719
  ): Effect.Effect<AiGatewayAttemptResult<AiGatewayGenerateResult>, AiGenerationError> {
687
- const model = getDirectOpenRouterChatModel(modelId)
720
+ const model = getDirectOpenRouterChatModel(config, modelId)
688
721
  return attemptAiGatewayGenerate('openrouter.generate', () => model.doGenerate(params))
689
722
  }
690
723
 
691
724
  function attemptDirectOpenRouterStream(
725
+ config: AiGatewayRuntimeConfig,
692
726
  modelId: string,
693
727
  params: AiGatewayCallOptions,
694
728
  ): Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError> {
695
- const model = getDirectOpenRouterChatModel(modelId)
729
+ const model = getDirectOpenRouterChatModel(config, modelId)
696
730
  return attemptAiGatewayStream('openrouter.stream', () => model.doStream(params))
697
731
  }
698
732
 
699
733
  function executeGenerateAttemptPlan(
734
+ config: AiGatewayRuntimeConfig,
700
735
  modelId: string,
701
736
  params: AiGatewayCallOptions,
702
737
  doGenerate: () => PromiseLike<AiGatewayGenerateResult>,
@@ -709,7 +744,7 @@ function executeGenerateAttemptPlan(
709
744
  return yield* attempt.execute
710
745
  })
711
746
 
712
- if (!hasDirectOpenRouterFallback(modelId)) {
747
+ if (!hasDirectOpenRouterFallback(config, modelId)) {
713
748
  return effect.pipe(
714
749
  Effect.provide(primary),
715
750
  Effect.withSpan('AiGateway.executeGeneratePlan'),
@@ -723,9 +758,9 @@ function executeGenerateAttemptPlan(
723
758
  { provide: primary },
724
759
  {
725
760
  provide: Layer.succeed(AiGatewayGenerateAttempt, {
726
- execute: attemptDirectOpenRouterGenerate(modelId, params),
761
+ execute: attemptDirectOpenRouterGenerate(config, modelId, params),
727
762
  }),
728
- while: (error: AiGenerationError) => shouldFallbackToDirectOpenRouter(modelId, error),
763
+ while: (error: AiGenerationError) => shouldFallbackToDirectOpenRouter(config, modelId, error),
729
764
  },
730
765
  ),
731
766
  ),
@@ -735,6 +770,7 @@ function executeGenerateAttemptPlan(
735
770
  }
736
771
 
737
772
  function executeStreamAttemptPlan(
773
+ config: AiGatewayRuntimeConfig,
738
774
  modelId: string,
739
775
  params: AiGatewayCallOptions,
740
776
  doStream: () => PromiseLike<AiGatewayStreamResult>,
@@ -747,7 +783,7 @@ function executeStreamAttemptPlan(
747
783
  return yield* attempt.execute
748
784
  })
749
785
 
750
- if (!hasDirectOpenRouterFallback(modelId)) {
786
+ if (!hasDirectOpenRouterFallback(config, modelId)) {
751
787
  return effect.pipe(
752
788
  Effect.provide(primary),
753
789
  Effect.withSpan('AiGateway.executeStreamPlan'),
@@ -760,8 +796,10 @@ function executeStreamAttemptPlan(
760
796
  ExecutionPlan.make(
761
797
  { provide: primary },
762
798
  {
763
- provide: Layer.succeed(AiGatewayStreamAttempt, { execute: attemptDirectOpenRouterStream(modelId, params) }),
764
- while: (error: AiGenerationError) => shouldFallbackToDirectOpenRouter(modelId, error),
799
+ provide: Layer.succeed(AiGatewayStreamAttempt, {
800
+ execute: attemptDirectOpenRouterStream(config, modelId, params),
801
+ }),
802
+ while: (error: AiGenerationError) => shouldFallbackToDirectOpenRouter(config, modelId, error),
765
803
  },
766
804
  ),
767
805
  ),
@@ -794,6 +832,8 @@ export function injectAiGatewayChatReasoningStream(
794
832
  const reasoningId = 'ai-gateway-reasoning-0'
795
833
  let reasoningOpen = false
796
834
  let reasoningClosed = false
835
+ let reasoningText = ''
836
+ let nativeReasoningSeen = false
797
837
 
798
838
  return stream.pipeThrough(
799
839
  new TransformStream<AiGatewayStreamPart, AiGatewayStreamPart>({
@@ -805,11 +845,25 @@ export function injectAiGatewayChatReasoningStream(
805
845
  reasoningClosed = true
806
846
  }
807
847
 
848
+ if (chunk.type === 'reasoning-start' || chunk.type === 'reasoning-delta' || chunk.type === 'reasoning-end') {
849
+ nativeReasoningSeen = true
850
+ closeReasoning()
851
+ controller.enqueue(chunk)
852
+ return
853
+ }
854
+
808
855
  if (chunk.type === 'raw') {
809
- const reasoningDelta = reasoningClosed ? null : extractAiGatewayChatReasoningDeltaText(chunk.rawValue)
856
+ const reasoningDeltaState =
857
+ reasoningClosed || nativeReasoningSeen
858
+ ? null
859
+ : deriveAiGatewayChatReasoningDeltaText({
860
+ previousReasoningText: reasoningText,
861
+ rawChunk: chunk.rawValue,
862
+ })
810
863
  controller.enqueue(chunk)
811
864
 
812
- if (reasoningDelta) {
865
+ if (reasoningDeltaState?.delta) {
866
+ reasoningText = reasoningDeltaState.nextReasoningText
813
867
  if (!reasoningOpen) {
814
868
  controller.enqueue({ type: 'reasoning-start', id: reasoningId } satisfies AiGatewayStreamPart)
815
869
  reasoningOpen = true
@@ -818,7 +872,7 @@ export function injectAiGatewayChatReasoningStream(
818
872
  controller.enqueue({
819
873
  type: 'reasoning-delta',
820
874
  id: reasoningId,
821
- delta: reasoningDelta,
875
+ delta: reasoningDeltaState.delta,
822
876
  } satisfies AiGatewayStreamPart)
823
877
  }
824
878
  return
@@ -849,7 +903,56 @@ function addAiGatewayReasoningRawChunks(
849
903
  return { ...params, includeRawChunks: true }
850
904
  }
851
905
 
852
- function createAiGatewayLanguageModelMiddleware(modelId: string): LanguageModelMiddleware {
906
+ function resolveProviderModel(
907
+ provider: ReturnType<typeof createOpenAI>,
908
+ modelId: string,
909
+ providerId: string,
910
+ ): AiGatewayLanguageModel {
911
+ return providerId === OPENAI_CHAT_PROVIDER_ID ? provider.chat(modelId) : provider(modelId)
912
+ }
913
+
914
+ // Module-level Promise slot that `createLotaRuntime` populates during boot.
915
+ // This is a legitimate per-process singleton (mirrors the worker bootstrap
916
+ // pattern in `workers/bootstrap.ts`): the AI gateway middleware is dispatched
917
+ // by AI SDK callers that live outside Effect context, so the middleware needs
918
+ // a way to run gateway Effects without capturing a `ManagedRuntime` through
919
+ // every `aiGatewayModel(modelId)` call site.
920
+ //
921
+ // Only `createLotaRuntime` writes to the slot; resetting on disconnect is a
922
+ // Phase 3b concern — for now it stays alive for the process lifetime.
923
+ let aiGatewayRuntimeReady: Promise<{
924
+ gateway: Context.Service.Shape<typeof AiGatewayTag>
925
+ runtimeConfig: Context.Service.Shape<typeof RuntimeConfigServiceTag>
926
+ runPromise: <A, E>(effect: Effect.Effect<A, E, never>) => Promise<A>
927
+ runFork: AiGatewayRunFork
928
+ }> | null = null
929
+
930
+ export function bindAiGatewayRuntime(params: {
931
+ gateway: Context.Service.Shape<typeof AiGatewayTag>
932
+ runtimeConfig: Context.Service.Shape<typeof RuntimeConfigServiceTag>
933
+ runPromise: <A, E>(effect: Effect.Effect<A, E, never>) => Promise<A>
934
+ runFork: AiGatewayRunFork
935
+ }): void {
936
+ aiGatewayRuntimeReady = Promise.resolve(params)
937
+ }
938
+
939
+ export function clearAiGatewayRuntime(): void {
940
+ aiGatewayRuntimeReady = null
941
+ }
942
+
943
+ async function getAiGatewayRuntime(): Promise<{
944
+ gateway: Context.Service.Shape<typeof AiGatewayTag>
945
+ runtimeConfig: Context.Service.Shape<typeof RuntimeConfigServiceTag>
946
+ runPromise: <A, E>(effect: Effect.Effect<A, E, never>) => Promise<A>
947
+ runFork: AiGatewayRunFork
948
+ }> {
949
+ if (!aiGatewayRuntimeReady) {
950
+ throw new Error('AI gateway runtime has not been initialized. Call createLotaRuntime() first.')
951
+ }
952
+ return aiGatewayRuntimeReady
953
+ }
954
+
955
+ function createAiGatewayLanguageModelMiddleware(modelId: string, providerId: string): LanguageModelMiddleware {
853
956
  return {
854
957
  specificationVersion: 'v3',
855
958
  transformParams: ({ params, type }) =>
@@ -858,10 +961,12 @@ function createAiGatewayLanguageModelMiddleware(modelId: string): LanguageModelM
858
961
  addAiGatewayReasoningRawChunks(normalizeAiGatewayChatProviderOptions(params, modelId), type),
859
962
  ),
860
963
  ),
861
- wrapGenerate: ({ doGenerate, params }) =>
862
- Effect.runPromise(
964
+ wrapGenerate: async ({ params }) => {
965
+ const { gateway, runtimeConfig, runPromise } = await getAiGatewayRuntime()
966
+ const model = resolveProviderModel(gateway.provider, modelId, providerId)
967
+ return runPromise(
863
968
  withAiGatewayConcurrency(
864
- executeGenerateAttemptPlan(modelId, params, doGenerate).pipe(
969
+ executeGenerateAttemptPlan(runtimeConfig, modelId, params, () => model.doGenerate(params)).pipe(
865
970
  Effect.map(({ result }) => ({
866
971
  ...result,
867
972
  content: injectAiGatewayChatReasoningContent(
@@ -870,12 +975,15 @@ function createAiGatewayLanguageModelMiddleware(modelId: string): LanguageModelM
870
975
  ),
871
976
  })),
872
977
  ),
873
- ),
874
- ),
875
- wrapStream: ({ doStream, params }) =>
876
- Effect.runPromise(
978
+ ).pipe(Effect.provideService(AiGatewayTag, gateway)),
979
+ )
980
+ },
981
+ wrapStream: async ({ params }) => {
982
+ const { gateway, runtimeConfig, runPromise, runFork } = await getAiGatewayRuntime()
983
+ const model = resolveProviderModel(gateway.provider, modelId, providerId)
984
+ return runPromise(
877
985
  withAiGatewayStreamConcurrency(
878
- executeStreamAttemptPlan(modelId, params, doStream).pipe(
986
+ executeStreamAttemptPlan(runtimeConfig, modelId, params, () => model.doStream(params)).pipe(
879
987
  Effect.map((attempt) => ({
880
988
  ...attempt,
881
989
  result: isReasoningEnabled(params)
@@ -883,8 +991,12 @@ function createAiGatewayLanguageModelMiddleware(modelId: string): LanguageModelM
883
991
  : attempt.result,
884
992
  })),
885
993
  ),
886
- ).pipe(Effect.map(({ result }) => result)),
887
- ),
994
+ runFork,
995
+ )
996
+ .pipe(Effect.map(({ result }) => result))
997
+ .pipe(Effect.provideService(AiGatewayTag, gateway)),
998
+ )
999
+ },
888
1000
  }
889
1001
  }
890
1002
 
@@ -921,36 +1033,42 @@ function withAiGatewayDevTools<TModel extends AiGatewayLanguageModel>(model: TMo
921
1033
  return wrapLanguageModel({ model, middleware: devToolsMiddleware() }) as TModel
922
1034
  }
923
1035
 
924
- function createLazyAiGatewayLanguageModel(params: {
925
- modelId: string
926
- providerId: string
927
- resolve: () => AiGatewayLanguageModel
928
- }): AiGatewayLanguageModel {
1036
+ function createAiGatewayLanguageModelPlaceholder(modelId: string, providerId: string): AiGatewayLanguageModel {
1037
+ const unreachable = (method: string) =>
1038
+ Promise.reject(
1039
+ new Error(
1040
+ `[ai-gateway] AiGateway language model ${modelId}.${method} was invoked without the gateway middleware; ` +
1041
+ 'this call path should be fully handled by createAiGatewayLanguageModelMiddleware.',
1042
+ ),
1043
+ )
1044
+
929
1045
  return {
930
1046
  specificationVersion: 'v3',
931
- provider: params.providerId,
932
- modelId: params.modelId,
1047
+ provider: providerId,
1048
+ modelId,
933
1049
  supportedUrls: {},
934
- doGenerate: (options) => params.resolve().doGenerate(options),
935
- doStream: (options) => params.resolve().doStream(options),
1050
+ doGenerate: () => unreachable('doGenerate'),
1051
+ doStream: () => unreachable('doStream'),
936
1052
  }
937
1053
  }
938
1054
 
939
- function createLazyAiGatewayEmbeddingModel(modelId: string): AiGatewayEmbeddingModel {
1055
+ function createAiGatewayEmbeddingModelPlaceholder(modelId: string): AiGatewayEmbeddingModel {
940
1056
  return {
941
1057
  specificationVersion: 'v3',
942
1058
  provider: OPENAI_EMBEDDING_PROVIDER_ID,
943
1059
  modelId,
944
1060
  maxEmbeddingsPerCall: OPENAI_EMBEDDING_MAX_PER_CALL,
945
1061
  supportsParallelCalls: true,
946
- doEmbed: (options) => getAiGatewayProvider().embeddingModel(modelId).doEmbed(options),
1062
+ doEmbed: () =>
1063
+ Promise.reject(
1064
+ new Error(
1065
+ `[ai-gateway] AiGateway embedding model ${modelId}.doEmbed was invoked without the gateway middleware; ` +
1066
+ 'this call path should be fully handled by aiGatewayEmbeddingModel middleware.',
1067
+ ),
1068
+ ),
947
1069
  }
948
1070
  }
949
1071
 
950
- export function getAiGatewayProvider() {
951
- return getAiGateway().provider
952
- }
953
-
954
1072
  export function aiGatewayModel(modelId: string) {
955
1073
  if (isOpenRouterModel(modelId)) {
956
1074
  return aiGatewayChatModel(modelId)
@@ -958,12 +1076,8 @@ export function aiGatewayModel(modelId: string) {
958
1076
 
959
1077
  return withAiGatewayDevTools(
960
1078
  wrapLanguageModel({
961
- model: createLazyAiGatewayLanguageModel({
962
- modelId,
963
- providerId: OPENAI_RESPONSES_PROVIDER_ID,
964
- resolve: () => getAiGatewayProvider()(modelId),
965
- }),
966
- middleware: createAiGatewayLanguageModelMiddleware(modelId),
1079
+ model: createAiGatewayLanguageModelPlaceholder(modelId, OPENAI_RESPONSES_PROVIDER_ID),
1080
+ middleware: createAiGatewayLanguageModelMiddleware(modelId, OPENAI_RESPONSES_PROVIDER_ID),
967
1081
  }),
968
1082
  )
969
1083
  }
@@ -975,30 +1089,32 @@ export function aiGatewayOpenRouterResponseHealingModel(modelId: string) {
975
1089
  export function aiGatewayChatModel(modelId: string) {
976
1090
  return withAiGatewayDevTools(
977
1091
  wrapLanguageModel({
978
- model: createLazyAiGatewayLanguageModel({
979
- modelId,
980
- providerId: OPENAI_CHAT_PROVIDER_ID,
981
- resolve: () => getAiGatewayProvider().chat(modelId),
982
- }),
983
- middleware: createAiGatewayLanguageModelMiddleware(modelId),
1092
+ model: createAiGatewayLanguageModelPlaceholder(modelId, OPENAI_CHAT_PROVIDER_ID),
1093
+ middleware: createAiGatewayLanguageModelMiddleware(modelId, OPENAI_CHAT_PROVIDER_ID),
984
1094
  }),
985
1095
  )
986
1096
  }
987
1097
 
988
1098
  export function aiGatewayEmbeddingModel(modelId: string) {
989
1099
  return wrapEmbeddingModel({
990
- model: createLazyAiGatewayEmbeddingModel(modelId),
1100
+ model: createAiGatewayEmbeddingModelPlaceholder(modelId),
991
1101
  middleware: {
992
1102
  specificationVersion: 'v3',
993
- wrapEmbed: ({ doEmbed }) =>
994
- Effect.runPromise(
1103
+ wrapEmbed: async ({ params }) => {
1104
+ const { gateway, runPromise } = await getAiGatewayRuntime()
1105
+ const embeddingModel = gateway.provider.embeddingModel(modelId)
1106
+ return runPromise(
995
1107
  withAiGatewayConcurrency(
996
1108
  withAiGatewayResilience(
997
1109
  'ai-gateway.embed',
998
- Effect.tryPromise({ try: doEmbed, catch: (cause) => classifyAiGatewayError('ai-gateway.embed', cause) }),
999
- ),
1000
- ).pipe(Effect.withSpan('AiGateway.embed'), Effect.annotateSpans({ modelId })),
1001
- ),
1110
+ Effect.tryPromise({
1111
+ try: () => embeddingModel.doEmbed(params),
1112
+ catch: (cause) => classifyAiGatewayError('ai-gateway.embed', cause),
1113
+ }),
1114
+ ).pipe(Effect.withSpan('AiGateway.embed'), Effect.annotateSpans({ modelId })),
1115
+ ).pipe(Effect.provideService(AiGatewayTag, gateway)),
1116
+ )
1117
+ },
1002
1118
  },
1003
1119
  })
1004
1120
  }
@@ -1,2 +1,17 @@
1
- export * from './ai-gateway'
1
+ export {
2
+ AiGatewayLive,
3
+ AiGatewayTag,
4
+ DEFAULT_AI_GATEWAY_URL,
5
+ aiGatewayChatModel,
6
+ aiGatewayEmbeddingModel,
7
+ aiGatewayModel,
8
+ aiGatewayOpenRouterResponseHealingModel,
9
+ bindAiGatewayRuntime,
10
+ extractAiGatewayChatReasoningDeltaText,
11
+ extractAiGatewayChatReasoningText,
12
+ injectAiGatewayChatReasoningContent,
13
+ injectAiGatewayChatReasoningStream,
14
+ normalizeAiGatewayChatProviderOptions,
15
+ normalizeAiGatewayUrl,
16
+ } from './ai-gateway'
2
17
  export * from './cache-headers'