@lota-sdk/core 0.4.12 → 0.4.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/package.json +4 -4
  2. package/src/ai/embedding-cache.ts +17 -11
  3. package/src/ai-gateway/ai-gateway.ts +164 -94
  4. package/src/ai-gateway/index.ts +4 -1
  5. package/src/config/agent-defaults.ts +2 -2
  6. package/src/config/agent-types.ts +1 -1
  7. package/src/create-runtime.ts +259 -200
  8. package/src/db/cursor-pagination.ts +2 -9
  9. package/src/db/memory-store.ts +194 -175
  10. package/src/db/memory.ts +125 -71
  11. package/src/db/schema-fingerprint.ts +5 -4
  12. package/src/db/service-normalization.ts +4 -3
  13. package/src/db/service.ts +3 -2
  14. package/src/db/startup.ts +15 -16
  15. package/src/effect/errors.ts +161 -21
  16. package/src/effect/index.ts +0 -1
  17. package/src/embeddings/provider.ts +15 -7
  18. package/src/queues/autonomous-job.queue.ts +10 -22
  19. package/src/queues/delayed-node-promotion.queue.ts +8 -14
  20. package/src/queues/document-processor.queue.ts +13 -4
  21. package/src/queues/memory-consolidation.queue.ts +26 -14
  22. package/src/queues/plan-agent-heartbeat.queue.ts +10 -9
  23. package/src/queues/plan-scheduler.queue.ts +37 -15
  24. package/src/queues/queue-factory.ts +59 -35
  25. package/src/queues/standalone-worker.ts +3 -2
  26. package/src/redis/connection.ts +10 -3
  27. package/src/redis/org-memory-lock.ts +1 -1
  28. package/src/redis/redis-lease-lock.ts +5 -5
  29. package/src/redis/stream-context.ts +1 -1
  30. package/src/runtime/chat-message.ts +64 -1
  31. package/src/runtime/chat-run-orchestration.ts +33 -20
  32. package/src/runtime/context-compaction/context-compaction-runtime.ts +14 -7
  33. package/src/runtime/context-compaction/context-compaction.ts +78 -66
  34. package/src/runtime/domain-layer.ts +13 -7
  35. package/src/runtime/execution-plan.ts +7 -3
  36. package/src/runtime/live-turn-trace.ts +6 -49
  37. package/src/runtime/memory/memory-block.ts +3 -9
  38. package/src/runtime/memory/memory-scope.ts +3 -1
  39. package/src/runtime/plugin-resolution.ts +2 -1
  40. package/src/runtime/post-turn-side-effects.ts +6 -5
  41. package/src/runtime/retrieval-adapters.ts +8 -20
  42. package/src/runtime/runtime-config.ts +3 -9
  43. package/src/runtime/runtime-extensions.ts +2 -4
  44. package/src/runtime/runtime-lifecycle.ts +56 -16
  45. package/src/runtime/runtime-services.ts +180 -102
  46. package/src/runtime/runtime-worker-registry.ts +3 -1
  47. package/src/runtime/social-chat/social-chat-agent-runner.ts +1 -1
  48. package/src/runtime/social-chat/social-chat-history.ts +21 -18
  49. package/src/runtime/social-chat/social-chat.ts +356 -223
  50. package/src/runtime/specialist-runner.ts +3 -1
  51. package/src/runtime/team-consultation/team-consultation-orchestrator.ts +3 -2
  52. package/src/runtime/thread-turn-context.ts +142 -102
  53. package/src/runtime/turn-lifecycle.ts +15 -46
  54. package/src/services/agent-activity.service.ts +1 -1
  55. package/src/services/agent-executor.service.ts +107 -77
  56. package/src/services/autonomous-job.service.ts +354 -293
  57. package/src/services/background-work.service.ts +3 -3
  58. package/src/services/context-compaction.service.ts +7 -2
  59. package/src/services/document-chunk.service.ts +50 -32
  60. package/src/services/execution-plan/execution-plan-schedule.ts +5 -3
  61. package/src/services/execution-plan/execution-plan.service.ts +162 -179
  62. package/src/services/feedback-loop.service.ts +5 -4
  63. package/src/services/graph-full-routing.ts +37 -36
  64. package/src/services/institutional-memory.service.ts +28 -30
  65. package/src/services/learned-skill.service.ts +107 -72
  66. package/src/services/memory/memory-errors.ts +4 -23
  67. package/src/services/memory/memory-org-memory.ts +10 -5
  68. package/src/services/memory/memory-rerank.ts +18 -6
  69. package/src/services/memory/memory.service.ts +170 -111
  70. package/src/services/memory/rerank.service.ts +29 -20
  71. package/src/services/organization-member.service.ts +1 -1
  72. package/src/services/organization.service.ts +69 -75
  73. package/src/services/ownership-dispatcher.service.ts +40 -39
  74. package/src/services/plan/plan-agent-heartbeat.service.ts +26 -23
  75. package/src/services/plan/plan-agent-query.service.ts +39 -31
  76. package/src/services/plan/plan-completion-side-effects.ts +13 -17
  77. package/src/services/plan/plan-coordination.service.ts +2 -1
  78. package/src/services/plan/plan-cycle.service.ts +6 -5
  79. package/src/services/plan/plan-deadline.service.ts +57 -54
  80. package/src/services/plan/plan-event-delivery.service.ts +5 -4
  81. package/src/services/plan/plan-executor-graph.ts +18 -15
  82. package/src/services/plan/plan-executor.service.ts +235 -262
  83. package/src/services/plan/plan-run.service.ts +169 -93
  84. package/src/services/plan/plan-scheduler.service.ts +192 -202
  85. package/src/services/plan/plan-template.service.ts +1 -1
  86. package/src/services/plan/plan-transaction-events.ts +1 -1
  87. package/src/services/plan/plan-workspace.service.ts +23 -14
  88. package/src/services/plugin-executor.service.ts +5 -9
  89. package/src/services/queue-job.service.ts +117 -59
  90. package/src/services/recent-activity-title.service.ts +13 -12
  91. package/src/services/recent-activity.service.ts +6 -1
  92. package/src/services/social-chat-history.service.ts +29 -25
  93. package/src/services/system-executor.service.ts +5 -9
  94. package/src/services/thread/thread-active-run.ts +2 -2
  95. package/src/services/thread/thread-listing.ts +61 -57
  96. package/src/services/thread/thread-memory-block.ts +73 -48
  97. package/src/services/thread/thread-message.service.ts +76 -65
  98. package/src/services/thread/thread-record-store.ts +8 -8
  99. package/src/services/thread/thread-title.service.ts +10 -4
  100. package/src/services/thread/thread-turn-execution.ts +43 -45
  101. package/src/services/thread/thread-turn-preparation.service.ts +257 -135
  102. package/src/services/thread/thread-turn-streaming.ts +82 -85
  103. package/src/services/thread/thread-turn.ts +8 -8
  104. package/src/services/thread/thread.service.ts +135 -100
  105. package/src/services/user.service.ts +45 -48
  106. package/src/storage/attachment-parser.ts +6 -2
  107. package/src/storage/attachment-storage.service.ts +5 -6
  108. package/src/storage/generated-document-storage.service.ts +1 -1
  109. package/src/system-agents/context-compaction.agent.ts +10 -9
  110. package/src/system-agents/delegated-agent-factory.ts +30 -6
  111. package/src/system-agents/memory-reranker.agent.ts +10 -9
  112. package/src/system-agents/memory.agent.ts +10 -9
  113. package/src/system-agents/recent-activity-title-refiner.agent.ts +13 -15
  114. package/src/system-agents/regular-chat-memory-digest.agent.ts +13 -12
  115. package/src/system-agents/skill-extractor.agent.ts +13 -12
  116. package/src/system-agents/skill-manager.agent.ts +13 -12
  117. package/src/system-agents/thread-router.agent.ts +10 -5
  118. package/src/system-agents/title-generator.agent.ts +13 -12
  119. package/src/tools/fetch-webpage.tool.ts +13 -13
  120. package/src/tools/memory-block.tool.ts +3 -1
  121. package/src/tools/plan-approval.tool.ts +4 -2
  122. package/src/tools/read-file-parts.tool.ts +10 -4
  123. package/src/tools/remember-memory.tool.ts +3 -1
  124. package/src/tools/research-topic.tool.ts +9 -5
  125. package/src/tools/search-web.tool.ts +16 -16
  126. package/src/tools/search.tool.ts +20 -5
  127. package/src/tools/team-think.tool.ts +61 -38
  128. package/src/utils/async.ts +5 -5
  129. package/src/utils/errors.ts +19 -18
  130. package/src/utils/sse-keepalive.ts +28 -25
  131. package/src/workers/bootstrap.ts +75 -11
  132. package/src/workers/memory-consolidation.worker.ts +82 -91
  133. package/src/workers/organization-learning.worker.ts +14 -4
  134. package/src/workers/regular-chat-memory-digest.runner.ts +105 -67
  135. package/src/workers/skill-extraction.runner.ts +97 -61
  136. package/src/workers/utils/repo-structure-extractor.ts +13 -8
  137. package/src/workers/utils/thread-message-query.ts +24 -24
  138. package/src/workers/worker-utils.ts +23 -4
  139. package/src/effect/helpers.ts +0 -123
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lota-sdk/core",
3
- "version": "0.4.12",
3
+ "version": "0.4.14",
4
4
  "type": "module",
5
5
  "main": "./src/index.ts",
6
6
  "types": "./src/index.ts",
@@ -31,13 +31,13 @@
31
31
  "@ai-sdk/openai": "^3.0.53",
32
32
  "@chat-adapter/slack": "^4.26.0",
33
33
  "@chat-adapter/state-ioredis": "^4.26.0",
34
- "@lota-sdk/shared": "0.4.12",
34
+ "@lota-sdk/shared": "0.4.14",
35
35
  "@mendable/firecrawl-js": "^4.18.3",
36
36
  "@surrealdb/node": "^3.0.3",
37
37
  "ai": "^6.0.168",
38
- "bullmq": "^5.74.1",
38
+ "bullmq": "^5.74.2",
39
39
  "chat": "^4.26.0",
40
- "effect": "^4.0.0-beta.50",
40
+ "effect": "^4.0.0-beta.52",
41
41
  "hono": "^4.12.14",
42
42
  "ioredis": "5.9.3",
43
43
  "mammoth": "^1.12.0",
@@ -1,7 +1,9 @@
1
1
  import { Context, Effect, Layer, Schema } from 'effect'
2
2
  import type IORedis from 'ioredis'
3
3
 
4
+ import { RuntimeBridgeTag } from '../ai-gateway/ai-gateway'
4
5
  import { aiLogger } from '../config/logger'
6
+ import { ERROR_TAGS } from '../effect/errors'
5
7
  import { RedisServiceTag, RuntimeConfigServiceTag } from '../effect/services'
6
8
  import { sha256Hex } from '../utils/crypto'
7
9
 
@@ -9,7 +11,7 @@ export const DEFAULT_EMBEDDING_CACHE_TTL_SECONDS = 7200
9
11
  const EMBEDDING_CACHE_KEY_PREFIX = 'emb'
10
12
  const EmbeddingCacheJsonSchema = Schema.fromJsonString(Schema.Array(Schema.Number))
11
13
 
12
- class EmbeddingCacheError extends Schema.TaggedErrorClass<EmbeddingCacheError>()('EmbeddingCacheError', {
14
+ class EmbeddingCacheError extends Schema.TaggedErrorClass<EmbeddingCacheError>()(ERROR_TAGS.EmbeddingCacheError, {
13
15
  message: Schema.String,
14
16
  operation: Schema.Literals(['get', 'set']),
15
17
  cause: Schema.optional(Schema.Defect),
@@ -29,7 +31,8 @@ export class EmbeddingCache {
29
31
 
30
32
  constructor(
31
33
  private redis: IORedis,
32
- private ttlSeconds: number = DEFAULT_EMBEDDING_CACHE_TTL_SECONDS,
34
+ private ttlSeconds: number,
35
+ private readonly runPromise: <A, E = never>(effect: Effect.Effect<A, E>) => Promise<A>,
33
36
  ) {}
34
37
 
35
38
  private buildKey(model: string, text: string): string {
@@ -37,10 +40,6 @@ export class EmbeddingCache {
37
40
  return `${EMBEDDING_CACHE_KEY_PREFIX}:${model}:${hash}`
38
41
  }
39
42
 
40
- private runEffect<A>(effect: Effect.Effect<A>): Promise<A> {
41
- return Effect.runPromise(effect)
42
- }
43
-
44
43
  private fetchFromRedisEffect(key: string): Effect.Effect<number[] | null> {
45
44
  const redis = this.redis
46
45
 
@@ -58,7 +57,7 @@ export class EmbeddingCache {
58
57
  new EmbeddingCacheError({ message: 'Embedding cache parse failed.', operation: 'get', cause }),
59
58
  })
60
59
  }).pipe(
61
- Effect.catchTag('EmbeddingCacheError', (error) => {
60
+ Effect.catchTag(ERROR_TAGS.EmbeddingCacheError, (error) => {
62
61
  aiLogger.warn`${error.message}: ${error.cause}`
63
62
  return Effect.succeed<number[] | null>(null)
64
63
  }),
@@ -71,7 +70,7 @@ export class EmbeddingCache {
71
70
  const pending = this.inflight.get(key)
72
71
  if (pending) return pending
73
72
 
74
- const promise = this.runEffect(this.fetchFromRedisEffect(key))
73
+ const promise = this.runPromise(this.fetchFromRedisEffect(key))
75
74
  this.inflight.set(key, promise)
76
75
  void promise.finally(() => this.inflight.delete(key))
77
76
 
@@ -95,7 +94,7 @@ export class EmbeddingCache {
95
94
  })
96
95
  }).pipe(
97
96
  Effect.asVoid,
98
- Effect.catchTag('EmbeddingCacheError', (error) => {
97
+ Effect.catchTag(ERROR_TAGS.EmbeddingCacheError, (error) => {
99
98
  aiLogger.warn`${error.message}: ${error.cause}`
100
99
  return Effect.void
101
100
  }),
@@ -103,7 +102,7 @@ export class EmbeddingCache {
103
102
  }
104
103
 
105
104
  set(model: string, text: string, embedding: number[]): Promise<void> {
106
- return this.runEffect(this.setEffect(model, text, embedding))
105
+ return this.runPromise(this.setEffect(model, text, embedding))
107
106
  }
108
107
  }
109
108
 
@@ -111,11 +110,18 @@ export class EmbeddingCacheTag extends Context.Service<EmbeddingCacheTag, Embedd
111
110
  '@lota-sdk/core/EmbeddingCache',
112
111
  ) {}
113
112
 
113
+ /**
114
+ * `EmbeddingCache` needs a `runPromise` to convert internal Effect chains into
115
+ * the Promise API that AI SDK `embed` / `embedMany` expect. The layer yields
116
+ * `RuntimeBridgeTag` once and captures it — callers never prop-drill
117
+ * `runPromise` through this module.
118
+ */
114
119
  export const EmbeddingCacheLive = Layer.effect(
115
120
  EmbeddingCacheTag,
116
121
  Effect.gen(function* () {
117
122
  const redis = yield* RedisServiceTag
118
123
  const config = yield* RuntimeConfigServiceTag
119
- return new EmbeddingCache(redis.getConnection(), config.memory.embeddingCacheTtlSeconds)
124
+ const bridge = yield* RuntimeBridgeTag
125
+ return new EmbeddingCache(redis.getConnection(), config.memory.embeddingCacheTtlSeconds, bridge.runPromise)
120
126
  }),
121
127
  )
@@ -5,7 +5,7 @@ import type { LanguageModelMiddleware } from 'ai'
5
5
  import { Cause, Clock, Context, Duration, Effect, ExecutionPlan, Fiber, Layer, Semaphore } from 'effect'
6
6
 
7
7
  import { DEFAULT_AI_GATEWAY_URL } from '../config/constants'
8
- import { AiGenerationError, ConfigurationError } from '../effect/errors'
8
+ import { ERROR_TAGS, AiGenerationError, ConfigurationError } from '../effect/errors'
9
9
  import { RuntimeConfigServiceTag } from '../effect/services'
10
10
  import { getDirectOpenRouterProvider, normalizeDirectOpenRouterModelId } from '../openrouter/direct-provider'
11
11
  import { isRecord, readString } from '../utils/string'
@@ -26,15 +26,15 @@ type AiGatewayAttemptResult<A> = { source: string; result: A }
26
26
  // eslint-disable-next-line @typescript-eslint/no-redundant-type-constituents
27
27
  type AiGatewayRunFork = <A, E>(effect: Effect.Effect<A, E, never>) => Fiber.Fiber<A, E | unknown>
28
28
 
29
- class AiGatewayGenerateAttempt extends Context.Service<
30
- AiGatewayGenerateAttempt,
29
+ class AiGatewayGenerateAttemptTag extends Context.Service<
30
+ AiGatewayGenerateAttemptTag,
31
31
  { readonly execute: Effect.Effect<AiGatewayAttemptResult<AiGatewayGenerateResult>, AiGenerationError> }
32
- >()('@lota-sdk/core/internal/AiGatewayGenerateAttempt') {}
32
+ >()('@lota-sdk/core/internal/AiGatewayGenerateAttemptTag') {}
33
33
 
34
- class AiGatewayStreamAttempt extends Context.Service<
35
- AiGatewayStreamAttempt,
34
+ class AiGatewayStreamAttemptTag extends Context.Service<
35
+ AiGatewayStreamAttemptTag,
36
36
  { readonly execute: Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError> }
37
- >()('@lota-sdk/core/internal/AiGatewayStreamAttempt') {}
37
+ >()('@lota-sdk/core/internal/AiGatewayStreamAttemptTag') {}
38
38
 
39
39
  const EXPECTED_GATEWAY_KEY_PREFIX = 'sk-bf-'
40
40
  const AI_GATEWAY_VIRTUAL_KEY_HEADER = 'x-bf-vk'
@@ -71,7 +71,7 @@ const RETRYABLE_NETWORK_ERROR_PATTERNS = [
71
71
  ]
72
72
 
73
73
  function isAiGenerationError(error: unknown): error is AiGenerationError {
74
- return isRecord(error) && error._tag === 'AiGenerationError'
74
+ return isRecord(error) && error._tag === ERROR_TAGS.AiGenerationError
75
75
  }
76
76
 
77
77
  function getNumericField(value: Record<string, unknown>, key: string): number | null {
@@ -244,7 +244,7 @@ function withAiGatewayResilience<A>(source: string, effect: Effect.Effect<A, AiG
244
244
  const retryEffect = Effect.gen(function* () {
245
245
  const runAttempt = (attempt: number): Effect.Effect<A, AiGenerationError> =>
246
246
  effect.pipe(
247
- Effect.catchTag('AiGenerationError', (error) =>
247
+ Effect.catchTag(ERROR_TAGS.AiGenerationError, (error) =>
248
248
  Effect.gen(function* () {
249
249
  if (!isRetryableAiGatewayError(error) || attempt >= AI_GATEWAY_MAX_RETRIES - 1) {
250
250
  return yield* error
@@ -371,7 +371,15 @@ function withAiGatewayStreamIdleTimeout(
371
371
  reader = null
372
372
  if (!streamReader) return
373
373
 
374
- yield* Effect.tryPromise(() => streamReader.cancel(timeoutError)).pipe(Effect.catch(() => Effect.void))
374
+ yield* Effect.tryPromise({
375
+ try: () => streamReader.cancel(timeoutError),
376
+ catch: (cause) =>
377
+ new AiGenerationError({
378
+ source,
379
+ message: `[${source}] Stream cancel failed.`,
380
+ ...(cause instanceof Error ? { providerData: cause.message } : {}),
381
+ }),
382
+ }).pipe(Effect.catch(() => Effect.void))
375
383
  }),
376
384
  ),
377
385
  ),
@@ -388,7 +396,15 @@ function withAiGatewayStreamIdleTimeout(
388
396
  for (;;) {
389
397
  if (closed) return
390
398
 
391
- const { done, value } = yield* Effect.tryPromise(() => streamReader.read())
399
+ const { done, value } = yield* Effect.tryPromise({
400
+ try: () => streamReader.read(),
401
+ catch: (cause) =>
402
+ new AiGenerationError({
403
+ source,
404
+ message: `[${source}] Stream read failed.`,
405
+ ...(cause instanceof Error ? { providerData: cause.message } : {}),
406
+ }),
407
+ })
392
408
  if (done) {
393
409
  stopIdleTimeout()
394
410
  yield* Effect.sync(() => closeStream(controller))
@@ -434,7 +450,15 @@ function withAiGatewayStreamIdleTimeout(
434
450
  }
435
451
 
436
452
  return Effect.runPromise(
437
- Effect.tryPromise(() => streamReader.cancel(reason)).pipe(Effect.catch(() => Effect.void)),
453
+ Effect.tryPromise({
454
+ try: () => streamReader.cancel(reason),
455
+ catch: (cause) =>
456
+ new AiGenerationError({
457
+ source,
458
+ message: `[${source}] Stream cancel failed.`,
459
+ ...(cause instanceof Error ? { providerData: cause.message } : {}),
460
+ }),
461
+ }).pipe(Effect.catch(() => Effect.void)),
438
462
  )
439
463
  },
440
464
  })
@@ -457,12 +481,22 @@ function withDefaultAiGatewayCacheHeaders(params: AiGatewayCallOptions): AiGatew
457
481
  return { ...params, headers: mergeAiGatewayHeaders(params.headers, buildAiGatewayCacheHeaders('lota-sdk')) }
458
482
  }
459
483
 
484
+ function normalizeAiGatewayUrlEffect(value: string): Effect.Effect<string, ConfigurationError> {
485
+ const trimmed = value.trim()
486
+ if (!trimmed) {
487
+ return Effect.fail(
488
+ new ConfigurationError({ message: '[ai-gateway] AI gateway URL is required.', key: 'aiGateway.url' }),
489
+ )
490
+ }
491
+ const normalized = trimmed.replace(/\/+$/, '')
492
+ return Effect.succeed(normalized.endsWith('/v1') ? normalized : `${normalized}/v1`)
493
+ }
494
+
460
495
  function normalizeAiGatewayUrl(value: string): string {
461
496
  const trimmed = value.trim()
462
497
  if (!trimmed) {
463
498
  throw new ConfigurationError({ message: '[ai-gateway] AI gateway URL is required.', key: 'aiGateway.url' })
464
499
  }
465
-
466
500
  const normalized = trimmed.replace(/\/+$/, '')
467
501
  return normalized.endsWith('/v1') ? normalized : `${normalized}/v1`
468
502
  }
@@ -473,10 +507,10 @@ export class AiGatewayTag extends Context.Service<
473
507
  { readonly semaphore: Semaphore.Semaphore; readonly provider: ReturnType<typeof createOpenAI> }
474
508
  >()('@lota-sdk/core/AiGateway') {}
475
509
 
476
- export const AiGatewayLive = Layer.effect(
477
- AiGatewayTag,
478
- Effect.gen(function* () {
479
- const config = yield* RuntimeConfigServiceTag
510
+ export function makeAiGatewayService(
511
+ config: AiGatewayRuntimeConfig,
512
+ ): Effect.Effect<Context.Service.Shape<typeof AiGatewayTag>, ConfigurationError> {
513
+ return Effect.gen(function* () {
480
514
  const semaphore = yield* Semaphore.make(config.aiGateway.maxConcurrency)
481
515
 
482
516
  const apiKey = config.aiGateway.key.trim()
@@ -486,10 +520,18 @@ export const AiGatewayLive = Layer.effect(
486
520
  key: 'aiGateway.key',
487
521
  })
488
522
  }
489
- const baseURL = normalizeAiGatewayUrl(config.aiGateway.url)
523
+ const baseURL = yield* normalizeAiGatewayUrlEffect(config.aiGateway.url)
490
524
  const provider = createOpenAI({ baseURL, apiKey, headers: { [AI_GATEWAY_VIRTUAL_KEY_HEADER]: apiKey } })
491
525
 
492
526
  return AiGatewayTag.of({ semaphore, provider })
527
+ })
528
+ }
529
+
530
+ export const AiGatewayLive = Layer.effect(
531
+ AiGatewayTag,
532
+ Effect.gen(function* () {
533
+ const config = yield* RuntimeConfigServiceTag
534
+ return yield* makeAiGatewayService(config)
493
535
  }),
494
536
  )
495
537
 
@@ -514,11 +556,26 @@ function withAiGatewayStreamConcurrency(
514
556
  const currentContext = yield* Effect.context<never>()
515
557
  yield* semaphore.take(1)
516
558
 
517
- // NOTE: manual release intentional permit outlives Effect scope for the
518
- // stream lifetime. The stream consumer drains asynchronously after this
519
- // Effect resolves; the permit is released by either the idle-timeout
520
- // finalize callback or the error path below. The `released` guard makes
521
- // the release idempotent across those paths.
559
+ // Permit-release impedance bridge. The semaphore permit MUST outlive
560
+ // the surrounding Effect because the returned ReadableStream is drained
561
+ // by an external consumer (AI SDK middleware fetch consumer) on a
562
+ // promise/callback timeline that has no Effect scope to attach to.
563
+ //
564
+ // We considered Layer.scoped + Effect.acquireRelease here. It does not
565
+ // fit: the stream's close/cancel/idle-timeout callbacks fire from
566
+ // outside any Effect, so a Scope-bound finalizer cannot observe them
567
+ // without running the close itself through `Effect.runFork(Scope.close)`
568
+ // — which is the same imperative shape as the latch below, just more
569
+ // ceremony.
570
+ //
571
+ // The `released = false` latch keeps `release()` idempotent across the
572
+ // three callback edges that can fire it:
573
+ // 1. error path — Effect.catchTag + Effect.sync(release)
574
+ // 2. interrupt path — Effect.onInterrupt + Effect.sync(release)
575
+ // 3. idle-timeout path — withAiGatewayStreamIdleTimeout's onFinalize
576
+ //
577
+ // `Effect.runForkWith(currentContext)(semaphore.release(1))` preserves
578
+ // span/logger context across the boundary so observability stays intact.
522
579
  let released = false
523
580
  const release = () => {
524
581
  if (released) return
@@ -527,7 +584,9 @@ function withAiGatewayStreamConcurrency(
527
584
  }
528
585
 
529
586
  const attempt = yield* restore(effect).pipe(
530
- Effect.catchTag('AiGenerationError', (error) => Effect.sync(release).pipe(Effect.andThen(Effect.fail(error)))),
587
+ Effect.catchTag(ERROR_TAGS.AiGenerationError, (error) =>
588
+ Effect.sync(release).pipe(Effect.andThen(Effect.fail(error))),
589
+ ),
531
590
  Effect.onInterrupt(() => Effect.sync(release)),
532
591
  )
533
592
 
@@ -736,11 +795,11 @@ function executeGenerateAttemptPlan(
736
795
  params: AiGatewayCallOptions,
737
796
  doGenerate: () => PromiseLike<AiGatewayGenerateResult>,
738
797
  ): Effect.Effect<AiGatewayAttemptResult<AiGatewayGenerateResult>, AiGenerationError> {
739
- const primary = Layer.succeed(AiGatewayGenerateAttempt, {
798
+ const primary = Layer.succeed(AiGatewayGenerateAttemptTag, {
740
799
  execute: attemptAiGatewayGenerate('ai-gateway.generate', doGenerate),
741
800
  })
742
801
  const effect = Effect.gen(function* () {
743
- const attempt = yield* AiGatewayGenerateAttempt
802
+ const attempt = yield* AiGatewayGenerateAttemptTag
744
803
  return yield* attempt.execute
745
804
  })
746
805
 
@@ -757,7 +816,7 @@ function executeGenerateAttemptPlan(
757
816
  ExecutionPlan.make(
758
817
  { provide: primary },
759
818
  {
760
- provide: Layer.succeed(AiGatewayGenerateAttempt, {
819
+ provide: Layer.succeed(AiGatewayGenerateAttemptTag, {
761
820
  execute: attemptDirectOpenRouterGenerate(config, modelId, params),
762
821
  }),
763
822
  while: (error: AiGenerationError) => shouldFallbackToDirectOpenRouter(config, modelId, error),
@@ -775,11 +834,11 @@ function executeStreamAttemptPlan(
775
834
  params: AiGatewayCallOptions,
776
835
  doStream: () => PromiseLike<AiGatewayStreamResult>,
777
836
  ): Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError> {
778
- const primary = Layer.succeed(AiGatewayStreamAttempt, {
837
+ const primary = Layer.succeed(AiGatewayStreamAttemptTag, {
779
838
  execute: attemptAiGatewayStream('ai-gateway.stream', doStream),
780
839
  })
781
840
  const effect = Effect.gen(function* () {
782
- const attempt = yield* AiGatewayStreamAttempt
841
+ const attempt = yield* AiGatewayStreamAttemptTag
783
842
  return yield* attempt.execute
784
843
  })
785
844
 
@@ -796,7 +855,7 @@ function executeStreamAttemptPlan(
796
855
  ExecutionPlan.make(
797
856
  { provide: primary },
798
857
  {
799
- provide: Layer.succeed(AiGatewayStreamAttempt, {
858
+ provide: Layer.succeed(AiGatewayStreamAttemptTag, {
800
859
  execute: attemptDirectOpenRouterStream(config, modelId, params),
801
860
  }),
802
861
  while: (error: AiGenerationError) => shouldFallbackToDirectOpenRouter(config, modelId, error),
@@ -911,48 +970,18 @@ function resolveProviderModel(
911
970
  return providerId === OPENAI_CHAT_PROVIDER_ID ? provider.chat(modelId) : provider(modelId)
912
971
  }
913
972
 
914
- // Module-level Promise slot that `createLotaRuntime` populates during boot.
915
- // This is a legitimate per-process singleton (mirrors the worker bootstrap
916
- // pattern in `workers/bootstrap.ts`): the AI gateway middleware is dispatched
917
- // by AI SDK callers that live outside Effect context, so the middleware needs
918
- // a way to run gateway Effects without capturing a `ManagedRuntime` through
919
- // every `aiGatewayModel(modelId)` call site.
920
- //
921
- // Only `createLotaRuntime` writes to the slot; resetting on disconnect is a
922
- // Phase 3b concern — for now it stays alive for the process lifetime.
923
- let aiGatewayRuntimeReady: Promise<{
924
- gateway: Context.Service.Shape<typeof AiGatewayTag>
925
- runtimeConfig: Context.Service.Shape<typeof RuntimeConfigServiceTag>
926
- runPromise: <A, E>(effect: Effect.Effect<A, E, never>) => Promise<A>
927
- runFork: AiGatewayRunFork
928
- }> | null = null
929
-
930
- export function bindAiGatewayRuntime(params: {
931
- gateway: Context.Service.Shape<typeof AiGatewayTag>
932
- runtimeConfig: Context.Service.Shape<typeof RuntimeConfigServiceTag>
933
- runPromise: <A, E>(effect: Effect.Effect<A, E, never>) => Promise<A>
934
- runFork: AiGatewayRunFork
935
- }): void {
936
- aiGatewayRuntimeReady = Promise.resolve(params)
937
- }
938
-
939
- export function clearAiGatewayRuntime(): void {
940
- aiGatewayRuntimeReady = null
941
- }
942
-
943
- async function getAiGatewayRuntime(): Promise<{
973
+ export type AiGatewayDeps = {
944
974
  gateway: Context.Service.Shape<typeof AiGatewayTag>
945
975
  runtimeConfig: Context.Service.Shape<typeof RuntimeConfigServiceTag>
946
976
  runPromise: <A, E>(effect: Effect.Effect<A, E, never>) => Promise<A>
947
977
  runFork: AiGatewayRunFork
948
- }> {
949
- if (!aiGatewayRuntimeReady) {
950
- throw new Error('AI gateway runtime has not been initialized. Call createLotaRuntime() first.')
951
- }
952
- return aiGatewayRuntimeReady
953
978
  }
954
979
 
955
- function createAiGatewayLanguageModelMiddleware(modelId: string, providerId: string): LanguageModelMiddleware {
980
+ function createAiGatewayLanguageModelMiddleware(
981
+ modelId: string,
982
+ providerId: string,
983
+ deps: AiGatewayDeps,
984
+ ): LanguageModelMiddleware {
956
985
  return {
957
986
  specificationVersion: 'v3',
958
987
  transformParams: ({ params, type }) =>
@@ -961,12 +990,11 @@ function createAiGatewayLanguageModelMiddleware(modelId: string, providerId: str
961
990
  addAiGatewayReasoningRawChunks(normalizeAiGatewayChatProviderOptions(params, modelId), type),
962
991
  ),
963
992
  ),
964
- wrapGenerate: async ({ params }) => {
965
- const { gateway, runtimeConfig, runPromise } = await getAiGatewayRuntime()
966
- const model = resolveProviderModel(gateway.provider, modelId, providerId)
967
- return runPromise(
993
+ wrapGenerate: ({ params }) => {
994
+ const model = resolveProviderModel(deps.gateway.provider, modelId, providerId)
995
+ return deps.runPromise(
968
996
  withAiGatewayConcurrency(
969
- executeGenerateAttemptPlan(runtimeConfig, modelId, params, () => model.doGenerate(params)).pipe(
997
+ executeGenerateAttemptPlan(deps.runtimeConfig, modelId, params, () => model.doGenerate(params)).pipe(
970
998
  Effect.map(({ result }) => ({
971
999
  ...result,
972
1000
  content: injectAiGatewayChatReasoningContent(
@@ -975,15 +1003,14 @@ function createAiGatewayLanguageModelMiddleware(modelId: string, providerId: str
975
1003
  ),
976
1004
  })),
977
1005
  ),
978
- ).pipe(Effect.provideService(AiGatewayTag, gateway)),
1006
+ ).pipe(Effect.provideService(AiGatewayTag, deps.gateway)),
979
1007
  )
980
1008
  },
981
- wrapStream: async ({ params }) => {
982
- const { gateway, runtimeConfig, runPromise, runFork } = await getAiGatewayRuntime()
983
- const model = resolveProviderModel(gateway.provider, modelId, providerId)
984
- return runPromise(
1009
+ wrapStream: ({ params }) => {
1010
+ const model = resolveProviderModel(deps.gateway.provider, modelId, providerId)
1011
+ return deps.runPromise(
985
1012
  withAiGatewayStreamConcurrency(
986
- executeStreamAttemptPlan(runtimeConfig, modelId, params, () => model.doStream(params)).pipe(
1013
+ executeStreamAttemptPlan(deps.runtimeConfig, modelId, params, () => model.doStream(params)).pipe(
987
1014
  Effect.map((attempt) => ({
988
1015
  ...attempt,
989
1016
  result: isReasoningEnabled(params)
@@ -991,10 +1018,11 @@ function createAiGatewayLanguageModelMiddleware(modelId: string, providerId: str
991
1018
  : attempt.result,
992
1019
  })),
993
1020
  ),
994
- runFork,
995
- )
996
- .pipe(Effect.map(({ result }) => result))
997
- .pipe(Effect.provideService(AiGatewayTag, gateway)),
1021
+ deps.runFork,
1022
+ ).pipe(
1023
+ Effect.map(({ result }) => result),
1024
+ Effect.provideService(AiGatewayTag, deps.gateway),
1025
+ ),
998
1026
  )
999
1027
  },
1000
1028
  }
@@ -1069,41 +1097,40 @@ function createAiGatewayEmbeddingModelPlaceholder(modelId: string): AiGatewayEmb
1069
1097
  }
1070
1098
  }
1071
1099
 
1072
- export function aiGatewayModel(modelId: string) {
1100
+ export function aiGatewayModel(modelId: string, deps: AiGatewayDeps) {
1073
1101
  if (isOpenRouterModel(modelId)) {
1074
- return aiGatewayChatModel(modelId)
1102
+ return aiGatewayChatModel(modelId, deps)
1075
1103
  }
1076
1104
 
1077
1105
  return withAiGatewayDevTools(
1078
1106
  wrapLanguageModel({
1079
1107
  model: createAiGatewayLanguageModelPlaceholder(modelId, OPENAI_RESPONSES_PROVIDER_ID),
1080
- middleware: createAiGatewayLanguageModelMiddleware(modelId, OPENAI_RESPONSES_PROVIDER_ID),
1108
+ middleware: createAiGatewayLanguageModelMiddleware(modelId, OPENAI_RESPONSES_PROVIDER_ID, deps),
1081
1109
  }),
1082
1110
  )
1083
1111
  }
1084
1112
 
1085
- export function aiGatewayOpenRouterResponseHealingModel(modelId: string) {
1086
- return aiGatewayChatModel(modelId)
1113
+ export function aiGatewayOpenRouterResponseHealingModel(modelId: string, deps: AiGatewayDeps) {
1114
+ return aiGatewayChatModel(modelId, deps)
1087
1115
  }
1088
1116
 
1089
- export function aiGatewayChatModel(modelId: string) {
1117
+ export function aiGatewayChatModel(modelId: string, deps: AiGatewayDeps) {
1090
1118
  return withAiGatewayDevTools(
1091
1119
  wrapLanguageModel({
1092
1120
  model: createAiGatewayLanguageModelPlaceholder(modelId, OPENAI_CHAT_PROVIDER_ID),
1093
- middleware: createAiGatewayLanguageModelMiddleware(modelId, OPENAI_CHAT_PROVIDER_ID),
1121
+ middleware: createAiGatewayLanguageModelMiddleware(modelId, OPENAI_CHAT_PROVIDER_ID, deps),
1094
1122
  }),
1095
1123
  )
1096
1124
  }
1097
1125
 
1098
- export function aiGatewayEmbeddingModel(modelId: string) {
1126
+ export function aiGatewayEmbeddingModel(modelId: string, deps: AiGatewayDeps) {
1099
1127
  return wrapEmbeddingModel({
1100
1128
  model: createAiGatewayEmbeddingModelPlaceholder(modelId),
1101
1129
  middleware: {
1102
1130
  specificationVersion: 'v3',
1103
- wrapEmbed: async ({ params }) => {
1104
- const { gateway, runPromise } = await getAiGatewayRuntime()
1105
- const embeddingModel = gateway.provider.embeddingModel(modelId)
1106
- return runPromise(
1131
+ wrapEmbed: ({ params }) => {
1132
+ const embeddingModel = deps.gateway.provider.embeddingModel(modelId)
1133
+ return deps.runPromise(
1107
1134
  withAiGatewayConcurrency(
1108
1135
  withAiGatewayResilience(
1109
1136
  'ai-gateway.embed',
@@ -1112,11 +1139,54 @@ export function aiGatewayEmbeddingModel(modelId: string) {
1112
1139
  catch: (cause) => classifyAiGatewayError('ai-gateway.embed', cause),
1113
1140
  }),
1114
1141
  ).pipe(Effect.withSpan('AiGateway.embed'), Effect.annotateSpans({ modelId })),
1115
- ).pipe(Effect.provideService(AiGatewayTag, gateway)),
1142
+ ).pipe(Effect.provideService(AiGatewayTag, deps.gateway)),
1116
1143
  )
1117
1144
  },
1118
1145
  },
1119
1146
  })
1120
1147
  }
1121
1148
 
1149
+ /**
1150
+ * Pre-bound AI gateway model surface. Produced once per `LotaRuntime` in
1151
+ * `createLotaRuntime` and provided via `AiGatewayModelsTag` so services and
1152
+ * system-agent factories consume it through standard DI rather than an
1153
+ * ambient module slot.
1154
+ */
1155
+ export type AiGatewayModels = {
1156
+ model(modelId: string): ReturnType<typeof aiGatewayModel>
1157
+ chatModel(modelId: string): ReturnType<typeof aiGatewayChatModel>
1158
+ embeddingModel(modelId: string): ReturnType<typeof aiGatewayEmbeddingModel>
1159
+ openRouterResponseHealingModel(modelId: string): ReturnType<typeof aiGatewayOpenRouterResponseHealingModel>
1160
+ }
1161
+
1162
+ export function createAiGatewayModels(deps: AiGatewayDeps): AiGatewayModels {
1163
+ return {
1164
+ model: (modelId: string) => aiGatewayModel(modelId, deps),
1165
+ chatModel: (modelId: string) => aiGatewayChatModel(modelId, deps),
1166
+ embeddingModel: (modelId: string) => aiGatewayEmbeddingModel(modelId, deps),
1167
+ openRouterResponseHealingModel: (modelId: string) => aiGatewayOpenRouterResponseHealingModel(modelId, deps),
1168
+ }
1169
+ }
1170
+
1171
+ export class AiGatewayModelsTag extends Context.Service<AiGatewayModelsTag, AiGatewayModels>()(
1172
+ '@lota-sdk/core/AiGatewayModels',
1173
+ ) {}
1174
+
1175
+ /**
1176
+ * Runtime bridge surface. Services and tool factories that need to convert
1177
+ * Effects to Promises at a host boundary (AI SDK tool callbacks, BullMQ worker
1178
+ * processors, etc.) yield this tag to obtain the `runPromise` and `runFork`
1179
+ * bound to the outer `ManagedRuntime`. Provided once in `createLotaRuntime`
1180
+ * via a function-local holder so services DI their runtime bridge rather than
1181
+ * capturing context with `Effect.runPromiseWith`.
1182
+ */
1183
+ export type RuntimeBridge = {
1184
+ runPromise: <A, E>(effect: Effect.Effect<A, E, never>, options?: { signal?: AbortSignal }) => Promise<A>
1185
+ runFork: AiGatewayRunFork
1186
+ }
1187
+
1188
+ export class RuntimeBridgeTag extends Context.Service<RuntimeBridgeTag, RuntimeBridge>()(
1189
+ '@lota-sdk/core/RuntimeBridge',
1190
+ ) {}
1191
+
1122
1192
  export { DEFAULT_AI_GATEWAY_URL, normalizeAiGatewayUrl }
@@ -1,12 +1,14 @@
1
1
  export {
2
2
  AiGatewayLive,
3
+ AiGatewayModelsTag,
3
4
  AiGatewayTag,
4
5
  DEFAULT_AI_GATEWAY_URL,
6
+ RuntimeBridgeTag,
5
7
  aiGatewayChatModel,
6
8
  aiGatewayEmbeddingModel,
7
9
  aiGatewayModel,
8
10
  aiGatewayOpenRouterResponseHealingModel,
9
- bindAiGatewayRuntime,
11
+ createAiGatewayModels,
10
12
  extractAiGatewayChatReasoningDeltaText,
11
13
  extractAiGatewayChatReasoningText,
12
14
  injectAiGatewayChatReasoningContent,
@@ -14,4 +16,5 @@ export {
14
16
  normalizeAiGatewayChatProviderOptions,
15
17
  normalizeAiGatewayUrl,
16
18
  } from './ai-gateway'
19
+ export type { AiGatewayDeps, AiGatewayModels, RuntimeBridge } from './ai-gateway'
17
20
  export * from './cache-headers'
@@ -8,8 +8,8 @@ import type {
8
8
  AgentToolBuilder,
9
9
  } from './agent-types'
10
10
 
11
- function defaultBuildAgentTools(): ToolSet {
12
- return {}
11
+ function defaultBuildAgentTools(): Promise<ToolSet> {
12
+ return Promise.resolve({})
13
13
  }
14
14
 
15
15
  function defaultGetAgentRuntimeConfig(params: AgentRuntimeConfigParams) {
@@ -55,7 +55,7 @@ export type AgentCreator = <TTools extends ToolSet>(
55
55
  export type AgentFactory = Partial<Record<string, AgentCreator>>
56
56
 
57
57
  /** Builds the tool set for a given agent. */
58
- export type AgentToolBuilder = (params: AgentToolBuilderParams) => ToolSet | Promise<ToolSet>
58
+ export type AgentToolBuilder = (params: AgentToolBuilderParams) => Promise<ToolSet>
59
59
 
60
60
  /** Returns runtime configuration for a given agent. */
61
61
  export type AgentRuntimeConfigProvider = (params: AgentRuntimeConfigParams) => AgentRuntimeConfig<string>