@lota-sdk/core 0.4.19 → 0.4.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lota-sdk/core",
3
- "version": "0.4.19",
3
+ "version": "0.4.20",
4
4
  "type": "module",
5
5
  "main": "./src/index.ts",
6
6
  "types": "./src/index.ts",
@@ -31,7 +31,7 @@
31
31
  "@ai-sdk/openai": "^3.0.53",
32
32
  "@chat-adapter/slack": "^4.26.0",
33
33
  "@chat-adapter/state-ioredis": "^4.26.0",
34
- "@lota-sdk/shared": "0.4.19",
34
+ "@lota-sdk/shared": "0.4.20",
35
35
  "@mendable/firecrawl-js": "^4.18.3",
36
36
  "@surrealdb/node": "^3.0.3",
37
37
  "ai": "^6.0.168",
@@ -2,12 +2,11 @@ import { devToolsMiddleware } from '@ai-sdk/devtools'
2
2
  import { createOpenAI } from '@ai-sdk/openai'
3
3
  import { wrapEmbeddingModel, wrapLanguageModel } from 'ai'
4
4
  import type { LanguageModelMiddleware } from 'ai'
5
- import { Cause, Clock, Context, Duration, Effect, ExecutionPlan, Fiber, Layer, Semaphore } from 'effect'
5
+ import { Cause, Clock, Context, Duration, Effect, Fiber, Layer, Semaphore } from 'effect'
6
6
 
7
7
  import { DEFAULT_AI_GATEWAY_URL } from '../config/constants'
8
8
  import { ERROR_TAGS, AiGenerationError, ConfigurationError } from '../effect/errors'
9
9
  import { RuntimeConfigServiceTag } from '../effect/services'
10
- import { getDirectOpenRouterProvider, normalizeDirectOpenRouterModelId } from '../openrouter/direct-provider'
11
10
  import { isRecord, readString } from '../utils/string'
12
11
  import { buildAiGatewayCacheHeaders } from './cache-headers'
13
12
 
@@ -37,7 +36,7 @@ class AiGatewayStreamAttemptTag extends Context.Service<
37
36
 
38
37
  const EXPECTED_GATEWAY_KEY_PREFIX = 'sk-bf-'
39
38
  const AI_GATEWAY_VIRTUAL_KEY_HEADER = 'x-bf-vk'
40
- const AI_GATEWAY_TIMEOUT_MS = 30_000
39
+ const AI_GATEWAY_TIMEOUT_MS = 180_000
41
40
  const AI_GATEWAY_STREAM_IDLE_TIMEOUT_MS = 30_000
42
41
  const AI_GATEWAY_MAX_RETRIES = 4
43
42
  const AI_GATEWAY_MAX_RETRY_DELAY_MS = 15_000
@@ -726,22 +725,6 @@ function isOpenRouterModel(modelId: string): boolean {
726
725
  return modelId.trim().toLowerCase().startsWith('openrouter/')
727
726
  }
728
727
 
729
- function hasDirectOpenRouterFallback(config: AiGatewayRuntimeConfig, modelId: string): boolean {
730
- return isOpenRouterModel(modelId) && Boolean(config.aiGateway.openRouterApiKey?.trim())
731
- }
732
-
733
- function getDirectOpenRouterChatModel(config: AiGatewayRuntimeConfig, modelId: string): AiGatewayLanguageModel {
734
- return getDirectOpenRouterProvider(config.aiGateway.openRouterApiKey).chat(normalizeDirectOpenRouterModelId(modelId))
735
- }
736
-
737
- function shouldFallbackToDirectOpenRouter(
738
- config: AiGatewayRuntimeConfig,
739
- modelId: string,
740
- error: AiGenerationError,
741
- ): boolean {
742
- return hasDirectOpenRouterFallback(config, modelId) && isRetryableAiGatewayError(error)
743
- }
744
-
745
728
  function attemptAiGatewayGenerate(
746
729
  source: string,
747
730
  evaluate: () => PromiseLike<AiGatewayGenerateResult>,
@@ -770,28 +753,8 @@ function attemptAiGatewayStream(
770
753
  )
771
754
  }
772
755
 
773
- function attemptDirectOpenRouterGenerate(
774
- config: AiGatewayRuntimeConfig,
775
- modelId: string,
776
- params: AiGatewayCallOptions,
777
- ): Effect.Effect<AiGatewayAttemptResult<AiGatewayGenerateResult>, AiGenerationError> {
778
- const model = getDirectOpenRouterChatModel(config, modelId)
779
- return attemptAiGatewayGenerate('openrouter.generate', () => model.doGenerate(params))
780
- }
781
-
782
- function attemptDirectOpenRouterStream(
783
- config: AiGatewayRuntimeConfig,
784
- modelId: string,
785
- params: AiGatewayCallOptions,
786
- ): Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError> {
787
- const model = getDirectOpenRouterChatModel(config, modelId)
788
- return attemptAiGatewayStream('openrouter.stream', () => model.doStream(params))
789
- }
790
-
791
756
  function executeGenerateAttemptPlan(
792
- config: AiGatewayRuntimeConfig,
793
757
  modelId: string,
794
- params: AiGatewayCallOptions,
795
758
  doGenerate: () => PromiseLike<AiGatewayGenerateResult>,
796
759
  ): Effect.Effect<AiGatewayAttemptResult<AiGatewayGenerateResult>, AiGenerationError> {
797
760
  const primary = Layer.succeed(AiGatewayGenerateAttemptTag, {
@@ -802,35 +765,15 @@ function executeGenerateAttemptPlan(
802
765
  return yield* attempt.execute
803
766
  })
804
767
 
805
- if (!hasDirectOpenRouterFallback(config, modelId)) {
806
- return effect.pipe(
807
- Effect.provide(primary),
808
- Effect.withSpan('AiGateway.executeGeneratePlan'),
809
- Effect.annotateSpans({ modelId, directOpenRouterFallbackEnabled: false }),
810
- )
811
- }
812
-
813
768
  return effect.pipe(
814
- Effect.withExecutionPlan(
815
- ExecutionPlan.make(
816
- { provide: primary },
817
- {
818
- provide: Layer.succeed(AiGatewayGenerateAttemptTag, {
819
- execute: attemptDirectOpenRouterGenerate(config, modelId, params),
820
- }),
821
- while: (error: AiGenerationError) => shouldFallbackToDirectOpenRouter(config, modelId, error),
822
- },
823
- ),
824
- ),
769
+ Effect.provide(primary),
825
770
  Effect.withSpan('AiGateway.executeGeneratePlan'),
826
- Effect.annotateSpans({ modelId, directOpenRouterFallbackEnabled: true }),
771
+ Effect.annotateSpans({ modelId }),
827
772
  )
828
773
  }
829
774
 
830
775
  function executeStreamAttemptPlan(
831
- config: AiGatewayRuntimeConfig,
832
776
  modelId: string,
833
- params: AiGatewayCallOptions,
834
777
  doStream: () => PromiseLike<AiGatewayStreamResult>,
835
778
  ): Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError> {
836
779
  const primary = Layer.succeed(AiGatewayStreamAttemptTag, {
@@ -841,28 +784,10 @@ function executeStreamAttemptPlan(
841
784
  return yield* attempt.execute
842
785
  })
843
786
 
844
- if (!hasDirectOpenRouterFallback(config, modelId)) {
845
- return effect.pipe(
846
- Effect.provide(primary),
847
- Effect.withSpan('AiGateway.executeStreamPlan'),
848
- Effect.annotateSpans({ modelId, directOpenRouterFallbackEnabled: false }),
849
- )
850
- }
851
-
852
787
  return effect.pipe(
853
- Effect.withExecutionPlan(
854
- ExecutionPlan.make(
855
- { provide: primary },
856
- {
857
- provide: Layer.succeed(AiGatewayStreamAttemptTag, {
858
- execute: attemptDirectOpenRouterStream(config, modelId, params),
859
- }),
860
- while: (error: AiGenerationError) => shouldFallbackToDirectOpenRouter(config, modelId, error),
861
- },
862
- ),
863
- ),
788
+ Effect.provide(primary),
864
789
  Effect.withSpan('AiGateway.executeStreamPlan'),
865
- Effect.annotateSpans({ modelId, directOpenRouterFallbackEnabled: true }),
790
+ Effect.annotateSpans({ modelId }),
866
791
  )
867
792
  }
868
793
 
@@ -1010,7 +935,7 @@ function createAiGatewayLanguageModelMiddleware(
1010
935
  const model = resolveProviderModel(resolvedDeps.gateway.provider, modelId, providerId)
1011
936
  return resolvedDeps.runPromise(
1012
937
  withAiGatewayConcurrency(
1013
- executeGenerateAttemptPlan(resolvedDeps.runtimeConfig, modelId, params, () => model.doGenerate(params)).pipe(
938
+ executeGenerateAttemptPlan(modelId, () => model.doGenerate(params)).pipe(
1014
939
  Effect.map(({ result }) => ({
1015
940
  ...result,
1016
941
  content: injectAiGatewayChatReasoningContent(
@@ -1027,7 +952,7 @@ function createAiGatewayLanguageModelMiddleware(
1027
952
  const model = resolveProviderModel(resolvedDeps.gateway.provider, modelId, providerId)
1028
953
  return resolvedDeps.runPromise(
1029
954
  withAiGatewayStreamConcurrency(
1030
- executeStreamAttemptPlan(resolvedDeps.runtimeConfig, modelId, params, () => model.doStream(params)).pipe(
955
+ executeStreamAttemptPlan(modelId, () => model.doStream(params)).pipe(
1031
956
  Effect.map((attempt) => ({
1032
957
  ...attempt,
1033
958
  result: isReasoningEnabled(params)
@@ -1264,20 +1264,12 @@ export class SurrealMemoryStore {
1264
1264
 
1265
1265
  export function createMemoryStore(
1266
1266
  db: SurrealDBService,
1267
- options: {
1268
- embeddingModel: string
1269
- openRouterApiKey?: string
1270
- runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A>
1271
- },
1267
+ options: { embeddingModel: string; runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A> },
1272
1268
  background: BackgroundWorker,
1273
1269
  ): SurrealMemoryStore {
1274
1270
  return new SurrealMemoryStore(
1275
1271
  db,
1276
- new ProviderEmbeddings({
1277
- modelId: options.embeddingModel,
1278
- openRouterApiKey: options.openRouterApiKey,
1279
- runPromise: options.runPromise,
1280
- }),
1272
+ new ProviderEmbeddings({ modelId: options.embeddingModel, runPromise: options.runPromise }),
1281
1273
  background,
1282
1274
  )
1283
1275
  }
@@ -151,13 +151,13 @@ const MemoryDeltaRelationSchema = z
151
151
  targetMemoryId: z
152
152
  .string()
153
153
  .min(1)
154
- .optional()
154
+ .nullable()
155
155
  .describe('Target existing memory id when relation points to existing memory.'),
156
156
  targetFactIndex: z
157
157
  .number()
158
158
  .int()
159
159
  .min(0)
160
- .optional()
160
+ .nullable()
161
161
  .describe('Target newFacts index when relation points to another newly provided fact.'),
162
162
  })
163
163
  .strict()
@@ -179,17 +179,12 @@ const MemoryDeltaItemSchema = z
179
179
  classification: MemoryDeltaClassificationSchema.describe(
180
180
  'How this fact relates to existing memories: new, supersedes, contradicts, enriches, duplicate.',
181
181
  ),
182
- targetMemoryIds: z
183
- .array(z.string().min(1))
184
- .default([])
185
- .describe('Existing memory IDs that are directly related to this fact.'),
182
+ targetMemoryIds: z.array(z.string().min(1)).describe('Existing memory IDs that are directly related to this fact.'),
186
183
  invalidateTargetIds: z
187
184
  .array(z.string().min(1))
188
- .default([])
189
185
  .describe('Subset of targetMemoryIds that should be deleted as obsolete/invalidated.'),
190
186
  relations: z
191
187
  .array(MemoryDeltaRelationSchema)
192
- .default([])
193
188
  .describe('Explicit semantic relations from this fact to existing memories and/or other new facts by index.'),
194
189
  rationale: z.string().min(1).describe('Short rationale for the classification decision.'),
195
190
  })
package/src/db/memory.ts CHANGED
@@ -79,11 +79,7 @@ export class Memory {
79
79
  ) {
80
80
  this.store = createMemoryStore(
81
81
  deps.db,
82
- {
83
- embeddingModel: deps.runtimeConfig.aiGateway.embeddingModel,
84
- openRouterApiKey: deps.runtimeConfig.aiGateway.openRouterApiKey,
85
- runPromise: deps.runPromise,
86
- },
82
+ { embeddingModel: deps.runtimeConfig.aiGateway.embeddingModel, runPromise: deps.runPromise },
87
83
  deps.background,
88
84
  )
89
85
  this.runtimeConfig = deps.runtimeConfig
@@ -1,8 +1,8 @@
1
1
  import { embed, embedMany } from 'ai'
2
2
  import { Schema, Effect } from 'effect'
3
3
 
4
+ import { aiGatewayEmbeddingModel } from '../ai-gateway/ai-gateway'
4
5
  import { ERROR_TAGS, ConfigurationError } from '../effect/errors'
5
- import { getDirectOpenRouterProvider, normalizeDirectOpenRouterModelId } from '../openrouter/direct-provider'
6
6
 
7
7
  const SUPPORTED_EMBEDDING_PREFIXES = ['openai/', 'openrouter/'] as const
8
8
 
@@ -16,7 +16,6 @@ type ProviderEmbeddingsOptions = {
16
16
  embedManyFn?: typeof embedMany
17
17
  getCache?: () => SharedEmbeddingCache | null
18
18
  modelId: string
19
- openRouterApiKey?: string
20
19
  /**
21
20
  * `runPromise` is required: callers must yield `RuntimeBridgeTag` in their
22
21
  * `Layer.effect` (or accept a `RuntimeBridge` dep) and pass its `runPromise`
@@ -25,7 +24,7 @@ type ProviderEmbeddingsOptions = {
25
24
  runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A>
26
25
  }
27
26
 
28
- function resolveEmbeddingModel(modelId: string, openRouterApiKey?: string) {
27
+ function resolveEmbeddingModel(modelId: string) {
29
28
  const normalized = modelId.trim()
30
29
  if (!normalized) {
31
30
  throw new ConfigurationError({ message: '[embeddings-provider] Model id is required.', key: 'embeddingModelId' })
@@ -38,7 +37,7 @@ function resolveEmbeddingModel(modelId: string, openRouterApiKey?: string) {
38
37
  })
39
38
  }
40
39
 
41
- return getDirectOpenRouterProvider(openRouterApiKey).embeddingModel(normalizeDirectOpenRouterModelId(normalized))
40
+ return aiGatewayEmbeddingModel(normalized)
42
41
  }
43
42
 
44
43
  function normalizeEmbedding(embedding: readonly number[]): number[] {
@@ -69,7 +68,6 @@ export class ProviderEmbeddings {
69
68
  /** In-flight dedup: concurrent embedQuery calls for the same text share one API round-trip. */
70
69
  private readonly inflightEmbeddings = new Map<string, Promise<number[]>>()
71
70
 
72
- private readonly openRouterApiKey: string | undefined
73
71
  private readonly runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A>
74
72
 
75
73
  constructor(options: ProviderEmbeddingsOptions) {
@@ -77,7 +75,6 @@ export class ProviderEmbeddings {
77
75
  this.embedManyFn = options.embedManyFn ?? embedMany
78
76
  this.getCache = options.getCache ?? (() => null)
79
77
  this.resolvedModelId = options.modelId
80
- this.openRouterApiKey = options.openRouterApiKey
81
78
  this.runPromise = options.runPromise
82
79
  }
83
80
 
@@ -87,7 +84,7 @@ export class ProviderEmbeddings {
87
84
 
88
85
  private getModel() {
89
86
  if (!this._model) {
90
- this._model = resolveEmbeddingModel(this.getModelId(), this.openRouterApiKey)
87
+ this._model = resolveEmbeddingModel(this.getModelId())
91
88
  }
92
89
  return this._model
93
90
  }
@@ -72,8 +72,8 @@ interface MemoryDeltaOutputLike {
72
72
 
73
73
  interface MemoryDeltaRelationLike<TRelation extends string = string> {
74
74
  relation: TRelation
75
- targetMemoryId?: string
76
- targetFactIndex?: number
75
+ targetMemoryId?: string | null
76
+ targetFactIndex?: number | null
77
77
  }
78
78
 
79
79
  interface MemoryActionAdd {
@@ -14,9 +14,10 @@ Decide one classification per fact:
14
14
  Rules:
15
15
  - Return exactly one delta item per new fact, preserving order.
16
16
  - fact must match the corresponding newFacts entry verbatim.
17
+ - Always include targetMemoryIds, invalidateTargetIds, and relations. Use [] when there are no values.
17
18
  - targetMemoryIds and invalidateTargetIds may only contain ids from existingMemories.
18
19
  - invalidateTargetIds must be a subset of targetMemoryIds.
19
- - In each relation item, set exactly one target: targetMemoryId or targetFactIndex.
20
+ - In each relation item, return both target fields. Set exactly one target to a real value and set the other target field to null.
20
21
  - targetFactIndex must be valid, must not point to the same fact index, and is only for relations to other new facts.
21
22
  - For supersedes/contradicts, include target memories when evidence exists.
22
23
  - If uncertain, prefer conservative output: classify as new with no targets.
@@ -215,7 +215,6 @@ export const LotaRuntimeConfigSchema = z.object({
215
215
  url: nonEmptyStringSchema,
216
216
  key: nonEmptyStringSchema,
217
217
  embeddingModel: nonEmptyStringSchema.default('openai/text-embedding-3-small'),
218
- openRouterApiKey: nonEmptyStringSchema.optional(),
219
218
  maxConcurrency: z.coerce.number().int().positive().default(8),
220
219
  }),
221
220
  s3: z.object({
@@ -293,7 +292,6 @@ export const LOTA_RUNTIME_ENV_KEYS = Object.freeze([
293
292
  'AI_GATEWAY_URL',
294
293
  'AI_GATEWAY_KEY',
295
294
  'AI_EMBEDDING_MODEL',
296
- 'OPENROUTER_API_KEY',
297
295
  'AI_GATEWAY_MAX_CONCURRENCY',
298
296
  'S3_ENDPOINT',
299
297
  'S3_BUCKET',
@@ -330,7 +328,6 @@ export const lotaRuntimeEnvConfig = Config.all({
330
328
  aiGatewayUrl: Config.string('AI_GATEWAY_URL').pipe(Config.withDefault(DEFAULT_AI_GATEWAY_URL)),
331
329
  aiGatewayKey: Config.redacted('AI_GATEWAY_KEY'),
332
330
  aiEmbeddingModel: Config.string('AI_EMBEDDING_MODEL').pipe(Config.withDefault('openai/text-embedding-3-small')),
333
- openRouterApiKey: Config.redacted('OPENROUTER_API_KEY').pipe(Config.option),
334
331
  aiGatewayMaxConcurrency: Config.number('AI_GATEWAY_MAX_CONCURRENCY').pipe(Config.withDefault(8)),
335
332
  s3Endpoint: Config.string('S3_ENDPOINT'),
336
333
  s3Bucket: Config.string('S3_BUCKET'),
@@ -379,9 +376,6 @@ export function loadLotaRuntimeConfigFromEnv(
379
376
  key: Redacted.value(env.aiGatewayKey),
380
377
  embeddingModel: env.aiEmbeddingModel,
381
378
  maxConcurrency: env.aiGatewayMaxConcurrency,
382
- ...(Option.isSome(env.openRouterApiKey)
383
- ? { openRouterApiKey: Redacted.value(env.openRouterApiKey.value) }
384
- : {}),
385
379
  },
386
380
  s3: {
387
381
  endpoint: env.s3Endpoint,
@@ -17,9 +17,8 @@ type DocumentChunkEmbeddings = {
17
17
  function createDocumentChunkEmbeddings(
18
18
  embeddingModel: string,
19
19
  runPromise: RuntimeBridge['runPromise'],
20
- openRouterApiKey?: string,
21
20
  ): DocumentChunkEmbeddings {
22
- const embeddings = new ProviderEmbeddings({ modelId: embeddingModel, openRouterApiKey, runPromise })
21
+ const embeddings = new ProviderEmbeddings({ modelId: embeddingModel, runPromise })
23
22
 
24
23
  return {
25
24
  embedDocuments: (documents) => embeddings.embedDocuments(documents),
@@ -213,11 +212,7 @@ export const DocumentChunkServiceLive = Layer.effect(
213
212
  const runtimeConfig = yield* RuntimeConfigServiceTag
214
213
  const bridge = yield* RuntimeBridgeTag
215
214
  return makeDocumentChunkService(
216
- createDocumentChunkEmbeddings(
217
- runtimeConfig.aiGateway.embeddingModel,
218
- bridge.runPromise,
219
- runtimeConfig.aiGateway.openRouterApiKey,
220
- ),
215
+ createDocumentChunkEmbeddings(runtimeConfig.aiGateway.embeddingModel, bridge.runPromise),
221
216
  )
222
217
  }),
223
218
  )
@@ -111,15 +111,11 @@ interface RetrieveForTurnParams {
111
111
 
112
112
  export function makeLearnedSkillService(
113
113
  db: SurrealDBService,
114
- options: { embeddingModel: string; openRouterApiKey?: string; runPromise: RuntimeBridge['runPromise'] },
114
+ options: { embeddingModel: string; runPromise: RuntimeBridge['runPromise'] },
115
115
  skillExistsCache: Cache.Cache<string, boolean, LearnedSkillServiceError>,
116
116
  background: Context.Service.Shape<typeof BackgroundWorkServiceTag>,
117
117
  ) {
118
- const embeddings = new ProviderEmbeddings({
119
- modelId: options.embeddingModel,
120
- openRouterApiKey: options.openRouterApiKey,
121
- runPromise: options.runPromise,
122
- })
118
+ const embeddings = new ProviderEmbeddings({ modelId: options.embeddingModel, runPromise: options.runPromise })
123
119
 
124
120
  const hasSkillsForAgent = (orgId: string, agentId: string) => Cache.get(skillExistsCache, `${orgId}:${agentId}`)
125
121
 
@@ -517,11 +513,7 @@ export const LearnedSkillServiceLive = Layer.effect(
517
513
  })
518
514
  return makeLearnedSkillService(
519
515
  db,
520
- {
521
- embeddingModel: runtimeConfig.aiGateway.embeddingModel,
522
- openRouterApiKey: runtimeConfig.aiGateway.openRouterApiKey,
523
- runPromise: bridge.runPromise,
524
- },
516
+ { embeddingModel: runtimeConfig.aiGateway.embeddingModel, runPromise: bridge.runPromise },
525
517
  skillExistsCache,
526
518
  background,
527
519
  )
@@ -3,18 +3,19 @@ import * as Schema from 'effect/Schema'
3
3
  import { z } from 'zod'
4
4
 
5
5
  import { OPENROUTER_FAST_RERANK_MODEL_ID } from '../../config/model-constants'
6
- import { ERROR_TAGS } from '../../effect/errors'
6
+ import { ConfigurationError, ERROR_TAGS } from '../../effect/errors'
7
7
  import { RuntimeConfigServiceTag } from '../../effect/services'
8
8
  import { toValidationError } from '../../effect/zod'
9
- import { normalizeDirectOpenRouterModelId, resolveOpenRouterApiKey } from '../../openrouter/direct-provider'
10
9
  import type { ResolvedLotaRuntimeConfig } from '../../runtime/runtime-config'
11
10
 
12
- const OPENROUTER_RERANK_URL = 'https://openrouter.ai/api/v1/rerank' as const
11
+ const AI_GATEWAY_VIRTUAL_KEY_HEADER = 'x-bf-vk' as const
12
+ const EXPECTED_GATEWAY_KEY_PREFIX = 'sk-bf-' as const
13
+ const OPENROUTER_MODEL_PREFIX = 'openrouter/' as const
13
14
 
14
15
  const RerankRequestBodySchema = Schema.Struct({
15
16
  model: Schema.String,
16
17
  query: Schema.String,
17
- documents: Schema.Array(Schema.String),
18
+ documents: Schema.Array(Schema.Struct({ text: Schema.String })),
18
19
  top_n: Schema.Number,
19
20
  })
20
21
 
@@ -30,6 +31,36 @@ function toRerankServiceError(operation: string, message: string, cause: unknown
30
31
  return new RerankServiceError({ operation, message, cause })
31
32
  }
32
33
 
34
+ function resolveAiGatewayRerankUrl(config: ResolvedLotaRuntimeConfig): string {
35
+ const trimmed = config.aiGateway.url.trim()
36
+ if (!trimmed) {
37
+ throw new ConfigurationError({ message: '[ai-gateway] AI gateway URL is required.', key: 'aiGateway.url' })
38
+ }
39
+ const normalized = trimmed.replace(/\/+$/, '')
40
+ const v1BaseUrl = normalized.endsWith('/v1') ? normalized : `${normalized}/v1`
41
+ return `${v1BaseUrl}/rerank`
42
+ }
43
+
44
+ function resolveAiGatewayKey(config: ResolvedLotaRuntimeConfig): string {
45
+ const key = config.aiGateway.key.trim()
46
+ if (!key.startsWith(EXPECTED_GATEWAY_KEY_PREFIX)) {
47
+ throw new ConfigurationError({
48
+ message: `[ai-gateway] Gateway keys must use the ${EXPECTED_GATEWAY_KEY_PREFIX}* format.`,
49
+ key: 'aiGateway.key',
50
+ })
51
+ }
52
+ return key
53
+ }
54
+
55
+ function normalizeRerankModelId(modelId: string): string {
56
+ const normalized = modelId.trim()
57
+ if (!normalized) {
58
+ throw new ConfigurationError({ message: 'Rerank model id is required.', key: 'rerankModelId' })
59
+ }
60
+
61
+ return normalized.startsWith(OPENROUTER_MODEL_PREFIX) ? normalized.slice(OPENROUTER_MODEL_PREFIX.length) : normalized
62
+ }
63
+
33
64
  const RerankResponseSchema = z
34
65
  .object({
35
66
  model: z.string().optional(),
@@ -101,10 +132,10 @@ export function makeRerankService(config: ResolvedLotaRuntimeConfig) {
101
132
 
102
133
  function resolveRerankModelId(modelId?: string): string {
103
134
  const explicit = modelId?.trim()
104
- if (explicit) return normalizeDirectOpenRouterModelId(explicit)
135
+ if (explicit) return normalizeRerankModelId(explicit)
105
136
 
106
137
  const configured = readConfiguredRerankModelId()
107
- if (configured) return normalizeDirectOpenRouterModelId(configured)
138
+ if (configured) return normalizeRerankModelId(configured)
108
139
 
109
140
  return OPENROUTER_FAST_RERANK_MODEL_ID
110
141
  }
@@ -116,21 +147,26 @@ export function makeRerankService(config: ResolvedLotaRuntimeConfig) {
116
147
  return { modelId: resolveRerankModelId(params.modelId), results: [] as RerankResultItem[] }
117
148
  }
118
149
 
119
- const apiKey = resolveOpenRouterApiKey(config.aiGateway.openRouterApiKey)
150
+ const gatewayKey = resolveAiGatewayKey(config)
151
+ const rerankUrl = resolveAiGatewayRerankUrl(config)
120
152
  const modelId = resolveRerankModelId(params.modelId)
121
153
  const topN = clampTopN(params.topN, params.documents.length)
122
154
  const requestBody = encodeRerankRequestBody({
123
155
  model: modelId,
124
156
  query: params.query,
125
- documents: params.documents.map((document) => document.text),
157
+ documents: params.documents.map((document) => ({ text: document.text })),
126
158
  top_n: topN,
127
159
  })
128
160
 
129
161
  const response = yield* Effect.tryPromise({
130
162
  try: () =>
131
- Bun.fetch(OPENROUTER_RERANK_URL, {
163
+ Bun.fetch(rerankUrl, {
132
164
  method: 'POST',
133
- headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json' },
165
+ headers: {
166
+ Authorization: `Bearer ${gatewayKey}`,
167
+ [AI_GATEWAY_VIRTUAL_KEY_HEADER]: gatewayKey,
168
+ 'Content-Type': 'application/json',
169
+ },
134
170
  body: requestBody,
135
171
  }),
136
172
  catch: (cause) => toRerankServiceError('fetch-rerank', 'Failed to fetch rerank results.', cause),
@@ -144,7 +180,7 @@ export function makeRerankService(config: ResolvedLotaRuntimeConfig) {
144
180
  if (!response.ok) {
145
181
  return yield* new RerankServiceError({
146
182
  operation: 'fetch-rerank',
147
- message: `OpenRouter rerank failed (${response.status}): ${responseText}`,
183
+ message: `AI gateway rerank failed (${response.status}): ${responseText}`,
148
184
  cause: responseText,
149
185
  })
150
186
  }
@@ -6,6 +6,8 @@ interface HelperAgentOptionOverrides {
6
6
  temperature?: number
7
7
  }
8
8
 
9
+ const DEFAULT_HELPER_AGENT_MAX_RETRIES = 2
10
+
9
11
  export function resolveHelperAgentOptions(
10
12
  options: CreateHelperToolLoopAgentOptions,
11
13
  overrides?: HelperAgentOptionOverrides,
@@ -15,6 +17,6 @@ export function resolveHelperAgentOptions(
15
17
  maxOutputTokens: overrides?.maxOutputTokens ?? options.maxOutputTokens,
16
18
  temperature: overrides?.temperature ?? options.temperature,
17
19
  output: options.output,
18
- maxRetries: options.maxRetries,
20
+ maxRetries: options.maxRetries ?? DEFAULT_HELPER_AGENT_MAX_RETRIES,
19
21
  }
20
22
  }
@@ -50,9 +50,8 @@ function buildSandboxedWorkerRuntimeConfigEffect() {
50
50
  aiGateway: {
51
51
  url: env.aiGatewayUrl,
52
52
  key: Redacted.value(env.aiGatewayKey),
53
- ...(Option.isSome(env.openRouterApiKey)
54
- ? { openRouterApiKey: Redacted.value(env.openRouterApiKey.value) }
55
- : {}),
53
+ embeddingModel: env.aiEmbeddingModel,
54
+ maxConcurrency: env.aiGatewayMaxConcurrency,
56
55
  },
57
56
  s3: {
58
57
  endpoint: env.s3Endpoint,
@@ -47,7 +47,6 @@ const skillExtractionServices: SkillExtractionServices = {
47
47
  socialChatHistoryService: await resolve(SocialChatHistoryServiceTag),
48
48
  runtimeAdapters: await resolve(RuntimeAdaptersServiceTag),
49
49
  embeddingModel: workerRuntimeConfig.aiGateway.embeddingModel,
50
- openRouterApiKey: workerRuntimeConfig.aiGateway.openRouterApiKey,
51
50
  runPromise: (effect) => runtime.runPromise(effect),
52
51
  }
53
52
  const organizationLearningQueueJobService = await resolve(QueueJobServiceTag)
@@ -36,7 +36,6 @@ export interface SkillExtractionServices {
36
36
  socialChatHistoryService: Context.Service.Shape<typeof SocialChatHistoryServiceTag>
37
37
  runtimeAdapters: LotaRuntimeAdapters
38
38
  embeddingModel: string
39
- openRouterApiKey?: string
40
39
  runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A>
41
40
  }
42
41
 
@@ -351,11 +350,7 @@ export function runSkillExtraction(
351
350
  return Promise.resolve({ skipped: true, processedMessages: 0, extractedSkills: 0 })
352
351
  }
353
352
 
354
- const embeddings = new ProviderEmbeddings({
355
- modelId: services.embeddingModel,
356
- openRouterApiKey: services.openRouterApiKey,
357
- runPromise: services.runPromise,
358
- })
353
+ const embeddings = new ProviderEmbeddings({ modelId: services.embeddingModel, runPromise: services.runPromise })
359
354
  const withMemoryLock = runtimeAdapters.withWorkspaceMemoryLock
360
355
  const runExtraction = () =>
361
356
  services.runPromise(
@@ -1,29 +0,0 @@
1
- import { createOpenAI } from '@ai-sdk/openai'
2
-
3
- import { ConfigurationError } from '../effect/errors'
4
-
5
- const DIRECT_OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1' as const
6
- const OPENROUTER_MODEL_PREFIX = 'openrouter/' as const
7
-
8
- export function resolveOpenRouterApiKey(openRouterApiKey: string | undefined): string {
9
- const key = openRouterApiKey?.trim()
10
- if (key) return key
11
-
12
- throw new ConfigurationError({
13
- message: 'Missing OpenRouter API key. Configure createLotaRuntime({ aiGateway: { openRouterApiKey } }).',
14
- key: 'aiGateway.openRouterApiKey',
15
- })
16
- }
17
-
18
- export function normalizeDirectOpenRouterModelId(modelId: string): string {
19
- const normalized = modelId.trim()
20
- if (!normalized) {
21
- throw new ConfigurationError({ message: 'OpenRouter model id is required.', key: 'openRouterModelId' })
22
- }
23
-
24
- return normalized.startsWith(OPENROUTER_MODEL_PREFIX) ? normalized.slice(OPENROUTER_MODEL_PREFIX.length) : normalized
25
- }
26
-
27
- export function getDirectOpenRouterProvider(openRouterApiKey?: string) {
28
- return createOpenAI({ baseURL: DIRECT_OPENROUTER_BASE_URL, apiKey: resolveOpenRouterApiKey(openRouterApiKey) })
29
- }