@lota-sdk/core 0.4.19 → 0.4.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,13 +1,13 @@
1
1
  {
2
2
  "name": "@lota-sdk/core",
3
- "version": "0.4.19",
4
- "type": "module",
5
- "main": "./src/index.ts",
6
- "types": "./src/index.ts",
3
+ "version": "0.4.21",
7
4
  "files": [
8
5
  "src",
9
6
  "infrastructure/schema"
10
7
  ],
8
+ "type": "module",
9
+ "main": "./src/index.ts",
10
+ "types": "./src/index.ts",
11
11
  "exports": {
12
12
  ".": {
13
13
  "bun": "./src/index.ts",
@@ -15,6 +15,10 @@
15
15
  "types": "./src/index.ts"
16
16
  }
17
17
  },
18
+ "publishConfig": {
19
+ "access": "public",
20
+ "registry": "https://registry.npmjs.org/"
21
+ },
18
22
  "scripts": {
19
23
  "lint": "bunx oxlint --fix -c ../oxlint.config.ts src",
20
24
  "format": "bunx oxfmt src",
@@ -22,23 +26,19 @@
22
26
  "test:unit": "bun test --max-concurrency=1 ../tests/unit/core",
23
27
  "test:coverage": "bun test --coverage ../tests/unit/core"
24
28
  },
25
- "publishConfig": {
26
- "access": "public",
27
- "registry": "https://registry.npmjs.org/"
28
- },
29
29
  "dependencies": {
30
- "@ai-sdk/devtools": "^0.0.15",
31
- "@ai-sdk/openai": "^3.0.53",
30
+ "@ai-sdk/devtools": "^0.0.16",
31
+ "@ai-sdk/openai": "^3.0.54",
32
32
  "@chat-adapter/slack": "^4.26.0",
33
33
  "@chat-adapter/state-ioredis": "^4.26.0",
34
- "@lota-sdk/shared": "0.4.19",
35
- "@mendable/firecrawl-js": "^4.18.3",
34
+ "@lota-sdk/shared": "0.4.21",
35
+ "@mendable/firecrawl-js": "^4.20.0",
36
36
  "@surrealdb/node": "^3.0.3",
37
- "ai": "^6.0.168",
38
- "bullmq": "^5.74.2",
37
+ "ai": "^6.0.170",
38
+ "bullmq": "^5.76.4",
39
39
  "chat": "^4.26.0",
40
- "effect": "^4.0.0-beta.52",
41
- "hono": "^4.12.14",
40
+ "effect": "^4.0.0-beta.59",
41
+ "hono": "^4.12.15",
42
42
  "ioredis": "5.9.3",
43
43
  "mammoth": "^1.12.0",
44
44
  "pdf-parse": "^2.4.5",
@@ -2,12 +2,11 @@ import { devToolsMiddleware } from '@ai-sdk/devtools'
2
2
  import { createOpenAI } from '@ai-sdk/openai'
3
3
  import { wrapEmbeddingModel, wrapLanguageModel } from 'ai'
4
4
  import type { LanguageModelMiddleware } from 'ai'
5
- import { Cause, Clock, Context, Duration, Effect, ExecutionPlan, Fiber, Layer, Semaphore } from 'effect'
5
+ import { Cause, Clock, Context, Duration, Effect, Fiber, Layer, Semaphore } from 'effect'
6
6
 
7
7
  import { DEFAULT_AI_GATEWAY_URL } from '../config/constants'
8
8
  import { ERROR_TAGS, AiGenerationError, ConfigurationError } from '../effect/errors'
9
9
  import { RuntimeConfigServiceTag } from '../effect/services'
10
- import { getDirectOpenRouterProvider, normalizeDirectOpenRouterModelId } from '../openrouter/direct-provider'
11
10
  import { isRecord, readString } from '../utils/string'
12
11
  import { buildAiGatewayCacheHeaders } from './cache-headers'
13
12
 
@@ -37,7 +36,7 @@ class AiGatewayStreamAttemptTag extends Context.Service<
37
36
 
38
37
  const EXPECTED_GATEWAY_KEY_PREFIX = 'sk-bf-'
39
38
  const AI_GATEWAY_VIRTUAL_KEY_HEADER = 'x-bf-vk'
40
- const AI_GATEWAY_TIMEOUT_MS = 30_000
39
+ const AI_GATEWAY_TIMEOUT_MS = 180_000
41
40
  const AI_GATEWAY_STREAM_IDLE_TIMEOUT_MS = 30_000
42
41
  const AI_GATEWAY_MAX_RETRIES = 4
43
42
  const AI_GATEWAY_MAX_RETRY_DELAY_MS = 15_000
@@ -722,24 +721,18 @@ function isReasoningEnabled(params: AiGatewayCallOptions): boolean {
722
721
  return typeof openaiOptions.reasoningEffort === 'string' && openaiOptions.reasoningEffort !== 'none'
723
722
  }
724
723
 
725
- function isOpenRouterModel(modelId: string): boolean {
726
- return modelId.trim().toLowerCase().startsWith('openrouter/')
727
- }
728
-
729
- function hasDirectOpenRouterFallback(config: AiGatewayRuntimeConfig, modelId: string): boolean {
730
- return isOpenRouterModel(modelId) && Boolean(config.aiGateway.openRouterApiKey?.trim())
724
+ function hasFunctionTools(params: AiGatewayCallOptions): boolean {
725
+ return Array.isArray(params.tools) && params.tools.some((tool) => isRecord(tool) && tool.type === 'function')
731
726
  }
732
727
 
733
- function getDirectOpenRouterChatModel(config: AiGatewayRuntimeConfig, modelId: string): AiGatewayLanguageModel {
734
- return getDirectOpenRouterProvider(config.aiGateway.openRouterApiKey).chat(normalizeDirectOpenRouterModelId(modelId))
728
+ function isGpt55Model(modelId: string): boolean {
729
+ const normalized = modelId.trim().toLowerCase()
730
+ const modelName = normalized.split('/').at(-1) ?? normalized
731
+ return modelName === 'gpt-5.5' || modelName.startsWith('gpt-5.5-')
735
732
  }
736
733
 
737
- function shouldFallbackToDirectOpenRouter(
738
- config: AiGatewayRuntimeConfig,
739
- modelId: string,
740
- error: AiGenerationError,
741
- ): boolean {
742
- return hasDirectOpenRouterFallback(config, modelId) && isRetryableAiGatewayError(error)
734
+ function isOpenRouterModel(modelId: string): boolean {
735
+ return modelId.trim().toLowerCase().startsWith('openrouter/')
743
736
  }
744
737
 
745
738
  function attemptAiGatewayGenerate(
@@ -770,28 +763,8 @@ function attemptAiGatewayStream(
770
763
  )
771
764
  }
772
765
 
773
- function attemptDirectOpenRouterGenerate(
774
- config: AiGatewayRuntimeConfig,
775
- modelId: string,
776
- params: AiGatewayCallOptions,
777
- ): Effect.Effect<AiGatewayAttemptResult<AiGatewayGenerateResult>, AiGenerationError> {
778
- const model = getDirectOpenRouterChatModel(config, modelId)
779
- return attemptAiGatewayGenerate('openrouter.generate', () => model.doGenerate(params))
780
- }
781
-
782
- function attemptDirectOpenRouterStream(
783
- config: AiGatewayRuntimeConfig,
784
- modelId: string,
785
- params: AiGatewayCallOptions,
786
- ): Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError> {
787
- const model = getDirectOpenRouterChatModel(config, modelId)
788
- return attemptAiGatewayStream('openrouter.stream', () => model.doStream(params))
789
- }
790
-
791
766
  function executeGenerateAttemptPlan(
792
- config: AiGatewayRuntimeConfig,
793
767
  modelId: string,
794
- params: AiGatewayCallOptions,
795
768
  doGenerate: () => PromiseLike<AiGatewayGenerateResult>,
796
769
  ): Effect.Effect<AiGatewayAttemptResult<AiGatewayGenerateResult>, AiGenerationError> {
797
770
  const primary = Layer.succeed(AiGatewayGenerateAttemptTag, {
@@ -802,35 +775,15 @@ function executeGenerateAttemptPlan(
802
775
  return yield* attempt.execute
803
776
  })
804
777
 
805
- if (!hasDirectOpenRouterFallback(config, modelId)) {
806
- return effect.pipe(
807
- Effect.provide(primary),
808
- Effect.withSpan('AiGateway.executeGeneratePlan'),
809
- Effect.annotateSpans({ modelId, directOpenRouterFallbackEnabled: false }),
810
- )
811
- }
812
-
813
778
  return effect.pipe(
814
- Effect.withExecutionPlan(
815
- ExecutionPlan.make(
816
- { provide: primary },
817
- {
818
- provide: Layer.succeed(AiGatewayGenerateAttemptTag, {
819
- execute: attemptDirectOpenRouterGenerate(config, modelId, params),
820
- }),
821
- while: (error: AiGenerationError) => shouldFallbackToDirectOpenRouter(config, modelId, error),
822
- },
823
- ),
824
- ),
779
+ Effect.provide(primary),
825
780
  Effect.withSpan('AiGateway.executeGeneratePlan'),
826
- Effect.annotateSpans({ modelId, directOpenRouterFallbackEnabled: true }),
781
+ Effect.annotateSpans({ modelId }),
827
782
  )
828
783
  }
829
784
 
830
785
  function executeStreamAttemptPlan(
831
- config: AiGatewayRuntimeConfig,
832
786
  modelId: string,
833
- params: AiGatewayCallOptions,
834
787
  doStream: () => PromiseLike<AiGatewayStreamResult>,
835
788
  ): Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError> {
836
789
  const primary = Layer.succeed(AiGatewayStreamAttemptTag, {
@@ -841,28 +794,10 @@ function executeStreamAttemptPlan(
841
794
  return yield* attempt.execute
842
795
  })
843
796
 
844
- if (!hasDirectOpenRouterFallback(config, modelId)) {
845
- return effect.pipe(
846
- Effect.provide(primary),
847
- Effect.withSpan('AiGateway.executeStreamPlan'),
848
- Effect.annotateSpans({ modelId, directOpenRouterFallbackEnabled: false }),
849
- )
850
- }
851
-
852
797
  return effect.pipe(
853
- Effect.withExecutionPlan(
854
- ExecutionPlan.make(
855
- { provide: primary },
856
- {
857
- provide: Layer.succeed(AiGatewayStreamAttemptTag, {
858
- execute: attemptDirectOpenRouterStream(config, modelId, params),
859
- }),
860
- while: (error: AiGenerationError) => shouldFallbackToDirectOpenRouter(config, modelId, error),
861
- },
862
- ),
863
- ),
798
+ Effect.provide(primary),
864
799
  Effect.withSpan('AiGateway.executeStreamPlan'),
865
- Effect.annotateSpans({ modelId, directOpenRouterFallbackEnabled: true }),
800
+ Effect.annotateSpans({ modelId }),
866
801
  )
867
802
  }
868
803
 
@@ -962,7 +897,11 @@ function resolveProviderModel(
962
897
  modelId: string,
963
898
  providerId: string,
964
899
  ): AiGatewayLanguageModel {
965
- return providerId === OPENAI_CHAT_PROVIDER_ID ? provider.chat(modelId) : provider(modelId)
900
+ if (providerId !== OPENAI_CHAT_PROVIDER_ID) {
901
+ return provider(modelId)
902
+ }
903
+
904
+ return provider.chat(modelId)
966
905
  }
967
906
 
968
907
  export type AiGatewayDeps = {
@@ -1002,7 +941,10 @@ function createAiGatewayLanguageModelMiddleware(
1002
941
  transformParams: ({ params, type }) =>
1003
942
  Promise.resolve(
1004
943
  withDefaultAiGatewayCacheHeaders(
1005
- addAiGatewayReasoningRawChunks(normalizeAiGatewayChatProviderOptions(params, modelId), type),
944
+ addAiGatewayReasoningRawChunks(
945
+ providerId === OPENAI_CHAT_PROVIDER_ID ? normalizeAiGatewayChatProviderOptions(params, modelId) : params,
946
+ type,
947
+ ),
1006
948
  ),
1007
949
  ),
1008
950
  wrapGenerate: ({ params }) => {
@@ -1010,7 +952,7 @@ function createAiGatewayLanguageModelMiddleware(
1010
952
  const model = resolveProviderModel(resolvedDeps.gateway.provider, modelId, providerId)
1011
953
  return resolvedDeps.runPromise(
1012
954
  withAiGatewayConcurrency(
1013
- executeGenerateAttemptPlan(resolvedDeps.runtimeConfig, modelId, params, () => model.doGenerate(params)).pipe(
955
+ executeGenerateAttemptPlan(modelId, () => model.doGenerate(params)).pipe(
1014
956
  Effect.map(({ result }) => ({
1015
957
  ...result,
1016
958
  content: injectAiGatewayChatReasoningContent(
@@ -1027,7 +969,7 @@ function createAiGatewayLanguageModelMiddleware(
1027
969
  const model = resolveProviderModel(resolvedDeps.gateway.provider, modelId, providerId)
1028
970
  return resolvedDeps.runPromise(
1029
971
  withAiGatewayStreamConcurrency(
1030
- executeStreamAttemptPlan(resolvedDeps.runtimeConfig, modelId, params, () => model.doStream(params)).pipe(
972
+ executeStreamAttemptPlan(modelId, () => model.doStream(params)).pipe(
1031
973
  Effect.map((attempt) => ({
1032
974
  ...attempt,
1033
975
  result: isReasoningEnabled(params)
@@ -1047,9 +989,38 @@ function createAiGatewayLanguageModelMiddleware(
1047
989
 
1048
990
  export function normalizeAiGatewayChatProviderOptions(
1049
991
  params: AiGatewayCallOptions,
1050
- _modelId?: string,
992
+ modelId?: string,
1051
993
  ): AiGatewayCallOptions {
1052
- return params
994
+ if (!modelId || !isGpt55Model(modelId) || !hasFunctionTools(params)) {
995
+ return params
996
+ }
997
+
998
+ if (!isRecord(params.providerOptions) || !isRecord(params.providerOptions.openai)) {
999
+ return params
1000
+ }
1001
+
1002
+ const openaiOptions = { ...params.providerOptions.openai }
1003
+ const strippedReasoningOptions =
1004
+ 'forceReasoning' in openaiOptions || 'reasoningEffort' in openaiOptions || 'reasoningSummary' in openaiOptions
1005
+ if (!strippedReasoningOptions) {
1006
+ return params
1007
+ }
1008
+
1009
+ delete openaiOptions.forceReasoning
1010
+ delete openaiOptions.reasoningEffort
1011
+ delete openaiOptions.reasoningSummary
1012
+
1013
+ const providerOptions = { ...params.providerOptions }
1014
+ delete providerOptions.openai
1015
+ const nextProviderOptions =
1016
+ Object.keys(openaiOptions).length === 0 ? providerOptions : { ...providerOptions, openai: openaiOptions }
1017
+
1018
+ if (Object.keys(nextProviderOptions).length === 0) {
1019
+ const { providerOptions: _providerOptions, ...nextParams } = params
1020
+ return nextParams
1021
+ }
1022
+
1023
+ return { ...params, providerOptions: nextProviderOptions as AiGatewayCallOptions['providerOptions'] }
1053
1024
  }
1054
1025
 
1055
1026
  function withAiGatewayDevTools<TModel extends AiGatewayLanguageModel>(model: TModel): TModel {
@@ -1264,20 +1264,12 @@ export class SurrealMemoryStore {
1264
1264
 
1265
1265
  export function createMemoryStore(
1266
1266
  db: SurrealDBService,
1267
- options: {
1268
- embeddingModel: string
1269
- openRouterApiKey?: string
1270
- runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A>
1271
- },
1267
+ options: { embeddingModel: string; runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A> },
1272
1268
  background: BackgroundWorker,
1273
1269
  ): SurrealMemoryStore {
1274
1270
  return new SurrealMemoryStore(
1275
1271
  db,
1276
- new ProviderEmbeddings({
1277
- modelId: options.embeddingModel,
1278
- openRouterApiKey: options.openRouterApiKey,
1279
- runPromise: options.runPromise,
1280
- }),
1272
+ new ProviderEmbeddings({ modelId: options.embeddingModel, runPromise: options.runPromise }),
1281
1273
  background,
1282
1274
  )
1283
1275
  }
@@ -151,13 +151,13 @@ const MemoryDeltaRelationSchema = z
151
151
  targetMemoryId: z
152
152
  .string()
153
153
  .min(1)
154
- .optional()
154
+ .nullable()
155
155
  .describe('Target existing memory id when relation points to existing memory.'),
156
156
  targetFactIndex: z
157
157
  .number()
158
158
  .int()
159
159
  .min(0)
160
- .optional()
160
+ .nullable()
161
161
  .describe('Target newFacts index when relation points to another newly provided fact.'),
162
162
  })
163
163
  .strict()
@@ -179,17 +179,12 @@ const MemoryDeltaItemSchema = z
179
179
  classification: MemoryDeltaClassificationSchema.describe(
180
180
  'How this fact relates to existing memories: new, supersedes, contradicts, enriches, duplicate.',
181
181
  ),
182
- targetMemoryIds: z
183
- .array(z.string().min(1))
184
- .default([])
185
- .describe('Existing memory IDs that are directly related to this fact.'),
182
+ targetMemoryIds: z.array(z.string().min(1)).describe('Existing memory IDs that are directly related to this fact.'),
186
183
  invalidateTargetIds: z
187
184
  .array(z.string().min(1))
188
- .default([])
189
185
  .describe('Subset of targetMemoryIds that should be deleted as obsolete/invalidated.'),
190
186
  relations: z
191
187
  .array(MemoryDeltaRelationSchema)
192
- .default([])
193
188
  .describe('Explicit semantic relations from this fact to existing memories and/or other new facts by index.'),
194
189
  rationale: z.string().min(1).describe('Short rationale for the classification decision.'),
195
190
  })
package/src/db/memory.ts CHANGED
@@ -79,11 +79,7 @@ export class Memory {
79
79
  ) {
80
80
  this.store = createMemoryStore(
81
81
  deps.db,
82
- {
83
- embeddingModel: deps.runtimeConfig.aiGateway.embeddingModel,
84
- openRouterApiKey: deps.runtimeConfig.aiGateway.openRouterApiKey,
85
- runPromise: deps.runPromise,
86
- },
82
+ { embeddingModel: deps.runtimeConfig.aiGateway.embeddingModel, runPromise: deps.runPromise },
87
83
  deps.background,
88
84
  )
89
85
  this.runtimeConfig = deps.runtimeConfig
@@ -1,8 +1,8 @@
1
1
  import { embed, embedMany } from 'ai'
2
2
  import { Schema, Effect } from 'effect'
3
3
 
4
+ import { aiGatewayEmbeddingModel } from '../ai-gateway/ai-gateway'
4
5
  import { ERROR_TAGS, ConfigurationError } from '../effect/errors'
5
- import { getDirectOpenRouterProvider, normalizeDirectOpenRouterModelId } from '../openrouter/direct-provider'
6
6
 
7
7
  const SUPPORTED_EMBEDDING_PREFIXES = ['openai/', 'openrouter/'] as const
8
8
 
@@ -16,7 +16,6 @@ type ProviderEmbeddingsOptions = {
16
16
  embedManyFn?: typeof embedMany
17
17
  getCache?: () => SharedEmbeddingCache | null
18
18
  modelId: string
19
- openRouterApiKey?: string
20
19
  /**
21
20
  * `runPromise` is required: callers must yield `RuntimeBridgeTag` in their
22
21
  * `Layer.effect` (or accept a `RuntimeBridge` dep) and pass its `runPromise`
@@ -25,7 +24,7 @@ type ProviderEmbeddingsOptions = {
25
24
  runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A>
26
25
  }
27
26
 
28
- function resolveEmbeddingModel(modelId: string, openRouterApiKey?: string) {
27
+ function resolveEmbeddingModel(modelId: string) {
29
28
  const normalized = modelId.trim()
30
29
  if (!normalized) {
31
30
  throw new ConfigurationError({ message: '[embeddings-provider] Model id is required.', key: 'embeddingModelId' })
@@ -38,7 +37,7 @@ function resolveEmbeddingModel(modelId: string, openRouterApiKey?: string) {
38
37
  })
39
38
  }
40
39
 
41
- return getDirectOpenRouterProvider(openRouterApiKey).embeddingModel(normalizeDirectOpenRouterModelId(normalized))
40
+ return aiGatewayEmbeddingModel(normalized)
42
41
  }
43
42
 
44
43
  function normalizeEmbedding(embedding: readonly number[]): number[] {
@@ -69,7 +68,6 @@ export class ProviderEmbeddings {
69
68
  /** In-flight dedup: concurrent embedQuery calls for the same text share one API round-trip. */
70
69
  private readonly inflightEmbeddings = new Map<string, Promise<number[]>>()
71
70
 
72
- private readonly openRouterApiKey: string | undefined
73
71
  private readonly runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A>
74
72
 
75
73
  constructor(options: ProviderEmbeddingsOptions) {
@@ -77,7 +75,6 @@ export class ProviderEmbeddings {
77
75
  this.embedManyFn = options.embedManyFn ?? embedMany
78
76
  this.getCache = options.getCache ?? (() => null)
79
77
  this.resolvedModelId = options.modelId
80
- this.openRouterApiKey = options.openRouterApiKey
81
78
  this.runPromise = options.runPromise
82
79
  }
83
80
 
@@ -87,7 +84,7 @@ export class ProviderEmbeddings {
87
84
 
88
85
  private getModel() {
89
86
  if (!this._model) {
90
- this._model = resolveEmbeddingModel(this.getModelId(), this.openRouterApiKey)
87
+ this._model = resolveEmbeddingModel(this.getModelId())
91
88
  }
92
89
  return this._model
93
90
  }
@@ -72,8 +72,8 @@ interface MemoryDeltaOutputLike {
72
72
 
73
73
  interface MemoryDeltaRelationLike<TRelation extends string = string> {
74
74
  relation: TRelation
75
- targetMemoryId?: string
76
- targetFactIndex?: number
75
+ targetMemoryId?: string | null
76
+ targetFactIndex?: number | null
77
77
  }
78
78
 
79
79
  interface MemoryActionAdd {
@@ -14,9 +14,10 @@ Decide one classification per fact:
14
14
  Rules:
15
15
  - Return exactly one delta item per new fact, preserving order.
16
16
  - fact must match the corresponding newFacts entry verbatim.
17
+ - Always include targetMemoryIds, invalidateTargetIds, and relations. Use [] when there are no values.
17
18
  - targetMemoryIds and invalidateTargetIds may only contain ids from existingMemories.
18
19
  - invalidateTargetIds must be a subset of targetMemoryIds.
19
- - In each relation item, set exactly one target: targetMemoryId or targetFactIndex.
20
+ - In each relation item, return both target fields. Set exactly one target to a real value and set the other target field to null.
20
21
  - targetFactIndex must be valid, must not point to the same fact index, and is only for relations to other new facts.
21
22
  - For supersedes/contradicts, include target memories when evidence exists.
22
23
  - If uncertain, prefer conservative output: classify as new with no targets.
@@ -215,7 +215,6 @@ export const LotaRuntimeConfigSchema = z.object({
215
215
  url: nonEmptyStringSchema,
216
216
  key: nonEmptyStringSchema,
217
217
  embeddingModel: nonEmptyStringSchema.default('openai/text-embedding-3-small'),
218
- openRouterApiKey: nonEmptyStringSchema.optional(),
219
218
  maxConcurrency: z.coerce.number().int().positive().default(8),
220
219
  }),
221
220
  s3: z.object({
@@ -293,7 +292,6 @@ export const LOTA_RUNTIME_ENV_KEYS = Object.freeze([
293
292
  'AI_GATEWAY_URL',
294
293
  'AI_GATEWAY_KEY',
295
294
  'AI_EMBEDDING_MODEL',
296
- 'OPENROUTER_API_KEY',
297
295
  'AI_GATEWAY_MAX_CONCURRENCY',
298
296
  'S3_ENDPOINT',
299
297
  'S3_BUCKET',
@@ -330,7 +328,6 @@ export const lotaRuntimeEnvConfig = Config.all({
330
328
  aiGatewayUrl: Config.string('AI_GATEWAY_URL').pipe(Config.withDefault(DEFAULT_AI_GATEWAY_URL)),
331
329
  aiGatewayKey: Config.redacted('AI_GATEWAY_KEY'),
332
330
  aiEmbeddingModel: Config.string('AI_EMBEDDING_MODEL').pipe(Config.withDefault('openai/text-embedding-3-small')),
333
- openRouterApiKey: Config.redacted('OPENROUTER_API_KEY').pipe(Config.option),
334
331
  aiGatewayMaxConcurrency: Config.number('AI_GATEWAY_MAX_CONCURRENCY').pipe(Config.withDefault(8)),
335
332
  s3Endpoint: Config.string('S3_ENDPOINT'),
336
333
  s3Bucket: Config.string('S3_BUCKET'),
@@ -379,9 +376,6 @@ export function loadLotaRuntimeConfigFromEnv(
379
376
  key: Redacted.value(env.aiGatewayKey),
380
377
  embeddingModel: env.aiEmbeddingModel,
381
378
  maxConcurrency: env.aiGatewayMaxConcurrency,
382
- ...(Option.isSome(env.openRouterApiKey)
383
- ? { openRouterApiKey: Redacted.value(env.openRouterApiKey.value) }
384
- : {}),
385
379
  },
386
380
  s3: {
387
381
  endpoint: env.s3Endpoint,
@@ -82,15 +82,15 @@ type PromisifiedService<T> = {
82
82
 
83
83
  type HostSvc<T extends { readonly Service: object }> = PromisifiedService<T['Service']>
84
84
 
85
- export type PromisifiedThreadService = HostSvc<typeof ThreadServiceTag>
86
- export type PromisifiedDocumentChunkService = HostSvc<typeof DocumentChunkServiceTag>
85
+ type PromisifiedThreadService = HostSvc<typeof ThreadServiceTag>
86
+ type PromisifiedDocumentChunkService = HostSvc<typeof DocumentChunkServiceTag>
87
87
 
88
- export type ArchiveSdkThread = (
88
+ type ArchiveSdkThread = (
89
89
  threadId: Parameters<Svc<typeof ThreadServiceTag>['updateStatus']>[0],
90
90
  status?: 'archived',
91
91
  ) => ReturnType<Svc<typeof ThreadServiceTag>['updateStatus']>
92
92
 
93
- export type UnarchiveSdkThread = (
93
+ type UnarchiveSdkThread = (
94
94
  threadId: Parameters<Svc<typeof ThreadServiceTag>['updateStatus']>[0],
95
95
  status?: 'active',
96
96
  ) => ReturnType<Svc<typeof ThreadServiceTag>['updateStatus']>
@@ -5,7 +5,7 @@ import { nowEpochMillis } from '../utils/date-time'
5
5
  import { buildCompletionCheckStructuredOutputHints } from './agent-runtime-policy'
6
6
  import { mergeInstructionSections } from './instruction-sections'
7
7
 
8
- export interface PlanTurnUpstreamHandoff {
8
+ interface PlanTurnUpstreamHandoff {
9
9
  nodeId: string
10
10
  label: string
11
11
  ownerRef: string
@@ -17,9 +17,8 @@ type DocumentChunkEmbeddings = {
17
17
  function createDocumentChunkEmbeddings(
18
18
  embeddingModel: string,
19
19
  runPromise: RuntimeBridge['runPromise'],
20
- openRouterApiKey?: string,
21
20
  ): DocumentChunkEmbeddings {
22
- const embeddings = new ProviderEmbeddings({ modelId: embeddingModel, openRouterApiKey, runPromise })
21
+ const embeddings = new ProviderEmbeddings({ modelId: embeddingModel, runPromise })
23
22
 
24
23
  return {
25
24
  embedDocuments: (documents) => embeddings.embedDocuments(documents),
@@ -213,11 +212,7 @@ export const DocumentChunkServiceLive = Layer.effect(
213
212
  const runtimeConfig = yield* RuntimeConfigServiceTag
214
213
  const bridge = yield* RuntimeBridgeTag
215
214
  return makeDocumentChunkService(
216
- createDocumentChunkEmbeddings(
217
- runtimeConfig.aiGateway.embeddingModel,
218
- bridge.runPromise,
219
- runtimeConfig.aiGateway.openRouterApiKey,
220
- ),
215
+ createDocumentChunkEmbeddings(runtimeConfig.aiGateway.embeddingModel, bridge.runPromise),
221
216
  )
222
217
  }),
223
218
  )
@@ -111,15 +111,11 @@ interface RetrieveForTurnParams {
111
111
 
112
112
  export function makeLearnedSkillService(
113
113
  db: SurrealDBService,
114
- options: { embeddingModel: string; openRouterApiKey?: string; runPromise: RuntimeBridge['runPromise'] },
114
+ options: { embeddingModel: string; runPromise: RuntimeBridge['runPromise'] },
115
115
  skillExistsCache: Cache.Cache<string, boolean, LearnedSkillServiceError>,
116
116
  background: Context.Service.Shape<typeof BackgroundWorkServiceTag>,
117
117
  ) {
118
- const embeddings = new ProviderEmbeddings({
119
- modelId: options.embeddingModel,
120
- openRouterApiKey: options.openRouterApiKey,
121
- runPromise: options.runPromise,
122
- })
118
+ const embeddings = new ProviderEmbeddings({ modelId: options.embeddingModel, runPromise: options.runPromise })
123
119
 
124
120
  const hasSkillsForAgent = (orgId: string, agentId: string) => Cache.get(skillExistsCache, `${orgId}:${agentId}`)
125
121
 
@@ -517,11 +513,7 @@ export const LearnedSkillServiceLive = Layer.effect(
517
513
  })
518
514
  return makeLearnedSkillService(
519
515
  db,
520
- {
521
- embeddingModel: runtimeConfig.aiGateway.embeddingModel,
522
- openRouterApiKey: runtimeConfig.aiGateway.openRouterApiKey,
523
- runPromise: bridge.runPromise,
524
- },
516
+ { embeddingModel: runtimeConfig.aiGateway.embeddingModel, runPromise: bridge.runPromise },
525
517
  skillExistsCache,
526
518
  background,
527
519
  )
@@ -3,18 +3,19 @@ import * as Schema from 'effect/Schema'
3
3
  import { z } from 'zod'
4
4
 
5
5
  import { OPENROUTER_FAST_RERANK_MODEL_ID } from '../../config/model-constants'
6
- import { ERROR_TAGS } from '../../effect/errors'
6
+ import { ConfigurationError, ERROR_TAGS } from '../../effect/errors'
7
7
  import { RuntimeConfigServiceTag } from '../../effect/services'
8
8
  import { toValidationError } from '../../effect/zod'
9
- import { normalizeDirectOpenRouterModelId, resolveOpenRouterApiKey } from '../../openrouter/direct-provider'
10
9
  import type { ResolvedLotaRuntimeConfig } from '../../runtime/runtime-config'
11
10
 
12
- const OPENROUTER_RERANK_URL = 'https://openrouter.ai/api/v1/rerank' as const
11
+ const AI_GATEWAY_VIRTUAL_KEY_HEADER = 'x-bf-vk' as const
12
+ const EXPECTED_GATEWAY_KEY_PREFIX = 'sk-bf-' as const
13
+ const OPENROUTER_MODEL_PREFIX = 'openrouter/' as const
13
14
 
14
15
  const RerankRequestBodySchema = Schema.Struct({
15
16
  model: Schema.String,
16
17
  query: Schema.String,
17
- documents: Schema.Array(Schema.String),
18
+ documents: Schema.Array(Schema.Struct({ text: Schema.String })),
18
19
  top_n: Schema.Number,
19
20
  })
20
21
 
@@ -30,6 +31,36 @@ function toRerankServiceError(operation: string, message: string, cause: unknown
30
31
  return new RerankServiceError({ operation, message, cause })
31
32
  }
32
33
 
34
+ function resolveAiGatewayRerankUrl(config: ResolvedLotaRuntimeConfig): string {
35
+ const trimmed = config.aiGateway.url.trim()
36
+ if (!trimmed) {
37
+ throw new ConfigurationError({ message: '[ai-gateway] AI gateway URL is required.', key: 'aiGateway.url' })
38
+ }
39
+ const normalized = trimmed.replace(/\/+$/, '')
40
+ const v1BaseUrl = normalized.endsWith('/v1') ? normalized : `${normalized}/v1`
41
+ return `${v1BaseUrl}/rerank`
42
+ }
43
+
44
+ function resolveAiGatewayKey(config: ResolvedLotaRuntimeConfig): string {
45
+ const key = config.aiGateway.key.trim()
46
+ if (!key.startsWith(EXPECTED_GATEWAY_KEY_PREFIX)) {
47
+ throw new ConfigurationError({
48
+ message: `[ai-gateway] Gateway keys must use the ${EXPECTED_GATEWAY_KEY_PREFIX}* format.`,
49
+ key: 'aiGateway.key',
50
+ })
51
+ }
52
+ return key
53
+ }
54
+
55
+ function normalizeRerankModelId(modelId: string): string {
56
+ const normalized = modelId.trim()
57
+ if (!normalized) {
58
+ throw new ConfigurationError({ message: 'Rerank model id is required.', key: 'rerankModelId' })
59
+ }
60
+
61
+ return normalized.startsWith(OPENROUTER_MODEL_PREFIX) ? normalized.slice(OPENROUTER_MODEL_PREFIX.length) : normalized
62
+ }
63
+
33
64
  const RerankResponseSchema = z
34
65
  .object({
35
66
  model: z.string().optional(),
@@ -101,10 +132,10 @@ export function makeRerankService(config: ResolvedLotaRuntimeConfig) {
101
132
 
102
133
  function resolveRerankModelId(modelId?: string): string {
103
134
  const explicit = modelId?.trim()
104
- if (explicit) return normalizeDirectOpenRouterModelId(explicit)
135
+ if (explicit) return normalizeRerankModelId(explicit)
105
136
 
106
137
  const configured = readConfiguredRerankModelId()
107
- if (configured) return normalizeDirectOpenRouterModelId(configured)
138
+ if (configured) return normalizeRerankModelId(configured)
108
139
 
109
140
  return OPENROUTER_FAST_RERANK_MODEL_ID
110
141
  }
@@ -116,21 +147,26 @@ export function makeRerankService(config: ResolvedLotaRuntimeConfig) {
116
147
  return { modelId: resolveRerankModelId(params.modelId), results: [] as RerankResultItem[] }
117
148
  }
118
149
 
119
- const apiKey = resolveOpenRouterApiKey(config.aiGateway.openRouterApiKey)
150
+ const gatewayKey = resolveAiGatewayKey(config)
151
+ const rerankUrl = resolveAiGatewayRerankUrl(config)
120
152
  const modelId = resolveRerankModelId(params.modelId)
121
153
  const topN = clampTopN(params.topN, params.documents.length)
122
154
  const requestBody = encodeRerankRequestBody({
123
155
  model: modelId,
124
156
  query: params.query,
125
- documents: params.documents.map((document) => document.text),
157
+ documents: params.documents.map((document) => ({ text: document.text })),
126
158
  top_n: topN,
127
159
  })
128
160
 
129
161
  const response = yield* Effect.tryPromise({
130
162
  try: () =>
131
- Bun.fetch(OPENROUTER_RERANK_URL, {
163
+ Bun.fetch(rerankUrl, {
132
164
  method: 'POST',
133
- headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json' },
165
+ headers: {
166
+ Authorization: `Bearer ${gatewayKey}`,
167
+ [AI_GATEWAY_VIRTUAL_KEY_HEADER]: gatewayKey,
168
+ 'Content-Type': 'application/json',
169
+ },
134
170
  body: requestBody,
135
171
  }),
136
172
  catch: (cause) => toRerankServiceError('fetch-rerank', 'Failed to fetch rerank results.', cause),
@@ -144,7 +180,7 @@ export function makeRerankService(config: ResolvedLotaRuntimeConfig) {
144
180
  if (!response.ok) {
145
181
  return yield* new RerankServiceError({
146
182
  operation: 'fetch-rerank',
147
- message: `OpenRouter rerank failed (${response.status}): ${responseText}`,
183
+ message: `AI gateway rerank failed (${response.status}): ${responseText}`,
148
184
  cause: responseText,
149
185
  })
150
186
  }
@@ -6,6 +6,8 @@ interface HelperAgentOptionOverrides {
6
6
  temperature?: number
7
7
  }
8
8
 
9
+ const DEFAULT_HELPER_AGENT_MAX_RETRIES = 2
10
+
9
11
  export function resolveHelperAgentOptions(
10
12
  options: CreateHelperToolLoopAgentOptions,
11
13
  overrides?: HelperAgentOptionOverrides,
@@ -15,6 +17,6 @@ export function resolveHelperAgentOptions(
15
17
  maxOutputTokens: overrides?.maxOutputTokens ?? options.maxOutputTokens,
16
18
  temperature: overrides?.temperature ?? options.temperature,
17
19
  output: options.output,
18
- maxRetries: options.maxRetries,
20
+ maxRetries: options.maxRetries ?? DEFAULT_HELPER_AGENT_MAX_RETRIES,
19
21
  }
20
22
  }
@@ -50,9 +50,8 @@ function buildSandboxedWorkerRuntimeConfigEffect() {
50
50
  aiGateway: {
51
51
  url: env.aiGatewayUrl,
52
52
  key: Redacted.value(env.aiGatewayKey),
53
- ...(Option.isSome(env.openRouterApiKey)
54
- ? { openRouterApiKey: Redacted.value(env.openRouterApiKey.value) }
55
- : {}),
53
+ embeddingModel: env.aiEmbeddingModel,
54
+ maxConcurrency: env.aiGatewayMaxConcurrency,
56
55
  },
57
56
  s3: {
58
57
  endpoint: env.s3Endpoint,
@@ -47,7 +47,6 @@ const skillExtractionServices: SkillExtractionServices = {
47
47
  socialChatHistoryService: await resolve(SocialChatHistoryServiceTag),
48
48
  runtimeAdapters: await resolve(RuntimeAdaptersServiceTag),
49
49
  embeddingModel: workerRuntimeConfig.aiGateway.embeddingModel,
50
- openRouterApiKey: workerRuntimeConfig.aiGateway.openRouterApiKey,
51
50
  runPromise: (effect) => runtime.runPromise(effect),
52
51
  }
53
52
  const organizationLearningQueueJobService = await resolve(QueueJobServiceTag)
@@ -36,7 +36,6 @@ export interface SkillExtractionServices {
36
36
  socialChatHistoryService: Context.Service.Shape<typeof SocialChatHistoryServiceTag>
37
37
  runtimeAdapters: LotaRuntimeAdapters
38
38
  embeddingModel: string
39
- openRouterApiKey?: string
40
39
  runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A>
41
40
  }
42
41
 
@@ -351,11 +350,7 @@ export function runSkillExtraction(
351
350
  return Promise.resolve({ skipped: true, processedMessages: 0, extractedSkills: 0 })
352
351
  }
353
352
 
354
- const embeddings = new ProviderEmbeddings({
355
- modelId: services.embeddingModel,
356
- openRouterApiKey: services.openRouterApiKey,
357
- runPromise: services.runPromise,
358
- })
353
+ const embeddings = new ProviderEmbeddings({ modelId: services.embeddingModel, runPromise: services.runPromise })
359
354
  const withMemoryLock = runtimeAdapters.withWorkspaceMemoryLock
360
355
  const runExtraction = () =>
361
356
  services.runPromise(
@@ -16,7 +16,7 @@ import type { LotaRuntimeBackgroundCursor } from '../../runtime/runtime-extensio
16
16
  import type { SocialChatHistoryMessage } from '../../services/social-chat-history.service'
17
17
  import { unsafeDateFrom } from '../../utils/date-time'
18
18
 
19
- export interface ThreadDigestMessage {
19
+ interface ThreadDigestMessage {
20
20
  source: 'thread'
21
21
  sourceId: string
22
22
  role: 'system' | 'user' | 'assistant'
@@ -25,7 +25,7 @@ export interface ThreadDigestMessage {
25
25
  cursor: LotaRuntimeBackgroundCursor
26
26
  }
27
27
 
28
- export type SocialDigestMessage = Pick<
28
+ type SocialDigestMessage = Pick<
29
29
  SocialChatHistoryMessage,
30
30
  'source' | 'sourceId' | 'role' | 'parts' | 'metadata' | 'cursor'
31
31
  >
@@ -1,29 +0,0 @@
1
- import { createOpenAI } from '@ai-sdk/openai'
2
-
3
- import { ConfigurationError } from '../effect/errors'
4
-
5
- const DIRECT_OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1' as const
6
- const OPENROUTER_MODEL_PREFIX = 'openrouter/' as const
7
-
8
- export function resolveOpenRouterApiKey(openRouterApiKey: string | undefined): string {
9
- const key = openRouterApiKey?.trim()
10
- if (key) return key
11
-
12
- throw new ConfigurationError({
13
- message: 'Missing OpenRouter API key. Configure createLotaRuntime({ aiGateway: { openRouterApiKey } }).',
14
- key: 'aiGateway.openRouterApiKey',
15
- })
16
- }
17
-
18
- export function normalizeDirectOpenRouterModelId(modelId: string): string {
19
- const normalized = modelId.trim()
20
- if (!normalized) {
21
- throw new ConfigurationError({ message: 'OpenRouter model id is required.', key: 'openRouterModelId' })
22
- }
23
-
24
- return normalized.startsWith(OPENROUTER_MODEL_PREFIX) ? normalized.slice(OPENROUTER_MODEL_PREFIX.length) : normalized
25
- }
26
-
27
- export function getDirectOpenRouterProvider(openRouterApiKey?: string) {
28
- return createOpenAI({ baseURL: DIRECT_OPENROUTER_BASE_URL, apiKey: resolveOpenRouterApiKey(openRouterApiKey) })
29
- }