npm - @lota-sdk/core - Versions diffs - 0.4.18 → 0.4.20 - Mend

@lota-sdk/core 0.4.18 → 0.4.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/package.json +2 -2
package/src/ai-gateway/ai-gateway.ts +10 -108
package/src/db/memory-store.ts +2 -10
package/src/db/memory-types.ts +3 -8
package/src/db/memory.ts +1 -5
package/src/embeddings/provider.ts +4 -7
package/src/runtime/memory/memory-pipeline.ts +2 -2
package/src/runtime/memory/memory-prompts-update.ts +2 -1
package/src/runtime/runtime-config.ts +0 -6
package/src/services/document-chunk.service.ts +2 -7
package/src/services/learned-skill.service.ts +3 -11
package/src/services/memory/rerank.service.ts +47 -11
package/src/system-agents/helper-agent-options.ts +3 -1
package/src/workers/bootstrap.ts +2 -3
package/src/workers/organization-learning.worker.ts +0 -1
package/src/workers/skill-extraction.runner.ts +1 -6
package/src/openrouter/direct-provider.ts +0 -29

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lota-sdk/core",
-  "version": "0.4.18",
+  "version": "0.4.20",
   "type": "module",
   "main": "./src/index.ts",
   "types": "./src/index.ts",
@@ -31,7 +31,7 @@
     "@ai-sdk/openai": "^3.0.53",
     "@chat-adapter/slack": "^4.26.0",
     "@chat-adapter/state-ioredis": "^4.26.0",
-    "@lota-sdk/shared": "0.4.18",
+    "@lota-sdk/shared": "0.4.20",
     "@mendable/firecrawl-js": "^4.18.3",
     "@surrealdb/node": "^3.0.3",
     "ai": "^6.0.168",

package/src/ai-gateway/ai-gateway.ts CHANGED Viewed

@@ -2,12 +2,11 @@ import { devToolsMiddleware } from '@ai-sdk/devtools'
 import { createOpenAI } from '@ai-sdk/openai'
 import { wrapEmbeddingModel, wrapLanguageModel } from 'ai'
 import type { LanguageModelMiddleware } from 'ai'
-import { Cause, Clock, Context, Duration, Effect, ExecutionPlan, Fiber, Layer, Semaphore } from 'effect'
+import { Cause, Clock, Context, Duration, Effect, Fiber, Layer, Semaphore } from 'effect'
 import { DEFAULT_AI_GATEWAY_URL } from '../config/constants'
 import { ERROR_TAGS, AiGenerationError, ConfigurationError } from '../effect/errors'
 import { RuntimeConfigServiceTag } from '../effect/services'
-import { getDirectOpenRouterProvider, normalizeDirectOpenRouterModelId } from '../openrouter/direct-provider'
 import { isRecord, readString } from '../utils/string'
 import { buildAiGatewayCacheHeaders } from './cache-headers'
@@ -21,7 +20,6 @@ type AiGatewayGenerateResult = Awaited<ReturnType<WrapStreamOptions['doGenerate'
 type AiGatewayStreamResult = Awaited<ReturnType<WrapStreamOptions['doStream']>>
 type AiGatewayGeneratedContent = AiGatewayGenerateResult['content'][number]
 type AiGatewayStreamPart = AiGatewayStreamResult['stream'] extends ReadableStream<infer T> ? T : never
-type AiGatewayProviderOptions = NonNullable<AiGatewayCallOptions['providerOptions']>
 type AiGatewayAttemptResult<A> = { source: string; result: A }
 // eslint-disable-next-line @typescript-eslint/no-redundant-type-constituents
 type AiGatewayRunFork = <A, E>(effect: Effect.Effect<A, E, never>) => Fiber.Fiber<A, E | unknown>
@@ -38,7 +36,7 @@ class AiGatewayStreamAttemptTag extends Context.Service<
 const EXPECTED_GATEWAY_KEY_PREFIX = 'sk-bf-'
 const AI_GATEWAY_VIRTUAL_KEY_HEADER = 'x-bf-vk'
-const AI_GATEWAY_TIMEOUT_MS = 30_000
+const AI_GATEWAY_TIMEOUT_MS = 180_000
 const AI_GATEWAY_STREAM_IDLE_TIMEOUT_MS = 30_000
 const AI_GATEWAY_MAX_RETRIES = 4
 const AI_GATEWAY_MAX_RETRY_DELAY_MS = 15_000
@@ -727,22 +725,6 @@ function isOpenRouterModel(modelId: string): boolean {
   return modelId.trim().toLowerCase().startsWith('openrouter/')
 }
-function hasDirectOpenRouterFallback(config: AiGatewayRuntimeConfig, modelId: string): boolean {
-  return isOpenRouterModel(modelId) && Boolean(config.aiGateway.openRouterApiKey?.trim())
-}
-function getDirectOpenRouterChatModel(config: AiGatewayRuntimeConfig, modelId: string): AiGatewayLanguageModel {
-  return getDirectOpenRouterProvider(config.aiGateway.openRouterApiKey).chat(normalizeDirectOpenRouterModelId(modelId))
-}
-function shouldFallbackToDirectOpenRouter(
-  config: AiGatewayRuntimeConfig,
-  modelId: string,
-  error: AiGenerationError,
-): boolean {
-  return hasDirectOpenRouterFallback(config, modelId) && isRetryableAiGatewayError(error)
-}
 function attemptAiGatewayGenerate(
   source: string,
   evaluate: () => PromiseLike<AiGatewayGenerateResult>,
@@ -771,28 +753,8 @@ function attemptAiGatewayStream(
   )
 }
-function attemptDirectOpenRouterGenerate(
-  config: AiGatewayRuntimeConfig,
-  modelId: string,
-  params: AiGatewayCallOptions,
-): Effect.Effect<AiGatewayAttemptResult<AiGatewayGenerateResult>, AiGenerationError> {
-  const model = getDirectOpenRouterChatModel(config, modelId)
-  return attemptAiGatewayGenerate('openrouter.generate', () => model.doGenerate(params))
-}
-function attemptDirectOpenRouterStream(
-  config: AiGatewayRuntimeConfig,
-  modelId: string,
-  params: AiGatewayCallOptions,
-): Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError> {
-  const model = getDirectOpenRouterChatModel(config, modelId)
-  return attemptAiGatewayStream('openrouter.stream', () => model.doStream(params))
-}
 function executeGenerateAttemptPlan(
-  config: AiGatewayRuntimeConfig,
   modelId: string,
-  params: AiGatewayCallOptions,
   doGenerate: () => PromiseLike<AiGatewayGenerateResult>,
 ): Effect.Effect<AiGatewayAttemptResult<AiGatewayGenerateResult>, AiGenerationError> {
   const primary = Layer.succeed(AiGatewayGenerateAttemptTag, {
@@ -803,35 +765,15 @@ function executeGenerateAttemptPlan(
     return yield* attempt.execute
   })
-  if (!hasDirectOpenRouterFallback(config, modelId)) {
-    return effect.pipe(
-      Effect.provide(primary),
-      Effect.withSpan('AiGateway.executeGeneratePlan'),
-      Effect.annotateSpans({ modelId, directOpenRouterFallbackEnabled: false }),
-    )
-  }
   return effect.pipe(
-    Effect.withExecutionPlan(
-      ExecutionPlan.make(
-        { provide: primary },
-        {
-          provide: Layer.succeed(AiGatewayGenerateAttemptTag, {
-            execute: attemptDirectOpenRouterGenerate(config, modelId, params),
-          }),
-          while: (error: AiGenerationError) => shouldFallbackToDirectOpenRouter(config, modelId, error),
-        },
-      ),
-    ),
+    Effect.provide(primary),
     Effect.withSpan('AiGateway.executeGeneratePlan'),
-    Effect.annotateSpans({ modelId, directOpenRouterFallbackEnabled: true }),
+    Effect.annotateSpans({ modelId }),
   )
 }
 function executeStreamAttemptPlan(
-  config: AiGatewayRuntimeConfig,
   modelId: string,
-  params: AiGatewayCallOptions,
   doStream: () => PromiseLike<AiGatewayStreamResult>,
 ): Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError> {
   const primary = Layer.succeed(AiGatewayStreamAttemptTag, {
@@ -842,35 +784,13 @@ function executeStreamAttemptPlan(
     return yield* attempt.execute
   })
-  if (!hasDirectOpenRouterFallback(config, modelId)) {
-    return effect.pipe(
-      Effect.provide(primary),
-      Effect.withSpan('AiGateway.executeStreamPlan'),
-      Effect.annotateSpans({ modelId, directOpenRouterFallbackEnabled: false }),
-    )
-  }
   return effect.pipe(
-    Effect.withExecutionPlan(
-      ExecutionPlan.make(
-        { provide: primary },
-        {
-          provide: Layer.succeed(AiGatewayStreamAttemptTag, {
-            execute: attemptDirectOpenRouterStream(config, modelId, params),
-          }),
-          while: (error: AiGenerationError) => shouldFallbackToDirectOpenRouter(config, modelId, error),
-        },
-      ),
-    ),
+    Effect.provide(primary),
     Effect.withSpan('AiGateway.executeStreamPlan'),
-    Effect.annotateSpans({ modelId, directOpenRouterFallbackEnabled: true }),
+    Effect.annotateSpans({ modelId }),
   )
 }
-function isOpenRouterOpenAIReasoningModel(modelId: string): boolean {
-  return modelId.trim().toLowerCase().startsWith('openrouter/openai/gpt-5')
-}
 function shouldCloseInjectedReasoning(chunk: AiGatewayStreamPart): boolean {
   switch (chunk.type) {
     case 'stream-start':
@@ -1015,7 +935,7 @@ function createAiGatewayLanguageModelMiddleware(
       const model = resolveProviderModel(resolvedDeps.gateway.provider, modelId, providerId)
       return resolvedDeps.runPromise(
         withAiGatewayConcurrency(
-          executeGenerateAttemptPlan(resolvedDeps.runtimeConfig, modelId, params, () => model.doGenerate(params)).pipe(
+          executeGenerateAttemptPlan(modelId, () => model.doGenerate(params)).pipe(
             Effect.map(({ result }) => ({
               ...result,
               content: injectAiGatewayChatReasoningContent(
@@ -1032,7 +952,7 @@ function createAiGatewayLanguageModelMiddleware(
       const model = resolveProviderModel(resolvedDeps.gateway.provider, modelId, providerId)
       return resolvedDeps.runPromise(
         withAiGatewayStreamConcurrency(
-          executeStreamAttemptPlan(resolvedDeps.runtimeConfig, modelId, params, () => model.doStream(params)).pipe(
+          executeStreamAttemptPlan(modelId, () => model.doStream(params)).pipe(
             Effect.map((attempt) => ({
               ...attempt,
               result: isReasoningEnabled(params)
@@ -1052,27 +972,9 @@ function createAiGatewayLanguageModelMiddleware(
 export function normalizeAiGatewayChatProviderOptions(
   params: AiGatewayCallOptions,
-  modelId?: string,
+  _modelId?: string,
 ): AiGatewayCallOptions {
-  const providerOptions = isRecord(params.providerOptions)
-    ? ({ ...params.providerOptions } as AiGatewayProviderOptions)
-    : ({} as AiGatewayProviderOptions)
-  const openaiOptions = isRecord(providerOptions.openai)
-    ? { ...providerOptions.openai }
-    : ({} as Record<string, unknown>)
-  if (modelId && isOpenRouterOpenAIReasoningModel(modelId) && openaiOptions.forceReasoning === undefined) {
-    openaiOptions.forceReasoning = true
-  }
-  if (providerOptions.openai === openaiOptions || Object.keys(openaiOptions).length === 0) {
-    return params
-  }
-  return {
-    ...params,
-    providerOptions: { ...providerOptions, openai: openaiOptions as AiGatewayProviderOptions['openai'] },
-  }
+  return params
 }
 function withAiGatewayDevTools<TModel extends AiGatewayLanguageModel>(model: TModel): TModel {

package/src/db/memory-store.ts CHANGED Viewed

@@ -1264,20 +1264,12 @@ export class SurrealMemoryStore {
 export function createMemoryStore(
   db: SurrealDBService,
-  options: {
-    embeddingModel: string
-    openRouterApiKey?: string
-    runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A>
-  },
+  options: { embeddingModel: string; runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A> },
   background: BackgroundWorker,
 ): SurrealMemoryStore {
   return new SurrealMemoryStore(
     db,
-    new ProviderEmbeddings({
-      modelId: options.embeddingModel,
-      openRouterApiKey: options.openRouterApiKey,
-      runPromise: options.runPromise,
-    }),
+    new ProviderEmbeddings({ modelId: options.embeddingModel, runPromise: options.runPromise }),
     background,
   )
 }

package/src/db/memory-types.ts CHANGED Viewed

@@ -151,13 +151,13 @@ const MemoryDeltaRelationSchema = z
     targetMemoryId: z
       .string()
       .min(1)
-      .optional()
+      .nullable()
       .describe('Target existing memory id when relation points to existing memory.'),
     targetFactIndex: z
       .number()
       .int()
       .min(0)
-      .optional()
+      .nullable()
       .describe('Target newFacts index when relation points to another newly provided fact.'),
   })
   .strict()
@@ -179,17 +179,12 @@ const MemoryDeltaItemSchema = z
     classification: MemoryDeltaClassificationSchema.describe(
       'How this fact relates to existing memories: new, supersedes, contradicts, enriches, duplicate.',
     ),
-    targetMemoryIds: z
-      .array(z.string().min(1))
-      .default([])
-      .describe('Existing memory IDs that are directly related to this fact.'),
+    targetMemoryIds: z.array(z.string().min(1)).describe('Existing memory IDs that are directly related to this fact.'),
     invalidateTargetIds: z
       .array(z.string().min(1))
-      .default([])
       .describe('Subset of targetMemoryIds that should be deleted as obsolete/invalidated.'),
     relations: z
       .array(MemoryDeltaRelationSchema)
-      .default([])
       .describe('Explicit semantic relations from this fact to existing memories and/or other new facts by index.'),
     rationale: z.string().min(1).describe('Short rationale for the classification decision.'),
   })

package/src/db/memory.ts CHANGED Viewed

@@ -79,11 +79,7 @@ export class Memory {
   ) {
     this.store = createMemoryStore(
       deps.db,
-      {
-        embeddingModel: deps.runtimeConfig.aiGateway.embeddingModel,
-        openRouterApiKey: deps.runtimeConfig.aiGateway.openRouterApiKey,
-        runPromise: deps.runPromise,
-      },
+      { embeddingModel: deps.runtimeConfig.aiGateway.embeddingModel, runPromise: deps.runPromise },
       deps.background,
     )
     this.runtimeConfig = deps.runtimeConfig

package/src/embeddings/provider.ts CHANGED Viewed

@@ -1,8 +1,8 @@
 import { embed, embedMany } from 'ai'
 import { Schema, Effect } from 'effect'
+import { aiGatewayEmbeddingModel } from '../ai-gateway/ai-gateway'
 import { ERROR_TAGS, ConfigurationError } from '../effect/errors'
-import { getDirectOpenRouterProvider, normalizeDirectOpenRouterModelId } from '../openrouter/direct-provider'
 const SUPPORTED_EMBEDDING_PREFIXES = ['openai/', 'openrouter/'] as const
@@ -16,7 +16,6 @@ type ProviderEmbeddingsOptions = {
   embedManyFn?: typeof embedMany
   getCache?: () => SharedEmbeddingCache | null
   modelId: string
-  openRouterApiKey?: string
   /**
    * `runPromise` is required: callers must yield `RuntimeBridgeTag` in their
    * `Layer.effect` (or accept a `RuntimeBridge` dep) and pass its `runPromise`
@@ -25,7 +24,7 @@ type ProviderEmbeddingsOptions = {
   runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A>
 }
-function resolveEmbeddingModel(modelId: string, openRouterApiKey?: string) {
+function resolveEmbeddingModel(modelId: string) {
   const normalized = modelId.trim()
   if (!normalized) {
     throw new ConfigurationError({ message: '[embeddings-provider] Model id is required.', key: 'embeddingModelId' })
@@ -38,7 +37,7 @@ function resolveEmbeddingModel(modelId: string, openRouterApiKey?: string) {
     })
   }
-  return getDirectOpenRouterProvider(openRouterApiKey).embeddingModel(normalizeDirectOpenRouterModelId(normalized))
+  return aiGatewayEmbeddingModel(normalized)
 }
 function normalizeEmbedding(embedding: readonly number[]): number[] {
@@ -69,7 +68,6 @@ export class ProviderEmbeddings {
   /** In-flight dedup: concurrent embedQuery calls for the same text share one API round-trip. */
   private readonly inflightEmbeddings = new Map<string, Promise<number[]>>()
-  private readonly openRouterApiKey: string | undefined
   private readonly runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A>
   constructor(options: ProviderEmbeddingsOptions) {
@@ -77,7 +75,6 @@ export class ProviderEmbeddings {
     this.embedManyFn = options.embedManyFn ?? embedMany
     this.getCache = options.getCache ?? (() => null)
     this.resolvedModelId = options.modelId
-    this.openRouterApiKey = options.openRouterApiKey
     this.runPromise = options.runPromise
   }
@@ -87,7 +84,7 @@ export class ProviderEmbeddings {
   private getModel() {
     if (!this._model) {
-      this._model = resolveEmbeddingModel(this.getModelId(), this.openRouterApiKey)
+      this._model = resolveEmbeddingModel(this.getModelId())
     }
     return this._model
   }

package/src/runtime/memory/memory-pipeline.ts CHANGED Viewed

@@ -72,8 +72,8 @@ interface MemoryDeltaOutputLike {
 interface MemoryDeltaRelationLike<TRelation extends string = string> {
   relation: TRelation
-  targetMemoryId?: string
-  targetFactIndex?: number
+  targetMemoryId?: string | null
+  targetFactIndex?: number | null
 }
 interface MemoryActionAdd {

package/src/runtime/memory/memory-prompts-update.ts CHANGED Viewed

@@ -14,9 +14,10 @@ Decide one classification per fact:
 Rules:
 - Return exactly one delta item per new fact, preserving order.
 - fact must match the corresponding newFacts entry verbatim.
+- Always include targetMemoryIds, invalidateTargetIds, and relations. Use [] when there are no values.
 - targetMemoryIds and invalidateTargetIds may only contain ids from existingMemories.
 - invalidateTargetIds must be a subset of targetMemoryIds.
-- In each relation item, set exactly one target: targetMemoryId or targetFactIndex.
+- In each relation item, return both target fields. Set exactly one target to a real value and set the other target field to null.
 - targetFactIndex must be valid, must not point to the same fact index, and is only for relations to other new facts.
 - For supersedes/contradicts, include target memories when evidence exists.
 - If uncertain, prefer conservative output: classify as new with no targets.

package/src/runtime/runtime-config.ts CHANGED Viewed

@@ -215,7 +215,6 @@ export const LotaRuntimeConfigSchema = z.object({
     url: nonEmptyStringSchema,
     key: nonEmptyStringSchema,
     embeddingModel: nonEmptyStringSchema.default('openai/text-embedding-3-small'),
-    openRouterApiKey: nonEmptyStringSchema.optional(),
     maxConcurrency: z.coerce.number().int().positive().default(8),
   }),
   s3: z.object({
@@ -293,7 +292,6 @@ export const LOTA_RUNTIME_ENV_KEYS = Object.freeze([
   'AI_GATEWAY_URL',
   'AI_GATEWAY_KEY',
   'AI_EMBEDDING_MODEL',
-  'OPENROUTER_API_KEY',
   'AI_GATEWAY_MAX_CONCURRENCY',
   'S3_ENDPOINT',
   'S3_BUCKET',
@@ -330,7 +328,6 @@ export const lotaRuntimeEnvConfig = Config.all({
   aiGatewayUrl: Config.string('AI_GATEWAY_URL').pipe(Config.withDefault(DEFAULT_AI_GATEWAY_URL)),
   aiGatewayKey: Config.redacted('AI_GATEWAY_KEY'),
   aiEmbeddingModel: Config.string('AI_EMBEDDING_MODEL').pipe(Config.withDefault('openai/text-embedding-3-small')),
-  openRouterApiKey: Config.redacted('OPENROUTER_API_KEY').pipe(Config.option),
   aiGatewayMaxConcurrency: Config.number('AI_GATEWAY_MAX_CONCURRENCY').pipe(Config.withDefault(8)),
   s3Endpoint: Config.string('S3_ENDPOINT'),
   s3Bucket: Config.string('S3_BUCKET'),
@@ -379,9 +376,6 @@ export function loadLotaRuntimeConfigFromEnv(
             key: Redacted.value(env.aiGatewayKey),
             embeddingModel: env.aiEmbeddingModel,
             maxConcurrency: env.aiGatewayMaxConcurrency,
-            ...(Option.isSome(env.openRouterApiKey)
-              ? { openRouterApiKey: Redacted.value(env.openRouterApiKey.value) }
-              : {}),
           },
           s3: {
             endpoint: env.s3Endpoint,

package/src/services/document-chunk.service.ts CHANGED Viewed

@@ -17,9 +17,8 @@ type DocumentChunkEmbeddings = {
 function createDocumentChunkEmbeddings(
   embeddingModel: string,
   runPromise: RuntimeBridge['runPromise'],
-  openRouterApiKey?: string,
 ): DocumentChunkEmbeddings {
-  const embeddings = new ProviderEmbeddings({ modelId: embeddingModel, openRouterApiKey, runPromise })
+  const embeddings = new ProviderEmbeddings({ modelId: embeddingModel, runPromise })
   return {
     embedDocuments: (documents) => embeddings.embedDocuments(documents),
@@ -213,11 +212,7 @@ export const DocumentChunkServiceLive = Layer.effect(
     const runtimeConfig = yield* RuntimeConfigServiceTag
     const bridge = yield* RuntimeBridgeTag
     return makeDocumentChunkService(
-      createDocumentChunkEmbeddings(
-        runtimeConfig.aiGateway.embeddingModel,
-        bridge.runPromise,
-        runtimeConfig.aiGateway.openRouterApiKey,
-      ),
+      createDocumentChunkEmbeddings(runtimeConfig.aiGateway.embeddingModel, bridge.runPromise),
     )
   }),
 )

package/src/services/learned-skill.service.ts CHANGED Viewed

@@ -111,15 +111,11 @@ interface RetrieveForTurnParams {
 export function makeLearnedSkillService(
   db: SurrealDBService,
-  options: { embeddingModel: string; openRouterApiKey?: string; runPromise: RuntimeBridge['runPromise'] },
+  options: { embeddingModel: string; runPromise: RuntimeBridge['runPromise'] },
   skillExistsCache: Cache.Cache<string, boolean, LearnedSkillServiceError>,
   background: Context.Service.Shape<typeof BackgroundWorkServiceTag>,
 ) {
-  const embeddings = new ProviderEmbeddings({
-    modelId: options.embeddingModel,
-    openRouterApiKey: options.openRouterApiKey,
-    runPromise: options.runPromise,
-  })
+  const embeddings = new ProviderEmbeddings({ modelId: options.embeddingModel, runPromise: options.runPromise })
   const hasSkillsForAgent = (orgId: string, agentId: string) => Cache.get(skillExistsCache, `${orgId}:${agentId}`)
@@ -517,11 +513,7 @@ export const LearnedSkillServiceLive = Layer.effect(
     })
     return makeLearnedSkillService(
       db,
-      {
-        embeddingModel: runtimeConfig.aiGateway.embeddingModel,
-        openRouterApiKey: runtimeConfig.aiGateway.openRouterApiKey,
-        runPromise: bridge.runPromise,
-      },
+      { embeddingModel: runtimeConfig.aiGateway.embeddingModel, runPromise: bridge.runPromise },
       skillExistsCache,
       background,
     )

package/src/services/memory/rerank.service.ts CHANGED Viewed

@@ -3,18 +3,19 @@ import * as Schema from 'effect/Schema'
 import { z } from 'zod'
 import { OPENROUTER_FAST_RERANK_MODEL_ID } from '../../config/model-constants'
-import { ERROR_TAGS } from '../../effect/errors'
+import { ConfigurationError, ERROR_TAGS } from '../../effect/errors'
 import { RuntimeConfigServiceTag } from '../../effect/services'
 import { toValidationError } from '../../effect/zod'
-import { normalizeDirectOpenRouterModelId, resolveOpenRouterApiKey } from '../../openrouter/direct-provider'
 import type { ResolvedLotaRuntimeConfig } from '../../runtime/runtime-config'
-const OPENROUTER_RERANK_URL = 'https://openrouter.ai/api/v1/rerank' as const
+const AI_GATEWAY_VIRTUAL_KEY_HEADER = 'x-bf-vk' as const
+const EXPECTED_GATEWAY_KEY_PREFIX = 'sk-bf-' as const
+const OPENROUTER_MODEL_PREFIX = 'openrouter/' as const
 const RerankRequestBodySchema = Schema.Struct({
   model: Schema.String,
   query: Schema.String,
-  documents: Schema.Array(Schema.String),
+  documents: Schema.Array(Schema.Struct({ text: Schema.String })),
   top_n: Schema.Number,
 })
@@ -30,6 +31,36 @@ function toRerankServiceError(operation: string, message: string, cause: unknown
   return new RerankServiceError({ operation, message, cause })
 }
+function resolveAiGatewayRerankUrl(config: ResolvedLotaRuntimeConfig): string {
+  const trimmed = config.aiGateway.url.trim()
+  if (!trimmed) {
+    throw new ConfigurationError({ message: '[ai-gateway] AI gateway URL is required.', key: 'aiGateway.url' })
+  }
+  const normalized = trimmed.replace(/\/+$/, '')
+  const v1BaseUrl = normalized.endsWith('/v1') ? normalized : `${normalized}/v1`
+  return `${v1BaseUrl}/rerank`
+}
+function resolveAiGatewayKey(config: ResolvedLotaRuntimeConfig): string {
+  const key = config.aiGateway.key.trim()
+  if (!key.startsWith(EXPECTED_GATEWAY_KEY_PREFIX)) {
+    throw new ConfigurationError({
+      message: `[ai-gateway] Gateway keys must use the ${EXPECTED_GATEWAY_KEY_PREFIX}* format.`,
+      key: 'aiGateway.key',
+    })
+  }
+  return key
+}
+function normalizeRerankModelId(modelId: string): string {
+  const normalized = modelId.trim()
+  if (!normalized) {
+    throw new ConfigurationError({ message: 'Rerank model id is required.', key: 'rerankModelId' })
+  }
+  return normalized.startsWith(OPENROUTER_MODEL_PREFIX) ? normalized.slice(OPENROUTER_MODEL_PREFIX.length) : normalized
+}
 const RerankResponseSchema = z
   .object({
     model: z.string().optional(),
@@ -101,10 +132,10 @@ export function makeRerankService(config: ResolvedLotaRuntimeConfig) {
   function resolveRerankModelId(modelId?: string): string {
     const explicit = modelId?.trim()
-    if (explicit) return normalizeDirectOpenRouterModelId(explicit)
+    if (explicit) return normalizeRerankModelId(explicit)
     const configured = readConfiguredRerankModelId()
-    if (configured) return normalizeDirectOpenRouterModelId(configured)
+    if (configured) return normalizeRerankModelId(configured)
     return OPENROUTER_FAST_RERANK_MODEL_ID
   }
@@ -116,21 +147,26 @@ export function makeRerankService(config: ResolvedLotaRuntimeConfig) {
           return { modelId: resolveRerankModelId(params.modelId), results: [] as RerankResultItem[] }
         }
-        const apiKey = resolveOpenRouterApiKey(config.aiGateway.openRouterApiKey)
+        const gatewayKey = resolveAiGatewayKey(config)
+        const rerankUrl = resolveAiGatewayRerankUrl(config)
         const modelId = resolveRerankModelId(params.modelId)
         const topN = clampTopN(params.topN, params.documents.length)
         const requestBody = encodeRerankRequestBody({
           model: modelId,
           query: params.query,
-          documents: params.documents.map((document) => document.text),
+          documents: params.documents.map((document) => ({ text: document.text })),
           top_n: topN,
         })
         const response = yield* Effect.tryPromise({
           try: () =>
-            Bun.fetch(OPENROUTER_RERANK_URL, {
+            Bun.fetch(rerankUrl, {
               method: 'POST',
-              headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json' },
+              headers: {
+                Authorization: `Bearer ${gatewayKey}`,
+                [AI_GATEWAY_VIRTUAL_KEY_HEADER]: gatewayKey,
+                'Content-Type': 'application/json',
+              },
               body: requestBody,
             }),
           catch: (cause) => toRerankServiceError('fetch-rerank', 'Failed to fetch rerank results.', cause),
@@ -144,7 +180,7 @@ export function makeRerankService(config: ResolvedLotaRuntimeConfig) {
         if (!response.ok) {
           return yield* new RerankServiceError({
             operation: 'fetch-rerank',
-            message: `OpenRouter rerank failed (${response.status}): ${responseText}`,
+            message: `AI gateway rerank failed (${response.status}): ${responseText}`,
             cause: responseText,
           })
         }

package/src/system-agents/helper-agent-options.ts CHANGED Viewed

@@ -6,6 +6,8 @@ interface HelperAgentOptionOverrides {
   temperature?: number
 }
+const DEFAULT_HELPER_AGENT_MAX_RETRIES = 2
 export function resolveHelperAgentOptions(
   options: CreateHelperToolLoopAgentOptions,
   overrides?: HelperAgentOptionOverrides,
@@ -15,6 +17,6 @@ export function resolveHelperAgentOptions(
     maxOutputTokens: overrides?.maxOutputTokens ?? options.maxOutputTokens,
     temperature: overrides?.temperature ?? options.temperature,
     output: options.output,
-    maxRetries: options.maxRetries,
+    maxRetries: options.maxRetries ?? DEFAULT_HELPER_AGENT_MAX_RETRIES,
   }
 }

package/src/workers/bootstrap.ts CHANGED Viewed

@@ -50,9 +50,8 @@ function buildSandboxedWorkerRuntimeConfigEffect() {
           aiGateway: {
             url: env.aiGatewayUrl,
             key: Redacted.value(env.aiGatewayKey),
-            ...(Option.isSome(env.openRouterApiKey)
-              ? { openRouterApiKey: Redacted.value(env.openRouterApiKey.value) }
-              : {}),
+            embeddingModel: env.aiEmbeddingModel,
+            maxConcurrency: env.aiGatewayMaxConcurrency,
           },
           s3: {
             endpoint: env.s3Endpoint,

package/src/workers/organization-learning.worker.ts CHANGED Viewed

@@ -47,7 +47,6 @@ const skillExtractionServices: SkillExtractionServices = {
   socialChatHistoryService: await resolve(SocialChatHistoryServiceTag),
   runtimeAdapters: await resolve(RuntimeAdaptersServiceTag),
   embeddingModel: workerRuntimeConfig.aiGateway.embeddingModel,
-  openRouterApiKey: workerRuntimeConfig.aiGateway.openRouterApiKey,
   runPromise: (effect) => runtime.runPromise(effect),
 }
 const organizationLearningQueueJobService = await resolve(QueueJobServiceTag)

package/src/workers/skill-extraction.runner.ts CHANGED Viewed

@@ -36,7 +36,6 @@ export interface SkillExtractionServices {
   socialChatHistoryService: Context.Service.Shape<typeof SocialChatHistoryServiceTag>
   runtimeAdapters: LotaRuntimeAdapters
   embeddingModel: string
-  openRouterApiKey?: string
   runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A>
 }
@@ -351,11 +350,7 @@ export function runSkillExtraction(
     return Promise.resolve({ skipped: true, processedMessages: 0, extractedSkills: 0 })
   }
-  const embeddings = new ProviderEmbeddings({
-    modelId: services.embeddingModel,
-    openRouterApiKey: services.openRouterApiKey,
-    runPromise: services.runPromise,
-  })
+  const embeddings = new ProviderEmbeddings({ modelId: services.embeddingModel, runPromise: services.runPromise })
   const withMemoryLock = runtimeAdapters.withWorkspaceMemoryLock
   const runExtraction = () =>
     services.runPromise(

package/src/openrouter/direct-provider.ts DELETED Viewed

@@ -1,29 +0,0 @@
-import { createOpenAI } from '@ai-sdk/openai'
-import { ConfigurationError } from '../effect/errors'
-const DIRECT_OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1' as const
-const OPENROUTER_MODEL_PREFIX = 'openrouter/' as const
-export function resolveOpenRouterApiKey(openRouterApiKey: string | undefined): string {
-  const key = openRouterApiKey?.trim()
-  if (key) return key
-  throw new ConfigurationError({
-    message: 'Missing OpenRouter API key. Configure createLotaRuntime({ aiGateway: { openRouterApiKey } }).',
-    key: 'aiGateway.openRouterApiKey',
-  })
-}
-export function normalizeDirectOpenRouterModelId(modelId: string): string {
-  const normalized = modelId.trim()
-  if (!normalized) {
-    throw new ConfigurationError({ message: 'OpenRouter model id is required.', key: 'openRouterModelId' })
-  }
-  return normalized.startsWith(OPENROUTER_MODEL_PREFIX) ? normalized.slice(OPENROUTER_MODEL_PREFIX.length) : normalized
-}
-export function getDirectOpenRouterProvider(openRouterApiKey?: string) {
-  return createOpenAI({ baseURL: DIRECT_OPENROUTER_BASE_URL, apiKey: resolveOpenRouterApiKey(openRouterApiKey) })
-}