npm - @lota-sdk/core - Versions diffs - 0.4.41 → 0.4.43 - Mend

@lota-sdk/core 0.4.41 → 0.4.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/package.json +2 -2
package/src/ai/index.ts +0 -1
package/src/ai-gateway/ai-gateway.ts +4 -24
package/src/ai-gateway/index.ts +0 -1
package/src/config/model-constants.ts +0 -1
package/src/effect/errors.ts +1 -1
package/src/embeddings/provider.ts +24 -65
package/src/redis/index.ts +1 -0
package/src/redis/redis-cache.ts +176 -0
package/src/runtime/domain-layer.ts +2 -2
package/src/runtime/runtime-config.ts +1 -7
package/src/services/plan/plan-deadline.service.ts +4 -4
package/src/system-agents/context-compaction.agent.ts +0 -2
package/src/system-agents/memory-reranker.agent.ts +0 -2
package/src/system-agents/memory.agent.ts +0 -2
package/src/system-agents/recent-activity-title-refiner.agent.ts +0 -2
package/src/system-agents/regular-chat-memory-digest.agent.ts +0 -2
package/src/system-agents/skill-extractor.agent.ts +0 -2
package/src/system-agents/skill-manager.agent.ts +0 -2
package/src/system-agents/thread-router.agent.ts +0 -2
package/src/system-agents/title-generator.agent.ts +0 -2
package/src/tools/research-topic.tool.ts +0 -2
package/src/ai/embedding-cache.ts +0 -127
package/src/ai-gateway/cache-headers.ts +0 -42

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lota-sdk/core",
-  "version": "0.4.41",
+  "version": "0.4.43",
   "files": [
     "src",
     "infrastructure/schema"
@@ -32,7 +32,7 @@
     "@ai-sdk/provider": "^3.0.9",
     "@chat-adapter/slack": "^4.26.0",
     "@chat-adapter/state-ioredis": "^4.26.0",
-    "@lota-sdk/shared": "0.4.41",
+    "@lota-sdk/shared": "0.4.43",
     "@mendable/firecrawl-js": "^4.20.0",
     "@surrealdb/node": "^3.0.3",
     "ai": "^6.0.170",

package/src/ai/index.ts CHANGED Viewed

	@@ -1,2 +1 @@
1 1	export * from './definitions'
2	- export * from './embedding-cache'

package/src/ai-gateway/ai-gateway.ts CHANGED Viewed

@@ -10,7 +10,6 @@ import { ERROR_TAGS, AiGenerationError, ConfigurationError } from '../effect/err
 import { RuntimeConfigServiceTag } from '../effect/services'
 import { openRouterEmbeddingModel } from '../embeddings/openrouter'
 import { isRecord, readString } from '../utils/string'
-import { buildAiGatewayCacheHeaders } from './cache-headers'
 type AiGatewayChatResponse = { body?: unknown }
 type AiGatewayTransformParamsOptions = Parameters<NonNullable<LanguageModelMiddleware['transformParams']>>[0]
@@ -473,23 +472,6 @@ function withAiGatewayStreamIdleTimeout(
   })
 }
-function mergeAiGatewayHeaders(
-  existingHeaders: AiGatewayCallOptions['headers'] | undefined,
-  additionalHeaders: Record<string, string>,
-): Record<string, string> {
-  const merged = new Headers(existingHeaders as HeadersInit | undefined)
-  for (const [key, value] of Object.entries(additionalHeaders)) {
-    if (!merged.has(key)) {
-      merged.set(key, value)
-    }
-  }
-  return Object.fromEntries(merged.entries())
-}
-function withDefaultAiGatewayCacheHeaders(params: AiGatewayCallOptions): AiGatewayCallOptions {
-  return { ...params, headers: mergeAiGatewayHeaders(params.headers, buildAiGatewayCacheHeaders('lota-sdk')) }
-}
 function normalizeAiGatewayUrlEffect(value: string): Effect.Effect<string, ConfigurationError> {
   const trimmed = value.trim()
   if (!trimmed) {
@@ -1122,13 +1104,11 @@ function createAiGatewayLanguageModelMiddleware(
     specificationVersion: 'v3',
     transformParams: ({ params, type }) =>
       Promise.resolve(
-        withDefaultAiGatewayCacheHeaders(
-          addAiGatewayReasoningRawChunks(
-            normalizeAiGatewayJsonSchemas(
-              providerId === OPENAI_CHAT_PROVIDER_ID ? normalizeAiGatewayChatProviderOptions(params, modelId) : params,
-            ),
-            type,
+        addAiGatewayReasoningRawChunks(
+          normalizeAiGatewayJsonSchemas(
+            providerId === OPENAI_CHAT_PROVIDER_ID ? normalizeAiGatewayChatProviderOptions(params, modelId) : params,
           ),
+          type,
         ),
       ),
     wrapGenerate: ({ params }) => {

package/src/ai-gateway/index.ts CHANGED Viewed

@@ -20,4 +20,3 @@ export {
   normalizeAiGatewayUrl,
 } from './ai-gateway'
 export type { AiGatewayDeps, AiGatewayModels, RuntimeBridge } from './ai-gateway'
-export * from './cache-headers'

package/src/config/model-constants.ts CHANGED Viewed

@@ -4,7 +4,6 @@ export {
   OPENROUTER_GEMINI_PRO_MODEL_ID,
   OPENAI_HIGH_REASONING_PROVIDER_OPTIONS,
   OPENAI_REASONING_MODEL_ID,
-  OPENROUTER_FAST_RERANK_MODEL_ID,
   OPENROUTER_FAST_REASONING_MODEL_ID,
   OPENROUTER_GEMINI_FLASH_MODEL_ID,
   OPENROUTER_HIGH_REASONING_PROVIDER_OPTIONS,

package/src/effect/errors.ts CHANGED Viewed

@@ -32,7 +32,6 @@ export const ERROR_TAGS = {
   CursorPaginationError: '@lota-sdk/core/CursorPaginationError',
   DatabaseError: '@lota-sdk/core/DatabaseError',
   DelegatedAgentError: '@lota-sdk/core/DelegatedAgentError',
-  EmbeddingCacheError: '@lota-sdk/core/EmbeddingCacheError',
   EmbeddingProviderError: '@lota-sdk/core/EmbeddingProviderError',
   ExecutionPlanCacheError: '@lota-sdk/core/ExecutionPlanCacheError',
   FeedbackLoopServiceError: '@lota-sdk/core/FeedbackLoopServiceError',
@@ -73,6 +72,7 @@ export const ERROR_TAGS = {
   QueueWorkerError: '@lota-sdk/core/QueueWorkerError',
   ReadFilePartsError: '@lota-sdk/core/ReadFilePartsError',
   RedisError: '@lota-sdk/core/RedisError',
+  RedisJsonCacheError: '@lota-sdk/core/RedisJsonCacheError',
   RerankServiceError: '@lota-sdk/core/RerankServiceError',
   RuntimeLifecycleError: '@lota-sdk/core/RuntimeLifecycleError',
   SandboxedWorkerBootstrapError: '@lota-sdk/core/SandboxedWorkerBootstrapError',

package/src/embeddings/provider.ts CHANGED Viewed

@@ -7,15 +7,18 @@ import { openRouterEmbeddingModel } from './openrouter'
 const SUPPORTED_EMBEDDING_PREFIXES = ['openai/', 'openrouter/'] as const
 const SUPPORTED_BARE_EMBEDDING_MODEL_IDS = ['text-embedding-3-small'] as const
-type SharedEmbeddingCache = {
-  get(model: string, text: string): Promise<number[] | null>
-  set(model: string, text: string, embedding: number[]): Promise<void>
-}
+/**
+ * Bound on the in-flight Promise dedup map. Concurrent embedQuery calls for
+ * the same text share a single API round-trip; this cap keeps the map from
+ * growing without bound under sustained load. FIFO eviction by insertion
+ * order — the oldest pending key is evicted before insertion when full. The
+ * pending Promise is not aborted; only its dedup slot is freed.
+ */
+const MAX_INFLIGHT = 1000
 type ProviderEmbeddingsOptions = {
   embedFn?: typeof embed
   embedManyFn?: typeof embedMany
-  getCache?: () => SharedEmbeddingCache | null
   modelId: string
   /**
    * `runPromise` is required: callers must yield `RuntimeBridgeTag` in their
@@ -67,7 +70,6 @@ function tryEmbeddingPromise<A>(
 export class ProviderEmbeddings {
   private readonly embedFn: typeof embed
   private readonly embedManyFn: typeof embedMany
-  private readonly getCache: () => SharedEmbeddingCache | null
   private readonly resolvedModelId: string
   private _model: ReturnType<typeof resolveEmbeddingModel> | null = null
   /** In-flight dedup: concurrent embedQuery calls for the same text share one API round-trip. */
@@ -78,7 +80,6 @@ export class ProviderEmbeddings {
   constructor(options: ProviderEmbeddingsOptions) {
     this.embedFn = options.embedFn ?? embed
     this.embedManyFn = options.embedManyFn ?? embedMany
-    this.getCache = options.getCache ?? (() => null)
     this.resolvedModelId = options.modelId
     this.runPromise = options.runPromise
   }
@@ -94,11 +95,12 @@ export class ProviderEmbeddings {
     return this._model
   }
-  private loadCachedEmbedding(text: string): Promise<number[] | null> {
-    const redisCache = this.getCache()
-    if (!redisCache) return Promise.resolve(null)
-    return redisCache.get(this.getModelId(), text)
+  private rememberInflight(key: string, promise: Promise<number[]>): void {
+    if (this.inflightEmbeddings.size >= MAX_INFLIGHT) {
+      const oldest = this.inflightEmbeddings.keys().next().value
+      if (oldest !== undefined) this.inflightEmbeddings.delete(oldest)
+    }
+    this.inflightEmbeddings.set(key, promise)
   }
   embedQuery(text: string): Promise<number[]> {
@@ -110,7 +112,11 @@ export class ProviderEmbeddings {
     if (pending) return pending
     const promise = this.runPromise(this.executeEmbedQueryEffect(input))
-    this.inflightEmbeddings.set(dedupKey, promise)
+    this.rememberInflight(dedupKey, promise)
+    // `.then(_, _)` (not `.finally`): the dual-handler form swallows the
+    // rejection on the cleanup branch so it never surfaces as an unhandled
+    // rejection. The original promise is still returned to the caller, who
+    // is responsible for reacting to its rejection.
     void promise.then(
       () => this.inflightEmbeddings.delete(dedupKey),
       () => this.inflightEmbeddings.delete(dedupKey),
@@ -122,24 +128,10 @@ export class ProviderEmbeddings {
   private executeEmbedQueryEffect(input: string): Effect.Effect<number[], EmbeddingProviderError> {
     return Effect.gen(
       function* (this: ProviderEmbeddings) {
-        const cached = yield* tryEmbeddingPromise('Failed to load cached query embedding.', () =>
-          this.loadCachedEmbedding(input),
-        )
-        if (cached) {
-          return cached
-        }
         const result = yield* tryEmbeddingPromise('Failed to generate query embedding.', () =>
           this.embedFn({ model: this.getModel(), value: input, maxRetries: 2 }),
         )
-        const embedding = normalizeEmbedding(result.embedding)
-        const redisCache = this.getCache()
-        if (redisCache) {
-          void redisCache.set(this.getModelId(), input, embedding)
-        }
-        return embedding
+        return normalizeEmbedding(result.embedding)
       }.bind(this),
     ).pipe(Effect.withSpan('ProviderEmbeddings.executeEmbedQuery'))
   }
@@ -166,45 +158,12 @@ export class ProviderEmbeddings {
   ): Effect.Effect<number[][], EmbeddingProviderError> {
     return Effect.gen(
       function* (this: ProviderEmbeddings) {
-        const embeddingsByText = new Map<string, number[]>()
-        let missingTexts = [...uniqueTexts]
-        const redisCache = this.getCache()
-        const redisResults =
-          redisCache && missingTexts.length > 0
-            ? yield* Effect.all(
-                missingTexts.map((text) =>
-                  tryEmbeddingPromise('Failed to load cached document embedding.', () =>
-                    redisCache.get(this.getModelId(), text),
-                  ).pipe(Effect.map((embedding) => ({ text, embedding }))),
-                ),
-              )
-            : ([] as Array<{ text: string; embedding: number[] | null }>)
-        if (redisCache && missingTexts.length > 0) {
-          missingTexts = []
-          for (const result of redisResults) {
-            if (!result.embedding) {
-              missingTexts.push(result.text)
-              continue
-            }
-            embeddingsByText.set(result.text, result.embedding)
-          }
-        }
-        if (missingTexts.length === 0) {
-          return normalized.map((text) => (text ? (embeddingsByText.get(text) ?? []) : []))
-        }
         const result = yield* tryEmbeddingPromise('Failed to generate document embeddings.', () =>
-          this.embedManyFn({ model: this.getModel(), values: missingTexts, maxRetries: 2 }),
+          this.embedManyFn({ model: this.getModel(), values: uniqueTexts, maxRetries: 2 }),
         )
-        missingTexts.forEach((text, index) => {
-          const embedding = normalizeEmbedding(result.embeddings[index] ?? [])
-          embeddingsByText.set(text, embedding)
-          if (redisCache) {
-            void redisCache.set(this.getModelId(), text, embedding)
-          }
+        const embeddingsByText = new Map<string, number[]>()
+        uniqueTexts.forEach((text, index) => {
+          embeddingsByText.set(text, normalizeEmbedding(result.embeddings[index] ?? []))
         })
         return normalized.map((text) => (text ? (embeddingsByText.get(text) ?? []) : []))

package/src/redis/index.ts CHANGED Viewed

@@ -2,6 +2,7 @@ import { createRedisConnectionManager } from './connection'
 import type { RedisConnectionManager } from './connection'
 export { DEFAULT_REDIS_OPTIONS, type RedisConnectionLogger } from './connection'
 export { withOrgMemoryLock, withOrgMemoryLockEffect } from './org-memory-lock'
+export { RedisJsonCache, RedisJsonCacheError, RedisJsonCacheLive, RedisJsonCacheTag } from './redis-cache'
 export { withLeaseLock } from './redis-lease-lock'
 export {
   createThreadResumableContext,

package/src/redis/redis-cache.ts ADDED Viewed

@@ -0,0 +1,176 @@
+import { Context, Effect, Layer, Schema } from 'effect'
+import type IORedis from 'ioredis'
+import { RuntimeBridgeTag } from '../ai-gateway/ai-gateway'
+import { ERROR_TAGS } from '../effect/errors'
+import { RedisServiceTag } from '../effect/services'
+/**
+ * Bound on the in-flight Promise dedup map. Keeps memory usage predictable
+ * under sustained load — concurrent identical loads still share their
+ * Promise, but the map cannot grow without bound when ttls are long.
+ */
+const MAX_INFLIGHT = 1000
+export class RedisJsonCacheError extends Schema.TaggedErrorClass<RedisJsonCacheError>()(
+  ERROR_TAGS.RedisJsonCacheError,
+  {
+    message: Schema.String,
+    operation: Schema.Literals(['get', 'set', 'del', 'load']),
+    cause: Schema.optional(Schema.Defect),
+  },
+) {}
+/**
+ * Schema accepted by `RedisJsonCache` methods. The codec must be fully
+ * self-contained — no remaining decoding/encoding services — so that
+ * `Schema.fromJsonString(...)` resolves at runtime without additional
+ * context. `Schema.Codec<T, E, never, never>` is the canonical "ready to
+ * run" form.
+ */
+type RedisJsonCacheCodec<T> = Schema.Codec<T, unknown, never, never>
+/**
+ * Generic Redis-backed JSON cache keyed by string. The codec is supplied per
+ * call so a single cache instance can hold values of multiple shapes (the
+ * caller is responsible for namespacing keys correctly). Errors propagate as
+ * `RedisJsonCacheError`; this cache deliberately does not fail-soft.
+ */
+export class RedisJsonCache {
+  /** In-flight dedup: concurrent loads for the same key share a single round-trip. */
+  private readonly inflight = new Map<string, Promise<unknown>>()
+  constructor(
+    private readonly redis: IORedis,
+    private readonly runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A>,
+  ) {}
+  get<T>(key: string, schema: RedisJsonCacheCodec<T>): Promise<T | null> {
+    return this.runPromise(this.getEffect(key, schema))
+  }
+  set<T>(key: string, value: T, ttlSeconds: number, schema: RedisJsonCacheCodec<T>): Promise<void> {
+    return this.runPromise(this.setEffect(key, value, ttlSeconds, schema))
+  }
+  del(key: string): Promise<void> {
+    return this.runPromise(this.delEffect(key))
+  }
+  /**
+   * Read-through cache. On miss, invokes `loader` and stores the result with
+   * the supplied TTL. Concurrent calls with the same key share a single
+   * loader Promise via the in-flight map. The map is bounded — when full,
+   * the oldest entry (FIFO insertion order) is evicted before insert.
+   */
+  getOrSet<T>(key: string, ttlSeconds: number, schema: RedisJsonCacheCodec<T>, loader: () => Promise<T>): Promise<T> {
+    const existing = this.inflight.get(key) as Promise<T> | undefined
+    if (existing) return existing
+    const promise = this.runPromise(this.getOrSetEffect(key, ttlSeconds, schema, loader))
+    if (this.inflight.size >= MAX_INFLIGHT) {
+      const oldest = this.inflight.keys().next().value
+      if (oldest !== undefined) this.inflight.delete(oldest)
+    }
+    this.inflight.set(key, promise)
+    // `.then(_, _)` (not `.finally`): the dual-handler form swallows the
+    // rejection on the cleanup branch so it never surfaces as an unhandled
+    // rejection. The original promise is still returned to the caller, who
+    // is responsible for reacting to its rejection.
+    void promise.then(
+      () => this.inflight.delete(key),
+      () => this.inflight.delete(key),
+    )
+    return promise
+  }
+  private getOrSetEffect<T>(
+    key: string,
+    ttlSeconds: number,
+    schema: RedisJsonCacheCodec<T>,
+    loader: () => Promise<T>,
+  ): Effect.Effect<T, RedisJsonCacheError> {
+    return Effect.gen(
+      function* (this: RedisJsonCache) {
+        const cached = yield* this.getEffect(key, schema)
+        if (cached !== null) return cached
+        const fresh = yield* Effect.tryPromise({
+          try: () => loader(),
+          catch: (cause) =>
+            new RedisJsonCacheError({ message: 'Redis JSON cache loader failed.', operation: 'load', cause }),
+        })
+        yield* this.setEffect(key, fresh, ttlSeconds, schema)
+        return fresh
+      }.bind(this),
+    )
+  }
+  private getEffect<T>(key: string, schema: RedisJsonCacheCodec<T>): Effect.Effect<T | null, RedisJsonCacheError> {
+    const redis = this.redis
+    const decode = Schema.decodeUnknownSync(Schema.fromJsonString(schema)) as (raw: string) => T
+    return Effect.gen(function* () {
+      const cached = yield* Effect.tryPromise({
+        try: () => redis.getBuffer(key),
+        catch: (cause) => new RedisJsonCacheError({ message: 'Redis JSON cache get failed.', operation: 'get', cause }),
+      })
+      if (!cached) return null
+      return yield* Effect.try({
+        try: () => decode(cached.toString()),
+        catch: (cause) =>
+          new RedisJsonCacheError({ message: 'Redis JSON cache parse failed.', operation: 'get', cause }),
+      })
+    })
+  }
+  private setEffect<T>(
+    key: string,
+    value: T,
+    ttlSeconds: number,
+    schema: RedisJsonCacheCodec<T>,
+  ): Effect.Effect<void, RedisJsonCacheError> {
+    const redis = this.redis
+    const encode = Schema.encodeSync(Schema.fromJsonString(schema)) as (input: T) => string
+    return Effect.gen(function* () {
+      const serialized = yield* Effect.try({
+        try: () => encode(value),
+        catch: (cause) =>
+          new RedisJsonCacheError({ message: 'Redis JSON cache serialization failed.', operation: 'set', cause }),
+      })
+      yield* Effect.tryPromise({
+        try: () => redis.set(key, serialized, 'EX', ttlSeconds),
+        catch: (cause) => new RedisJsonCacheError({ message: 'Redis JSON cache set failed.', operation: 'set', cause }),
+      })
+    }).pipe(Effect.asVoid)
+  }
+  private delEffect(key: string): Effect.Effect<void, RedisJsonCacheError> {
+    const redis = this.redis
+    return Effect.tryPromise({
+      try: () => redis.del(key),
+      catch: (cause) => new RedisJsonCacheError({ message: 'Redis JSON cache del failed.', operation: 'del', cause }),
+    }).pipe(Effect.asVoid)
+  }
+}
+export class RedisJsonCacheTag extends Context.Service<RedisJsonCacheTag, RedisJsonCache>()(
+  '@lota-sdk/core/RedisJsonCache',
+) {}
+/**
+ * `RedisJsonCache` needs a `runPromise` so its Promise-returning surface stays
+ * grounded in the host runtime. We yield `RuntimeBridgeTag` once at layer
+ * construction and capture the bridged `runPromise`; consumers never
+ * prop-drill it.
+ */
+export const RedisJsonCacheLive = Layer.effect(
+  RedisJsonCacheTag,
+  Effect.gen(function* () {
+    const redis = yield* RedisServiceTag
+    const bridge = yield* RuntimeBridgeTag
+    return new RedisJsonCache(redis.getConnection(), bridge.runPromise)
+  }),
+)

package/src/runtime/domain-layer.ts CHANGED Viewed

@@ -11,9 +11,9 @@ import type { Layer as LayerType } from 'effect'
 import { Layer } from 'effect'
 import type { AiGatewayModelsTag, AiGatewayTag, RuntimeBridgeTag } from '../ai-gateway/ai-gateway'
-import { EmbeddingCacheLive } from '../ai/embedding-cache'
 import type { buildInfrastructureLayer } from '../effect/layers'
 import { LotaQueuesLive } from '../queues/queues.service'
+import { RedisJsonCacheLive } from '../redis/redis-cache'
 import { SharedThreadStreamSubscriberLive } from '../redis/stream-context'
 import { AgentActivityServiceLive } from '../services/agent-activity.service'
 import { AgentExecutorServiceLive } from '../services/agent-executor.service'
@@ -113,7 +113,7 @@ export function buildDomainServiceLayer(infrastructureLayer: InfrastructureLayer
   )
   const ctx0 = Layer.mergeAll(
     baseCtx,
-    provide(Layer.mergeAll(EmbeddingCacheLive, FirecrawlLive, HelperModelLive), baseCtx),
+    provide(Layer.mergeAll(FirecrawlLive, HelperModelLive, RedisJsonCacheLive), baseCtx),
     tier0,
   )

package/src/runtime/runtime-config.ts CHANGED Viewed

@@ -237,16 +237,10 @@ export const LotaRuntimeConfigSchema = z.object({
   memory: z
     .object({
       searchK: z.coerce.number().int().positive().default(6),
-      embeddingCacheTtlSeconds: z.coerce.number().int().positive().default(7200),
       rerankerStrategy: MemoryRerankerStrategySchema.default('rerank'),
       rerankerModelId: z.string().trim().min(1).default(AI_GATEWAY_FAST_RERANK_MODEL_ID),
     })
-    .default({
-      searchK: 6,
-      embeddingCacheTtlSeconds: 7200,
-      rerankerStrategy: 'rerank',
-      rerankerModelId: AI_GATEWAY_FAST_RERANK_MODEL_ID,
-    }),
+    .default({ searchK: 6, rerankerStrategy: 'rerank', rerankerModelId: AI_GATEWAY_FAST_RERANK_MODEL_ID }),
   threads: threadConfigSchema.default({}),
   agents: agentsConfigSchema,
   toolProviders: z.custom<ToolSet>(isToolSet, { error: 'toolProviders must be a tool registry object' }).optional(),

package/src/services/plan/plan-deadline.service.ts CHANGED Viewed

@@ -393,7 +393,7 @@ export function makePlanDeadlineService(deps: PlanDeadlineDeps) {
       return
     }
-    const runCache = new Map<string, PlanRunRecord>()
+    const runLookup = new Map<string, PlanRunRecord>()
     const handleEntry = (entry: (typeof sweep.entries)[number]): Effect.Effect<void, PlanDeadlineError> =>
       Effect.gen(function* () {
@@ -403,9 +403,9 @@ export function makePlanDeadlineService(deps: PlanDeadlineDeps) {
         }
         const runIdStr = recordIdToString(entry.nodeRun.runId, TABLES.PLAN_RUN)
-        const cachedRun = runCache.get(runIdStr)
+        const existing = runLookup.get(runIdStr)
         const run =
-          cachedRun ??
+          existing ??
           (yield* db
             .findOne(TABLES.PLAN_RUN, { id: ensureRecordId(entry.nodeRun.runId, TABLES.PLAN_RUN) }, PlanRunSchema)
             .pipe(
@@ -414,7 +414,7 @@ export function makePlanDeadlineService(deps: PlanDeadlineDeps) {
         if (!run) {
           return
         }
-        runCache.set(runIdStr, run)
+        runLookup.set(runIdStr, run)
         const dedupeKeyBase = `plan-deadline:${runIdStr}:${entry.nodeRun.nodeId}`
         const actionEffect =

package/src/system-agents/context-compaction.agent.ts CHANGED Viewed

@@ -2,7 +2,6 @@ import type { CreateHelperToolLoopAgentOptions } from '@lota-sdk/shared'
 import { ToolLoopAgent } from 'ai'
 import type { AiGatewayModels } from '../ai-gateway/ai-gateway'
-import { buildAiGatewayDirectCacheHeaders } from '../ai-gateway/cache-headers'
 import { OPENROUTER_STRUCTURED_HELPER_MODEL_ID } from '../config/model-constants'
 import { resolveHelperAgentOptions } from './helper-agent-options'
@@ -33,7 +32,6 @@ export function makeContextCompactionAgentFactory(models: AiGatewayModels) {
     new ToolLoopAgent({
       id: 'context-compaction',
       model: models.chatModel(OPENROUTER_STRUCTURED_HELPER_MODEL_ID),
-      headers: buildAiGatewayDirectCacheHeaders('lota-sdk'),
       ...resolveHelperAgentOptions(options, { instructions: CONTEXT_COMPACTION_PROMPT }),
     })
 }

package/src/system-agents/memory-reranker.agent.ts CHANGED Viewed

@@ -2,7 +2,6 @@ import type { CreateHelperToolLoopAgentOptions } from '@lota-sdk/shared'
 import { ToolLoopAgent } from 'ai'
 import type { AiGatewayModels } from '../ai-gateway/ai-gateway'
-import { buildAiGatewayDirectCacheHeaders } from '../ai-gateway/cache-headers'
 import {
   OPENROUTER_LOW_REASONING_PROVIDER_OPTIONS,
   OPENROUTER_STRUCTURED_HELPER_MODEL_ID,
@@ -34,7 +33,6 @@ export function makeMemoryRerankerAgentFactory(models: AiGatewayModels) {
     new ToolLoopAgent({
       id: 'memory-reranker',
       model: models.chatModel(OPENROUTER_STRUCTURED_HELPER_MODEL_ID),
-      headers: buildAiGatewayDirectCacheHeaders('lota-sdk'),
       providerOptions: OPENROUTER_LOW_REASONING_PROVIDER_OPTIONS,
       ...resolveHelperAgentOptions(options),
     })

package/src/system-agents/memory.agent.ts CHANGED Viewed

@@ -2,7 +2,6 @@ import type { CreateHelperToolLoopAgentOptions } from '@lota-sdk/shared'
 import { ToolLoopAgent } from 'ai'
 import type { AiGatewayModels } from '../ai-gateway/ai-gateway'
-import { buildAiGatewayDirectCacheHeaders } from '../ai-gateway/cache-headers'
 import {
   OPENROUTER_STRUCTURED_HELPER_MODEL_ID,
   OPENROUTER_HIGH_REASONING_PROVIDER_OPTIONS,
@@ -54,7 +53,6 @@ export function makeOrgMemoryAgentFactory(models: AiGatewayModels) {
     new ToolLoopAgent({
       id: 'org-memory',
       model: models.chatModel(OPENROUTER_STRUCTURED_HELPER_MODEL_ID),
-      headers: buildAiGatewayDirectCacheHeaders('lota-sdk'),
       providerOptions: OPENROUTER_HIGH_REASONING_PROVIDER_OPTIONS,
       ...resolveHelperAgentOptions(options),
     })

package/src/system-agents/recent-activity-title-refiner.agent.ts CHANGED Viewed

@@ -2,7 +2,6 @@ import type { CreateHelperToolLoopAgentOptions } from '@lota-sdk/shared'
 import { ToolLoopAgent } from 'ai'
 import type { AiGatewayModels } from '../ai-gateway/ai-gateway'
-import { buildAiGatewayDirectCacheHeaders } from '../ai-gateway/cache-headers'
 import type { ResolvedAgentConfig } from '../config/agent-defaults'
 import { OPENROUTER_STRUCTURED_HELPER_MODEL_ID } from '../config/model-constants'
 import { resolveHelperAgentOptions } from './helper-agent-options'
@@ -78,7 +77,6 @@ export function makeRecentActivityTitleRefinerAgentFactory(models: AiGatewayMode
     new ToolLoopAgent({
       id: 'recent-activity-title-refiner',
       model: models.chatModel(OPENROUTER_STRUCTURED_HELPER_MODEL_ID),
-      headers: buildAiGatewayDirectCacheHeaders('lota-sdk'),
       ...resolveHelperAgentOptions(options, {
         instructions: buildRecentActivityTitleRefinerPrompt(agentConfig),
         maxOutputTokens: RECENT_ACTIVITY_TITLE_MAX_TOKENS,

package/src/system-agents/regular-chat-memory-digest.agent.ts CHANGED Viewed

@@ -2,7 +2,6 @@ import type { CreateHelperToolLoopAgentOptions } from '@lota-sdk/shared'
 import { ToolLoopAgent } from 'ai'
 import type { AiGatewayModels } from '../ai-gateway/ai-gateway'
-import { buildAiGatewayDirectCacheHeaders } from '../ai-gateway/cache-headers'
 import {
   OPENROUTER_STRUCTURED_HELPER_MODEL_ID,
   OPENROUTER_HIGH_REASONING_PROVIDER_OPTIONS,
@@ -29,7 +28,6 @@ export function makeRegularChatMemoryDigestAgentFactory(models: AiGatewayModels)
     new ToolLoopAgent({
       id: 'regular-chat-memory-digest',
       model: models.chatModel(OPENROUTER_STRUCTURED_HELPER_MODEL_ID),
-      headers: buildAiGatewayDirectCacheHeaders('lota-sdk'),
       providerOptions: OPENROUTER_HIGH_REASONING_PROVIDER_OPTIONS,
       ...resolveHelperAgentOptions(options, {
         instructions: regularChatMemoryDigestPrompt,

package/src/system-agents/skill-extractor.agent.ts CHANGED Viewed

@@ -3,7 +3,6 @@ import { ToolLoopAgent } from 'ai'
 import { z } from 'zod'
 import type { AiGatewayModels } from '../ai-gateway/ai-gateway'
-import { buildAiGatewayDirectCacheHeaders } from '../ai-gateway/cache-headers'
 import {
   OPENROUTER_STRUCTURED_HELPER_MODEL_ID,
   OPENROUTER_HIGH_REASONING_PROVIDER_OPTIONS,
@@ -47,7 +46,6 @@ export function makeSkillExtractorAgentFactory(models: AiGatewayModels) {
     new ToolLoopAgent({
       id: 'skill-extractor',
       model: models.chatModel(OPENROUTER_STRUCTURED_HELPER_MODEL_ID),
-      headers: buildAiGatewayDirectCacheHeaders('lota-sdk'),
       providerOptions: OPENROUTER_HIGH_REASONING_PROVIDER_OPTIONS,
       ...resolveHelperAgentOptions(options, {
         instructions: skillExtractorPrompt,

package/src/system-agents/skill-manager.agent.ts CHANGED Viewed

@@ -3,7 +3,6 @@ import { ToolLoopAgent } from 'ai'
 import { z } from 'zod'
 import type { AiGatewayModels } from '../ai-gateway/ai-gateway'
-import { buildAiGatewayDirectCacheHeaders } from '../ai-gateway/cache-headers'
 import {
   OPENROUTER_STRUCTURED_HELPER_MODEL_ID,
   OPENROUTER_HIGH_REASONING_PROVIDER_OPTIONS,
@@ -71,7 +70,6 @@ export function makeSkillManagerAgentFactory(models: AiGatewayModels) {
     new ToolLoopAgent({
       id: 'skill-manager',
       model: models.chatModel(OPENROUTER_STRUCTURED_HELPER_MODEL_ID),
-      headers: buildAiGatewayDirectCacheHeaders('lota-sdk'),
       providerOptions: OPENROUTER_HIGH_REASONING_PROVIDER_OPTIONS,
       ...resolveHelperAgentOptions(options, {
         instructions: skillManagerPrompt,

package/src/system-agents/thread-router.agent.ts CHANGED Viewed

@@ -3,7 +3,6 @@ import { Effect } from 'effect'
 import { z } from 'zod'
 import type { AiGatewayModels } from '../ai-gateway/ai-gateway'
-import { buildAiGatewayDirectCacheHeaders } from '../ai-gateway/cache-headers'
 import type { ResolvedAgentConfig } from '../config/agent-defaults'
 import { chatLogger } from '../config/logger'
 import type { ValidationError } from '../effect/errors'
@@ -182,7 +181,6 @@ function generateRouterObjectEffect<TSchema extends z.ZodTypeAny>(params: {
     try: () =>
       generateObject({
         model: params.aiGatewayModels.chatModel(modelId),
-        headers: buildAiGatewayDirectCacheHeaders('lota-sdk'),
         schema: params.schema,
         system: params.system,
         prompt: params.prompt,

package/src/system-agents/title-generator.agent.ts CHANGED Viewed

@@ -2,7 +2,6 @@ import type { CreateHelperToolLoopAgentOptions } from '@lota-sdk/shared'
 import { ToolLoopAgent } from 'ai'
 import type { AiGatewayModels } from '../ai-gateway/ai-gateway'
-import { buildAiGatewayDirectCacheHeaders } from '../ai-gateway/cache-headers'
 import {
   OPENROUTER_FAST_REASONING_MODEL_ID,
   OPENROUTER_MINIMAL_REASONING_PROVIDER_OPTIONS,
@@ -35,7 +34,6 @@ export function makeThreadTitleGeneratorAgentFactory(models: AiGatewayModels) {
     new ToolLoopAgent({
       id: 'thread-title-generator',
       model: models.chatModel(OPENROUTER_FAST_REASONING_MODEL_ID),
-      headers: buildAiGatewayDirectCacheHeaders('lota-sdk'),
       providerOptions: OPENROUTER_MINIMAL_REASONING_PROVIDER_OPTIONS,
       ...resolveHelperAgentOptions(options, {
         instructions: THREAD_TITLE_GENERATOR_PROMPT,

package/src/tools/research-topic.tool.ts CHANGED Viewed

@@ -3,7 +3,6 @@ import type { Effect } from 'effect'
 import type { AiGatewayModels } from '../ai-gateway/ai-gateway'
 import { aiGatewayChatModel, getDefaultAiGatewayRunPromise } from '../ai-gateway/ai-gateway'
-import { buildAiGatewayStrictSemanticCacheHeaders } from '../ai-gateway/cache-headers'
 import {
   OPENROUTER_FAST_REASONING_MODEL_ID,
   OPENROUTER_LOW_REASONING_PROVIDER_OPTIONS,
@@ -28,7 +27,6 @@ export const researchTopicTool = createDelegatedAgentToolWithContext<ResearchTop
       ? aiGatewayModels.chatModel(OPENROUTER_FAST_REASONING_MODEL_ID)
       : aiGatewayChatModel(OPENROUTER_FAST_REASONING_MODEL_ID),
   providerOptions: OPENROUTER_LOW_REASONING_PROVIDER_OPTIONS,
-  headers: buildAiGatewayStrictSemanticCacheHeaders('researchTopic'),
   instructions: RESEARCHER_PROMPT,
   createTools: ({ firecrawl, runPromise }) => ({
     searchWeb: searchWebTool.create({ firecrawl, runPromise }),

package/src/ai/embedding-cache.ts DELETED Viewed

@@ -1,127 +0,0 @@
-import { Context, Effect, Layer, Schema } from 'effect'
-import type IORedis from 'ioredis'
-import { RuntimeBridgeTag } from '../ai-gateway/ai-gateway'
-import { aiLogger } from '../config/logger'
-import { ERROR_TAGS } from '../effect/errors'
-import { RedisServiceTag, RuntimeConfigServiceTag } from '../effect/services'
-import { sha256Hex } from '../utils/crypto'
-export const DEFAULT_EMBEDDING_CACHE_TTL_SECONDS = 7200
-const EMBEDDING_CACHE_KEY_PREFIX = 'emb'
-const EmbeddingCacheJsonSchema = Schema.fromJsonString(Schema.Array(Schema.Number))
-class EmbeddingCacheError extends Schema.TaggedErrorClass<EmbeddingCacheError>()(ERROR_TAGS.EmbeddingCacheError, {
-  message: Schema.String,
-  operation: Schema.Literals(['get', 'set']),
-  cause: Schema.optional(Schema.Defect),
-}) {}
-function decodeEmbeddingCacheValue(raw: string): number[] {
-  return [...Schema.decodeUnknownSync(EmbeddingCacheJsonSchema)(raw)]
-}
-function encodeEmbeddingCacheValue(embedding: number[]): string {
-  return Schema.encodeSync(EmbeddingCacheJsonSchema)(embedding)
-}
-export class EmbeddingCache {
-  /** In-flight dedup: concurrent gets for the same key share a single Redis+API round-trip. */
-  private readonly inflight = new Map<string, Promise<number[] | null>>()
-  constructor(
-    private redis: IORedis,
-    private ttlSeconds: number,
-    private readonly runPromise: <A, E = never>(effect: Effect.Effect<A, E>) => Promise<A>,
-  ) {}
-  private buildKey(model: string, text: string): string {
-    const hash = sha256Hex(text)
-    return `${EMBEDDING_CACHE_KEY_PREFIX}:${model}:${hash}`
-  }
-  private fetchFromRedisEffect(key: string): Effect.Effect<number[] | null> {
-    const redis = this.redis
-    return Effect.gen(function* () {
-      const cached = yield* Effect.tryPromise({
-        try: () => redis.getBuffer(key),
-        catch: (cause) => new EmbeddingCacheError({ message: 'Embedding cache get failed.', operation: 'get', cause }),
-      })
-      if (!cached) return null
-      return yield* Effect.try({
-        try: () => decodeEmbeddingCacheValue(cached.toString()),
-        catch: (cause) =>
-          new EmbeddingCacheError({ message: 'Embedding cache parse failed.', operation: 'get', cause }),
-      })
-    }).pipe(
-      Effect.catchTag(ERROR_TAGS.EmbeddingCacheError, (error) => {
-        aiLogger.warn`${error.message}: ${error.cause}`
-        return Effect.succeed<number[] | null>(null)
-      }),
-    )
-  }
-  get(model: string, text: string): Promise<number[] | null> {
-    const key = this.buildKey(model, text)
-    const pending = this.inflight.get(key)
-    if (pending) return pending
-    const promise = this.runPromise(this.fetchFromRedisEffect(key))
-    this.inflight.set(key, promise)
-    void promise.finally(() => this.inflight.delete(key))
-    return promise
-  }
-  private setEffect(model: string, text: string, embedding: number[]): Effect.Effect<void> {
-    const redis = this.redis
-    const ttlSeconds = this.ttlSeconds
-    const key = this.buildKey(model, text)
-    return Effect.gen(function* () {
-      const serialized = yield* Effect.try({
-        try: () => encodeEmbeddingCacheValue(embedding),
-        catch: (cause) =>
-          new EmbeddingCacheError({ message: 'Embedding cache serialization failed.', operation: 'set', cause }),
-      })
-      yield* Effect.tryPromise({
-        try: () => redis.set(key, serialized, 'EX', ttlSeconds),
-        catch: (cause) => new EmbeddingCacheError({ message: 'Embedding cache set failed.', operation: 'set', cause }),
-      })
-    }).pipe(
-      Effect.asVoid,
-      Effect.catchTag(ERROR_TAGS.EmbeddingCacheError, (error) => {
-        aiLogger.warn`${error.message}: ${error.cause}`
-        return Effect.void
-      }),
-    )
-  }
-  set(model: string, text: string, embedding: number[]): Promise<void> {
-    return this.runPromise(this.setEffect(model, text, embedding))
-  }
-}
-export class EmbeddingCacheTag extends Context.Service<EmbeddingCacheTag, EmbeddingCache>()(
-  '@lota-sdk/core/EmbeddingCache',
-) {}
-/**
- * `EmbeddingCache` needs a `runPromise` to convert internal Effect chains into
- * the Promise API that AI SDK `embed` / `embedMany` expect. The layer yields
- * `RuntimeBridgeTag` once and captures it — callers never prop-drill
- * `runPromise` through this module.
- */
-export const EmbeddingCacheLive = Layer.effect(
-  EmbeddingCacheTag,
-  Effect.gen(function* () {
-    const redis = yield* RedisServiceTag
-    const config = yield* RuntimeConfigServiceTag
-    const bridge = yield* RuntimeBridgeTag
-    return new EmbeddingCache(redis.getConnection(), config.memory.embeddingCacheTtlSeconds, bridge.runPromise)
-  }),
-)

package/src/ai-gateway/cache-headers.ts DELETED Viewed

@@ -1,42 +0,0 @@
-const AI_GATEWAY_CACHE_KEY_HEADER = 'x-bf-cache-key'
-const AI_GATEWAY_CACHE_TTL_HEADER = 'x-bf-cache-ttl'
-const AI_GATEWAY_CACHE_THRESHOLD_HEADER = 'x-bf-cache-threshold'
-const AI_GATEWAY_CACHE_TYPE_HEADER = 'x-bf-cache-type'
-export const AI_GATEWAY_STRICT_SEMANTIC_CACHE_THRESHOLD = 0.975
-export type AiGatewayCacheType = 'direct' | 'semantic'
-export function toAiGatewayCacheKeyPart(value: string): string {
-  const normalized = value
-    .trim()
-    .toLowerCase()
-    .replace(/[^a-z0-9:_-]+/g, '-')
-    .replace(/-+/g, '-')
-  return normalized.replace(/^-+|-+$/g, '') || 'request'
-}
-export function buildAiGatewayCacheHeaders(
-  cacheKey: string,
-  ttl?: string,
-  threshold?: number,
-  cacheType?: AiGatewayCacheType,
-): Record<string, string> {
-  const headers: Record<string, string> = { [AI_GATEWAY_CACHE_KEY_HEADER]: cacheKey }
-  if (ttl) headers[AI_GATEWAY_CACHE_TTL_HEADER] = ttl
-  if (typeof threshold === 'number') headers[AI_GATEWAY_CACHE_THRESHOLD_HEADER] = String(threshold)
-  if (cacheType) headers[AI_GATEWAY_CACHE_TYPE_HEADER] = cacheType
-  return headers
-}
-export function buildAiGatewayDirectCacheHeaders(cacheKey: string, ttl?: string): Record<string, string> {
-  return buildAiGatewayCacheHeaders(cacheKey, ttl, undefined, 'direct')
-}
-export function buildAiGatewayStrictSemanticCacheHeaders(
-  cacheKey: string,
-  ttl?: string,
-  threshold = AI_GATEWAY_STRICT_SEMANTIC_CACHE_THRESHOLD,
-): Record<string, string> {
-  return buildAiGatewayCacheHeaders(cacheKey, ttl, threshold, 'semantic')
-}