@lota-sdk/core 0.4.42 → 0.4.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lota-sdk/core",
3
- "version": "0.4.42",
3
+ "version": "0.4.43",
4
4
  "files": [
5
5
  "src",
6
6
  "infrastructure/schema"
@@ -32,7 +32,7 @@
32
32
  "@ai-sdk/provider": "^3.0.9",
33
33
  "@chat-adapter/slack": "^4.26.0",
34
34
  "@chat-adapter/state-ioredis": "^4.26.0",
35
- "@lota-sdk/shared": "0.4.42",
35
+ "@lota-sdk/shared": "0.4.43",
36
36
  "@mendable/firecrawl-js": "^4.20.0",
37
37
  "@surrealdb/node": "^3.0.3",
38
38
  "ai": "^6.0.170",
package/src/ai/index.ts CHANGED
@@ -1,2 +1 @@
1
1
  export * from './definitions'
2
- export * from './embedding-cache'
@@ -4,7 +4,6 @@ export {
4
4
  OPENROUTER_GEMINI_PRO_MODEL_ID,
5
5
  OPENAI_HIGH_REASONING_PROVIDER_OPTIONS,
6
6
  OPENAI_REASONING_MODEL_ID,
7
- OPENROUTER_FAST_RERANK_MODEL_ID,
8
7
  OPENROUTER_FAST_REASONING_MODEL_ID,
9
8
  OPENROUTER_GEMINI_FLASH_MODEL_ID,
10
9
  OPENROUTER_HIGH_REASONING_PROVIDER_OPTIONS,
@@ -32,7 +32,6 @@ export const ERROR_TAGS = {
32
32
  CursorPaginationError: '@lota-sdk/core/CursorPaginationError',
33
33
  DatabaseError: '@lota-sdk/core/DatabaseError',
34
34
  DelegatedAgentError: '@lota-sdk/core/DelegatedAgentError',
35
- EmbeddingCacheError: '@lota-sdk/core/EmbeddingCacheError',
36
35
  EmbeddingProviderError: '@lota-sdk/core/EmbeddingProviderError',
37
36
  ExecutionPlanCacheError: '@lota-sdk/core/ExecutionPlanCacheError',
38
37
  FeedbackLoopServiceError: '@lota-sdk/core/FeedbackLoopServiceError',
@@ -73,6 +72,7 @@ export const ERROR_TAGS = {
73
72
  QueueWorkerError: '@lota-sdk/core/QueueWorkerError',
74
73
  ReadFilePartsError: '@lota-sdk/core/ReadFilePartsError',
75
74
  RedisError: '@lota-sdk/core/RedisError',
75
+ RedisJsonCacheError: '@lota-sdk/core/RedisJsonCacheError',
76
76
  RerankServiceError: '@lota-sdk/core/RerankServiceError',
77
77
  RuntimeLifecycleError: '@lota-sdk/core/RuntimeLifecycleError',
78
78
  SandboxedWorkerBootstrapError: '@lota-sdk/core/SandboxedWorkerBootstrapError',
@@ -7,15 +7,18 @@ import { openRouterEmbeddingModel } from './openrouter'
7
7
  const SUPPORTED_EMBEDDING_PREFIXES = ['openai/', 'openrouter/'] as const
8
8
  const SUPPORTED_BARE_EMBEDDING_MODEL_IDS = ['text-embedding-3-small'] as const
9
9
 
10
- type SharedEmbeddingCache = {
11
- get(model: string, text: string): Promise<number[] | null>
12
- set(model: string, text: string, embedding: number[]): Promise<void>
13
- }
10
+ /**
11
+ * Bound on the in-flight Promise dedup map. Concurrent embedQuery calls for
12
+ * the same text share a single API round-trip; this cap keeps the map from
13
+ * growing without bound under sustained load. FIFO eviction by insertion
14
+ * order — the oldest pending key is evicted before insertion when full. The
15
+ * pending Promise is not aborted; only its dedup slot is freed.
16
+ */
17
+ const MAX_INFLIGHT = 1000
14
18
 
15
19
  type ProviderEmbeddingsOptions = {
16
20
  embedFn?: typeof embed
17
21
  embedManyFn?: typeof embedMany
18
- getCache?: () => SharedEmbeddingCache | null
19
22
  modelId: string
20
23
  /**
21
24
  * `runPromise` is required: callers must yield `RuntimeBridgeTag` in their
@@ -67,7 +70,6 @@ function tryEmbeddingPromise<A>(
67
70
  export class ProviderEmbeddings {
68
71
  private readonly embedFn: typeof embed
69
72
  private readonly embedManyFn: typeof embedMany
70
- private readonly getCache: () => SharedEmbeddingCache | null
71
73
  private readonly resolvedModelId: string
72
74
  private _model: ReturnType<typeof resolveEmbeddingModel> | null = null
73
75
  /** In-flight dedup: concurrent embedQuery calls for the same text share one API round-trip. */
@@ -78,7 +80,6 @@ export class ProviderEmbeddings {
78
80
  constructor(options: ProviderEmbeddingsOptions) {
79
81
  this.embedFn = options.embedFn ?? embed
80
82
  this.embedManyFn = options.embedManyFn ?? embedMany
81
- this.getCache = options.getCache ?? (() => null)
82
83
  this.resolvedModelId = options.modelId
83
84
  this.runPromise = options.runPromise
84
85
  }
@@ -94,11 +95,12 @@ export class ProviderEmbeddings {
94
95
  return this._model
95
96
  }
96
97
 
97
- private loadCachedEmbedding(text: string): Promise<number[] | null> {
98
- const redisCache = this.getCache()
99
- if (!redisCache) return Promise.resolve(null)
100
-
101
- return redisCache.get(this.getModelId(), text)
98
+ private rememberInflight(key: string, promise: Promise<number[]>): void {
99
+ if (this.inflightEmbeddings.size >= MAX_INFLIGHT) {
100
+ const oldest = this.inflightEmbeddings.keys().next().value
101
+ if (oldest !== undefined) this.inflightEmbeddings.delete(oldest)
102
+ }
103
+ this.inflightEmbeddings.set(key, promise)
102
104
  }
103
105
 
104
106
  embedQuery(text: string): Promise<number[]> {
@@ -110,7 +112,11 @@ export class ProviderEmbeddings {
110
112
  if (pending) return pending
111
113
 
112
114
  const promise = this.runPromise(this.executeEmbedQueryEffect(input))
113
- this.inflightEmbeddings.set(dedupKey, promise)
115
+ this.rememberInflight(dedupKey, promise)
116
+ // `.then(_, _)` (not `.finally`): the dual-handler form swallows the
117
+ // rejection on the cleanup branch so it never surfaces as an unhandled
118
+ // rejection. The original promise is still returned to the caller, who
119
+ // is responsible for reacting to its rejection.
114
120
  void promise.then(
115
121
  () => this.inflightEmbeddings.delete(dedupKey),
116
122
  () => this.inflightEmbeddings.delete(dedupKey),
@@ -122,24 +128,10 @@ export class ProviderEmbeddings {
122
128
  private executeEmbedQueryEffect(input: string): Effect.Effect<number[], EmbeddingProviderError> {
123
129
  return Effect.gen(
124
130
  function* (this: ProviderEmbeddings) {
125
- const cached = yield* tryEmbeddingPromise('Failed to load cached query embedding.', () =>
126
- this.loadCachedEmbedding(input),
127
- )
128
- if (cached) {
129
- return cached
130
- }
131
-
132
131
  const result = yield* tryEmbeddingPromise('Failed to generate query embedding.', () =>
133
132
  this.embedFn({ model: this.getModel(), value: input, maxRetries: 2 }),
134
133
  )
135
- const embedding = normalizeEmbedding(result.embedding)
136
-
137
- const redisCache = this.getCache()
138
- if (redisCache) {
139
- void redisCache.set(this.getModelId(), input, embedding)
140
- }
141
-
142
- return embedding
134
+ return normalizeEmbedding(result.embedding)
143
135
  }.bind(this),
144
136
  ).pipe(Effect.withSpan('ProviderEmbeddings.executeEmbedQuery'))
145
137
  }
@@ -166,45 +158,12 @@ export class ProviderEmbeddings {
166
158
  ): Effect.Effect<number[][], EmbeddingProviderError> {
167
159
  return Effect.gen(
168
160
  function* (this: ProviderEmbeddings) {
169
- const embeddingsByText = new Map<string, number[]>()
170
- let missingTexts = [...uniqueTexts]
171
- const redisCache = this.getCache()
172
- const redisResults =
173
- redisCache && missingTexts.length > 0
174
- ? yield* Effect.all(
175
- missingTexts.map((text) =>
176
- tryEmbeddingPromise('Failed to load cached document embedding.', () =>
177
- redisCache.get(this.getModelId(), text),
178
- ).pipe(Effect.map((embedding) => ({ text, embedding }))),
179
- ),
180
- )
181
- : ([] as Array<{ text: string; embedding: number[] | null }>)
182
-
183
- if (redisCache && missingTexts.length > 0) {
184
- missingTexts = []
185
- for (const result of redisResults) {
186
- if (!result.embedding) {
187
- missingTexts.push(result.text)
188
- continue
189
- }
190
-
191
- embeddingsByText.set(result.text, result.embedding)
192
- }
193
- }
194
-
195
- if (missingTexts.length === 0) {
196
- return normalized.map((text) => (text ? (embeddingsByText.get(text) ?? []) : []))
197
- }
198
-
199
161
  const result = yield* tryEmbeddingPromise('Failed to generate document embeddings.', () =>
200
- this.embedManyFn({ model: this.getModel(), values: missingTexts, maxRetries: 2 }),
162
+ this.embedManyFn({ model: this.getModel(), values: uniqueTexts, maxRetries: 2 }),
201
163
  )
202
- missingTexts.forEach((text, index) => {
203
- const embedding = normalizeEmbedding(result.embeddings[index] ?? [])
204
- embeddingsByText.set(text, embedding)
205
- if (redisCache) {
206
- void redisCache.set(this.getModelId(), text, embedding)
207
- }
164
+ const embeddingsByText = new Map<string, number[]>()
165
+ uniqueTexts.forEach((text, index) => {
166
+ embeddingsByText.set(text, normalizeEmbedding(result.embeddings[index] ?? []))
208
167
  })
209
168
 
210
169
  return normalized.map((text) => (text ? (embeddingsByText.get(text) ?? []) : []))
@@ -2,6 +2,7 @@ import { createRedisConnectionManager } from './connection'
2
2
  import type { RedisConnectionManager } from './connection'
3
3
  export { DEFAULT_REDIS_OPTIONS, type RedisConnectionLogger } from './connection'
4
4
  export { withOrgMemoryLock, withOrgMemoryLockEffect } from './org-memory-lock'
5
+ export { RedisJsonCache, RedisJsonCacheError, RedisJsonCacheLive, RedisJsonCacheTag } from './redis-cache'
5
6
  export { withLeaseLock } from './redis-lease-lock'
6
7
  export {
7
8
  createThreadResumableContext,
@@ -0,0 +1,176 @@
1
+ import { Context, Effect, Layer, Schema } from 'effect'
2
+ import type IORedis from 'ioredis'
3
+
4
+ import { RuntimeBridgeTag } from '../ai-gateway/ai-gateway'
5
+ import { ERROR_TAGS } from '../effect/errors'
6
+ import { RedisServiceTag } from '../effect/services'
7
+
8
+ /**
9
+ * Bound on the in-flight Promise dedup map. Keeps memory usage predictable
10
+ * under sustained load — concurrent identical loads still share their
11
+ * Promise, but the map cannot grow without bound when ttls are long.
12
+ */
13
+ const MAX_INFLIGHT = 1000
14
+
15
+ export class RedisJsonCacheError extends Schema.TaggedErrorClass<RedisJsonCacheError>()(
16
+ ERROR_TAGS.RedisJsonCacheError,
17
+ {
18
+ message: Schema.String,
19
+ operation: Schema.Literals(['get', 'set', 'del', 'load']),
20
+ cause: Schema.optional(Schema.Defect),
21
+ },
22
+ ) {}
23
+
24
+ /**
25
+ * Schema accepted by `RedisJsonCache` methods. The codec must be fully
26
+ * self-contained — no remaining decoding/encoding services — so that
27
+ * `Schema.fromJsonString(...)` resolves at runtime without additional
28
+ * context. `Schema.Codec<T, E, never, never>` is the canonical "ready to
29
+ * run" form.
30
+ */
31
+ type RedisJsonCacheCodec<T> = Schema.Codec<T, unknown, never, never>
32
+
33
+ /**
34
+ * Generic Redis-backed JSON cache keyed by string. The codec is supplied per
35
+ * call so a single cache instance can hold values of multiple shapes (the
36
+ * caller is responsible for namespacing keys correctly). Errors propagate as
37
+ * `RedisJsonCacheError`; this cache deliberately does not fail-soft.
38
+ */
39
+ export class RedisJsonCache {
40
+ /** In-flight dedup: concurrent loads for the same key share a single round-trip. */
41
+ private readonly inflight = new Map<string, Promise<unknown>>()
42
+
43
+ constructor(
44
+ private readonly redis: IORedis,
45
+ private readonly runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A>,
46
+ ) {}
47
+
48
+ get<T>(key: string, schema: RedisJsonCacheCodec<T>): Promise<T | null> {
49
+ return this.runPromise(this.getEffect(key, schema))
50
+ }
51
+
52
+ set<T>(key: string, value: T, ttlSeconds: number, schema: RedisJsonCacheCodec<T>): Promise<void> {
53
+ return this.runPromise(this.setEffect(key, value, ttlSeconds, schema))
54
+ }
55
+
56
+ del(key: string): Promise<void> {
57
+ return this.runPromise(this.delEffect(key))
58
+ }
59
+
60
+ /**
61
+ * Read-through cache. On miss, invokes `loader` and stores the result with
62
+ * the supplied TTL. Concurrent calls with the same key share a single
63
+ * loader Promise via the in-flight map. The map is bounded — when full,
64
+ * the oldest entry (FIFO insertion order) is evicted before insert.
65
+ */
66
+ getOrSet<T>(key: string, ttlSeconds: number, schema: RedisJsonCacheCodec<T>, loader: () => Promise<T>): Promise<T> {
67
+ const existing = this.inflight.get(key) as Promise<T> | undefined
68
+ if (existing) return existing
69
+
70
+ const promise = this.runPromise(this.getOrSetEffect(key, ttlSeconds, schema, loader))
71
+ if (this.inflight.size >= MAX_INFLIGHT) {
72
+ const oldest = this.inflight.keys().next().value
73
+ if (oldest !== undefined) this.inflight.delete(oldest)
74
+ }
75
+ this.inflight.set(key, promise)
76
+ // `.then(_, _)` (not `.finally`): the dual-handler form swallows the
77
+ // rejection on the cleanup branch so it never surfaces as an unhandled
78
+ // rejection. The original promise is still returned to the caller, who
79
+ // is responsible for reacting to its rejection.
80
+ void promise.then(
81
+ () => this.inflight.delete(key),
82
+ () => this.inflight.delete(key),
83
+ )
84
+
85
+ return promise
86
+ }
87
+
88
+ private getOrSetEffect<T>(
89
+ key: string,
90
+ ttlSeconds: number,
91
+ schema: RedisJsonCacheCodec<T>,
92
+ loader: () => Promise<T>,
93
+ ): Effect.Effect<T, RedisJsonCacheError> {
94
+ return Effect.gen(
95
+ function* (this: RedisJsonCache) {
96
+ const cached = yield* this.getEffect(key, schema)
97
+ if (cached !== null) return cached
98
+
99
+ const fresh = yield* Effect.tryPromise({
100
+ try: () => loader(),
101
+ catch: (cause) =>
102
+ new RedisJsonCacheError({ message: 'Redis JSON cache loader failed.', operation: 'load', cause }),
103
+ })
104
+ yield* this.setEffect(key, fresh, ttlSeconds, schema)
105
+ return fresh
106
+ }.bind(this),
107
+ )
108
+ }
109
+
110
+ private getEffect<T>(key: string, schema: RedisJsonCacheCodec<T>): Effect.Effect<T | null, RedisJsonCacheError> {
111
+ const redis = this.redis
112
+ const decode = Schema.decodeUnknownSync(Schema.fromJsonString(schema)) as (raw: string) => T
113
+ return Effect.gen(function* () {
114
+ const cached = yield* Effect.tryPromise({
115
+ try: () => redis.getBuffer(key),
116
+ catch: (cause) => new RedisJsonCacheError({ message: 'Redis JSON cache get failed.', operation: 'get', cause }),
117
+ })
118
+
119
+ if (!cached) return null
120
+
121
+ return yield* Effect.try({
122
+ try: () => decode(cached.toString()),
123
+ catch: (cause) =>
124
+ new RedisJsonCacheError({ message: 'Redis JSON cache parse failed.', operation: 'get', cause }),
125
+ })
126
+ })
127
+ }
128
+
129
+ private setEffect<T>(
130
+ key: string,
131
+ value: T,
132
+ ttlSeconds: number,
133
+ schema: RedisJsonCacheCodec<T>,
134
+ ): Effect.Effect<void, RedisJsonCacheError> {
135
+ const redis = this.redis
136
+ const encode = Schema.encodeSync(Schema.fromJsonString(schema)) as (input: T) => string
137
+ return Effect.gen(function* () {
138
+ const serialized = yield* Effect.try({
139
+ try: () => encode(value),
140
+ catch: (cause) =>
141
+ new RedisJsonCacheError({ message: 'Redis JSON cache serialization failed.', operation: 'set', cause }),
142
+ })
143
+ yield* Effect.tryPromise({
144
+ try: () => redis.set(key, serialized, 'EX', ttlSeconds),
145
+ catch: (cause) => new RedisJsonCacheError({ message: 'Redis JSON cache set failed.', operation: 'set', cause }),
146
+ })
147
+ }).pipe(Effect.asVoid)
148
+ }
149
+
150
+ private delEffect(key: string): Effect.Effect<void, RedisJsonCacheError> {
151
+ const redis = this.redis
152
+ return Effect.tryPromise({
153
+ try: () => redis.del(key),
154
+ catch: (cause) => new RedisJsonCacheError({ message: 'Redis JSON cache del failed.', operation: 'del', cause }),
155
+ }).pipe(Effect.asVoid)
156
+ }
157
+ }
158
+
159
+ export class RedisJsonCacheTag extends Context.Service<RedisJsonCacheTag, RedisJsonCache>()(
160
+ '@lota-sdk/core/RedisJsonCache',
161
+ ) {}
162
+
163
+ /**
164
+ * `RedisJsonCache` needs a `runPromise` so its Promise-returning surface stays
165
+ * grounded in the host runtime. We yield `RuntimeBridgeTag` once at layer
166
+ * construction and capture the bridged `runPromise`; consumers never
167
+ * prop-drill it.
168
+ */
169
+ export const RedisJsonCacheLive = Layer.effect(
170
+ RedisJsonCacheTag,
171
+ Effect.gen(function* () {
172
+ const redis = yield* RedisServiceTag
173
+ const bridge = yield* RuntimeBridgeTag
174
+ return new RedisJsonCache(redis.getConnection(), bridge.runPromise)
175
+ }),
176
+ )
@@ -11,9 +11,9 @@ import type { Layer as LayerType } from 'effect'
11
11
  import { Layer } from 'effect'
12
12
 
13
13
  import type { AiGatewayModelsTag, AiGatewayTag, RuntimeBridgeTag } from '../ai-gateway/ai-gateway'
14
- import { EmbeddingCacheLive } from '../ai/embedding-cache'
15
14
  import type { buildInfrastructureLayer } from '../effect/layers'
16
15
  import { LotaQueuesLive } from '../queues/queues.service'
16
+ import { RedisJsonCacheLive } from '../redis/redis-cache'
17
17
  import { SharedThreadStreamSubscriberLive } from '../redis/stream-context'
18
18
  import { AgentActivityServiceLive } from '../services/agent-activity.service'
19
19
  import { AgentExecutorServiceLive } from '../services/agent-executor.service'
@@ -113,7 +113,7 @@ export function buildDomainServiceLayer(infrastructureLayer: InfrastructureLayer
113
113
  )
114
114
  const ctx0 = Layer.mergeAll(
115
115
  baseCtx,
116
- provide(Layer.mergeAll(EmbeddingCacheLive, FirecrawlLive, HelperModelLive), baseCtx),
116
+ provide(Layer.mergeAll(FirecrawlLive, HelperModelLive, RedisJsonCacheLive), baseCtx),
117
117
  tier0,
118
118
  )
119
119
 
@@ -237,16 +237,10 @@ export const LotaRuntimeConfigSchema = z.object({
237
237
  memory: z
238
238
  .object({
239
239
  searchK: z.coerce.number().int().positive().default(6),
240
- embeddingCacheTtlSeconds: z.coerce.number().int().positive().default(7200),
241
240
  rerankerStrategy: MemoryRerankerStrategySchema.default('rerank'),
242
241
  rerankerModelId: z.string().trim().min(1).default(AI_GATEWAY_FAST_RERANK_MODEL_ID),
243
242
  })
244
- .default({
245
- searchK: 6,
246
- embeddingCacheTtlSeconds: 7200,
247
- rerankerStrategy: 'rerank',
248
- rerankerModelId: AI_GATEWAY_FAST_RERANK_MODEL_ID,
249
- }),
243
+ .default({ searchK: 6, rerankerStrategy: 'rerank', rerankerModelId: AI_GATEWAY_FAST_RERANK_MODEL_ID }),
250
244
  threads: threadConfigSchema.default({}),
251
245
  agents: agentsConfigSchema,
252
246
  toolProviders: z.custom<ToolSet>(isToolSet, { error: 'toolProviders must be a tool registry object' }).optional(),
@@ -393,7 +393,7 @@ export function makePlanDeadlineService(deps: PlanDeadlineDeps) {
393
393
  return
394
394
  }
395
395
 
396
- const runCache = new Map<string, PlanRunRecord>()
396
+ const runLookup = new Map<string, PlanRunRecord>()
397
397
 
398
398
  const handleEntry = (entry: (typeof sweep.entries)[number]): Effect.Effect<void, PlanDeadlineError> =>
399
399
  Effect.gen(function* () {
@@ -403,9 +403,9 @@ export function makePlanDeadlineService(deps: PlanDeadlineDeps) {
403
403
  }
404
404
 
405
405
  const runIdStr = recordIdToString(entry.nodeRun.runId, TABLES.PLAN_RUN)
406
- const cachedRun = runCache.get(runIdStr)
406
+ const existing = runLookup.get(runIdStr)
407
407
  const run =
408
- cachedRun ??
408
+ existing ??
409
409
  (yield* db
410
410
  .findOne(TABLES.PLAN_RUN, { id: ensureRecordId(entry.nodeRun.runId, TABLES.PLAN_RUN) }, PlanRunSchema)
411
411
  .pipe(
@@ -414,7 +414,7 @@ export function makePlanDeadlineService(deps: PlanDeadlineDeps) {
414
414
  if (!run) {
415
415
  return
416
416
  }
417
- runCache.set(runIdStr, run)
417
+ runLookup.set(runIdStr, run)
418
418
 
419
419
  const dedupeKeyBase = `plan-deadline:${runIdStr}:${entry.nodeRun.nodeId}`
420
420
  const actionEffect =
@@ -1,127 +0,0 @@
1
- import { Context, Effect, Layer, Schema } from 'effect'
2
- import type IORedis from 'ioredis'
3
-
4
- import { RuntimeBridgeTag } from '../ai-gateway/ai-gateway'
5
- import { aiLogger } from '../config/logger'
6
- import { ERROR_TAGS } from '../effect/errors'
7
- import { RedisServiceTag, RuntimeConfigServiceTag } from '../effect/services'
8
- import { sha256Hex } from '../utils/crypto'
9
-
10
- export const DEFAULT_EMBEDDING_CACHE_TTL_SECONDS = 7200
11
- const EMBEDDING_CACHE_KEY_PREFIX = 'emb'
12
- const EmbeddingCacheJsonSchema = Schema.fromJsonString(Schema.Array(Schema.Number))
13
-
14
- class EmbeddingCacheError extends Schema.TaggedErrorClass<EmbeddingCacheError>()(ERROR_TAGS.EmbeddingCacheError, {
15
- message: Schema.String,
16
- operation: Schema.Literals(['get', 'set']),
17
- cause: Schema.optional(Schema.Defect),
18
- }) {}
19
-
20
- function decodeEmbeddingCacheValue(raw: string): number[] {
21
- return [...Schema.decodeUnknownSync(EmbeddingCacheJsonSchema)(raw)]
22
- }
23
-
24
- function encodeEmbeddingCacheValue(embedding: number[]): string {
25
- return Schema.encodeSync(EmbeddingCacheJsonSchema)(embedding)
26
- }
27
-
28
- export class EmbeddingCache {
29
- /** In-flight dedup: concurrent gets for the same key share a single Redis+API round-trip. */
30
- private readonly inflight = new Map<string, Promise<number[] | null>>()
31
-
32
- constructor(
33
- private redis: IORedis,
34
- private ttlSeconds: number,
35
- private readonly runPromise: <A, E = never>(effect: Effect.Effect<A, E>) => Promise<A>,
36
- ) {}
37
-
38
- private buildKey(model: string, text: string): string {
39
- const hash = sha256Hex(text)
40
- return `${EMBEDDING_CACHE_KEY_PREFIX}:${model}:${hash}`
41
- }
42
-
43
- private fetchFromRedisEffect(key: string): Effect.Effect<number[] | null> {
44
- const redis = this.redis
45
-
46
- return Effect.gen(function* () {
47
- const cached = yield* Effect.tryPromise({
48
- try: () => redis.getBuffer(key),
49
- catch: (cause) => new EmbeddingCacheError({ message: 'Embedding cache get failed.', operation: 'get', cause }),
50
- })
51
-
52
- if (!cached) return null
53
-
54
- return yield* Effect.try({
55
- try: () => decodeEmbeddingCacheValue(cached.toString()),
56
- catch: (cause) =>
57
- new EmbeddingCacheError({ message: 'Embedding cache parse failed.', operation: 'get', cause }),
58
- })
59
- }).pipe(
60
- Effect.catchTag(ERROR_TAGS.EmbeddingCacheError, (error) => {
61
- aiLogger.warn`${error.message}: ${error.cause}`
62
- return Effect.succeed<number[] | null>(null)
63
- }),
64
- )
65
- }
66
-
67
- get(model: string, text: string): Promise<number[] | null> {
68
- const key = this.buildKey(model, text)
69
-
70
- const pending = this.inflight.get(key)
71
- if (pending) return pending
72
-
73
- const promise = this.runPromise(this.fetchFromRedisEffect(key))
74
- this.inflight.set(key, promise)
75
- void promise.finally(() => this.inflight.delete(key))
76
-
77
- return promise
78
- }
79
-
80
- private setEffect(model: string, text: string, embedding: number[]): Effect.Effect<void> {
81
- const redis = this.redis
82
- const ttlSeconds = this.ttlSeconds
83
- const key = this.buildKey(model, text)
84
-
85
- return Effect.gen(function* () {
86
- const serialized = yield* Effect.try({
87
- try: () => encodeEmbeddingCacheValue(embedding),
88
- catch: (cause) =>
89
- new EmbeddingCacheError({ message: 'Embedding cache serialization failed.', operation: 'set', cause }),
90
- })
91
- yield* Effect.tryPromise({
92
- try: () => redis.set(key, serialized, 'EX', ttlSeconds),
93
- catch: (cause) => new EmbeddingCacheError({ message: 'Embedding cache set failed.', operation: 'set', cause }),
94
- })
95
- }).pipe(
96
- Effect.asVoid,
97
- Effect.catchTag(ERROR_TAGS.EmbeddingCacheError, (error) => {
98
- aiLogger.warn`${error.message}: ${error.cause}`
99
- return Effect.void
100
- }),
101
- )
102
- }
103
-
104
- set(model: string, text: string, embedding: number[]): Promise<void> {
105
- return this.runPromise(this.setEffect(model, text, embedding))
106
- }
107
- }
108
-
109
- export class EmbeddingCacheTag extends Context.Service<EmbeddingCacheTag, EmbeddingCache>()(
110
- '@lota-sdk/core/EmbeddingCache',
111
- ) {}
112
-
113
- /**
114
- * `EmbeddingCache` needs a `runPromise` to convert internal Effect chains into
115
- * the Promise API that AI SDK `embed` / `embedMany` expect. The layer yields
116
- * `RuntimeBridgeTag` once and captures it — callers never prop-drill
117
- * `runPromise` through this module.
118
- */
119
- export const EmbeddingCacheLive = Layer.effect(
120
- EmbeddingCacheTag,
121
- Effect.gen(function* () {
122
- const redis = yield* RedisServiceTag
123
- const config = yield* RuntimeConfigServiceTag
124
- const bridge = yield* RuntimeBridgeTag
125
- return new EmbeddingCache(redis.getConnection(), config.memory.embeddingCacheTtlSeconds, bridge.runPromise)
126
- }),
127
- )