@lota-sdk/core 0.4.41 → 0.4.43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/ai/index.ts +0 -1
- package/src/ai-gateway/ai-gateway.ts +4 -24
- package/src/ai-gateway/index.ts +0 -1
- package/src/config/model-constants.ts +0 -1
- package/src/effect/errors.ts +1 -1
- package/src/embeddings/provider.ts +24 -65
- package/src/redis/index.ts +1 -0
- package/src/redis/redis-cache.ts +176 -0
- package/src/runtime/domain-layer.ts +2 -2
- package/src/runtime/runtime-config.ts +1 -7
- package/src/services/plan/plan-deadline.service.ts +4 -4
- package/src/system-agents/context-compaction.agent.ts +0 -2
- package/src/system-agents/memory-reranker.agent.ts +0 -2
- package/src/system-agents/memory.agent.ts +0 -2
- package/src/system-agents/recent-activity-title-refiner.agent.ts +0 -2
- package/src/system-agents/regular-chat-memory-digest.agent.ts +0 -2
- package/src/system-agents/skill-extractor.agent.ts +0 -2
- package/src/system-agents/skill-manager.agent.ts +0 -2
- package/src/system-agents/thread-router.agent.ts +0 -2
- package/src/system-agents/title-generator.agent.ts +0 -2
- package/src/tools/research-topic.tool.ts +0 -2
- package/src/ai/embedding-cache.ts +0 -127
- package/src/ai-gateway/cache-headers.ts +0 -42
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lota-sdk/core",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.43",
|
|
4
4
|
"files": [
|
|
5
5
|
"src",
|
|
6
6
|
"infrastructure/schema"
|
|
@@ -32,7 +32,7 @@
|
|
|
32
32
|
"@ai-sdk/provider": "^3.0.9",
|
|
33
33
|
"@chat-adapter/slack": "^4.26.0",
|
|
34
34
|
"@chat-adapter/state-ioredis": "^4.26.0",
|
|
35
|
-
"@lota-sdk/shared": "0.4.
|
|
35
|
+
"@lota-sdk/shared": "0.4.43",
|
|
36
36
|
"@mendable/firecrawl-js": "^4.20.0",
|
|
37
37
|
"@surrealdb/node": "^3.0.3",
|
|
38
38
|
"ai": "^6.0.170",
|
package/src/ai/index.ts
CHANGED
|
@@ -10,7 +10,6 @@ import { ERROR_TAGS, AiGenerationError, ConfigurationError } from '../effect/err
|
|
|
10
10
|
import { RuntimeConfigServiceTag } from '../effect/services'
|
|
11
11
|
import { openRouterEmbeddingModel } from '../embeddings/openrouter'
|
|
12
12
|
import { isRecord, readString } from '../utils/string'
|
|
13
|
-
import { buildAiGatewayCacheHeaders } from './cache-headers'
|
|
14
13
|
|
|
15
14
|
type AiGatewayChatResponse = { body?: unknown }
|
|
16
15
|
type AiGatewayTransformParamsOptions = Parameters<NonNullable<LanguageModelMiddleware['transformParams']>>[0]
|
|
@@ -473,23 +472,6 @@ function withAiGatewayStreamIdleTimeout(
|
|
|
473
472
|
})
|
|
474
473
|
}
|
|
475
474
|
|
|
476
|
-
function mergeAiGatewayHeaders(
|
|
477
|
-
existingHeaders: AiGatewayCallOptions['headers'] | undefined,
|
|
478
|
-
additionalHeaders: Record<string, string>,
|
|
479
|
-
): Record<string, string> {
|
|
480
|
-
const merged = new Headers(existingHeaders as HeadersInit | undefined)
|
|
481
|
-
for (const [key, value] of Object.entries(additionalHeaders)) {
|
|
482
|
-
if (!merged.has(key)) {
|
|
483
|
-
merged.set(key, value)
|
|
484
|
-
}
|
|
485
|
-
}
|
|
486
|
-
return Object.fromEntries(merged.entries())
|
|
487
|
-
}
|
|
488
|
-
|
|
489
|
-
function withDefaultAiGatewayCacheHeaders(params: AiGatewayCallOptions): AiGatewayCallOptions {
|
|
490
|
-
return { ...params, headers: mergeAiGatewayHeaders(params.headers, buildAiGatewayCacheHeaders('lota-sdk')) }
|
|
491
|
-
}
|
|
492
|
-
|
|
493
475
|
function normalizeAiGatewayUrlEffect(value: string): Effect.Effect<string, ConfigurationError> {
|
|
494
476
|
const trimmed = value.trim()
|
|
495
477
|
if (!trimmed) {
|
|
@@ -1122,13 +1104,11 @@ function createAiGatewayLanguageModelMiddleware(
|
|
|
1122
1104
|
specificationVersion: 'v3',
|
|
1123
1105
|
transformParams: ({ params, type }) =>
|
|
1124
1106
|
Promise.resolve(
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
providerId === OPENAI_CHAT_PROVIDER_ID ? normalizeAiGatewayChatProviderOptions(params, modelId) : params,
|
|
1129
|
-
),
|
|
1130
|
-
type,
|
|
1107
|
+
addAiGatewayReasoningRawChunks(
|
|
1108
|
+
normalizeAiGatewayJsonSchemas(
|
|
1109
|
+
providerId === OPENAI_CHAT_PROVIDER_ID ? normalizeAiGatewayChatProviderOptions(params, modelId) : params,
|
|
1131
1110
|
),
|
|
1111
|
+
type,
|
|
1132
1112
|
),
|
|
1133
1113
|
),
|
|
1134
1114
|
wrapGenerate: ({ params }) => {
|
package/src/ai-gateway/index.ts
CHANGED
package/src/effect/errors.ts
CHANGED
|
@@ -32,7 +32,6 @@ export const ERROR_TAGS = {
|
|
|
32
32
|
CursorPaginationError: '@lota-sdk/core/CursorPaginationError',
|
|
33
33
|
DatabaseError: '@lota-sdk/core/DatabaseError',
|
|
34
34
|
DelegatedAgentError: '@lota-sdk/core/DelegatedAgentError',
|
|
35
|
-
EmbeddingCacheError: '@lota-sdk/core/EmbeddingCacheError',
|
|
36
35
|
EmbeddingProviderError: '@lota-sdk/core/EmbeddingProviderError',
|
|
37
36
|
ExecutionPlanCacheError: '@lota-sdk/core/ExecutionPlanCacheError',
|
|
38
37
|
FeedbackLoopServiceError: '@lota-sdk/core/FeedbackLoopServiceError',
|
|
@@ -73,6 +72,7 @@ export const ERROR_TAGS = {
|
|
|
73
72
|
QueueWorkerError: '@lota-sdk/core/QueueWorkerError',
|
|
74
73
|
ReadFilePartsError: '@lota-sdk/core/ReadFilePartsError',
|
|
75
74
|
RedisError: '@lota-sdk/core/RedisError',
|
|
75
|
+
RedisJsonCacheError: '@lota-sdk/core/RedisJsonCacheError',
|
|
76
76
|
RerankServiceError: '@lota-sdk/core/RerankServiceError',
|
|
77
77
|
RuntimeLifecycleError: '@lota-sdk/core/RuntimeLifecycleError',
|
|
78
78
|
SandboxedWorkerBootstrapError: '@lota-sdk/core/SandboxedWorkerBootstrapError',
|
|
@@ -7,15 +7,18 @@ import { openRouterEmbeddingModel } from './openrouter'
|
|
|
7
7
|
const SUPPORTED_EMBEDDING_PREFIXES = ['openai/', 'openrouter/'] as const
|
|
8
8
|
const SUPPORTED_BARE_EMBEDDING_MODEL_IDS = ['text-embedding-3-small'] as const
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
10
|
+
/**
|
|
11
|
+
* Bound on the in-flight Promise dedup map. Concurrent embedQuery calls for
|
|
12
|
+
* the same text share a single API round-trip; this cap keeps the map from
|
|
13
|
+
* growing without bound under sustained load. FIFO eviction by insertion
|
|
14
|
+
* order — the oldest pending key is evicted before insertion when full. The
|
|
15
|
+
* pending Promise is not aborted; only its dedup slot is freed.
|
|
16
|
+
*/
|
|
17
|
+
const MAX_INFLIGHT = 1000
|
|
14
18
|
|
|
15
19
|
type ProviderEmbeddingsOptions = {
|
|
16
20
|
embedFn?: typeof embed
|
|
17
21
|
embedManyFn?: typeof embedMany
|
|
18
|
-
getCache?: () => SharedEmbeddingCache | null
|
|
19
22
|
modelId: string
|
|
20
23
|
/**
|
|
21
24
|
* `runPromise` is required: callers must yield `RuntimeBridgeTag` in their
|
|
@@ -67,7 +70,6 @@ function tryEmbeddingPromise<A>(
|
|
|
67
70
|
export class ProviderEmbeddings {
|
|
68
71
|
private readonly embedFn: typeof embed
|
|
69
72
|
private readonly embedManyFn: typeof embedMany
|
|
70
|
-
private readonly getCache: () => SharedEmbeddingCache | null
|
|
71
73
|
private readonly resolvedModelId: string
|
|
72
74
|
private _model: ReturnType<typeof resolveEmbeddingModel> | null = null
|
|
73
75
|
/** In-flight dedup: concurrent embedQuery calls for the same text share one API round-trip. */
|
|
@@ -78,7 +80,6 @@ export class ProviderEmbeddings {
|
|
|
78
80
|
constructor(options: ProviderEmbeddingsOptions) {
|
|
79
81
|
this.embedFn = options.embedFn ?? embed
|
|
80
82
|
this.embedManyFn = options.embedManyFn ?? embedMany
|
|
81
|
-
this.getCache = options.getCache ?? (() => null)
|
|
82
83
|
this.resolvedModelId = options.modelId
|
|
83
84
|
this.runPromise = options.runPromise
|
|
84
85
|
}
|
|
@@ -94,11 +95,12 @@ export class ProviderEmbeddings {
|
|
|
94
95
|
return this._model
|
|
95
96
|
}
|
|
96
97
|
|
|
97
|
-
private
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
98
|
+
private rememberInflight(key: string, promise: Promise<number[]>): void {
|
|
99
|
+
if (this.inflightEmbeddings.size >= MAX_INFLIGHT) {
|
|
100
|
+
const oldest = this.inflightEmbeddings.keys().next().value
|
|
101
|
+
if (oldest !== undefined) this.inflightEmbeddings.delete(oldest)
|
|
102
|
+
}
|
|
103
|
+
this.inflightEmbeddings.set(key, promise)
|
|
102
104
|
}
|
|
103
105
|
|
|
104
106
|
embedQuery(text: string): Promise<number[]> {
|
|
@@ -110,7 +112,11 @@ export class ProviderEmbeddings {
|
|
|
110
112
|
if (pending) return pending
|
|
111
113
|
|
|
112
114
|
const promise = this.runPromise(this.executeEmbedQueryEffect(input))
|
|
113
|
-
this.
|
|
115
|
+
this.rememberInflight(dedupKey, promise)
|
|
116
|
+
// `.then(_, _)` (not `.finally`): the dual-handler form swallows the
|
|
117
|
+
// rejection on the cleanup branch so it never surfaces as an unhandled
|
|
118
|
+
// rejection. The original promise is still returned to the caller, who
|
|
119
|
+
// is responsible for reacting to its rejection.
|
|
114
120
|
void promise.then(
|
|
115
121
|
() => this.inflightEmbeddings.delete(dedupKey),
|
|
116
122
|
() => this.inflightEmbeddings.delete(dedupKey),
|
|
@@ -122,24 +128,10 @@ export class ProviderEmbeddings {
|
|
|
122
128
|
private executeEmbedQueryEffect(input: string): Effect.Effect<number[], EmbeddingProviderError> {
|
|
123
129
|
return Effect.gen(
|
|
124
130
|
function* (this: ProviderEmbeddings) {
|
|
125
|
-
const cached = yield* tryEmbeddingPromise('Failed to load cached query embedding.', () =>
|
|
126
|
-
this.loadCachedEmbedding(input),
|
|
127
|
-
)
|
|
128
|
-
if (cached) {
|
|
129
|
-
return cached
|
|
130
|
-
}
|
|
131
|
-
|
|
132
131
|
const result = yield* tryEmbeddingPromise('Failed to generate query embedding.', () =>
|
|
133
132
|
this.embedFn({ model: this.getModel(), value: input, maxRetries: 2 }),
|
|
134
133
|
)
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
const redisCache = this.getCache()
|
|
138
|
-
if (redisCache) {
|
|
139
|
-
void redisCache.set(this.getModelId(), input, embedding)
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
return embedding
|
|
134
|
+
return normalizeEmbedding(result.embedding)
|
|
143
135
|
}.bind(this),
|
|
144
136
|
).pipe(Effect.withSpan('ProviderEmbeddings.executeEmbedQuery'))
|
|
145
137
|
}
|
|
@@ -166,45 +158,12 @@ export class ProviderEmbeddings {
|
|
|
166
158
|
): Effect.Effect<number[][], EmbeddingProviderError> {
|
|
167
159
|
return Effect.gen(
|
|
168
160
|
function* (this: ProviderEmbeddings) {
|
|
169
|
-
const embeddingsByText = new Map<string, number[]>()
|
|
170
|
-
let missingTexts = [...uniqueTexts]
|
|
171
|
-
const redisCache = this.getCache()
|
|
172
|
-
const redisResults =
|
|
173
|
-
redisCache && missingTexts.length > 0
|
|
174
|
-
? yield* Effect.all(
|
|
175
|
-
missingTexts.map((text) =>
|
|
176
|
-
tryEmbeddingPromise('Failed to load cached document embedding.', () =>
|
|
177
|
-
redisCache.get(this.getModelId(), text),
|
|
178
|
-
).pipe(Effect.map((embedding) => ({ text, embedding }))),
|
|
179
|
-
),
|
|
180
|
-
)
|
|
181
|
-
: ([] as Array<{ text: string; embedding: number[] | null }>)
|
|
182
|
-
|
|
183
|
-
if (redisCache && missingTexts.length > 0) {
|
|
184
|
-
missingTexts = []
|
|
185
|
-
for (const result of redisResults) {
|
|
186
|
-
if (!result.embedding) {
|
|
187
|
-
missingTexts.push(result.text)
|
|
188
|
-
continue
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
embeddingsByText.set(result.text, result.embedding)
|
|
192
|
-
}
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
if (missingTexts.length === 0) {
|
|
196
|
-
return normalized.map((text) => (text ? (embeddingsByText.get(text) ?? []) : []))
|
|
197
|
-
}
|
|
198
|
-
|
|
199
161
|
const result = yield* tryEmbeddingPromise('Failed to generate document embeddings.', () =>
|
|
200
|
-
this.embedManyFn({ model: this.getModel(), values:
|
|
162
|
+
this.embedManyFn({ model: this.getModel(), values: uniqueTexts, maxRetries: 2 }),
|
|
201
163
|
)
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
embeddingsByText.set(text,
|
|
205
|
-
if (redisCache) {
|
|
206
|
-
void redisCache.set(this.getModelId(), text, embedding)
|
|
207
|
-
}
|
|
164
|
+
const embeddingsByText = new Map<string, number[]>()
|
|
165
|
+
uniqueTexts.forEach((text, index) => {
|
|
166
|
+
embeddingsByText.set(text, normalizeEmbedding(result.embeddings[index] ?? []))
|
|
208
167
|
})
|
|
209
168
|
|
|
210
169
|
return normalized.map((text) => (text ? (embeddingsByText.get(text) ?? []) : []))
|
package/src/redis/index.ts
CHANGED
|
@@ -2,6 +2,7 @@ import { createRedisConnectionManager } from './connection'
|
|
|
2
2
|
import type { RedisConnectionManager } from './connection'
|
|
3
3
|
export { DEFAULT_REDIS_OPTIONS, type RedisConnectionLogger } from './connection'
|
|
4
4
|
export { withOrgMemoryLock, withOrgMemoryLockEffect } from './org-memory-lock'
|
|
5
|
+
export { RedisJsonCache, RedisJsonCacheError, RedisJsonCacheLive, RedisJsonCacheTag } from './redis-cache'
|
|
5
6
|
export { withLeaseLock } from './redis-lease-lock'
|
|
6
7
|
export {
|
|
7
8
|
createThreadResumableContext,
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
import { Context, Effect, Layer, Schema } from 'effect'
|
|
2
|
+
import type IORedis from 'ioredis'
|
|
3
|
+
|
|
4
|
+
import { RuntimeBridgeTag } from '../ai-gateway/ai-gateway'
|
|
5
|
+
import { ERROR_TAGS } from '../effect/errors'
|
|
6
|
+
import { RedisServiceTag } from '../effect/services'
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Bound on the in-flight Promise dedup map. Keeps memory usage predictable
|
|
10
|
+
* under sustained load — concurrent identical loads still share their
|
|
11
|
+
* Promise, but the map cannot grow without bound when ttls are long.
|
|
12
|
+
*/
|
|
13
|
+
const MAX_INFLIGHT = 1000
|
|
14
|
+
|
|
15
|
+
export class RedisJsonCacheError extends Schema.TaggedErrorClass<RedisJsonCacheError>()(
|
|
16
|
+
ERROR_TAGS.RedisJsonCacheError,
|
|
17
|
+
{
|
|
18
|
+
message: Schema.String,
|
|
19
|
+
operation: Schema.Literals(['get', 'set', 'del', 'load']),
|
|
20
|
+
cause: Schema.optional(Schema.Defect),
|
|
21
|
+
},
|
|
22
|
+
) {}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Schema accepted by `RedisJsonCache` methods. The codec must be fully
|
|
26
|
+
* self-contained — no remaining decoding/encoding services — so that
|
|
27
|
+
* `Schema.fromJsonString(...)` resolves at runtime without additional
|
|
28
|
+
* context. `Schema.Codec<T, E, never, never>` is the canonical "ready to
|
|
29
|
+
* run" form.
|
|
30
|
+
*/
|
|
31
|
+
type RedisJsonCacheCodec<T> = Schema.Codec<T, unknown, never, never>
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Generic Redis-backed JSON cache keyed by string. The codec is supplied per
|
|
35
|
+
* call so a single cache instance can hold values of multiple shapes (the
|
|
36
|
+
* caller is responsible for namespacing keys correctly). Errors propagate as
|
|
37
|
+
* `RedisJsonCacheError`; this cache deliberately does not fail-soft.
|
|
38
|
+
*/
|
|
39
|
+
export class RedisJsonCache {
|
|
40
|
+
/** In-flight dedup: concurrent loads for the same key share a single round-trip. */
|
|
41
|
+
private readonly inflight = new Map<string, Promise<unknown>>()
|
|
42
|
+
|
|
43
|
+
constructor(
|
|
44
|
+
private readonly redis: IORedis,
|
|
45
|
+
private readonly runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A>,
|
|
46
|
+
) {}
|
|
47
|
+
|
|
48
|
+
get<T>(key: string, schema: RedisJsonCacheCodec<T>): Promise<T | null> {
|
|
49
|
+
return this.runPromise(this.getEffect(key, schema))
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
set<T>(key: string, value: T, ttlSeconds: number, schema: RedisJsonCacheCodec<T>): Promise<void> {
|
|
53
|
+
return this.runPromise(this.setEffect(key, value, ttlSeconds, schema))
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
del(key: string): Promise<void> {
|
|
57
|
+
return this.runPromise(this.delEffect(key))
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Read-through cache. On miss, invokes `loader` and stores the result with
|
|
62
|
+
* the supplied TTL. Concurrent calls with the same key share a single
|
|
63
|
+
* loader Promise via the in-flight map. The map is bounded — when full,
|
|
64
|
+
* the oldest entry (FIFO insertion order) is evicted before insert.
|
|
65
|
+
*/
|
|
66
|
+
getOrSet<T>(key: string, ttlSeconds: number, schema: RedisJsonCacheCodec<T>, loader: () => Promise<T>): Promise<T> {
|
|
67
|
+
const existing = this.inflight.get(key) as Promise<T> | undefined
|
|
68
|
+
if (existing) return existing
|
|
69
|
+
|
|
70
|
+
const promise = this.runPromise(this.getOrSetEffect(key, ttlSeconds, schema, loader))
|
|
71
|
+
if (this.inflight.size >= MAX_INFLIGHT) {
|
|
72
|
+
const oldest = this.inflight.keys().next().value
|
|
73
|
+
if (oldest !== undefined) this.inflight.delete(oldest)
|
|
74
|
+
}
|
|
75
|
+
this.inflight.set(key, promise)
|
|
76
|
+
// `.then(_, _)` (not `.finally`): the dual-handler form swallows the
|
|
77
|
+
// rejection on the cleanup branch so it never surfaces as an unhandled
|
|
78
|
+
// rejection. The original promise is still returned to the caller, who
|
|
79
|
+
// is responsible for reacting to its rejection.
|
|
80
|
+
void promise.then(
|
|
81
|
+
() => this.inflight.delete(key),
|
|
82
|
+
() => this.inflight.delete(key),
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
return promise
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
private getOrSetEffect<T>(
|
|
89
|
+
key: string,
|
|
90
|
+
ttlSeconds: number,
|
|
91
|
+
schema: RedisJsonCacheCodec<T>,
|
|
92
|
+
loader: () => Promise<T>,
|
|
93
|
+
): Effect.Effect<T, RedisJsonCacheError> {
|
|
94
|
+
return Effect.gen(
|
|
95
|
+
function* (this: RedisJsonCache) {
|
|
96
|
+
const cached = yield* this.getEffect(key, schema)
|
|
97
|
+
if (cached !== null) return cached
|
|
98
|
+
|
|
99
|
+
const fresh = yield* Effect.tryPromise({
|
|
100
|
+
try: () => loader(),
|
|
101
|
+
catch: (cause) =>
|
|
102
|
+
new RedisJsonCacheError({ message: 'Redis JSON cache loader failed.', operation: 'load', cause }),
|
|
103
|
+
})
|
|
104
|
+
yield* this.setEffect(key, fresh, ttlSeconds, schema)
|
|
105
|
+
return fresh
|
|
106
|
+
}.bind(this),
|
|
107
|
+
)
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
private getEffect<T>(key: string, schema: RedisJsonCacheCodec<T>): Effect.Effect<T | null, RedisJsonCacheError> {
|
|
111
|
+
const redis = this.redis
|
|
112
|
+
const decode = Schema.decodeUnknownSync(Schema.fromJsonString(schema)) as (raw: string) => T
|
|
113
|
+
return Effect.gen(function* () {
|
|
114
|
+
const cached = yield* Effect.tryPromise({
|
|
115
|
+
try: () => redis.getBuffer(key),
|
|
116
|
+
catch: (cause) => new RedisJsonCacheError({ message: 'Redis JSON cache get failed.', operation: 'get', cause }),
|
|
117
|
+
})
|
|
118
|
+
|
|
119
|
+
if (!cached) return null
|
|
120
|
+
|
|
121
|
+
return yield* Effect.try({
|
|
122
|
+
try: () => decode(cached.toString()),
|
|
123
|
+
catch: (cause) =>
|
|
124
|
+
new RedisJsonCacheError({ message: 'Redis JSON cache parse failed.', operation: 'get', cause }),
|
|
125
|
+
})
|
|
126
|
+
})
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
private setEffect<T>(
|
|
130
|
+
key: string,
|
|
131
|
+
value: T,
|
|
132
|
+
ttlSeconds: number,
|
|
133
|
+
schema: RedisJsonCacheCodec<T>,
|
|
134
|
+
): Effect.Effect<void, RedisJsonCacheError> {
|
|
135
|
+
const redis = this.redis
|
|
136
|
+
const encode = Schema.encodeSync(Schema.fromJsonString(schema)) as (input: T) => string
|
|
137
|
+
return Effect.gen(function* () {
|
|
138
|
+
const serialized = yield* Effect.try({
|
|
139
|
+
try: () => encode(value),
|
|
140
|
+
catch: (cause) =>
|
|
141
|
+
new RedisJsonCacheError({ message: 'Redis JSON cache serialization failed.', operation: 'set', cause }),
|
|
142
|
+
})
|
|
143
|
+
yield* Effect.tryPromise({
|
|
144
|
+
try: () => redis.set(key, serialized, 'EX', ttlSeconds),
|
|
145
|
+
catch: (cause) => new RedisJsonCacheError({ message: 'Redis JSON cache set failed.', operation: 'set', cause }),
|
|
146
|
+
})
|
|
147
|
+
}).pipe(Effect.asVoid)
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
private delEffect(key: string): Effect.Effect<void, RedisJsonCacheError> {
|
|
151
|
+
const redis = this.redis
|
|
152
|
+
return Effect.tryPromise({
|
|
153
|
+
try: () => redis.del(key),
|
|
154
|
+
catch: (cause) => new RedisJsonCacheError({ message: 'Redis JSON cache del failed.', operation: 'del', cause }),
|
|
155
|
+
}).pipe(Effect.asVoid)
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
export class RedisJsonCacheTag extends Context.Service<RedisJsonCacheTag, RedisJsonCache>()(
|
|
160
|
+
'@lota-sdk/core/RedisJsonCache',
|
|
161
|
+
) {}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* `RedisJsonCache` needs a `runPromise` so its Promise-returning surface stays
|
|
165
|
+
* grounded in the host runtime. We yield `RuntimeBridgeTag` once at layer
|
|
166
|
+
* construction and capture the bridged `runPromise`; consumers never
|
|
167
|
+
* prop-drill it.
|
|
168
|
+
*/
|
|
169
|
+
export const RedisJsonCacheLive = Layer.effect(
|
|
170
|
+
RedisJsonCacheTag,
|
|
171
|
+
Effect.gen(function* () {
|
|
172
|
+
const redis = yield* RedisServiceTag
|
|
173
|
+
const bridge = yield* RuntimeBridgeTag
|
|
174
|
+
return new RedisJsonCache(redis.getConnection(), bridge.runPromise)
|
|
175
|
+
}),
|
|
176
|
+
)
|
|
@@ -11,9 +11,9 @@ import type { Layer as LayerType } from 'effect'
|
|
|
11
11
|
import { Layer } from 'effect'
|
|
12
12
|
|
|
13
13
|
import type { AiGatewayModelsTag, AiGatewayTag, RuntimeBridgeTag } from '../ai-gateway/ai-gateway'
|
|
14
|
-
import { EmbeddingCacheLive } from '../ai/embedding-cache'
|
|
15
14
|
import type { buildInfrastructureLayer } from '../effect/layers'
|
|
16
15
|
import { LotaQueuesLive } from '../queues/queues.service'
|
|
16
|
+
import { RedisJsonCacheLive } from '../redis/redis-cache'
|
|
17
17
|
import { SharedThreadStreamSubscriberLive } from '../redis/stream-context'
|
|
18
18
|
import { AgentActivityServiceLive } from '../services/agent-activity.service'
|
|
19
19
|
import { AgentExecutorServiceLive } from '../services/agent-executor.service'
|
|
@@ -113,7 +113,7 @@ export function buildDomainServiceLayer(infrastructureLayer: InfrastructureLayer
|
|
|
113
113
|
)
|
|
114
114
|
const ctx0 = Layer.mergeAll(
|
|
115
115
|
baseCtx,
|
|
116
|
-
provide(Layer.mergeAll(
|
|
116
|
+
provide(Layer.mergeAll(FirecrawlLive, HelperModelLive, RedisJsonCacheLive), baseCtx),
|
|
117
117
|
tier0,
|
|
118
118
|
)
|
|
119
119
|
|
|
@@ -237,16 +237,10 @@ export const LotaRuntimeConfigSchema = z.object({
|
|
|
237
237
|
memory: z
|
|
238
238
|
.object({
|
|
239
239
|
searchK: z.coerce.number().int().positive().default(6),
|
|
240
|
-
embeddingCacheTtlSeconds: z.coerce.number().int().positive().default(7200),
|
|
241
240
|
rerankerStrategy: MemoryRerankerStrategySchema.default('rerank'),
|
|
242
241
|
rerankerModelId: z.string().trim().min(1).default(AI_GATEWAY_FAST_RERANK_MODEL_ID),
|
|
243
242
|
})
|
|
244
|
-
.default({
|
|
245
|
-
searchK: 6,
|
|
246
|
-
embeddingCacheTtlSeconds: 7200,
|
|
247
|
-
rerankerStrategy: 'rerank',
|
|
248
|
-
rerankerModelId: AI_GATEWAY_FAST_RERANK_MODEL_ID,
|
|
249
|
-
}),
|
|
243
|
+
.default({ searchK: 6, rerankerStrategy: 'rerank', rerankerModelId: AI_GATEWAY_FAST_RERANK_MODEL_ID }),
|
|
250
244
|
threads: threadConfigSchema.default({}),
|
|
251
245
|
agents: agentsConfigSchema,
|
|
252
246
|
toolProviders: z.custom<ToolSet>(isToolSet, { error: 'toolProviders must be a tool registry object' }).optional(),
|
|
@@ -393,7 +393,7 @@ export function makePlanDeadlineService(deps: PlanDeadlineDeps) {
|
|
|
393
393
|
return
|
|
394
394
|
}
|
|
395
395
|
|
|
396
|
-
const
|
|
396
|
+
const runLookup = new Map<string, PlanRunRecord>()
|
|
397
397
|
|
|
398
398
|
const handleEntry = (entry: (typeof sweep.entries)[number]): Effect.Effect<void, PlanDeadlineError> =>
|
|
399
399
|
Effect.gen(function* () {
|
|
@@ -403,9 +403,9 @@ export function makePlanDeadlineService(deps: PlanDeadlineDeps) {
|
|
|
403
403
|
}
|
|
404
404
|
|
|
405
405
|
const runIdStr = recordIdToString(entry.nodeRun.runId, TABLES.PLAN_RUN)
|
|
406
|
-
const
|
|
406
|
+
const existing = runLookup.get(runIdStr)
|
|
407
407
|
const run =
|
|
408
|
-
|
|
408
|
+
existing ??
|
|
409
409
|
(yield* db
|
|
410
410
|
.findOne(TABLES.PLAN_RUN, { id: ensureRecordId(entry.nodeRun.runId, TABLES.PLAN_RUN) }, PlanRunSchema)
|
|
411
411
|
.pipe(
|
|
@@ -414,7 +414,7 @@ export function makePlanDeadlineService(deps: PlanDeadlineDeps) {
|
|
|
414
414
|
if (!run) {
|
|
415
415
|
return
|
|
416
416
|
}
|
|
417
|
-
|
|
417
|
+
runLookup.set(runIdStr, run)
|
|
418
418
|
|
|
419
419
|
const dedupeKeyBase = `plan-deadline:${runIdStr}:${entry.nodeRun.nodeId}`
|
|
420
420
|
const actionEffect =
|
|
@@ -2,7 +2,6 @@ import type { CreateHelperToolLoopAgentOptions } from '@lota-sdk/shared'
|
|
|
2
2
|
import { ToolLoopAgent } from 'ai'
|
|
3
3
|
|
|
4
4
|
import type { AiGatewayModels } from '../ai-gateway/ai-gateway'
|
|
5
|
-
import { buildAiGatewayDirectCacheHeaders } from '../ai-gateway/cache-headers'
|
|
6
5
|
import { OPENROUTER_STRUCTURED_HELPER_MODEL_ID } from '../config/model-constants'
|
|
7
6
|
import { resolveHelperAgentOptions } from './helper-agent-options'
|
|
8
7
|
|
|
@@ -33,7 +32,6 @@ export function makeContextCompactionAgentFactory(models: AiGatewayModels) {
|
|
|
33
32
|
new ToolLoopAgent({
|
|
34
33
|
id: 'context-compaction',
|
|
35
34
|
model: models.chatModel(OPENROUTER_STRUCTURED_HELPER_MODEL_ID),
|
|
36
|
-
headers: buildAiGatewayDirectCacheHeaders('lota-sdk'),
|
|
37
35
|
...resolveHelperAgentOptions(options, { instructions: CONTEXT_COMPACTION_PROMPT }),
|
|
38
36
|
})
|
|
39
37
|
}
|
|
@@ -2,7 +2,6 @@ import type { CreateHelperToolLoopAgentOptions } from '@lota-sdk/shared'
|
|
|
2
2
|
import { ToolLoopAgent } from 'ai'
|
|
3
3
|
|
|
4
4
|
import type { AiGatewayModels } from '../ai-gateway/ai-gateway'
|
|
5
|
-
import { buildAiGatewayDirectCacheHeaders } from '../ai-gateway/cache-headers'
|
|
6
5
|
import {
|
|
7
6
|
OPENROUTER_LOW_REASONING_PROVIDER_OPTIONS,
|
|
8
7
|
OPENROUTER_STRUCTURED_HELPER_MODEL_ID,
|
|
@@ -34,7 +33,6 @@ export function makeMemoryRerankerAgentFactory(models: AiGatewayModels) {
|
|
|
34
33
|
new ToolLoopAgent({
|
|
35
34
|
id: 'memory-reranker',
|
|
36
35
|
model: models.chatModel(OPENROUTER_STRUCTURED_HELPER_MODEL_ID),
|
|
37
|
-
headers: buildAiGatewayDirectCacheHeaders('lota-sdk'),
|
|
38
36
|
providerOptions: OPENROUTER_LOW_REASONING_PROVIDER_OPTIONS,
|
|
39
37
|
...resolveHelperAgentOptions(options),
|
|
40
38
|
})
|
|
@@ -2,7 +2,6 @@ import type { CreateHelperToolLoopAgentOptions } from '@lota-sdk/shared'
|
|
|
2
2
|
import { ToolLoopAgent } from 'ai'
|
|
3
3
|
|
|
4
4
|
import type { AiGatewayModels } from '../ai-gateway/ai-gateway'
|
|
5
|
-
import { buildAiGatewayDirectCacheHeaders } from '../ai-gateway/cache-headers'
|
|
6
5
|
import {
|
|
7
6
|
OPENROUTER_STRUCTURED_HELPER_MODEL_ID,
|
|
8
7
|
OPENROUTER_HIGH_REASONING_PROVIDER_OPTIONS,
|
|
@@ -54,7 +53,6 @@ export function makeOrgMemoryAgentFactory(models: AiGatewayModels) {
|
|
|
54
53
|
new ToolLoopAgent({
|
|
55
54
|
id: 'org-memory',
|
|
56
55
|
model: models.chatModel(OPENROUTER_STRUCTURED_HELPER_MODEL_ID),
|
|
57
|
-
headers: buildAiGatewayDirectCacheHeaders('lota-sdk'),
|
|
58
56
|
providerOptions: OPENROUTER_HIGH_REASONING_PROVIDER_OPTIONS,
|
|
59
57
|
...resolveHelperAgentOptions(options),
|
|
60
58
|
})
|
|
@@ -2,7 +2,6 @@ import type { CreateHelperToolLoopAgentOptions } from '@lota-sdk/shared'
|
|
|
2
2
|
import { ToolLoopAgent } from 'ai'
|
|
3
3
|
|
|
4
4
|
import type { AiGatewayModels } from '../ai-gateway/ai-gateway'
|
|
5
|
-
import { buildAiGatewayDirectCacheHeaders } from '../ai-gateway/cache-headers'
|
|
6
5
|
import type { ResolvedAgentConfig } from '../config/agent-defaults'
|
|
7
6
|
import { OPENROUTER_STRUCTURED_HELPER_MODEL_ID } from '../config/model-constants'
|
|
8
7
|
import { resolveHelperAgentOptions } from './helper-agent-options'
|
|
@@ -78,7 +77,6 @@ export function makeRecentActivityTitleRefinerAgentFactory(models: AiGatewayMode
|
|
|
78
77
|
new ToolLoopAgent({
|
|
79
78
|
id: 'recent-activity-title-refiner',
|
|
80
79
|
model: models.chatModel(OPENROUTER_STRUCTURED_HELPER_MODEL_ID),
|
|
81
|
-
headers: buildAiGatewayDirectCacheHeaders('lota-sdk'),
|
|
82
80
|
...resolveHelperAgentOptions(options, {
|
|
83
81
|
instructions: buildRecentActivityTitleRefinerPrompt(agentConfig),
|
|
84
82
|
maxOutputTokens: RECENT_ACTIVITY_TITLE_MAX_TOKENS,
|
|
@@ -2,7 +2,6 @@ import type { CreateHelperToolLoopAgentOptions } from '@lota-sdk/shared'
|
|
|
2
2
|
import { ToolLoopAgent } from 'ai'
|
|
3
3
|
|
|
4
4
|
import type { AiGatewayModels } from '../ai-gateway/ai-gateway'
|
|
5
|
-
import { buildAiGatewayDirectCacheHeaders } from '../ai-gateway/cache-headers'
|
|
6
5
|
import {
|
|
7
6
|
OPENROUTER_STRUCTURED_HELPER_MODEL_ID,
|
|
8
7
|
OPENROUTER_HIGH_REASONING_PROVIDER_OPTIONS,
|
|
@@ -29,7 +28,6 @@ export function makeRegularChatMemoryDigestAgentFactory(models: AiGatewayModels)
|
|
|
29
28
|
new ToolLoopAgent({
|
|
30
29
|
id: 'regular-chat-memory-digest',
|
|
31
30
|
model: models.chatModel(OPENROUTER_STRUCTURED_HELPER_MODEL_ID),
|
|
32
|
-
headers: buildAiGatewayDirectCacheHeaders('lota-sdk'),
|
|
33
31
|
providerOptions: OPENROUTER_HIGH_REASONING_PROVIDER_OPTIONS,
|
|
34
32
|
...resolveHelperAgentOptions(options, {
|
|
35
33
|
instructions: regularChatMemoryDigestPrompt,
|
|
@@ -3,7 +3,6 @@ import { ToolLoopAgent } from 'ai'
|
|
|
3
3
|
import { z } from 'zod'
|
|
4
4
|
|
|
5
5
|
import type { AiGatewayModels } from '../ai-gateway/ai-gateway'
|
|
6
|
-
import { buildAiGatewayDirectCacheHeaders } from '../ai-gateway/cache-headers'
|
|
7
6
|
import {
|
|
8
7
|
OPENROUTER_STRUCTURED_HELPER_MODEL_ID,
|
|
9
8
|
OPENROUTER_HIGH_REASONING_PROVIDER_OPTIONS,
|
|
@@ -47,7 +46,6 @@ export function makeSkillExtractorAgentFactory(models: AiGatewayModels) {
|
|
|
47
46
|
new ToolLoopAgent({
|
|
48
47
|
id: 'skill-extractor',
|
|
49
48
|
model: models.chatModel(OPENROUTER_STRUCTURED_HELPER_MODEL_ID),
|
|
50
|
-
headers: buildAiGatewayDirectCacheHeaders('lota-sdk'),
|
|
51
49
|
providerOptions: OPENROUTER_HIGH_REASONING_PROVIDER_OPTIONS,
|
|
52
50
|
...resolveHelperAgentOptions(options, {
|
|
53
51
|
instructions: skillExtractorPrompt,
|
|
@@ -3,7 +3,6 @@ import { ToolLoopAgent } from 'ai'
|
|
|
3
3
|
import { z } from 'zod'
|
|
4
4
|
|
|
5
5
|
import type { AiGatewayModels } from '../ai-gateway/ai-gateway'
|
|
6
|
-
import { buildAiGatewayDirectCacheHeaders } from '../ai-gateway/cache-headers'
|
|
7
6
|
import {
|
|
8
7
|
OPENROUTER_STRUCTURED_HELPER_MODEL_ID,
|
|
9
8
|
OPENROUTER_HIGH_REASONING_PROVIDER_OPTIONS,
|
|
@@ -71,7 +70,6 @@ export function makeSkillManagerAgentFactory(models: AiGatewayModels) {
|
|
|
71
70
|
new ToolLoopAgent({
|
|
72
71
|
id: 'skill-manager',
|
|
73
72
|
model: models.chatModel(OPENROUTER_STRUCTURED_HELPER_MODEL_ID),
|
|
74
|
-
headers: buildAiGatewayDirectCacheHeaders('lota-sdk'),
|
|
75
73
|
providerOptions: OPENROUTER_HIGH_REASONING_PROVIDER_OPTIONS,
|
|
76
74
|
...resolveHelperAgentOptions(options, {
|
|
77
75
|
instructions: skillManagerPrompt,
|
|
@@ -3,7 +3,6 @@ import { Effect } from 'effect'
|
|
|
3
3
|
import { z } from 'zod'
|
|
4
4
|
|
|
5
5
|
import type { AiGatewayModels } from '../ai-gateway/ai-gateway'
|
|
6
|
-
import { buildAiGatewayDirectCacheHeaders } from '../ai-gateway/cache-headers'
|
|
7
6
|
import type { ResolvedAgentConfig } from '../config/agent-defaults'
|
|
8
7
|
import { chatLogger } from '../config/logger'
|
|
9
8
|
import type { ValidationError } from '../effect/errors'
|
|
@@ -182,7 +181,6 @@ function generateRouterObjectEffect<TSchema extends z.ZodTypeAny>(params: {
|
|
|
182
181
|
try: () =>
|
|
183
182
|
generateObject({
|
|
184
183
|
model: params.aiGatewayModels.chatModel(modelId),
|
|
185
|
-
headers: buildAiGatewayDirectCacheHeaders('lota-sdk'),
|
|
186
184
|
schema: params.schema,
|
|
187
185
|
system: params.system,
|
|
188
186
|
prompt: params.prompt,
|
|
@@ -2,7 +2,6 @@ import type { CreateHelperToolLoopAgentOptions } from '@lota-sdk/shared'
|
|
|
2
2
|
import { ToolLoopAgent } from 'ai'
|
|
3
3
|
|
|
4
4
|
import type { AiGatewayModels } from '../ai-gateway/ai-gateway'
|
|
5
|
-
import { buildAiGatewayDirectCacheHeaders } from '../ai-gateway/cache-headers'
|
|
6
5
|
import {
|
|
7
6
|
OPENROUTER_FAST_REASONING_MODEL_ID,
|
|
8
7
|
OPENROUTER_MINIMAL_REASONING_PROVIDER_OPTIONS,
|
|
@@ -35,7 +34,6 @@ export function makeThreadTitleGeneratorAgentFactory(models: AiGatewayModels) {
|
|
|
35
34
|
new ToolLoopAgent({
|
|
36
35
|
id: 'thread-title-generator',
|
|
37
36
|
model: models.chatModel(OPENROUTER_FAST_REASONING_MODEL_ID),
|
|
38
|
-
headers: buildAiGatewayDirectCacheHeaders('lota-sdk'),
|
|
39
37
|
providerOptions: OPENROUTER_MINIMAL_REASONING_PROVIDER_OPTIONS,
|
|
40
38
|
...resolveHelperAgentOptions(options, {
|
|
41
39
|
instructions: THREAD_TITLE_GENERATOR_PROMPT,
|
|
@@ -3,7 +3,6 @@ import type { Effect } from 'effect'
|
|
|
3
3
|
|
|
4
4
|
import type { AiGatewayModels } from '../ai-gateway/ai-gateway'
|
|
5
5
|
import { aiGatewayChatModel, getDefaultAiGatewayRunPromise } from '../ai-gateway/ai-gateway'
|
|
6
|
-
import { buildAiGatewayStrictSemanticCacheHeaders } from '../ai-gateway/cache-headers'
|
|
7
6
|
import {
|
|
8
7
|
OPENROUTER_FAST_REASONING_MODEL_ID,
|
|
9
8
|
OPENROUTER_LOW_REASONING_PROVIDER_OPTIONS,
|
|
@@ -28,7 +27,6 @@ export const researchTopicTool = createDelegatedAgentToolWithContext<ResearchTop
|
|
|
28
27
|
? aiGatewayModels.chatModel(OPENROUTER_FAST_REASONING_MODEL_ID)
|
|
29
28
|
: aiGatewayChatModel(OPENROUTER_FAST_REASONING_MODEL_ID),
|
|
30
29
|
providerOptions: OPENROUTER_LOW_REASONING_PROVIDER_OPTIONS,
|
|
31
|
-
headers: buildAiGatewayStrictSemanticCacheHeaders('researchTopic'),
|
|
32
30
|
instructions: RESEARCHER_PROMPT,
|
|
33
31
|
createTools: ({ firecrawl, runPromise }) => ({
|
|
34
32
|
searchWeb: searchWebTool.create({ firecrawl, runPromise }),
|
|
@@ -1,127 +0,0 @@
|
|
|
1
|
-
import { Context, Effect, Layer, Schema } from 'effect'
|
|
2
|
-
import type IORedis from 'ioredis'
|
|
3
|
-
|
|
4
|
-
import { RuntimeBridgeTag } from '../ai-gateway/ai-gateway'
|
|
5
|
-
import { aiLogger } from '../config/logger'
|
|
6
|
-
import { ERROR_TAGS } from '../effect/errors'
|
|
7
|
-
import { RedisServiceTag, RuntimeConfigServiceTag } from '../effect/services'
|
|
8
|
-
import { sha256Hex } from '../utils/crypto'
|
|
9
|
-
|
|
10
|
-
export const DEFAULT_EMBEDDING_CACHE_TTL_SECONDS = 7200
|
|
11
|
-
const EMBEDDING_CACHE_KEY_PREFIX = 'emb'
|
|
12
|
-
const EmbeddingCacheJsonSchema = Schema.fromJsonString(Schema.Array(Schema.Number))
|
|
13
|
-
|
|
14
|
-
class EmbeddingCacheError extends Schema.TaggedErrorClass<EmbeddingCacheError>()(ERROR_TAGS.EmbeddingCacheError, {
|
|
15
|
-
message: Schema.String,
|
|
16
|
-
operation: Schema.Literals(['get', 'set']),
|
|
17
|
-
cause: Schema.optional(Schema.Defect),
|
|
18
|
-
}) {}
|
|
19
|
-
|
|
20
|
-
function decodeEmbeddingCacheValue(raw: string): number[] {
|
|
21
|
-
return [...Schema.decodeUnknownSync(EmbeddingCacheJsonSchema)(raw)]
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
function encodeEmbeddingCacheValue(embedding: number[]): string {
|
|
25
|
-
return Schema.encodeSync(EmbeddingCacheJsonSchema)(embedding)
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
export class EmbeddingCache {
|
|
29
|
-
/** In-flight dedup: concurrent gets for the same key share a single Redis+API round-trip. */
|
|
30
|
-
private readonly inflight = new Map<string, Promise<number[] | null>>()
|
|
31
|
-
|
|
32
|
-
constructor(
|
|
33
|
-
private redis: IORedis,
|
|
34
|
-
private ttlSeconds: number,
|
|
35
|
-
private readonly runPromise: <A, E = never>(effect: Effect.Effect<A, E>) => Promise<A>,
|
|
36
|
-
) {}
|
|
37
|
-
|
|
38
|
-
private buildKey(model: string, text: string): string {
|
|
39
|
-
const hash = sha256Hex(text)
|
|
40
|
-
return `${EMBEDDING_CACHE_KEY_PREFIX}:${model}:${hash}`
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
private fetchFromRedisEffect(key: string): Effect.Effect<number[] | null> {
|
|
44
|
-
const redis = this.redis
|
|
45
|
-
|
|
46
|
-
return Effect.gen(function* () {
|
|
47
|
-
const cached = yield* Effect.tryPromise({
|
|
48
|
-
try: () => redis.getBuffer(key),
|
|
49
|
-
catch: (cause) => new EmbeddingCacheError({ message: 'Embedding cache get failed.', operation: 'get', cause }),
|
|
50
|
-
})
|
|
51
|
-
|
|
52
|
-
if (!cached) return null
|
|
53
|
-
|
|
54
|
-
return yield* Effect.try({
|
|
55
|
-
try: () => decodeEmbeddingCacheValue(cached.toString()),
|
|
56
|
-
catch: (cause) =>
|
|
57
|
-
new EmbeddingCacheError({ message: 'Embedding cache parse failed.', operation: 'get', cause }),
|
|
58
|
-
})
|
|
59
|
-
}).pipe(
|
|
60
|
-
Effect.catchTag(ERROR_TAGS.EmbeddingCacheError, (error) => {
|
|
61
|
-
aiLogger.warn`${error.message}: ${error.cause}`
|
|
62
|
-
return Effect.succeed<number[] | null>(null)
|
|
63
|
-
}),
|
|
64
|
-
)
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
get(model: string, text: string): Promise<number[] | null> {
|
|
68
|
-
const key = this.buildKey(model, text)
|
|
69
|
-
|
|
70
|
-
const pending = this.inflight.get(key)
|
|
71
|
-
if (pending) return pending
|
|
72
|
-
|
|
73
|
-
const promise = this.runPromise(this.fetchFromRedisEffect(key))
|
|
74
|
-
this.inflight.set(key, promise)
|
|
75
|
-
void promise.finally(() => this.inflight.delete(key))
|
|
76
|
-
|
|
77
|
-
return promise
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
private setEffect(model: string, text: string, embedding: number[]): Effect.Effect<void> {
|
|
81
|
-
const redis = this.redis
|
|
82
|
-
const ttlSeconds = this.ttlSeconds
|
|
83
|
-
const key = this.buildKey(model, text)
|
|
84
|
-
|
|
85
|
-
return Effect.gen(function* () {
|
|
86
|
-
const serialized = yield* Effect.try({
|
|
87
|
-
try: () => encodeEmbeddingCacheValue(embedding),
|
|
88
|
-
catch: (cause) =>
|
|
89
|
-
new EmbeddingCacheError({ message: 'Embedding cache serialization failed.', operation: 'set', cause }),
|
|
90
|
-
})
|
|
91
|
-
yield* Effect.tryPromise({
|
|
92
|
-
try: () => redis.set(key, serialized, 'EX', ttlSeconds),
|
|
93
|
-
catch: (cause) => new EmbeddingCacheError({ message: 'Embedding cache set failed.', operation: 'set', cause }),
|
|
94
|
-
})
|
|
95
|
-
}).pipe(
|
|
96
|
-
Effect.asVoid,
|
|
97
|
-
Effect.catchTag(ERROR_TAGS.EmbeddingCacheError, (error) => {
|
|
98
|
-
aiLogger.warn`${error.message}: ${error.cause}`
|
|
99
|
-
return Effect.void
|
|
100
|
-
}),
|
|
101
|
-
)
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
set(model: string, text: string, embedding: number[]): Promise<void> {
|
|
105
|
-
return this.runPromise(this.setEffect(model, text, embedding))
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
export class EmbeddingCacheTag extends Context.Service<EmbeddingCacheTag, EmbeddingCache>()(
|
|
110
|
-
'@lota-sdk/core/EmbeddingCache',
|
|
111
|
-
) {}
|
|
112
|
-
|
|
113
|
-
/**
|
|
114
|
-
* `EmbeddingCache` needs a `runPromise` to convert internal Effect chains into
|
|
115
|
-
* the Promise API that AI SDK `embed` / `embedMany` expect. The layer yields
|
|
116
|
-
* `RuntimeBridgeTag` once and captures it — callers never prop-drill
|
|
117
|
-
* `runPromise` through this module.
|
|
118
|
-
*/
|
|
119
|
-
export const EmbeddingCacheLive = Layer.effect(
|
|
120
|
-
EmbeddingCacheTag,
|
|
121
|
-
Effect.gen(function* () {
|
|
122
|
-
const redis = yield* RedisServiceTag
|
|
123
|
-
const config = yield* RuntimeConfigServiceTag
|
|
124
|
-
const bridge = yield* RuntimeBridgeTag
|
|
125
|
-
return new EmbeddingCache(redis.getConnection(), config.memory.embeddingCacheTtlSeconds, bridge.runPromise)
|
|
126
|
-
}),
|
|
127
|
-
)
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
const AI_GATEWAY_CACHE_KEY_HEADER = 'x-bf-cache-key'
|
|
2
|
-
const AI_GATEWAY_CACHE_TTL_HEADER = 'x-bf-cache-ttl'
|
|
3
|
-
const AI_GATEWAY_CACHE_THRESHOLD_HEADER = 'x-bf-cache-threshold'
|
|
4
|
-
const AI_GATEWAY_CACHE_TYPE_HEADER = 'x-bf-cache-type'
|
|
5
|
-
|
|
6
|
-
export const AI_GATEWAY_STRICT_SEMANTIC_CACHE_THRESHOLD = 0.975
|
|
7
|
-
|
|
8
|
-
export type AiGatewayCacheType = 'direct' | 'semantic'
|
|
9
|
-
|
|
10
|
-
export function toAiGatewayCacheKeyPart(value: string): string {
|
|
11
|
-
const normalized = value
|
|
12
|
-
.trim()
|
|
13
|
-
.toLowerCase()
|
|
14
|
-
.replace(/[^a-z0-9:_-]+/g, '-')
|
|
15
|
-
.replace(/-+/g, '-')
|
|
16
|
-
return normalized.replace(/^-+|-+$/g, '') || 'request'
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
export function buildAiGatewayCacheHeaders(
|
|
20
|
-
cacheKey: string,
|
|
21
|
-
ttl?: string,
|
|
22
|
-
threshold?: number,
|
|
23
|
-
cacheType?: AiGatewayCacheType,
|
|
24
|
-
): Record<string, string> {
|
|
25
|
-
const headers: Record<string, string> = { [AI_GATEWAY_CACHE_KEY_HEADER]: cacheKey }
|
|
26
|
-
if (ttl) headers[AI_GATEWAY_CACHE_TTL_HEADER] = ttl
|
|
27
|
-
if (typeof threshold === 'number') headers[AI_GATEWAY_CACHE_THRESHOLD_HEADER] = String(threshold)
|
|
28
|
-
if (cacheType) headers[AI_GATEWAY_CACHE_TYPE_HEADER] = cacheType
|
|
29
|
-
return headers
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
export function buildAiGatewayDirectCacheHeaders(cacheKey: string, ttl?: string): Record<string, string> {
|
|
33
|
-
return buildAiGatewayCacheHeaders(cacheKey, ttl, undefined, 'direct')
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
export function buildAiGatewayStrictSemanticCacheHeaders(
|
|
37
|
-
cacheKey: string,
|
|
38
|
-
ttl?: string,
|
|
39
|
-
threshold = AI_GATEWAY_STRICT_SEMANTIC_CACHE_THRESHOLD,
|
|
40
|
-
): Record<string, string> {
|
|
41
|
-
return buildAiGatewayCacheHeaders(cacheKey, ttl, threshold, 'semantic')
|
|
42
|
-
}
|