@lota-sdk/core 0.4.18 → 0.4.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/ai-gateway/ai-gateway.ts +10 -108
- package/src/db/memory-store.ts +2 -10
- package/src/db/memory-types.ts +3 -8
- package/src/db/memory.ts +1 -5
- package/src/embeddings/provider.ts +4 -7
- package/src/runtime/memory/memory-pipeline.ts +2 -2
- package/src/runtime/memory/memory-prompts-update.ts +2 -1
- package/src/runtime/runtime-config.ts +0 -6
- package/src/services/document-chunk.service.ts +2 -7
- package/src/services/learned-skill.service.ts +3 -11
- package/src/services/memory/rerank.service.ts +47 -11
- package/src/system-agents/helper-agent-options.ts +3 -1
- package/src/workers/bootstrap.ts +2 -3
- package/src/workers/organization-learning.worker.ts +0 -1
- package/src/workers/skill-extraction.runner.ts +1 -6
- package/src/openrouter/direct-provider.ts +0 -29
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lota-sdk/core",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.20",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"main": "./src/index.ts",
|
|
6
6
|
"types": "./src/index.ts",
|
|
@@ -31,7 +31,7 @@
|
|
|
31
31
|
"@ai-sdk/openai": "^3.0.53",
|
|
32
32
|
"@chat-adapter/slack": "^4.26.0",
|
|
33
33
|
"@chat-adapter/state-ioredis": "^4.26.0",
|
|
34
|
-
"@lota-sdk/shared": "0.4.
|
|
34
|
+
"@lota-sdk/shared": "0.4.20",
|
|
35
35
|
"@mendable/firecrawl-js": "^4.18.3",
|
|
36
36
|
"@surrealdb/node": "^3.0.3",
|
|
37
37
|
"ai": "^6.0.168",
|
|
@@ -2,12 +2,11 @@ import { devToolsMiddleware } from '@ai-sdk/devtools'
|
|
|
2
2
|
import { createOpenAI } from '@ai-sdk/openai'
|
|
3
3
|
import { wrapEmbeddingModel, wrapLanguageModel } from 'ai'
|
|
4
4
|
import type { LanguageModelMiddleware } from 'ai'
|
|
5
|
-
import { Cause, Clock, Context, Duration, Effect,
|
|
5
|
+
import { Cause, Clock, Context, Duration, Effect, Fiber, Layer, Semaphore } from 'effect'
|
|
6
6
|
|
|
7
7
|
import { DEFAULT_AI_GATEWAY_URL } from '../config/constants'
|
|
8
8
|
import { ERROR_TAGS, AiGenerationError, ConfigurationError } from '../effect/errors'
|
|
9
9
|
import { RuntimeConfigServiceTag } from '../effect/services'
|
|
10
|
-
import { getDirectOpenRouterProvider, normalizeDirectOpenRouterModelId } from '../openrouter/direct-provider'
|
|
11
10
|
import { isRecord, readString } from '../utils/string'
|
|
12
11
|
import { buildAiGatewayCacheHeaders } from './cache-headers'
|
|
13
12
|
|
|
@@ -21,7 +20,6 @@ type AiGatewayGenerateResult = Awaited<ReturnType<WrapStreamOptions['doGenerate'
|
|
|
21
20
|
type AiGatewayStreamResult = Awaited<ReturnType<WrapStreamOptions['doStream']>>
|
|
22
21
|
type AiGatewayGeneratedContent = AiGatewayGenerateResult['content'][number]
|
|
23
22
|
type AiGatewayStreamPart = AiGatewayStreamResult['stream'] extends ReadableStream<infer T> ? T : never
|
|
24
|
-
type AiGatewayProviderOptions = NonNullable<AiGatewayCallOptions['providerOptions']>
|
|
25
23
|
type AiGatewayAttemptResult<A> = { source: string; result: A }
|
|
26
24
|
// eslint-disable-next-line @typescript-eslint/no-redundant-type-constituents
|
|
27
25
|
type AiGatewayRunFork = <A, E>(effect: Effect.Effect<A, E, never>) => Fiber.Fiber<A, E | unknown>
|
|
@@ -38,7 +36,7 @@ class AiGatewayStreamAttemptTag extends Context.Service<
|
|
|
38
36
|
|
|
39
37
|
const EXPECTED_GATEWAY_KEY_PREFIX = 'sk-bf-'
|
|
40
38
|
const AI_GATEWAY_VIRTUAL_KEY_HEADER = 'x-bf-vk'
|
|
41
|
-
const AI_GATEWAY_TIMEOUT_MS =
|
|
39
|
+
const AI_GATEWAY_TIMEOUT_MS = 180_000
|
|
42
40
|
const AI_GATEWAY_STREAM_IDLE_TIMEOUT_MS = 30_000
|
|
43
41
|
const AI_GATEWAY_MAX_RETRIES = 4
|
|
44
42
|
const AI_GATEWAY_MAX_RETRY_DELAY_MS = 15_000
|
|
@@ -727,22 +725,6 @@ function isOpenRouterModel(modelId: string): boolean {
|
|
|
727
725
|
return modelId.trim().toLowerCase().startsWith('openrouter/')
|
|
728
726
|
}
|
|
729
727
|
|
|
730
|
-
function hasDirectOpenRouterFallback(config: AiGatewayRuntimeConfig, modelId: string): boolean {
|
|
731
|
-
return isOpenRouterModel(modelId) && Boolean(config.aiGateway.openRouterApiKey?.trim())
|
|
732
|
-
}
|
|
733
|
-
|
|
734
|
-
function getDirectOpenRouterChatModel(config: AiGatewayRuntimeConfig, modelId: string): AiGatewayLanguageModel {
|
|
735
|
-
return getDirectOpenRouterProvider(config.aiGateway.openRouterApiKey).chat(normalizeDirectOpenRouterModelId(modelId))
|
|
736
|
-
}
|
|
737
|
-
|
|
738
|
-
function shouldFallbackToDirectOpenRouter(
|
|
739
|
-
config: AiGatewayRuntimeConfig,
|
|
740
|
-
modelId: string,
|
|
741
|
-
error: AiGenerationError,
|
|
742
|
-
): boolean {
|
|
743
|
-
return hasDirectOpenRouterFallback(config, modelId) && isRetryableAiGatewayError(error)
|
|
744
|
-
}
|
|
745
|
-
|
|
746
728
|
function attemptAiGatewayGenerate(
|
|
747
729
|
source: string,
|
|
748
730
|
evaluate: () => PromiseLike<AiGatewayGenerateResult>,
|
|
@@ -771,28 +753,8 @@ function attemptAiGatewayStream(
|
|
|
771
753
|
)
|
|
772
754
|
}
|
|
773
755
|
|
|
774
|
-
function attemptDirectOpenRouterGenerate(
|
|
775
|
-
config: AiGatewayRuntimeConfig,
|
|
776
|
-
modelId: string,
|
|
777
|
-
params: AiGatewayCallOptions,
|
|
778
|
-
): Effect.Effect<AiGatewayAttemptResult<AiGatewayGenerateResult>, AiGenerationError> {
|
|
779
|
-
const model = getDirectOpenRouterChatModel(config, modelId)
|
|
780
|
-
return attemptAiGatewayGenerate('openrouter.generate', () => model.doGenerate(params))
|
|
781
|
-
}
|
|
782
|
-
|
|
783
|
-
function attemptDirectOpenRouterStream(
|
|
784
|
-
config: AiGatewayRuntimeConfig,
|
|
785
|
-
modelId: string,
|
|
786
|
-
params: AiGatewayCallOptions,
|
|
787
|
-
): Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError> {
|
|
788
|
-
const model = getDirectOpenRouterChatModel(config, modelId)
|
|
789
|
-
return attemptAiGatewayStream('openrouter.stream', () => model.doStream(params))
|
|
790
|
-
}
|
|
791
|
-
|
|
792
756
|
function executeGenerateAttemptPlan(
|
|
793
|
-
config: AiGatewayRuntimeConfig,
|
|
794
757
|
modelId: string,
|
|
795
|
-
params: AiGatewayCallOptions,
|
|
796
758
|
doGenerate: () => PromiseLike<AiGatewayGenerateResult>,
|
|
797
759
|
): Effect.Effect<AiGatewayAttemptResult<AiGatewayGenerateResult>, AiGenerationError> {
|
|
798
760
|
const primary = Layer.succeed(AiGatewayGenerateAttemptTag, {
|
|
@@ -803,35 +765,15 @@ function executeGenerateAttemptPlan(
|
|
|
803
765
|
return yield* attempt.execute
|
|
804
766
|
})
|
|
805
767
|
|
|
806
|
-
if (!hasDirectOpenRouterFallback(config, modelId)) {
|
|
807
|
-
return effect.pipe(
|
|
808
|
-
Effect.provide(primary),
|
|
809
|
-
Effect.withSpan('AiGateway.executeGeneratePlan'),
|
|
810
|
-
Effect.annotateSpans({ modelId, directOpenRouterFallbackEnabled: false }),
|
|
811
|
-
)
|
|
812
|
-
}
|
|
813
|
-
|
|
814
768
|
return effect.pipe(
|
|
815
|
-
Effect.
|
|
816
|
-
ExecutionPlan.make(
|
|
817
|
-
{ provide: primary },
|
|
818
|
-
{
|
|
819
|
-
provide: Layer.succeed(AiGatewayGenerateAttemptTag, {
|
|
820
|
-
execute: attemptDirectOpenRouterGenerate(config, modelId, params),
|
|
821
|
-
}),
|
|
822
|
-
while: (error: AiGenerationError) => shouldFallbackToDirectOpenRouter(config, modelId, error),
|
|
823
|
-
},
|
|
824
|
-
),
|
|
825
|
-
),
|
|
769
|
+
Effect.provide(primary),
|
|
826
770
|
Effect.withSpan('AiGateway.executeGeneratePlan'),
|
|
827
|
-
Effect.annotateSpans({ modelId
|
|
771
|
+
Effect.annotateSpans({ modelId }),
|
|
828
772
|
)
|
|
829
773
|
}
|
|
830
774
|
|
|
831
775
|
function executeStreamAttemptPlan(
|
|
832
|
-
config: AiGatewayRuntimeConfig,
|
|
833
776
|
modelId: string,
|
|
834
|
-
params: AiGatewayCallOptions,
|
|
835
777
|
doStream: () => PromiseLike<AiGatewayStreamResult>,
|
|
836
778
|
): Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError> {
|
|
837
779
|
const primary = Layer.succeed(AiGatewayStreamAttemptTag, {
|
|
@@ -842,35 +784,13 @@ function executeStreamAttemptPlan(
|
|
|
842
784
|
return yield* attempt.execute
|
|
843
785
|
})
|
|
844
786
|
|
|
845
|
-
if (!hasDirectOpenRouterFallback(config, modelId)) {
|
|
846
|
-
return effect.pipe(
|
|
847
|
-
Effect.provide(primary),
|
|
848
|
-
Effect.withSpan('AiGateway.executeStreamPlan'),
|
|
849
|
-
Effect.annotateSpans({ modelId, directOpenRouterFallbackEnabled: false }),
|
|
850
|
-
)
|
|
851
|
-
}
|
|
852
|
-
|
|
853
787
|
return effect.pipe(
|
|
854
|
-
Effect.
|
|
855
|
-
ExecutionPlan.make(
|
|
856
|
-
{ provide: primary },
|
|
857
|
-
{
|
|
858
|
-
provide: Layer.succeed(AiGatewayStreamAttemptTag, {
|
|
859
|
-
execute: attemptDirectOpenRouterStream(config, modelId, params),
|
|
860
|
-
}),
|
|
861
|
-
while: (error: AiGenerationError) => shouldFallbackToDirectOpenRouter(config, modelId, error),
|
|
862
|
-
},
|
|
863
|
-
),
|
|
864
|
-
),
|
|
788
|
+
Effect.provide(primary),
|
|
865
789
|
Effect.withSpan('AiGateway.executeStreamPlan'),
|
|
866
|
-
Effect.annotateSpans({ modelId
|
|
790
|
+
Effect.annotateSpans({ modelId }),
|
|
867
791
|
)
|
|
868
792
|
}
|
|
869
793
|
|
|
870
|
-
function isOpenRouterOpenAIReasoningModel(modelId: string): boolean {
|
|
871
|
-
return modelId.trim().toLowerCase().startsWith('openrouter/openai/gpt-5')
|
|
872
|
-
}
|
|
873
|
-
|
|
874
794
|
function shouldCloseInjectedReasoning(chunk: AiGatewayStreamPart): boolean {
|
|
875
795
|
switch (chunk.type) {
|
|
876
796
|
case 'stream-start':
|
|
@@ -1015,7 +935,7 @@ function createAiGatewayLanguageModelMiddleware(
|
|
|
1015
935
|
const model = resolveProviderModel(resolvedDeps.gateway.provider, modelId, providerId)
|
|
1016
936
|
return resolvedDeps.runPromise(
|
|
1017
937
|
withAiGatewayConcurrency(
|
|
1018
|
-
executeGenerateAttemptPlan(
|
|
938
|
+
executeGenerateAttemptPlan(modelId, () => model.doGenerate(params)).pipe(
|
|
1019
939
|
Effect.map(({ result }) => ({
|
|
1020
940
|
...result,
|
|
1021
941
|
content: injectAiGatewayChatReasoningContent(
|
|
@@ -1032,7 +952,7 @@ function createAiGatewayLanguageModelMiddleware(
|
|
|
1032
952
|
const model = resolveProviderModel(resolvedDeps.gateway.provider, modelId, providerId)
|
|
1033
953
|
return resolvedDeps.runPromise(
|
|
1034
954
|
withAiGatewayStreamConcurrency(
|
|
1035
|
-
executeStreamAttemptPlan(
|
|
955
|
+
executeStreamAttemptPlan(modelId, () => model.doStream(params)).pipe(
|
|
1036
956
|
Effect.map((attempt) => ({
|
|
1037
957
|
...attempt,
|
|
1038
958
|
result: isReasoningEnabled(params)
|
|
@@ -1052,27 +972,9 @@ function createAiGatewayLanguageModelMiddleware(
|
|
|
1052
972
|
|
|
1053
973
|
export function normalizeAiGatewayChatProviderOptions(
|
|
1054
974
|
params: AiGatewayCallOptions,
|
|
1055
|
-
|
|
975
|
+
_modelId?: string,
|
|
1056
976
|
): AiGatewayCallOptions {
|
|
1057
|
-
|
|
1058
|
-
? ({ ...params.providerOptions } as AiGatewayProviderOptions)
|
|
1059
|
-
: ({} as AiGatewayProviderOptions)
|
|
1060
|
-
const openaiOptions = isRecord(providerOptions.openai)
|
|
1061
|
-
? { ...providerOptions.openai }
|
|
1062
|
-
: ({} as Record<string, unknown>)
|
|
1063
|
-
|
|
1064
|
-
if (modelId && isOpenRouterOpenAIReasoningModel(modelId) && openaiOptions.forceReasoning === undefined) {
|
|
1065
|
-
openaiOptions.forceReasoning = true
|
|
1066
|
-
}
|
|
1067
|
-
|
|
1068
|
-
if (providerOptions.openai === openaiOptions || Object.keys(openaiOptions).length === 0) {
|
|
1069
|
-
return params
|
|
1070
|
-
}
|
|
1071
|
-
|
|
1072
|
-
return {
|
|
1073
|
-
...params,
|
|
1074
|
-
providerOptions: { ...providerOptions, openai: openaiOptions as AiGatewayProviderOptions['openai'] },
|
|
1075
|
-
}
|
|
977
|
+
return params
|
|
1076
978
|
}
|
|
1077
979
|
|
|
1078
980
|
function withAiGatewayDevTools<TModel extends AiGatewayLanguageModel>(model: TModel): TModel {
|
package/src/db/memory-store.ts
CHANGED
|
@@ -1264,20 +1264,12 @@ export class SurrealMemoryStore {
|
|
|
1264
1264
|
|
|
1265
1265
|
export function createMemoryStore(
|
|
1266
1266
|
db: SurrealDBService,
|
|
1267
|
-
options: {
|
|
1268
|
-
embeddingModel: string
|
|
1269
|
-
openRouterApiKey?: string
|
|
1270
|
-
runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A>
|
|
1271
|
-
},
|
|
1267
|
+
options: { embeddingModel: string; runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A> },
|
|
1272
1268
|
background: BackgroundWorker,
|
|
1273
1269
|
): SurrealMemoryStore {
|
|
1274
1270
|
return new SurrealMemoryStore(
|
|
1275
1271
|
db,
|
|
1276
|
-
new ProviderEmbeddings({
|
|
1277
|
-
modelId: options.embeddingModel,
|
|
1278
|
-
openRouterApiKey: options.openRouterApiKey,
|
|
1279
|
-
runPromise: options.runPromise,
|
|
1280
|
-
}),
|
|
1272
|
+
new ProviderEmbeddings({ modelId: options.embeddingModel, runPromise: options.runPromise }),
|
|
1281
1273
|
background,
|
|
1282
1274
|
)
|
|
1283
1275
|
}
|
package/src/db/memory-types.ts
CHANGED
|
@@ -151,13 +151,13 @@ const MemoryDeltaRelationSchema = z
|
|
|
151
151
|
targetMemoryId: z
|
|
152
152
|
.string()
|
|
153
153
|
.min(1)
|
|
154
|
-
.
|
|
154
|
+
.nullable()
|
|
155
155
|
.describe('Target existing memory id when relation points to existing memory.'),
|
|
156
156
|
targetFactIndex: z
|
|
157
157
|
.number()
|
|
158
158
|
.int()
|
|
159
159
|
.min(0)
|
|
160
|
-
.
|
|
160
|
+
.nullable()
|
|
161
161
|
.describe('Target newFacts index when relation points to another newly provided fact.'),
|
|
162
162
|
})
|
|
163
163
|
.strict()
|
|
@@ -179,17 +179,12 @@ const MemoryDeltaItemSchema = z
|
|
|
179
179
|
classification: MemoryDeltaClassificationSchema.describe(
|
|
180
180
|
'How this fact relates to existing memories: new, supersedes, contradicts, enriches, duplicate.',
|
|
181
181
|
),
|
|
182
|
-
targetMemoryIds: z
|
|
183
|
-
.array(z.string().min(1))
|
|
184
|
-
.default([])
|
|
185
|
-
.describe('Existing memory IDs that are directly related to this fact.'),
|
|
182
|
+
targetMemoryIds: z.array(z.string().min(1)).describe('Existing memory IDs that are directly related to this fact.'),
|
|
186
183
|
invalidateTargetIds: z
|
|
187
184
|
.array(z.string().min(1))
|
|
188
|
-
.default([])
|
|
189
185
|
.describe('Subset of targetMemoryIds that should be deleted as obsolete/invalidated.'),
|
|
190
186
|
relations: z
|
|
191
187
|
.array(MemoryDeltaRelationSchema)
|
|
192
|
-
.default([])
|
|
193
188
|
.describe('Explicit semantic relations from this fact to existing memories and/or other new facts by index.'),
|
|
194
189
|
rationale: z.string().min(1).describe('Short rationale for the classification decision.'),
|
|
195
190
|
})
|
package/src/db/memory.ts
CHANGED
|
@@ -79,11 +79,7 @@ export class Memory {
|
|
|
79
79
|
) {
|
|
80
80
|
this.store = createMemoryStore(
|
|
81
81
|
deps.db,
|
|
82
|
-
{
|
|
83
|
-
embeddingModel: deps.runtimeConfig.aiGateway.embeddingModel,
|
|
84
|
-
openRouterApiKey: deps.runtimeConfig.aiGateway.openRouterApiKey,
|
|
85
|
-
runPromise: deps.runPromise,
|
|
86
|
-
},
|
|
82
|
+
{ embeddingModel: deps.runtimeConfig.aiGateway.embeddingModel, runPromise: deps.runPromise },
|
|
87
83
|
deps.background,
|
|
88
84
|
)
|
|
89
85
|
this.runtimeConfig = deps.runtimeConfig
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { embed, embedMany } from 'ai'
|
|
2
2
|
import { Schema, Effect } from 'effect'
|
|
3
3
|
|
|
4
|
+
import { aiGatewayEmbeddingModel } from '../ai-gateway/ai-gateway'
|
|
4
5
|
import { ERROR_TAGS, ConfigurationError } from '../effect/errors'
|
|
5
|
-
import { getDirectOpenRouterProvider, normalizeDirectOpenRouterModelId } from '../openrouter/direct-provider'
|
|
6
6
|
|
|
7
7
|
const SUPPORTED_EMBEDDING_PREFIXES = ['openai/', 'openrouter/'] as const
|
|
8
8
|
|
|
@@ -16,7 +16,6 @@ type ProviderEmbeddingsOptions = {
|
|
|
16
16
|
embedManyFn?: typeof embedMany
|
|
17
17
|
getCache?: () => SharedEmbeddingCache | null
|
|
18
18
|
modelId: string
|
|
19
|
-
openRouterApiKey?: string
|
|
20
19
|
/**
|
|
21
20
|
* `runPromise` is required: callers must yield `RuntimeBridgeTag` in their
|
|
22
21
|
* `Layer.effect` (or accept a `RuntimeBridge` dep) and pass its `runPromise`
|
|
@@ -25,7 +24,7 @@ type ProviderEmbeddingsOptions = {
|
|
|
25
24
|
runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A>
|
|
26
25
|
}
|
|
27
26
|
|
|
28
|
-
function resolveEmbeddingModel(modelId: string
|
|
27
|
+
function resolveEmbeddingModel(modelId: string) {
|
|
29
28
|
const normalized = modelId.trim()
|
|
30
29
|
if (!normalized) {
|
|
31
30
|
throw new ConfigurationError({ message: '[embeddings-provider] Model id is required.', key: 'embeddingModelId' })
|
|
@@ -38,7 +37,7 @@ function resolveEmbeddingModel(modelId: string, openRouterApiKey?: string) {
|
|
|
38
37
|
})
|
|
39
38
|
}
|
|
40
39
|
|
|
41
|
-
return
|
|
40
|
+
return aiGatewayEmbeddingModel(normalized)
|
|
42
41
|
}
|
|
43
42
|
|
|
44
43
|
function normalizeEmbedding(embedding: readonly number[]): number[] {
|
|
@@ -69,7 +68,6 @@ export class ProviderEmbeddings {
|
|
|
69
68
|
/** In-flight dedup: concurrent embedQuery calls for the same text share one API round-trip. */
|
|
70
69
|
private readonly inflightEmbeddings = new Map<string, Promise<number[]>>()
|
|
71
70
|
|
|
72
|
-
private readonly openRouterApiKey: string | undefined
|
|
73
71
|
private readonly runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A>
|
|
74
72
|
|
|
75
73
|
constructor(options: ProviderEmbeddingsOptions) {
|
|
@@ -77,7 +75,6 @@ export class ProviderEmbeddings {
|
|
|
77
75
|
this.embedManyFn = options.embedManyFn ?? embedMany
|
|
78
76
|
this.getCache = options.getCache ?? (() => null)
|
|
79
77
|
this.resolvedModelId = options.modelId
|
|
80
|
-
this.openRouterApiKey = options.openRouterApiKey
|
|
81
78
|
this.runPromise = options.runPromise
|
|
82
79
|
}
|
|
83
80
|
|
|
@@ -87,7 +84,7 @@ export class ProviderEmbeddings {
|
|
|
87
84
|
|
|
88
85
|
private getModel() {
|
|
89
86
|
if (!this._model) {
|
|
90
|
-
this._model = resolveEmbeddingModel(this.getModelId()
|
|
87
|
+
this._model = resolveEmbeddingModel(this.getModelId())
|
|
91
88
|
}
|
|
92
89
|
return this._model
|
|
93
90
|
}
|
|
@@ -72,8 +72,8 @@ interface MemoryDeltaOutputLike {
|
|
|
72
72
|
|
|
73
73
|
interface MemoryDeltaRelationLike<TRelation extends string = string> {
|
|
74
74
|
relation: TRelation
|
|
75
|
-
targetMemoryId?: string
|
|
76
|
-
targetFactIndex?: number
|
|
75
|
+
targetMemoryId?: string | null
|
|
76
|
+
targetFactIndex?: number | null
|
|
77
77
|
}
|
|
78
78
|
|
|
79
79
|
interface MemoryActionAdd {
|
|
@@ -14,9 +14,10 @@ Decide one classification per fact:
|
|
|
14
14
|
Rules:
|
|
15
15
|
- Return exactly one delta item per new fact, preserving order.
|
|
16
16
|
- fact must match the corresponding newFacts entry verbatim.
|
|
17
|
+
- Always include targetMemoryIds, invalidateTargetIds, and relations. Use [] when there are no values.
|
|
17
18
|
- targetMemoryIds and invalidateTargetIds may only contain ids from existingMemories.
|
|
18
19
|
- invalidateTargetIds must be a subset of targetMemoryIds.
|
|
19
|
-
- In each relation item,
|
|
20
|
+
- In each relation item, return both target fields. Set exactly one target to a real value and set the other target field to null.
|
|
20
21
|
- targetFactIndex must be valid, must not point to the same fact index, and is only for relations to other new facts.
|
|
21
22
|
- For supersedes/contradicts, include target memories when evidence exists.
|
|
22
23
|
- If uncertain, prefer conservative output: classify as new with no targets.
|
|
@@ -215,7 +215,6 @@ export const LotaRuntimeConfigSchema = z.object({
|
|
|
215
215
|
url: nonEmptyStringSchema,
|
|
216
216
|
key: nonEmptyStringSchema,
|
|
217
217
|
embeddingModel: nonEmptyStringSchema.default('openai/text-embedding-3-small'),
|
|
218
|
-
openRouterApiKey: nonEmptyStringSchema.optional(),
|
|
219
218
|
maxConcurrency: z.coerce.number().int().positive().default(8),
|
|
220
219
|
}),
|
|
221
220
|
s3: z.object({
|
|
@@ -293,7 +292,6 @@ export const LOTA_RUNTIME_ENV_KEYS = Object.freeze([
|
|
|
293
292
|
'AI_GATEWAY_URL',
|
|
294
293
|
'AI_GATEWAY_KEY',
|
|
295
294
|
'AI_EMBEDDING_MODEL',
|
|
296
|
-
'OPENROUTER_API_KEY',
|
|
297
295
|
'AI_GATEWAY_MAX_CONCURRENCY',
|
|
298
296
|
'S3_ENDPOINT',
|
|
299
297
|
'S3_BUCKET',
|
|
@@ -330,7 +328,6 @@ export const lotaRuntimeEnvConfig = Config.all({
|
|
|
330
328
|
aiGatewayUrl: Config.string('AI_GATEWAY_URL').pipe(Config.withDefault(DEFAULT_AI_GATEWAY_URL)),
|
|
331
329
|
aiGatewayKey: Config.redacted('AI_GATEWAY_KEY'),
|
|
332
330
|
aiEmbeddingModel: Config.string('AI_EMBEDDING_MODEL').pipe(Config.withDefault('openai/text-embedding-3-small')),
|
|
333
|
-
openRouterApiKey: Config.redacted('OPENROUTER_API_KEY').pipe(Config.option),
|
|
334
331
|
aiGatewayMaxConcurrency: Config.number('AI_GATEWAY_MAX_CONCURRENCY').pipe(Config.withDefault(8)),
|
|
335
332
|
s3Endpoint: Config.string('S3_ENDPOINT'),
|
|
336
333
|
s3Bucket: Config.string('S3_BUCKET'),
|
|
@@ -379,9 +376,6 @@ export function loadLotaRuntimeConfigFromEnv(
|
|
|
379
376
|
key: Redacted.value(env.aiGatewayKey),
|
|
380
377
|
embeddingModel: env.aiEmbeddingModel,
|
|
381
378
|
maxConcurrency: env.aiGatewayMaxConcurrency,
|
|
382
|
-
...(Option.isSome(env.openRouterApiKey)
|
|
383
|
-
? { openRouterApiKey: Redacted.value(env.openRouterApiKey.value) }
|
|
384
|
-
: {}),
|
|
385
379
|
},
|
|
386
380
|
s3: {
|
|
387
381
|
endpoint: env.s3Endpoint,
|
|
@@ -17,9 +17,8 @@ type DocumentChunkEmbeddings = {
|
|
|
17
17
|
function createDocumentChunkEmbeddings(
|
|
18
18
|
embeddingModel: string,
|
|
19
19
|
runPromise: RuntimeBridge['runPromise'],
|
|
20
|
-
openRouterApiKey?: string,
|
|
21
20
|
): DocumentChunkEmbeddings {
|
|
22
|
-
const embeddings = new ProviderEmbeddings({ modelId: embeddingModel,
|
|
21
|
+
const embeddings = new ProviderEmbeddings({ modelId: embeddingModel, runPromise })
|
|
23
22
|
|
|
24
23
|
return {
|
|
25
24
|
embedDocuments: (documents) => embeddings.embedDocuments(documents),
|
|
@@ -213,11 +212,7 @@ export const DocumentChunkServiceLive = Layer.effect(
|
|
|
213
212
|
const runtimeConfig = yield* RuntimeConfigServiceTag
|
|
214
213
|
const bridge = yield* RuntimeBridgeTag
|
|
215
214
|
return makeDocumentChunkService(
|
|
216
|
-
createDocumentChunkEmbeddings(
|
|
217
|
-
runtimeConfig.aiGateway.embeddingModel,
|
|
218
|
-
bridge.runPromise,
|
|
219
|
-
runtimeConfig.aiGateway.openRouterApiKey,
|
|
220
|
-
),
|
|
215
|
+
createDocumentChunkEmbeddings(runtimeConfig.aiGateway.embeddingModel, bridge.runPromise),
|
|
221
216
|
)
|
|
222
217
|
}),
|
|
223
218
|
)
|
|
@@ -111,15 +111,11 @@ interface RetrieveForTurnParams {
|
|
|
111
111
|
|
|
112
112
|
export function makeLearnedSkillService(
|
|
113
113
|
db: SurrealDBService,
|
|
114
|
-
options: { embeddingModel: string;
|
|
114
|
+
options: { embeddingModel: string; runPromise: RuntimeBridge['runPromise'] },
|
|
115
115
|
skillExistsCache: Cache.Cache<string, boolean, LearnedSkillServiceError>,
|
|
116
116
|
background: Context.Service.Shape<typeof BackgroundWorkServiceTag>,
|
|
117
117
|
) {
|
|
118
|
-
const embeddings = new ProviderEmbeddings({
|
|
119
|
-
modelId: options.embeddingModel,
|
|
120
|
-
openRouterApiKey: options.openRouterApiKey,
|
|
121
|
-
runPromise: options.runPromise,
|
|
122
|
-
})
|
|
118
|
+
const embeddings = new ProviderEmbeddings({ modelId: options.embeddingModel, runPromise: options.runPromise })
|
|
123
119
|
|
|
124
120
|
const hasSkillsForAgent = (orgId: string, agentId: string) => Cache.get(skillExistsCache, `${orgId}:${agentId}`)
|
|
125
121
|
|
|
@@ -517,11 +513,7 @@ export const LearnedSkillServiceLive = Layer.effect(
|
|
|
517
513
|
})
|
|
518
514
|
return makeLearnedSkillService(
|
|
519
515
|
db,
|
|
520
|
-
{
|
|
521
|
-
embeddingModel: runtimeConfig.aiGateway.embeddingModel,
|
|
522
|
-
openRouterApiKey: runtimeConfig.aiGateway.openRouterApiKey,
|
|
523
|
-
runPromise: bridge.runPromise,
|
|
524
|
-
},
|
|
516
|
+
{ embeddingModel: runtimeConfig.aiGateway.embeddingModel, runPromise: bridge.runPromise },
|
|
525
517
|
skillExistsCache,
|
|
526
518
|
background,
|
|
527
519
|
)
|
|
@@ -3,18 +3,19 @@ import * as Schema from 'effect/Schema'
|
|
|
3
3
|
import { z } from 'zod'
|
|
4
4
|
|
|
5
5
|
import { OPENROUTER_FAST_RERANK_MODEL_ID } from '../../config/model-constants'
|
|
6
|
-
import { ERROR_TAGS } from '../../effect/errors'
|
|
6
|
+
import { ConfigurationError, ERROR_TAGS } from '../../effect/errors'
|
|
7
7
|
import { RuntimeConfigServiceTag } from '../../effect/services'
|
|
8
8
|
import { toValidationError } from '../../effect/zod'
|
|
9
|
-
import { normalizeDirectOpenRouterModelId, resolveOpenRouterApiKey } from '../../openrouter/direct-provider'
|
|
10
9
|
import type { ResolvedLotaRuntimeConfig } from '../../runtime/runtime-config'
|
|
11
10
|
|
|
12
|
-
const
|
|
11
|
+
const AI_GATEWAY_VIRTUAL_KEY_HEADER = 'x-bf-vk' as const
|
|
12
|
+
const EXPECTED_GATEWAY_KEY_PREFIX = 'sk-bf-' as const
|
|
13
|
+
const OPENROUTER_MODEL_PREFIX = 'openrouter/' as const
|
|
13
14
|
|
|
14
15
|
const RerankRequestBodySchema = Schema.Struct({
|
|
15
16
|
model: Schema.String,
|
|
16
17
|
query: Schema.String,
|
|
17
|
-
documents: Schema.Array(Schema.String),
|
|
18
|
+
documents: Schema.Array(Schema.Struct({ text: Schema.String })),
|
|
18
19
|
top_n: Schema.Number,
|
|
19
20
|
})
|
|
20
21
|
|
|
@@ -30,6 +31,36 @@ function toRerankServiceError(operation: string, message: string, cause: unknown
|
|
|
30
31
|
return new RerankServiceError({ operation, message, cause })
|
|
31
32
|
}
|
|
32
33
|
|
|
34
|
+
function resolveAiGatewayRerankUrl(config: ResolvedLotaRuntimeConfig): string {
|
|
35
|
+
const trimmed = config.aiGateway.url.trim()
|
|
36
|
+
if (!trimmed) {
|
|
37
|
+
throw new ConfigurationError({ message: '[ai-gateway] AI gateway URL is required.', key: 'aiGateway.url' })
|
|
38
|
+
}
|
|
39
|
+
const normalized = trimmed.replace(/\/+$/, '')
|
|
40
|
+
const v1BaseUrl = normalized.endsWith('/v1') ? normalized : `${normalized}/v1`
|
|
41
|
+
return `${v1BaseUrl}/rerank`
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function resolveAiGatewayKey(config: ResolvedLotaRuntimeConfig): string {
|
|
45
|
+
const key = config.aiGateway.key.trim()
|
|
46
|
+
if (!key.startsWith(EXPECTED_GATEWAY_KEY_PREFIX)) {
|
|
47
|
+
throw new ConfigurationError({
|
|
48
|
+
message: `[ai-gateway] Gateway keys must use the ${EXPECTED_GATEWAY_KEY_PREFIX}* format.`,
|
|
49
|
+
key: 'aiGateway.key',
|
|
50
|
+
})
|
|
51
|
+
}
|
|
52
|
+
return key
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function normalizeRerankModelId(modelId: string): string {
|
|
56
|
+
const normalized = modelId.trim()
|
|
57
|
+
if (!normalized) {
|
|
58
|
+
throw new ConfigurationError({ message: 'Rerank model id is required.', key: 'rerankModelId' })
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return normalized.startsWith(OPENROUTER_MODEL_PREFIX) ? normalized.slice(OPENROUTER_MODEL_PREFIX.length) : normalized
|
|
62
|
+
}
|
|
63
|
+
|
|
33
64
|
const RerankResponseSchema = z
|
|
34
65
|
.object({
|
|
35
66
|
model: z.string().optional(),
|
|
@@ -101,10 +132,10 @@ export function makeRerankService(config: ResolvedLotaRuntimeConfig) {
|
|
|
101
132
|
|
|
102
133
|
function resolveRerankModelId(modelId?: string): string {
|
|
103
134
|
const explicit = modelId?.trim()
|
|
104
|
-
if (explicit) return
|
|
135
|
+
if (explicit) return normalizeRerankModelId(explicit)
|
|
105
136
|
|
|
106
137
|
const configured = readConfiguredRerankModelId()
|
|
107
|
-
if (configured) return
|
|
138
|
+
if (configured) return normalizeRerankModelId(configured)
|
|
108
139
|
|
|
109
140
|
return OPENROUTER_FAST_RERANK_MODEL_ID
|
|
110
141
|
}
|
|
@@ -116,21 +147,26 @@ export function makeRerankService(config: ResolvedLotaRuntimeConfig) {
|
|
|
116
147
|
return { modelId: resolveRerankModelId(params.modelId), results: [] as RerankResultItem[] }
|
|
117
148
|
}
|
|
118
149
|
|
|
119
|
-
const
|
|
150
|
+
const gatewayKey = resolveAiGatewayKey(config)
|
|
151
|
+
const rerankUrl = resolveAiGatewayRerankUrl(config)
|
|
120
152
|
const modelId = resolveRerankModelId(params.modelId)
|
|
121
153
|
const topN = clampTopN(params.topN, params.documents.length)
|
|
122
154
|
const requestBody = encodeRerankRequestBody({
|
|
123
155
|
model: modelId,
|
|
124
156
|
query: params.query,
|
|
125
|
-
documents: params.documents.map((document) => document.text),
|
|
157
|
+
documents: params.documents.map((document) => ({ text: document.text })),
|
|
126
158
|
top_n: topN,
|
|
127
159
|
})
|
|
128
160
|
|
|
129
161
|
const response = yield* Effect.tryPromise({
|
|
130
162
|
try: () =>
|
|
131
|
-
Bun.fetch(
|
|
163
|
+
Bun.fetch(rerankUrl, {
|
|
132
164
|
method: 'POST',
|
|
133
|
-
headers: {
|
|
165
|
+
headers: {
|
|
166
|
+
Authorization: `Bearer ${gatewayKey}`,
|
|
167
|
+
[AI_GATEWAY_VIRTUAL_KEY_HEADER]: gatewayKey,
|
|
168
|
+
'Content-Type': 'application/json',
|
|
169
|
+
},
|
|
134
170
|
body: requestBody,
|
|
135
171
|
}),
|
|
136
172
|
catch: (cause) => toRerankServiceError('fetch-rerank', 'Failed to fetch rerank results.', cause),
|
|
@@ -144,7 +180,7 @@ export function makeRerankService(config: ResolvedLotaRuntimeConfig) {
|
|
|
144
180
|
if (!response.ok) {
|
|
145
181
|
return yield* new RerankServiceError({
|
|
146
182
|
operation: 'fetch-rerank',
|
|
147
|
-
message: `
|
|
183
|
+
message: `AI gateway rerank failed (${response.status}): ${responseText}`,
|
|
148
184
|
cause: responseText,
|
|
149
185
|
})
|
|
150
186
|
}
|
|
@@ -6,6 +6,8 @@ interface HelperAgentOptionOverrides {
|
|
|
6
6
|
temperature?: number
|
|
7
7
|
}
|
|
8
8
|
|
|
9
|
+
const DEFAULT_HELPER_AGENT_MAX_RETRIES = 2
|
|
10
|
+
|
|
9
11
|
export function resolveHelperAgentOptions(
|
|
10
12
|
options: CreateHelperToolLoopAgentOptions,
|
|
11
13
|
overrides?: HelperAgentOptionOverrides,
|
|
@@ -15,6 +17,6 @@ export function resolveHelperAgentOptions(
|
|
|
15
17
|
maxOutputTokens: overrides?.maxOutputTokens ?? options.maxOutputTokens,
|
|
16
18
|
temperature: overrides?.temperature ?? options.temperature,
|
|
17
19
|
output: options.output,
|
|
18
|
-
maxRetries: options.maxRetries,
|
|
20
|
+
maxRetries: options.maxRetries ?? DEFAULT_HELPER_AGENT_MAX_RETRIES,
|
|
19
21
|
}
|
|
20
22
|
}
|
package/src/workers/bootstrap.ts
CHANGED
|
@@ -50,9 +50,8 @@ function buildSandboxedWorkerRuntimeConfigEffect() {
|
|
|
50
50
|
aiGateway: {
|
|
51
51
|
url: env.aiGatewayUrl,
|
|
52
52
|
key: Redacted.value(env.aiGatewayKey),
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
: {}),
|
|
53
|
+
embeddingModel: env.aiEmbeddingModel,
|
|
54
|
+
maxConcurrency: env.aiGatewayMaxConcurrency,
|
|
56
55
|
},
|
|
57
56
|
s3: {
|
|
58
57
|
endpoint: env.s3Endpoint,
|
|
@@ -47,7 +47,6 @@ const skillExtractionServices: SkillExtractionServices = {
|
|
|
47
47
|
socialChatHistoryService: await resolve(SocialChatHistoryServiceTag),
|
|
48
48
|
runtimeAdapters: await resolve(RuntimeAdaptersServiceTag),
|
|
49
49
|
embeddingModel: workerRuntimeConfig.aiGateway.embeddingModel,
|
|
50
|
-
openRouterApiKey: workerRuntimeConfig.aiGateway.openRouterApiKey,
|
|
51
50
|
runPromise: (effect) => runtime.runPromise(effect),
|
|
52
51
|
}
|
|
53
52
|
const organizationLearningQueueJobService = await resolve(QueueJobServiceTag)
|
|
@@ -36,7 +36,6 @@ export interface SkillExtractionServices {
|
|
|
36
36
|
socialChatHistoryService: Context.Service.Shape<typeof SocialChatHistoryServiceTag>
|
|
37
37
|
runtimeAdapters: LotaRuntimeAdapters
|
|
38
38
|
embeddingModel: string
|
|
39
|
-
openRouterApiKey?: string
|
|
40
39
|
runPromise: <A, E>(effect: Effect.Effect<A, E>) => Promise<A>
|
|
41
40
|
}
|
|
42
41
|
|
|
@@ -351,11 +350,7 @@ export function runSkillExtraction(
|
|
|
351
350
|
return Promise.resolve({ skipped: true, processedMessages: 0, extractedSkills: 0 })
|
|
352
351
|
}
|
|
353
352
|
|
|
354
|
-
const embeddings = new ProviderEmbeddings({
|
|
355
|
-
modelId: services.embeddingModel,
|
|
356
|
-
openRouterApiKey: services.openRouterApiKey,
|
|
357
|
-
runPromise: services.runPromise,
|
|
358
|
-
})
|
|
353
|
+
const embeddings = new ProviderEmbeddings({ modelId: services.embeddingModel, runPromise: services.runPromise })
|
|
359
354
|
const withMemoryLock = runtimeAdapters.withWorkspaceMemoryLock
|
|
360
355
|
const runExtraction = () =>
|
|
361
356
|
services.runPromise(
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
import { createOpenAI } from '@ai-sdk/openai'
|
|
2
|
-
|
|
3
|
-
import { ConfigurationError } from '../effect/errors'
|
|
4
|
-
|
|
5
|
-
const DIRECT_OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1' as const
|
|
6
|
-
const OPENROUTER_MODEL_PREFIX = 'openrouter/' as const
|
|
7
|
-
|
|
8
|
-
export function resolveOpenRouterApiKey(openRouterApiKey: string | undefined): string {
|
|
9
|
-
const key = openRouterApiKey?.trim()
|
|
10
|
-
if (key) return key
|
|
11
|
-
|
|
12
|
-
throw new ConfigurationError({
|
|
13
|
-
message: 'Missing OpenRouter API key. Configure createLotaRuntime({ aiGateway: { openRouterApiKey } }).',
|
|
14
|
-
key: 'aiGateway.openRouterApiKey',
|
|
15
|
-
})
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
export function normalizeDirectOpenRouterModelId(modelId: string): string {
|
|
19
|
-
const normalized = modelId.trim()
|
|
20
|
-
if (!normalized) {
|
|
21
|
-
throw new ConfigurationError({ message: 'OpenRouter model id is required.', key: 'openRouterModelId' })
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
return normalized.startsWith(OPENROUTER_MODEL_PREFIX) ? normalized.slice(OPENROUTER_MODEL_PREFIX.length) : normalized
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
export function getDirectOpenRouterProvider(openRouterApiKey?: string) {
|
|
28
|
-
return createOpenAI({ baseURL: DIRECT_OPENROUTER_BASE_URL, apiKey: resolveOpenRouterApiKey(openRouterApiKey) })
|
|
29
|
-
}
|