@lota-sdk/core 0.4.6 → 0.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/config/model-constants.ts +1 -0
- package/src/create-runtime.ts +4 -0
- package/src/embeddings/provider.ts +7 -2
- package/src/openrouter/direct-provider.ts +53 -0
- package/src/runtime/execution-plan.ts +2 -0
- package/src/runtime/runtime-config.ts +16 -1
- package/src/services/index.ts +1 -0
- package/src/services/memory.service.ts +111 -41
- package/src/services/rerank.service.ts +156 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lota-sdk/core",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.8",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"main": "./src/index.ts",
|
|
6
6
|
"types": "./src/index.ts",
|
|
@@ -32,7 +32,7 @@
|
|
|
32
32
|
"@chat-adapter/slack": "^4.23.0",
|
|
33
33
|
"@chat-adapter/state-ioredis": "^4.23.0",
|
|
34
34
|
"@logtape/logtape": "^2.0.5",
|
|
35
|
-
"@lota-sdk/shared": "0.4.
|
|
35
|
+
"@lota-sdk/shared": "0.4.8",
|
|
36
36
|
"@mendable/firecrawl-js": "^4.18.1",
|
|
37
37
|
"@surrealdb/node": "^3.0.3",
|
|
38
38
|
"ai": "^6.0.145",
|
|
@@ -2,6 +2,7 @@ export {
|
|
|
2
2
|
AI_GATEWAY_REASONING_SUMMARY_LEVEL,
|
|
3
3
|
OPENAI_HIGH_REASONING_PROVIDER_OPTIONS,
|
|
4
4
|
OPENAI_REASONING_MODEL_ID,
|
|
5
|
+
OPENROUTER_FAST_RERANK_MODEL_ID,
|
|
5
6
|
OPENROUTER_FAST_REASONING_MODEL_ID,
|
|
6
7
|
OPENROUTER_GEMINI_FLASH_MODEL_ID,
|
|
7
8
|
OPENROUTER_HIGH_REASONING_PROVIDER_OPTIONS,
|
package/src/create-runtime.ts
CHANGED
|
@@ -63,6 +63,8 @@ import type { recentActivityTitleService } from './services/recent-activity-titl
|
|
|
63
63
|
import { recentActivityTitleService as recentActivityTitleServiceSingleton } from './services/recent-activity-title.service'
|
|
64
64
|
import type { recentActivityService } from './services/recent-activity.service'
|
|
65
65
|
import { recentActivityService as recentActivityServiceSingleton } from './services/recent-activity.service'
|
|
66
|
+
import type { rerankService } from './services/rerank.service'
|
|
67
|
+
import { rerankService as rerankServiceSingleton } from './services/rerank.service'
|
|
66
68
|
import {
|
|
67
69
|
configureSocialChatHistory,
|
|
68
70
|
socialChatHistoryService as socialChatHistoryServiceSingleton,
|
|
@@ -134,6 +136,7 @@ export interface LotaRuntime {
|
|
|
134
136
|
documentChunkService: typeof documentChunkService
|
|
135
137
|
generatedDocumentStorageService: typeof generatedDocumentStorageService
|
|
136
138
|
memoryService: typeof memoryService
|
|
139
|
+
rerankService: typeof rerankService
|
|
137
140
|
verifyMutatingApproval: typeof verifyMutatingApproval
|
|
138
141
|
organizationService: typeof organizationService
|
|
139
142
|
organizationMemberService: typeof organizationMemberService
|
|
@@ -403,6 +406,7 @@ export async function createLotaRuntime(config: LotaRuntimeConfig): Promise<Lota
|
|
|
403
406
|
documentChunkService: documentChunkServiceSingleton,
|
|
404
407
|
generatedDocumentStorageService: generatedDocumentStorageServiceSingleton,
|
|
405
408
|
memoryService: memoryServiceSingleton,
|
|
409
|
+
rerankService: rerankServiceSingleton,
|
|
406
410
|
verifyMutatingApproval: verifyMutatingApprovalSingleton,
|
|
407
411
|
organizationService: organizationServiceSingleton,
|
|
408
412
|
organizationMemberService: organizationMemberServiceSingleton,
|
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
import { embed, embedMany } from 'ai'
|
|
2
2
|
|
|
3
|
-
import { aiGatewayEmbeddingModel } from '../ai-gateway/ai-gateway'
|
|
4
3
|
import { getEmbeddingCache } from '../ai/embedding-cache'
|
|
4
|
+
import {
|
|
5
|
+
getDirectOpenRouterProvider,
|
|
6
|
+
normalizeDirectOpenRouterModelId,
|
|
7
|
+
resetDirectOpenRouterProviderForTests,
|
|
8
|
+
} from '../openrouter/direct-provider'
|
|
5
9
|
import { getRuntimeConfig } from '../runtime/runtime-config'
|
|
6
10
|
|
|
7
11
|
const SUPPORTED_EMBEDDING_PREFIXES = ['openai/', 'openrouter/'] as const
|
|
@@ -30,7 +34,7 @@ function resolveEmbeddingModel(modelId: string) {
|
|
|
30
34
|
)
|
|
31
35
|
}
|
|
32
36
|
|
|
33
|
-
return
|
|
37
|
+
return getDirectOpenRouterProvider().embeddingModel(normalizeDirectOpenRouterModelId(normalized))
|
|
34
38
|
}
|
|
35
39
|
|
|
36
40
|
function normalizeEmbedding(embedding: readonly number[]): number[] {
|
|
@@ -153,4 +157,5 @@ export function getDefaultEmbeddings(): ProviderEmbeddings {
|
|
|
153
157
|
|
|
154
158
|
export function resetDefaultEmbeddingsForTests(): void {
|
|
155
159
|
defaultEmbeddings = null
|
|
160
|
+
resetDirectOpenRouterProviderForTests()
|
|
156
161
|
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { createOpenAI } from '@ai-sdk/openai'
|
|
2
|
+
|
|
3
|
+
import { getRuntimeConfig } from '../runtime/runtime-config'
|
|
4
|
+
|
|
5
|
+
const DIRECT_OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1' as const
|
|
6
|
+
const OPENROUTER_MODEL_PREFIX = 'openrouter/' as const
|
|
7
|
+
|
|
8
|
+
let directOpenRouterProvider: ReturnType<typeof createOpenAI> | null = null
|
|
9
|
+
let directOpenRouterProviderKey: string | null = null
|
|
10
|
+
|
|
11
|
+
function readConfiguredOpenRouterApiKey(): string | null {
|
|
12
|
+
try {
|
|
13
|
+
return getRuntimeConfig().aiGateway.openRouterApiKey?.trim() || null
|
|
14
|
+
} catch {
|
|
15
|
+
return null
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export function resolveOpenRouterApiKey(): string {
|
|
20
|
+
const configured = readConfiguredOpenRouterApiKey()
|
|
21
|
+
if (configured) return configured
|
|
22
|
+
|
|
23
|
+
const envKey = process.env.OPENROUTER_API_KEY?.trim()
|
|
24
|
+
if (envKey) return envKey
|
|
25
|
+
|
|
26
|
+
throw new Error('Missing OpenRouter API key. Set aiGateway.openRouterApiKey or OPENROUTER_API_KEY.')
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function normalizeDirectOpenRouterModelId(modelId: string): string {
|
|
30
|
+
const normalized = modelId.trim()
|
|
31
|
+
if (!normalized) {
|
|
32
|
+
throw new Error('OpenRouter model id is required.')
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return normalized.startsWith(OPENROUTER_MODEL_PREFIX) ? normalized.slice(OPENROUTER_MODEL_PREFIX.length) : normalized
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function getDirectOpenRouterProvider() {
|
|
39
|
+
const apiKey = resolveOpenRouterApiKey()
|
|
40
|
+
if (directOpenRouterProvider && directOpenRouterProviderKey === apiKey) {
|
|
41
|
+
return directOpenRouterProvider
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
directOpenRouterProvider = createOpenAI({ baseURL: DIRECT_OPENROUTER_BASE_URL, apiKey })
|
|
45
|
+
directOpenRouterProviderKey = apiKey
|
|
46
|
+
|
|
47
|
+
return directOpenRouterProvider
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export function resetDirectOpenRouterProviderForTests(): void {
|
|
51
|
+
directOpenRouterProvider = null
|
|
52
|
+
directOpenRouterProviderKey = null
|
|
53
|
+
}
|
|
@@ -10,6 +10,8 @@ const EXECUTION_PLAN_AGENT_PROTOCOL_PROMPT = `<execution-plan-protocol>
|
|
|
10
10
|
- During plan-triggered turns, use the dedicated result-submission tool.
|
|
11
11
|
- Treat the active execution runs in <execution-plan-state> as authoritative for whether a plan already exists.
|
|
12
12
|
- If contracts or criteria materially change, replace the plan.
|
|
13
|
+
- Never append status labels in brackets to plan or node titles (e.g. "[blocked]", "[failed]", "[running]"). Use plain prose to describe status.
|
|
14
|
+
- Never expose raw run IDs (planRun:..., Run: UUID) in user-facing messages. Reference plans by their title only.
|
|
13
15
|
</execution-plan-protocol>`
|
|
14
16
|
|
|
15
17
|
function toExecutionPlanPromptSummaries(plans: SerializableExecutionPlan[]): ExecutionPlanPromptSummary[] {
|
|
@@ -3,6 +3,7 @@ import { z } from 'zod'
|
|
|
3
3
|
|
|
4
4
|
import type { CoreThreadProfile } from '../config/agent-defaults'
|
|
5
5
|
import type { AgentFactory, AgentRuntimeConfigProvider, AgentToolBuilder } from '../config/agent-types'
|
|
6
|
+
import { OPENROUTER_FAST_RERANK_MODEL_ID } from '../config/model-constants'
|
|
6
7
|
import type { LotaThreadConfig, ThreadBootstrapWelcomeConfig } from '../config/thread-defaults'
|
|
7
8
|
import type { RecordIdRef } from '../db/record-id'
|
|
8
9
|
import type { NotificationService } from '../services/notification.service'
|
|
@@ -216,6 +217,9 @@ const agentsConfigSchema = z
|
|
|
216
217
|
}
|
|
217
218
|
})
|
|
218
219
|
|
|
220
|
+
export const MemoryRerankerStrategySchema = z.enum(['helper-model', 'rerank'])
|
|
221
|
+
export type MemoryRerankerStrategy = z.infer<typeof MemoryRerankerStrategySchema>
|
|
222
|
+
|
|
219
223
|
export const LotaRuntimeConfigSchema = z.object({
|
|
220
224
|
database: z.object({
|
|
221
225
|
url: z.string().trim().min(1),
|
|
@@ -228,6 +232,7 @@ export const LotaRuntimeConfigSchema = z.object({
|
|
|
228
232
|
url: z.string().trim().min(1),
|
|
229
233
|
key: z.string().trim().min(1),
|
|
230
234
|
embeddingModel: z.string().trim().min(1).default('openai/text-embedding-3-small'),
|
|
235
|
+
openRouterApiKey: z.string().trim().min(1).optional(),
|
|
231
236
|
}),
|
|
232
237
|
s3: z.object({
|
|
233
238
|
endpoint: z.string().trim().min(1),
|
|
@@ -250,8 +255,15 @@ export const LotaRuntimeConfigSchema = z.object({
|
|
|
250
255
|
.object({
|
|
251
256
|
searchK: z.coerce.number().int().positive().default(6),
|
|
252
257
|
embeddingCacheTtlSeconds: z.coerce.number().int().positive().default(7200),
|
|
258
|
+
rerankerStrategy: MemoryRerankerStrategySchema.default('rerank'),
|
|
259
|
+
rerankerModelId: z.string().trim().min(1).default(OPENROUTER_FAST_RERANK_MODEL_ID),
|
|
253
260
|
})
|
|
254
|
-
.default({
|
|
261
|
+
.default({
|
|
262
|
+
searchK: 6,
|
|
263
|
+
embeddingCacheTtlSeconds: 7200,
|
|
264
|
+
rerankerStrategy: 'rerank',
|
|
265
|
+
rerankerModelId: OPENROUTER_FAST_RERANK_MODEL_ID,
|
|
266
|
+
}),
|
|
255
267
|
threads: threadConfigSchema.default({}),
|
|
256
268
|
agents: agentsConfigSchema,
|
|
257
269
|
toolProviders: z.custom<Record<string, unknown>>(isToolProviderRecord).optional(),
|
|
@@ -288,6 +300,7 @@ export const LOTA_RUNTIME_ENV_KEYS = Object.freeze([
|
|
|
288
300
|
'AI_GATEWAY_URL',
|
|
289
301
|
'AI_GATEWAY_KEY',
|
|
290
302
|
'AI_EMBEDDING_MODEL',
|
|
303
|
+
'OPENROUTER_API_KEY',
|
|
291
304
|
'S3_ENDPOINT',
|
|
292
305
|
'S3_BUCKET',
|
|
293
306
|
'S3_REGION',
|
|
@@ -298,6 +311,8 @@ export const LOTA_RUNTIME_ENV_KEYS = Object.freeze([
|
|
|
298
311
|
'FIRECRAWL_API_BASE_URL',
|
|
299
312
|
'LOG_LEVEL',
|
|
300
313
|
'MEMORY_SEARCH_K',
|
|
314
|
+
'MEMORY_RERANKER_STRATEGY',
|
|
315
|
+
'MEMORY_RERANKER_MODEL_ID',
|
|
301
316
|
])
|
|
302
317
|
|
|
303
318
|
let runtimeConfig: ResolvedLotaRuntimeConfig | null = null
|
package/src/services/index.ts
CHANGED
|
@@ -16,6 +16,7 @@ export * from './notification.service'
|
|
|
16
16
|
export * from './ownership-dispatcher.service'
|
|
17
17
|
export * from './organization-member.service'
|
|
18
18
|
export * from './organization.service'
|
|
19
|
+
export * from './rerank.service'
|
|
19
20
|
export * from './plan-coordination.service'
|
|
20
21
|
export * from './plan-cycle.service'
|
|
21
22
|
export * from './plan-deadline.service'
|
|
@@ -23,11 +23,13 @@ import {
|
|
|
23
23
|
executeScopedRetrieval,
|
|
24
24
|
scopedRetrievalToMap,
|
|
25
25
|
} from '../runtime/retrieval-adapters'
|
|
26
|
+
import type { MemoryRerankerStrategy } from '../runtime/runtime-config'
|
|
26
27
|
import { getRuntimeConfig } from '../runtime/runtime-config'
|
|
27
28
|
import { createMemoryRerankerAgent, MEMORY_RERANKER_PROMPT } from '../system-agents/memory-reranker.agent'
|
|
28
29
|
import { createOrgMemoryAgent, ORG_MEMORY_PROMPT } from '../system-agents/memory.agent'
|
|
29
30
|
import { clampImportance, compactWhitespace, truncateText } from '../utils/string'
|
|
30
31
|
import { formatMemoryResults, formatRerankedResults, getCandidateLimit } from './memory-utils'
|
|
32
|
+
import { rerankService } from './rerank.service'
|
|
31
33
|
|
|
32
34
|
const ORG_MEMORY_TYPE = 'fact'
|
|
33
35
|
const RERANK_CANDIDATE_MAX_CHARS = 500
|
|
@@ -47,6 +49,7 @@ const ONBOARDING_MEMORY_EXTRACTION_PROMPT =
|
|
|
47
49
|
'Onboarding mode is active. Extract multiple concrete startup facts from user-provided context: company mission, product capabilities, customer segments, pricing, traction, go-to-market plans, roadmap, team composition, technical stack, risks, and referenced URLs. Prefer one fact per concrete claim.'
|
|
48
50
|
const DIRECT_MEMORY_ASSESSMENT_PROMPT =
|
|
49
51
|
'The user is submitting a direct memory candidate. Keep the wording faithful. Return one fact only when the statement is durable enough for memory; otherwise return no facts.'
|
|
52
|
+
const RERANK_SECTION_TITLE = 'Most relevant memories'
|
|
50
53
|
|
|
51
54
|
const helperModelRuntime = createHelperModelRuntime()
|
|
52
55
|
|
|
@@ -234,26 +237,9 @@ class MemoryService {
|
|
|
234
237
|
if (candidates.length === 0) return null
|
|
235
238
|
|
|
236
239
|
try {
|
|
237
|
-
return
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
defaultSystemPrompt: MEMORY_RERANKER_PROMPT,
|
|
241
|
-
messages: [
|
|
242
|
-
{
|
|
243
|
-
role: 'user',
|
|
244
|
-
content: JSON.stringify({
|
|
245
|
-
query,
|
|
246
|
-
maxItems,
|
|
247
|
-
candidates: candidates.map((candidate) => ({
|
|
248
|
-
id: candidate.id,
|
|
249
|
-
text: this.truncateCandidateText(this.buildRerankerCandidateText(candidate)),
|
|
250
|
-
score: candidate.score,
|
|
251
|
-
})),
|
|
252
|
-
}),
|
|
253
|
-
},
|
|
254
|
-
],
|
|
255
|
-
schema: MemoryRerankOutputSchema,
|
|
256
|
-
})
|
|
240
|
+
return this.getRerankerStrategy() === 'rerank'
|
|
241
|
+
? await this.rerankCandidatesWithRerankService(query, candidates, maxItems)
|
|
242
|
+
: await this.rerankCandidatesWithHelper(query, candidates, maxItems)
|
|
257
243
|
} catch (error) {
|
|
258
244
|
aiLogger.warn`Memory reranker failed: ${error}`
|
|
259
245
|
return null
|
|
@@ -271,33 +257,117 @@ class MemoryService {
|
|
|
271
257
|
if (flattened.length === 0 || flattened.length <= maxItems) return null
|
|
272
258
|
|
|
273
259
|
try {
|
|
274
|
-
return
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
defaultSystemPrompt: MEMORY_RERANKER_PROMPT,
|
|
278
|
-
messages: [
|
|
279
|
-
{
|
|
280
|
-
role: 'user',
|
|
281
|
-
content: JSON.stringify({
|
|
282
|
-
query,
|
|
283
|
-
maxItems,
|
|
284
|
-
candidates: flattened.map((candidate) => ({
|
|
285
|
-
id: candidate.id,
|
|
286
|
-
text: this.truncateCandidateText(this.buildRerankerCandidateText(candidate)),
|
|
287
|
-
score: candidate.score,
|
|
288
|
-
scope: candidate.scopeTag,
|
|
289
|
-
})),
|
|
290
|
-
}),
|
|
291
|
-
},
|
|
292
|
-
],
|
|
293
|
-
schema: MemoryRerankOutputSchema,
|
|
294
|
-
})
|
|
260
|
+
return this.getRerankerStrategy() === 'rerank'
|
|
261
|
+
? await this.rerankCandidatesMultiScopeWithRerankService(query, flattened, maxItems)
|
|
262
|
+
: await this.rerankCandidatesMultiScopeWithHelper(query, flattened, maxItems)
|
|
295
263
|
} catch (error) {
|
|
296
264
|
aiLogger.warn`Multi-scope memory reranker failed: ${error}`
|
|
297
265
|
return null
|
|
298
266
|
}
|
|
299
267
|
}
|
|
300
268
|
|
|
269
|
+
private getRerankerStrategy(): MemoryRerankerStrategy {
|
|
270
|
+
return getRuntimeConfig().memory.rerankerStrategy
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
private buildRerankOutput(ids: string[], title = RERANK_SECTION_TITLE): MemoryRerankOutput {
|
|
274
|
+
return { sections: [{ title, items: ids.map((id) => ({ id, relevance: '' })) }] }
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
private async rerankCandidatesWithHelper(
|
|
278
|
+
query: string,
|
|
279
|
+
candidates: MemorySearchResult[],
|
|
280
|
+
maxItems: number,
|
|
281
|
+
): Promise<MemoryRerankOutput> {
|
|
282
|
+
return await helperModelRuntime.generateHelperStructured({
|
|
283
|
+
tag: 'memory-reranker',
|
|
284
|
+
createAgent: createMemoryRerankerAgent,
|
|
285
|
+
defaultSystemPrompt: MEMORY_RERANKER_PROMPT,
|
|
286
|
+
messages: [
|
|
287
|
+
{
|
|
288
|
+
role: 'user',
|
|
289
|
+
content: JSON.stringify({
|
|
290
|
+
query,
|
|
291
|
+
maxItems,
|
|
292
|
+
candidates: candidates.map((candidate) => ({
|
|
293
|
+
id: candidate.id,
|
|
294
|
+
text: this.truncateCandidateText(this.buildRerankerCandidateText(candidate)),
|
|
295
|
+
score: candidate.score,
|
|
296
|
+
})),
|
|
297
|
+
}),
|
|
298
|
+
},
|
|
299
|
+
],
|
|
300
|
+
schema: MemoryRerankOutputSchema,
|
|
301
|
+
})
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
private async rerankCandidatesWithRerankService(
|
|
305
|
+
query: string,
|
|
306
|
+
candidates: MemorySearchResult[],
|
|
307
|
+
maxItems: number,
|
|
308
|
+
): Promise<MemoryRerankOutput> {
|
|
309
|
+
const reranked = await rerankService.rerankDocuments({
|
|
310
|
+
query,
|
|
311
|
+
topN: maxItems,
|
|
312
|
+
documents: candidates.map((candidate) => ({
|
|
313
|
+
id: candidate.id,
|
|
314
|
+
text: this.truncateCandidateText(this.buildRerankerCandidateText(candidate)),
|
|
315
|
+
})),
|
|
316
|
+
})
|
|
317
|
+
|
|
318
|
+
return this.buildRerankOutput(reranked.results.map((item) => item.id))
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
private async rerankCandidatesMultiScopeWithHelper(
|
|
322
|
+
query: string,
|
|
323
|
+
flattenedCandidates: Array<MemorySearchResult & { scopeTag: string }>,
|
|
324
|
+
maxItems: number,
|
|
325
|
+
): Promise<MemoryRerankOutput> {
|
|
326
|
+
return await helperModelRuntime.generateHelperStructured({
|
|
327
|
+
tag: 'memory-reranker-multi-scope',
|
|
328
|
+
createAgent: createMemoryRerankerAgent,
|
|
329
|
+
defaultSystemPrompt: MEMORY_RERANKER_PROMPT,
|
|
330
|
+
messages: [
|
|
331
|
+
{
|
|
332
|
+
role: 'user',
|
|
333
|
+
content: JSON.stringify({
|
|
334
|
+
query,
|
|
335
|
+
maxItems,
|
|
336
|
+
candidates: flattenedCandidates.map((candidate) => ({
|
|
337
|
+
id: candidate.id,
|
|
338
|
+
text: this.truncateCandidateText(this.buildRerankerCandidateText(candidate)),
|
|
339
|
+
score: candidate.score,
|
|
340
|
+
scope: candidate.scopeTag,
|
|
341
|
+
})),
|
|
342
|
+
}),
|
|
343
|
+
},
|
|
344
|
+
],
|
|
345
|
+
schema: MemoryRerankOutputSchema,
|
|
346
|
+
})
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
private async rerankCandidatesMultiScopeWithRerankService(
|
|
350
|
+
query: string,
|
|
351
|
+
flattenedCandidates: Array<MemorySearchResult & { scopeTag: string }>,
|
|
352
|
+
maxItems: number,
|
|
353
|
+
): Promise<MemoryRerankOutput> {
|
|
354
|
+
const reranked = await rerankService.rerankDocuments({
|
|
355
|
+
query,
|
|
356
|
+
topN: maxItems,
|
|
357
|
+
documents: flattenedCandidates.map((candidate) => ({
|
|
358
|
+
id: candidate.id,
|
|
359
|
+
text: this.truncateCandidateText(
|
|
360
|
+
`${this.buildRerankerCandidateText(candidate)}\n\nScope: ${candidate.scopeTag}`,
|
|
361
|
+
),
|
|
362
|
+
})),
|
|
363
|
+
})
|
|
364
|
+
|
|
365
|
+
return this.buildRerankOutput(
|
|
366
|
+
reranked.results.map((item) => item.id),
|
|
367
|
+
'Top matches across memory scopes',
|
|
368
|
+
)
|
|
369
|
+
}
|
|
370
|
+
|
|
301
371
|
private async searchMemories({
|
|
302
372
|
query,
|
|
303
373
|
memory,
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import { z } from 'zod'
|
|
2
|
+
|
|
3
|
+
import { OPENROUTER_FAST_RERANK_MODEL_ID } from '../config/model-constants'
|
|
4
|
+
import { normalizeDirectOpenRouterModelId, resolveOpenRouterApiKey } from '../openrouter/direct-provider'
|
|
5
|
+
import { getRuntimeConfig } from '../runtime/runtime-config'
|
|
6
|
+
|
|
7
|
+
const OPENROUTER_RERANK_URL = 'https://openrouter.ai/api/v1/rerank' as const
|
|
8
|
+
|
|
9
|
+
const RerankResponseSchema = z
|
|
10
|
+
.object({
|
|
11
|
+
model: z.string().optional(),
|
|
12
|
+
results: z.array(
|
|
13
|
+
z
|
|
14
|
+
.object({
|
|
15
|
+
index: z.number().int().nonnegative().optional(),
|
|
16
|
+
relevance_score: z.number().optional(),
|
|
17
|
+
document: z.union([z.string(), z.object({ text: z.string().optional() }).passthrough()]).optional(),
|
|
18
|
+
})
|
|
19
|
+
.passthrough(),
|
|
20
|
+
),
|
|
21
|
+
usage: z.object({ search_units: z.number().optional(), cost: z.number().optional() }).passthrough().optional(),
|
|
22
|
+
})
|
|
23
|
+
.passthrough()
|
|
24
|
+
|
|
25
|
+
export interface RerankDocument {
|
|
26
|
+
id: string
|
|
27
|
+
text: string
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface RerankResultItem extends RerankDocument {
|
|
31
|
+
index: number
|
|
32
|
+
relevanceScore: number | null
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export interface RerankUsage {
|
|
36
|
+
searchUnits?: number
|
|
37
|
+
cost?: number
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface RerankResult {
|
|
41
|
+
modelId: string
|
|
42
|
+
results: RerankResultItem[]
|
|
43
|
+
usage?: RerankUsage
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function clampTopN(topN: number | undefined, total: number): number {
|
|
47
|
+
if (!Number.isFinite(topN)) return total
|
|
48
|
+
return Math.max(1, Math.min(total, Math.trunc(topN as number)))
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function readConfiguredRerankModelId(): string | null {
|
|
52
|
+
try {
|
|
53
|
+
return getRuntimeConfig().memory.rerankerModelId.trim()
|
|
54
|
+
} catch {
|
|
55
|
+
return null
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function resolveRerankModelId(modelId?: string): string {
|
|
60
|
+
const explicit = modelId?.trim()
|
|
61
|
+
if (explicit) return normalizeDirectOpenRouterModelId(explicit)
|
|
62
|
+
|
|
63
|
+
const configured = readConfiguredRerankModelId()
|
|
64
|
+
if (configured) return normalizeDirectOpenRouterModelId(configured)
|
|
65
|
+
|
|
66
|
+
return OPENROUTER_FAST_RERANK_MODEL_ID
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function matchRerankedDocument(
|
|
70
|
+
candidate: z.infer<typeof RerankResponseSchema>['results'][number],
|
|
71
|
+
documents: RerankDocument[],
|
|
72
|
+
): RerankDocument | null {
|
|
73
|
+
if (typeof candidate.index === 'number' && candidate.index >= 0 && candidate.index < documents.length) {
|
|
74
|
+
return documents[candidate.index] ?? null
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const text =
|
|
78
|
+
typeof candidate.document === 'string'
|
|
79
|
+
? candidate.document
|
|
80
|
+
: typeof candidate.document?.text === 'string'
|
|
81
|
+
? candidate.document.text
|
|
82
|
+
: null
|
|
83
|
+
if (!text) return null
|
|
84
|
+
|
|
85
|
+
return documents.find((document) => document.text === text) ?? null
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
class RerankService {
|
|
89
|
+
async rerankDocuments(params: {
|
|
90
|
+
query: string
|
|
91
|
+
documents: RerankDocument[]
|
|
92
|
+
topN?: number
|
|
93
|
+
modelId?: string
|
|
94
|
+
}): Promise<RerankResult> {
|
|
95
|
+
if (params.documents.length === 0) {
|
|
96
|
+
return { modelId: resolveRerankModelId(params.modelId), results: [] }
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const apiKey = resolveOpenRouterApiKey()
|
|
100
|
+
const modelId = resolveRerankModelId(params.modelId)
|
|
101
|
+
const topN = clampTopN(params.topN, params.documents.length)
|
|
102
|
+
|
|
103
|
+
const response = await fetch(OPENROUTER_RERANK_URL, {
|
|
104
|
+
method: 'POST',
|
|
105
|
+
headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json' },
|
|
106
|
+
body: JSON.stringify({
|
|
107
|
+
model: modelId,
|
|
108
|
+
query: params.query,
|
|
109
|
+
documents: params.documents.map((document) => document.text),
|
|
110
|
+
top_n: topN,
|
|
111
|
+
}),
|
|
112
|
+
})
|
|
113
|
+
|
|
114
|
+
const responseText = await response.text()
|
|
115
|
+
let payload: unknown = responseText
|
|
116
|
+
try {
|
|
117
|
+
payload = JSON.parse(responseText)
|
|
118
|
+
} catch {}
|
|
119
|
+
|
|
120
|
+
if (!response.ok) {
|
|
121
|
+
throw new Error(`OpenRouter rerank failed (${response.status}): ${responseText}`)
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const parsed = RerankResponseSchema.parse(payload)
|
|
125
|
+
const seen = new Set<string>()
|
|
126
|
+
const results: RerankResultItem[] = []
|
|
127
|
+
|
|
128
|
+
for (const item of parsed.results) {
|
|
129
|
+
const matched = matchRerankedDocument(item, params.documents)
|
|
130
|
+
if (!matched || seen.has(matched.id)) continue
|
|
131
|
+
seen.add(matched.id)
|
|
132
|
+
|
|
133
|
+
results.push({
|
|
134
|
+
id: matched.id,
|
|
135
|
+
text: matched.text,
|
|
136
|
+
index: item.index ?? params.documents.findIndex((document) => document.id === matched.id),
|
|
137
|
+
relevanceScore: typeof item.relevance_score === 'number' ? item.relevance_score : null,
|
|
138
|
+
})
|
|
139
|
+
|
|
140
|
+
if (results.length >= topN) break
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return {
|
|
144
|
+
modelId: parsed.model?.trim() || modelId,
|
|
145
|
+
results,
|
|
146
|
+
usage: parsed.usage
|
|
147
|
+
? {
|
|
148
|
+
...(parsed.usage.search_units !== undefined ? { searchUnits: parsed.usage.search_units } : {}),
|
|
149
|
+
...(parsed.usage.cost !== undefined ? { cost: parsed.usage.cost } : {}),
|
|
150
|
+
}
|
|
151
|
+
: undefined,
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
export const rerankService = new RerankService()
|