@lota-sdk/core 0.4.6 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lota-sdk/core",
3
- "version": "0.4.6",
3
+ "version": "0.4.8",
4
4
  "type": "module",
5
5
  "main": "./src/index.ts",
6
6
  "types": "./src/index.ts",
@@ -32,7 +32,7 @@
32
32
  "@chat-adapter/slack": "^4.23.0",
33
33
  "@chat-adapter/state-ioredis": "^4.23.0",
34
34
  "@logtape/logtape": "^2.0.5",
35
- "@lota-sdk/shared": "0.4.6",
35
+ "@lota-sdk/shared": "0.4.8",
36
36
  "@mendable/firecrawl-js": "^4.18.1",
37
37
  "@surrealdb/node": "^3.0.3",
38
38
  "ai": "^6.0.145",
@@ -2,6 +2,7 @@ export {
2
2
  AI_GATEWAY_REASONING_SUMMARY_LEVEL,
3
3
  OPENAI_HIGH_REASONING_PROVIDER_OPTIONS,
4
4
  OPENAI_REASONING_MODEL_ID,
5
+ OPENROUTER_FAST_RERANK_MODEL_ID,
5
6
  OPENROUTER_FAST_REASONING_MODEL_ID,
6
7
  OPENROUTER_GEMINI_FLASH_MODEL_ID,
7
8
  OPENROUTER_HIGH_REASONING_PROVIDER_OPTIONS,
@@ -63,6 +63,8 @@ import type { recentActivityTitleService } from './services/recent-activity-titl
63
63
  import { recentActivityTitleService as recentActivityTitleServiceSingleton } from './services/recent-activity-title.service'
64
64
  import type { recentActivityService } from './services/recent-activity.service'
65
65
  import { recentActivityService as recentActivityServiceSingleton } from './services/recent-activity.service'
66
+ import type { rerankService } from './services/rerank.service'
67
+ import { rerankService as rerankServiceSingleton } from './services/rerank.service'
66
68
  import {
67
69
  configureSocialChatHistory,
68
70
  socialChatHistoryService as socialChatHistoryServiceSingleton,
@@ -134,6 +136,7 @@ export interface LotaRuntime {
134
136
  documentChunkService: typeof documentChunkService
135
137
  generatedDocumentStorageService: typeof generatedDocumentStorageService
136
138
  memoryService: typeof memoryService
139
+ rerankService: typeof rerankService
137
140
  verifyMutatingApproval: typeof verifyMutatingApproval
138
141
  organizationService: typeof organizationService
139
142
  organizationMemberService: typeof organizationMemberService
@@ -403,6 +406,7 @@ export async function createLotaRuntime(config: LotaRuntimeConfig): Promise<Lota
403
406
  documentChunkService: documentChunkServiceSingleton,
404
407
  generatedDocumentStorageService: generatedDocumentStorageServiceSingleton,
405
408
  memoryService: memoryServiceSingleton,
409
+ rerankService: rerankServiceSingleton,
406
410
  verifyMutatingApproval: verifyMutatingApprovalSingleton,
407
411
  organizationService: organizationServiceSingleton,
408
412
  organizationMemberService: organizationMemberServiceSingleton,
@@ -1,7 +1,11 @@
1
1
  import { embed, embedMany } from 'ai'
2
2
 
3
- import { aiGatewayEmbeddingModel } from '../ai-gateway/ai-gateway'
4
3
  import { getEmbeddingCache } from '../ai/embedding-cache'
4
+ import {
5
+ getDirectOpenRouterProvider,
6
+ normalizeDirectOpenRouterModelId,
7
+ resetDirectOpenRouterProviderForTests,
8
+ } from '../openrouter/direct-provider'
5
9
  import { getRuntimeConfig } from '../runtime/runtime-config'
6
10
 
7
11
  const SUPPORTED_EMBEDDING_PREFIXES = ['openai/', 'openrouter/'] as const
@@ -30,7 +34,7 @@ function resolveEmbeddingModel(modelId: string) {
30
34
  )
31
35
  }
32
36
 
33
- return aiGatewayEmbeddingModel(normalized)
37
+ return getDirectOpenRouterProvider().embeddingModel(normalizeDirectOpenRouterModelId(normalized))
34
38
  }
35
39
 
36
40
  function normalizeEmbedding(embedding: readonly number[]): number[] {
@@ -153,4 +157,5 @@ export function getDefaultEmbeddings(): ProviderEmbeddings {
153
157
 
154
158
  export function resetDefaultEmbeddingsForTests(): void {
155
159
  defaultEmbeddings = null
160
+ resetDirectOpenRouterProviderForTests()
156
161
  }
@@ -0,0 +1,53 @@
1
+ import { createOpenAI } from '@ai-sdk/openai'
2
+
3
+ import { getRuntimeConfig } from '../runtime/runtime-config'
4
+
5
+ const DIRECT_OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1' as const
6
+ const OPENROUTER_MODEL_PREFIX = 'openrouter/' as const
7
+
8
+ let directOpenRouterProvider: ReturnType<typeof createOpenAI> | null = null
9
+ let directOpenRouterProviderKey: string | null = null
10
+
11
+ function readConfiguredOpenRouterApiKey(): string | null {
12
+ try {
13
+ return getRuntimeConfig().aiGateway.openRouterApiKey?.trim() || null
14
+ } catch {
15
+ return null
16
+ }
17
+ }
18
+
19
+ export function resolveOpenRouterApiKey(): string {
20
+ const configured = readConfiguredOpenRouterApiKey()
21
+ if (configured) return configured
22
+
23
+ const envKey = process.env.OPENROUTER_API_KEY?.trim()
24
+ if (envKey) return envKey
25
+
26
+ throw new Error('Missing OpenRouter API key. Set aiGateway.openRouterApiKey or OPENROUTER_API_KEY.')
27
+ }
28
+
29
+ export function normalizeDirectOpenRouterModelId(modelId: string): string {
30
+ const normalized = modelId.trim()
31
+ if (!normalized) {
32
+ throw new Error('OpenRouter model id is required.')
33
+ }
34
+
35
+ return normalized.startsWith(OPENROUTER_MODEL_PREFIX) ? normalized.slice(OPENROUTER_MODEL_PREFIX.length) : normalized
36
+ }
37
+
38
+ export function getDirectOpenRouterProvider() {
39
+ const apiKey = resolveOpenRouterApiKey()
40
+ if (directOpenRouterProvider && directOpenRouterProviderKey === apiKey) {
41
+ return directOpenRouterProvider
42
+ }
43
+
44
+ directOpenRouterProvider = createOpenAI({ baseURL: DIRECT_OPENROUTER_BASE_URL, apiKey })
45
+ directOpenRouterProviderKey = apiKey
46
+
47
+ return directOpenRouterProvider
48
+ }
49
+
50
+ export function resetDirectOpenRouterProviderForTests(): void {
51
+ directOpenRouterProvider = null
52
+ directOpenRouterProviderKey = null
53
+ }
@@ -10,6 +10,8 @@ const EXECUTION_PLAN_AGENT_PROTOCOL_PROMPT = `<execution-plan-protocol>
10
10
  - During plan-triggered turns, use the dedicated result-submission tool.
11
11
  - Treat the active execution runs in <execution-plan-state> as authoritative for whether a plan already exists.
12
12
  - If contracts or criteria materially change, replace the plan.
13
+ - Never append status labels in brackets to plan or node titles (e.g. "[blocked]", "[failed]", "[running]"). Use plain prose to describe status.
14
+ - Never expose raw run IDs (planRun:..., Run: UUID) in user-facing messages. Reference plans by their title only.
13
15
  </execution-plan-protocol>`
14
16
 
15
17
  function toExecutionPlanPromptSummaries(plans: SerializableExecutionPlan[]): ExecutionPlanPromptSummary[] {
@@ -3,6 +3,7 @@ import { z } from 'zod'
3
3
 
4
4
  import type { CoreThreadProfile } from '../config/agent-defaults'
5
5
  import type { AgentFactory, AgentRuntimeConfigProvider, AgentToolBuilder } from '../config/agent-types'
6
+ import { OPENROUTER_FAST_RERANK_MODEL_ID } from '../config/model-constants'
6
7
  import type { LotaThreadConfig, ThreadBootstrapWelcomeConfig } from '../config/thread-defaults'
7
8
  import type { RecordIdRef } from '../db/record-id'
8
9
  import type { NotificationService } from '../services/notification.service'
@@ -216,6 +217,9 @@ const agentsConfigSchema = z
216
217
  }
217
218
  })
218
219
 
220
+ export const MemoryRerankerStrategySchema = z.enum(['helper-model', 'rerank'])
221
+ export type MemoryRerankerStrategy = z.infer<typeof MemoryRerankerStrategySchema>
222
+
219
223
  export const LotaRuntimeConfigSchema = z.object({
220
224
  database: z.object({
221
225
  url: z.string().trim().min(1),
@@ -228,6 +232,7 @@ export const LotaRuntimeConfigSchema = z.object({
228
232
  url: z.string().trim().min(1),
229
233
  key: z.string().trim().min(1),
230
234
  embeddingModel: z.string().trim().min(1).default('openai/text-embedding-3-small'),
235
+ openRouterApiKey: z.string().trim().min(1).optional(),
231
236
  }),
232
237
  s3: z.object({
233
238
  endpoint: z.string().trim().min(1),
@@ -250,8 +255,15 @@ export const LotaRuntimeConfigSchema = z.object({
250
255
  .object({
251
256
  searchK: z.coerce.number().int().positive().default(6),
252
257
  embeddingCacheTtlSeconds: z.coerce.number().int().positive().default(7200),
258
+ rerankerStrategy: MemoryRerankerStrategySchema.default('rerank'),
259
+ rerankerModelId: z.string().trim().min(1).default(OPENROUTER_FAST_RERANK_MODEL_ID),
253
260
  })
254
- .default({ searchK: 6, embeddingCacheTtlSeconds: 7200 }),
261
+ .default({
262
+ searchK: 6,
263
+ embeddingCacheTtlSeconds: 7200,
264
+ rerankerStrategy: 'rerank',
265
+ rerankerModelId: OPENROUTER_FAST_RERANK_MODEL_ID,
266
+ }),
255
267
  threads: threadConfigSchema.default({}),
256
268
  agents: agentsConfigSchema,
257
269
  toolProviders: z.custom<Record<string, unknown>>(isToolProviderRecord).optional(),
@@ -288,6 +300,7 @@ export const LOTA_RUNTIME_ENV_KEYS = Object.freeze([
288
300
  'AI_GATEWAY_URL',
289
301
  'AI_GATEWAY_KEY',
290
302
  'AI_EMBEDDING_MODEL',
303
+ 'OPENROUTER_API_KEY',
291
304
  'S3_ENDPOINT',
292
305
  'S3_BUCKET',
293
306
  'S3_REGION',
@@ -298,6 +311,8 @@ export const LOTA_RUNTIME_ENV_KEYS = Object.freeze([
298
311
  'FIRECRAWL_API_BASE_URL',
299
312
  'LOG_LEVEL',
300
313
  'MEMORY_SEARCH_K',
314
+ 'MEMORY_RERANKER_STRATEGY',
315
+ 'MEMORY_RERANKER_MODEL_ID',
301
316
  ])
302
317
 
303
318
  let runtimeConfig: ResolvedLotaRuntimeConfig | null = null
@@ -16,6 +16,7 @@ export * from './notification.service'
16
16
  export * from './ownership-dispatcher.service'
17
17
  export * from './organization-member.service'
18
18
  export * from './organization.service'
19
+ export * from './rerank.service'
19
20
  export * from './plan-coordination.service'
20
21
  export * from './plan-cycle.service'
21
22
  export * from './plan-deadline.service'
@@ -23,11 +23,13 @@ import {
23
23
  executeScopedRetrieval,
24
24
  scopedRetrievalToMap,
25
25
  } from '../runtime/retrieval-adapters'
26
+ import type { MemoryRerankerStrategy } from '../runtime/runtime-config'
26
27
  import { getRuntimeConfig } from '../runtime/runtime-config'
27
28
  import { createMemoryRerankerAgent, MEMORY_RERANKER_PROMPT } from '../system-agents/memory-reranker.agent'
28
29
  import { createOrgMemoryAgent, ORG_MEMORY_PROMPT } from '../system-agents/memory.agent'
29
30
  import { clampImportance, compactWhitespace, truncateText } from '../utils/string'
30
31
  import { formatMemoryResults, formatRerankedResults, getCandidateLimit } from './memory-utils'
32
+ import { rerankService } from './rerank.service'
31
33
 
32
34
  const ORG_MEMORY_TYPE = 'fact'
33
35
  const RERANK_CANDIDATE_MAX_CHARS = 500
@@ -47,6 +49,7 @@ const ONBOARDING_MEMORY_EXTRACTION_PROMPT =
47
49
  'Onboarding mode is active. Extract multiple concrete startup facts from user-provided context: company mission, product capabilities, customer segments, pricing, traction, go-to-market plans, roadmap, team composition, technical stack, risks, and referenced URLs. Prefer one fact per concrete claim.'
48
50
  const DIRECT_MEMORY_ASSESSMENT_PROMPT =
49
51
  'The user is submitting a direct memory candidate. Keep the wording faithful. Return one fact only when the statement is durable enough for memory; otherwise return no facts.'
52
+ const RERANK_SECTION_TITLE = 'Most relevant memories'
50
53
 
51
54
  const helperModelRuntime = createHelperModelRuntime()
52
55
 
@@ -234,26 +237,9 @@ class MemoryService {
234
237
  if (candidates.length === 0) return null
235
238
 
236
239
  try {
237
- return await helperModelRuntime.generateHelperStructured({
238
- tag: 'memory-reranker',
239
- createAgent: createMemoryRerankerAgent,
240
- defaultSystemPrompt: MEMORY_RERANKER_PROMPT,
241
- messages: [
242
- {
243
- role: 'user',
244
- content: JSON.stringify({
245
- query,
246
- maxItems,
247
- candidates: candidates.map((candidate) => ({
248
- id: candidate.id,
249
- text: this.truncateCandidateText(this.buildRerankerCandidateText(candidate)),
250
- score: candidate.score,
251
- })),
252
- }),
253
- },
254
- ],
255
- schema: MemoryRerankOutputSchema,
256
- })
240
+ return this.getRerankerStrategy() === 'rerank'
241
+ ? await this.rerankCandidatesWithRerankService(query, candidates, maxItems)
242
+ : await this.rerankCandidatesWithHelper(query, candidates, maxItems)
257
243
  } catch (error) {
258
244
  aiLogger.warn`Memory reranker failed: ${error}`
259
245
  return null
@@ -271,33 +257,117 @@ class MemoryService {
271
257
  if (flattened.length === 0 || flattened.length <= maxItems) return null
272
258
 
273
259
  try {
274
- return await helperModelRuntime.generateHelperStructured({
275
- tag: 'memory-reranker-multi-scope',
276
- createAgent: createMemoryRerankerAgent,
277
- defaultSystemPrompt: MEMORY_RERANKER_PROMPT,
278
- messages: [
279
- {
280
- role: 'user',
281
- content: JSON.stringify({
282
- query,
283
- maxItems,
284
- candidates: flattened.map((candidate) => ({
285
- id: candidate.id,
286
- text: this.truncateCandidateText(this.buildRerankerCandidateText(candidate)),
287
- score: candidate.score,
288
- scope: candidate.scopeTag,
289
- })),
290
- }),
291
- },
292
- ],
293
- schema: MemoryRerankOutputSchema,
294
- })
260
+ return this.getRerankerStrategy() === 'rerank'
261
+ ? await this.rerankCandidatesMultiScopeWithRerankService(query, flattened, maxItems)
262
+ : await this.rerankCandidatesMultiScopeWithHelper(query, flattened, maxItems)
295
263
  } catch (error) {
296
264
  aiLogger.warn`Multi-scope memory reranker failed: ${error}`
297
265
  return null
298
266
  }
299
267
  }
300
268
 
269
+ private getRerankerStrategy(): MemoryRerankerStrategy {
270
+ return getRuntimeConfig().memory.rerankerStrategy
271
+ }
272
+
273
+ private buildRerankOutput(ids: string[], title = RERANK_SECTION_TITLE): MemoryRerankOutput {
274
+ return { sections: [{ title, items: ids.map((id) => ({ id, relevance: '' })) }] }
275
+ }
276
+
277
+ private async rerankCandidatesWithHelper(
278
+ query: string,
279
+ candidates: MemorySearchResult[],
280
+ maxItems: number,
281
+ ): Promise<MemoryRerankOutput> {
282
+ return await helperModelRuntime.generateHelperStructured({
283
+ tag: 'memory-reranker',
284
+ createAgent: createMemoryRerankerAgent,
285
+ defaultSystemPrompt: MEMORY_RERANKER_PROMPT,
286
+ messages: [
287
+ {
288
+ role: 'user',
289
+ content: JSON.stringify({
290
+ query,
291
+ maxItems,
292
+ candidates: candidates.map((candidate) => ({
293
+ id: candidate.id,
294
+ text: this.truncateCandidateText(this.buildRerankerCandidateText(candidate)),
295
+ score: candidate.score,
296
+ })),
297
+ }),
298
+ },
299
+ ],
300
+ schema: MemoryRerankOutputSchema,
301
+ })
302
+ }
303
+
304
+ private async rerankCandidatesWithRerankService(
305
+ query: string,
306
+ candidates: MemorySearchResult[],
307
+ maxItems: number,
308
+ ): Promise<MemoryRerankOutput> {
309
+ const reranked = await rerankService.rerankDocuments({
310
+ query,
311
+ topN: maxItems,
312
+ documents: candidates.map((candidate) => ({
313
+ id: candidate.id,
314
+ text: this.truncateCandidateText(this.buildRerankerCandidateText(candidate)),
315
+ })),
316
+ })
317
+
318
+ return this.buildRerankOutput(reranked.results.map((item) => item.id))
319
+ }
320
+
321
+ private async rerankCandidatesMultiScopeWithHelper(
322
+ query: string,
323
+ flattenedCandidates: Array<MemorySearchResult & { scopeTag: string }>,
324
+ maxItems: number,
325
+ ): Promise<MemoryRerankOutput> {
326
+ return await helperModelRuntime.generateHelperStructured({
327
+ tag: 'memory-reranker-multi-scope',
328
+ createAgent: createMemoryRerankerAgent,
329
+ defaultSystemPrompt: MEMORY_RERANKER_PROMPT,
330
+ messages: [
331
+ {
332
+ role: 'user',
333
+ content: JSON.stringify({
334
+ query,
335
+ maxItems,
336
+ candidates: flattenedCandidates.map((candidate) => ({
337
+ id: candidate.id,
338
+ text: this.truncateCandidateText(this.buildRerankerCandidateText(candidate)),
339
+ score: candidate.score,
340
+ scope: candidate.scopeTag,
341
+ })),
342
+ }),
343
+ },
344
+ ],
345
+ schema: MemoryRerankOutputSchema,
346
+ })
347
+ }
348
+
349
+ private async rerankCandidatesMultiScopeWithRerankService(
350
+ query: string,
351
+ flattenedCandidates: Array<MemorySearchResult & { scopeTag: string }>,
352
+ maxItems: number,
353
+ ): Promise<MemoryRerankOutput> {
354
+ const reranked = await rerankService.rerankDocuments({
355
+ query,
356
+ topN: maxItems,
357
+ documents: flattenedCandidates.map((candidate) => ({
358
+ id: candidate.id,
359
+ text: this.truncateCandidateText(
360
+ `${this.buildRerankerCandidateText(candidate)}\n\nScope: ${candidate.scopeTag}`,
361
+ ),
362
+ })),
363
+ })
364
+
365
+ return this.buildRerankOutput(
366
+ reranked.results.map((item) => item.id),
367
+ 'Top matches across memory scopes',
368
+ )
369
+ }
370
+
301
371
  private async searchMemories({
302
372
  query,
303
373
  memory,
@@ -0,0 +1,156 @@
1
+ import { z } from 'zod'
2
+
3
+ import { OPENROUTER_FAST_RERANK_MODEL_ID } from '../config/model-constants'
4
+ import { normalizeDirectOpenRouterModelId, resolveOpenRouterApiKey } from '../openrouter/direct-provider'
5
+ import { getRuntimeConfig } from '../runtime/runtime-config'
6
+
7
+ const OPENROUTER_RERANK_URL = 'https://openrouter.ai/api/v1/rerank' as const
8
+
9
+ const RerankResponseSchema = z
10
+ .object({
11
+ model: z.string().optional(),
12
+ results: z.array(
13
+ z
14
+ .object({
15
+ index: z.number().int().nonnegative().optional(),
16
+ relevance_score: z.number().optional(),
17
+ document: z.union([z.string(), z.object({ text: z.string().optional() }).passthrough()]).optional(),
18
+ })
19
+ .passthrough(),
20
+ ),
21
+ usage: z.object({ search_units: z.number().optional(), cost: z.number().optional() }).passthrough().optional(),
22
+ })
23
+ .passthrough()
24
+
25
+ export interface RerankDocument {
26
+ id: string
27
+ text: string
28
+ }
29
+
30
+ export interface RerankResultItem extends RerankDocument {
31
+ index: number
32
+ relevanceScore: number | null
33
+ }
34
+
35
+ export interface RerankUsage {
36
+ searchUnits?: number
37
+ cost?: number
38
+ }
39
+
40
+ export interface RerankResult {
41
+ modelId: string
42
+ results: RerankResultItem[]
43
+ usage?: RerankUsage
44
+ }
45
+
46
+ function clampTopN(topN: number | undefined, total: number): number {
47
+ if (!Number.isFinite(topN)) return total
48
+ return Math.max(1, Math.min(total, Math.trunc(topN as number)))
49
+ }
50
+
51
+ function readConfiguredRerankModelId(): string | null {
52
+ try {
53
+ return getRuntimeConfig().memory.rerankerModelId.trim()
54
+ } catch {
55
+ return null
56
+ }
57
+ }
58
+
59
+ function resolveRerankModelId(modelId?: string): string {
60
+ const explicit = modelId?.trim()
61
+ if (explicit) return normalizeDirectOpenRouterModelId(explicit)
62
+
63
+ const configured = readConfiguredRerankModelId()
64
+ if (configured) return normalizeDirectOpenRouterModelId(configured)
65
+
66
+ return OPENROUTER_FAST_RERANK_MODEL_ID
67
+ }
68
+
69
+ function matchRerankedDocument(
70
+ candidate: z.infer<typeof RerankResponseSchema>['results'][number],
71
+ documents: RerankDocument[],
72
+ ): RerankDocument | null {
73
+ if (typeof candidate.index === 'number' && candidate.index >= 0 && candidate.index < documents.length) {
74
+ return documents[candidate.index] ?? null
75
+ }
76
+
77
+ const text =
78
+ typeof candidate.document === 'string'
79
+ ? candidate.document
80
+ : typeof candidate.document?.text === 'string'
81
+ ? candidate.document.text
82
+ : null
83
+ if (!text) return null
84
+
85
+ return documents.find((document) => document.text === text) ?? null
86
+ }
87
+
88
+ class RerankService {
89
+ async rerankDocuments(params: {
90
+ query: string
91
+ documents: RerankDocument[]
92
+ topN?: number
93
+ modelId?: string
94
+ }): Promise<RerankResult> {
95
+ if (params.documents.length === 0) {
96
+ return { modelId: resolveRerankModelId(params.modelId), results: [] }
97
+ }
98
+
99
+ const apiKey = resolveOpenRouterApiKey()
100
+ const modelId = resolveRerankModelId(params.modelId)
101
+ const topN = clampTopN(params.topN, params.documents.length)
102
+
103
+ const response = await fetch(OPENROUTER_RERANK_URL, {
104
+ method: 'POST',
105
+ headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json' },
106
+ body: JSON.stringify({
107
+ model: modelId,
108
+ query: params.query,
109
+ documents: params.documents.map((document) => document.text),
110
+ top_n: topN,
111
+ }),
112
+ })
113
+
114
+ const responseText = await response.text()
115
+ let payload: unknown = responseText
116
+ try {
117
+ payload = JSON.parse(responseText)
118
+ } catch {}
119
+
120
+ if (!response.ok) {
121
+ throw new Error(`OpenRouter rerank failed (${response.status}): ${responseText}`)
122
+ }
123
+
124
+ const parsed = RerankResponseSchema.parse(payload)
125
+ const seen = new Set<string>()
126
+ const results: RerankResultItem[] = []
127
+
128
+ for (const item of parsed.results) {
129
+ const matched = matchRerankedDocument(item, params.documents)
130
+ if (!matched || seen.has(matched.id)) continue
131
+ seen.add(matched.id)
132
+
133
+ results.push({
134
+ id: matched.id,
135
+ text: matched.text,
136
+ index: item.index ?? params.documents.findIndex((document) => document.id === matched.id),
137
+ relevanceScore: typeof item.relevance_score === 'number' ? item.relevance_score : null,
138
+ })
139
+
140
+ if (results.length >= topN) break
141
+ }
142
+
143
+ return {
144
+ modelId: parsed.model?.trim() || modelId,
145
+ results,
146
+ usage: parsed.usage
147
+ ? {
148
+ ...(parsed.usage.search_units !== undefined ? { searchUnits: parsed.usage.search_units } : {}),
149
+ ...(parsed.usage.cost !== undefined ? { cost: parsed.usage.cost } : {}),
150
+ }
151
+ : undefined,
152
+ }
153
+ }
154
+ }
155
+
156
+ export const rerankService = new RerankService()