@lota-sdk/core 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/infrastructure/schema/00_workstream.surql +55 -0
  2. package/infrastructure/schema/01_memory.surql +47 -0
  3. package/infrastructure/schema/02_execution_plan.surql +62 -0
  4. package/infrastructure/schema/03_learned_skill.surql +32 -0
  5. package/infrastructure/schema/04_runtime_bootstrap.surql +8 -0
  6. package/package.json +128 -0
  7. package/src/ai/definitions.ts +308 -0
  8. package/src/bifrost/bifrost.ts +256 -0
  9. package/src/config/agent-defaults.ts +99 -0
  10. package/src/config/constants.ts +33 -0
  11. package/src/config/env-shapes.ts +122 -0
  12. package/src/config/logger.ts +29 -0
  13. package/src/config/model-constants.ts +31 -0
  14. package/src/config/search.ts +17 -0
  15. package/src/config/workstream-defaults.ts +68 -0
  16. package/src/db/base.service.ts +55 -0
  17. package/src/db/cursor-pagination.ts +73 -0
  18. package/src/db/memory-query-builder.ts +207 -0
  19. package/src/db/memory-store.helpers.ts +118 -0
  20. package/src/db/memory-store.rows.ts +29 -0
  21. package/src/db/memory-store.ts +974 -0
  22. package/src/db/memory-types.ts +193 -0
  23. package/src/db/memory.ts +505 -0
  24. package/src/db/record-id.ts +78 -0
  25. package/src/db/service.ts +932 -0
  26. package/src/db/startup.ts +152 -0
  27. package/src/db/tables.ts +20 -0
  28. package/src/document/org-document-chunking.ts +224 -0
  29. package/src/document/parsing.ts +40 -0
  30. package/src/embeddings/provider.ts +76 -0
  31. package/src/index.ts +302 -0
  32. package/src/queues/context-compaction.queue.ts +82 -0
  33. package/src/queues/document-processor.queue.ts +118 -0
  34. package/src/queues/memory-consolidation.queue.ts +65 -0
  35. package/src/queues/post-chat-memory.queue.ts +128 -0
  36. package/src/queues/recent-activity-title-refinement.queue.ts +69 -0
  37. package/src/queues/regular-chat-memory-digest.config.ts +12 -0
  38. package/src/queues/regular-chat-memory-digest.queue.ts +73 -0
  39. package/src/queues/skill-extraction.config.ts +9 -0
  40. package/src/queues/skill-extraction.queue.ts +62 -0
  41. package/src/redis/connection.ts +176 -0
  42. package/src/redis/index.ts +30 -0
  43. package/src/redis/org-memory-lock.ts +43 -0
  44. package/src/redis/redis-lease-lock.ts +158 -0
  45. package/src/runtime/agent-contract.ts +1 -0
  46. package/src/runtime/agent-prompt-context.ts +119 -0
  47. package/src/runtime/agent-runtime-policy.ts +192 -0
  48. package/src/runtime/agent-stream-helpers.ts +117 -0
  49. package/src/runtime/agent-types.ts +22 -0
  50. package/src/runtime/approval-continuation.ts +16 -0
  51. package/src/runtime/chat-attachments.ts +46 -0
  52. package/src/runtime/chat-message.ts +10 -0
  53. package/src/runtime/chat-request-routing.ts +21 -0
  54. package/src/runtime/chat-run-orchestration.ts +25 -0
  55. package/src/runtime/chat-run-registry.ts +20 -0
  56. package/src/runtime/chat-types.ts +18 -0
  57. package/src/runtime/context-compaction-constants.ts +11 -0
  58. package/src/runtime/context-compaction-runtime.ts +86 -0
  59. package/src/runtime/context-compaction.ts +909 -0
  60. package/src/runtime/execution-plan.ts +59 -0
  61. package/src/runtime/helper-model.ts +405 -0
  62. package/src/runtime/indexed-repositories-policy.ts +28 -0
  63. package/src/runtime/instruction-sections.ts +8 -0
  64. package/src/runtime/llm-content.ts +71 -0
  65. package/src/runtime/memory-block.ts +264 -0
  66. package/src/runtime/memory-digest-policy.ts +14 -0
  67. package/src/runtime/memory-format.ts +8 -0
  68. package/src/runtime/memory-pipeline.ts +570 -0
  69. package/src/runtime/memory-prompts-fact.ts +47 -0
  70. package/src/runtime/memory-prompts-parse.ts +3 -0
  71. package/src/runtime/memory-prompts-update.ts +37 -0
  72. package/src/runtime/memory-scope.ts +43 -0
  73. package/src/runtime/plugin-types.ts +10 -0
  74. package/src/runtime/retrieval-adapters.ts +25 -0
  75. package/src/runtime/retrieval-pipeline.ts +3 -0
  76. package/src/runtime/runtime-extensions.ts +154 -0
  77. package/src/runtime/skill-extraction-policy.ts +3 -0
  78. package/src/runtime/team-consultation-orchestrator.ts +245 -0
  79. package/src/runtime/team-consultation-prompts.ts +32 -0
  80. package/src/runtime/title-helpers.ts +12 -0
  81. package/src/runtime/turn-lifecycle.ts +28 -0
  82. package/src/runtime/workstream-chat-helpers.ts +187 -0
  83. package/src/runtime/workstream-routing-policy.ts +301 -0
  84. package/src/runtime/workstream-state.ts +261 -0
  85. package/src/services/attachment.service.ts +159 -0
  86. package/src/services/chat-attachments.service.ts +17 -0
  87. package/src/services/chat-run-registry.service.ts +3 -0
  88. package/src/services/context-compaction-runtime.ts +13 -0
  89. package/src/services/context-compaction.service.ts +115 -0
  90. package/src/services/document-chunk.service.ts +141 -0
  91. package/src/services/execution-plan.service.ts +890 -0
  92. package/src/services/learned-skill.service.ts +328 -0
  93. package/src/services/memory-assessment.service.ts +43 -0
  94. package/src/services/memory.service.ts +807 -0
  95. package/src/services/memory.utils.ts +84 -0
  96. package/src/services/mutating-approval.service.ts +110 -0
  97. package/src/services/recent-activity-title.service.ts +74 -0
  98. package/src/services/recent-activity.service.ts +397 -0
  99. package/src/services/workstream-change-tracker.service.ts +313 -0
  100. package/src/services/workstream-message.service.ts +283 -0
  101. package/src/services/workstream-title.service.ts +58 -0
  102. package/src/services/workstream-turn-preparation.ts +1340 -0
  103. package/src/services/workstream-turn.ts +37 -0
  104. package/src/services/workstream.service.ts +854 -0
  105. package/src/services/workstream.types.ts +118 -0
  106. package/src/storage/attachment-parser.ts +101 -0
  107. package/src/storage/attachment-storage.service.ts +391 -0
  108. package/src/storage/attachments.types.ts +11 -0
  109. package/src/storage/attachments.utils.ts +58 -0
  110. package/src/storage/generated-document-storage.service.ts +55 -0
  111. package/src/system-agents/agent-result.ts +27 -0
  112. package/src/system-agents/context-compacter.agent.ts +46 -0
  113. package/src/system-agents/delegated-agent-factory.ts +177 -0
  114. package/src/system-agents/helper-agent-options.ts +20 -0
  115. package/src/system-agents/memory-reranker.agent.ts +38 -0
  116. package/src/system-agents/memory.agent.ts +58 -0
  117. package/src/system-agents/recent-activity-title-refiner.agent.ts +53 -0
  118. package/src/system-agents/regular-chat-memory-digest.agent.ts +75 -0
  119. package/src/system-agents/researcher.agent.ts +34 -0
  120. package/src/system-agents/skill-extractor.agent.ts +88 -0
  121. package/src/system-agents/skill-manager.agent.ts +80 -0
  122. package/src/system-agents/title-generator.agent.ts +42 -0
  123. package/src/system-agents/workstream-tracker.agent.ts +58 -0
  124. package/src/tools/execution-plan.tool.ts +163 -0
  125. package/src/tools/fetch-webpage.tool.ts +132 -0
  126. package/src/tools/firecrawl-client.ts +12 -0
  127. package/src/tools/memory-block.tool.ts +55 -0
  128. package/src/tools/read-file-parts.tool.ts +80 -0
  129. package/src/tools/remember-memory.tool.ts +85 -0
  130. package/src/tools/research-topic.tool.ts +15 -0
  131. package/src/tools/search-tools.ts +55 -0
  132. package/src/tools/search-web.tool.ts +175 -0
  133. package/src/tools/team-think.tool.ts +125 -0
  134. package/src/tools/tool-contract.ts +21 -0
  135. package/src/tools/user-questions.tool.ts +18 -0
  136. package/src/utils/async.ts +50 -0
  137. package/src/utils/date-time.ts +34 -0
  138. package/src/utils/error.ts +10 -0
  139. package/src/utils/errors.ts +28 -0
  140. package/src/utils/hono-error-handler.ts +71 -0
  141. package/src/utils/string.ts +51 -0
  142. package/src/workers/bootstrap.ts +44 -0
  143. package/src/workers/memory-consolidation.worker.ts +318 -0
  144. package/src/workers/regular-chat-memory-digest.helpers.ts +100 -0
  145. package/src/workers/regular-chat-memory-digest.runner.ts +363 -0
  146. package/src/workers/regular-chat-memory-digest.worker.ts +22 -0
  147. package/src/workers/skill-extraction.runner.ts +331 -0
  148. package/src/workers/skill-extraction.worker.ts +22 -0
  149. package/src/workers/utils/repo-indexer-chunker.ts +331 -0
  150. package/src/workers/utils/repo-structure-extractor.ts +645 -0
  151. package/src/workers/utils/repomix-process-concurrency.ts +65 -0
  152. package/src/workers/utils/sandbox-error.ts +5 -0
  153. package/src/workers/worker-utils.ts +182 -0
@@ -0,0 +1,152 @@
1
+ import { BoundQuery, RecordId } from 'surrealdb'
2
+ import { z } from 'zod'
3
+
4
+ import type { SurrealDBService, SurrealDatabaseLogger } from '../db/service'
5
+ import { TABLES } from '../db/tables'
6
+ import { getErrorMessage } from '../utils/error'
7
+
8
+ const DATABASE_BOOTSTRAP_KEY = 'database-schema-ready'
9
+ const DEFAULT_RETRY_DELAY_MS = 1_000
10
+ const DEFAULT_MAX_WAIT_MS = 3 * 60 * 1_000
11
+ const RETRY_LOG_INTERVAL = 5
12
+
13
+ const RuntimeBootstrapRecordSchema = z.object({
14
+ id: z.unknown(),
15
+ key: z.string(),
16
+ schemaFingerprint: z.string(),
17
+ readyAt: z.union([z.date(), z.string(), z.number()]),
18
+ updatedAt: z.union([z.date(), z.string(), z.number()]),
19
+ })
20
+
21
+ type StartupLogger = Pick<SurrealDatabaseLogger, 'info' | 'warn' | 'error'>
22
+
23
+ function shouldLogRetry(attempt: number): boolean {
24
+ return attempt === 1 || attempt % RETRY_LOG_INTERVAL === 0
25
+ }
26
+
27
+ export async function connectWithStartupRetry(params: {
28
+ connect: () => Promise<void>
29
+ label: string
30
+ logger?: StartupLogger
31
+ retryDelayMs?: number
32
+ maxWaitMs?: number
33
+ }): Promise<void> {
34
+ const retryDelayMs = params.retryDelayMs ?? DEFAULT_RETRY_DELAY_MS
35
+ const maxWaitMs = params.maxWaitMs ?? DEFAULT_MAX_WAIT_MS
36
+ const startedAt = Date.now()
37
+
38
+ let attempt = 0
39
+ let lastError: unknown = null
40
+
41
+ while (Date.now() - startedAt <= maxWaitMs) {
42
+ attempt += 1
43
+
44
+ try {
45
+ await params.connect()
46
+ return
47
+ } catch (error) {
48
+ lastError = error
49
+ if (shouldLogRetry(attempt)) {
50
+ params.logger?.warn?.(
51
+ `Waiting for ${params.label} (${attempt}, elapsed=${Date.now() - startedAt}ms): ${getErrorMessage(error)}`,
52
+ )
53
+ }
54
+ await Bun.sleep(retryDelayMs)
55
+ }
56
+ }
57
+
58
+ params.logger?.error?.(`Timed out waiting for ${params.label}: ${getErrorMessage(lastError)}`)
59
+ throw lastError instanceof Error ? lastError : new Error(`Timed out waiting for ${params.label}`)
60
+ }
61
+
62
+ async function readDatabaseBootstrapRecord(
63
+ databaseService: SurrealDBService,
64
+ ): Promise<z.infer<typeof RuntimeBootstrapRecordSchema> | null> {
65
+ return await databaseService.queryOne(
66
+ new BoundQuery(
67
+ `SELECT *
68
+ FROM ${TABLES.RUNTIME_BOOTSTRAP}
69
+ WHERE key = $key
70
+ LIMIT 1`,
71
+ { key: DATABASE_BOOTSTRAP_KEY },
72
+ ),
73
+ RuntimeBootstrapRecordSchema,
74
+ )
75
+ }
76
+
77
+ export async function waitForDatabaseBootstrap(params: {
78
+ databaseService: SurrealDBService
79
+ expectedFingerprint?: string | null
80
+ label: string
81
+ logger?: StartupLogger
82
+ connect?: () => Promise<void>
83
+ retryDelayMs?: number
84
+ maxWaitMs?: number
85
+ }): Promise<void> {
86
+ const expectedFingerprint = params.expectedFingerprint?.trim()
87
+ if (!expectedFingerprint) {
88
+ return
89
+ }
90
+
91
+ const retryDelayMs = params.retryDelayMs ?? DEFAULT_RETRY_DELAY_MS
92
+ const maxWaitMs = params.maxWaitMs ?? DEFAULT_MAX_WAIT_MS
93
+ const startedAt = Date.now()
94
+
95
+ let attempt = 0
96
+ let lastError: unknown = null
97
+
98
+ while (Date.now() - startedAt <= maxWaitMs) {
99
+ attempt += 1
100
+
101
+ try {
102
+ if (params.connect) {
103
+ await params.connect()
104
+ }
105
+
106
+ const record = await readDatabaseBootstrapRecord(params.databaseService)
107
+ if (record?.schemaFingerprint === expectedFingerprint) {
108
+ return
109
+ }
110
+
111
+ if (shouldLogRetry(attempt)) {
112
+ const currentFingerprint =
113
+ typeof record?.schemaFingerprint === 'string' && record.schemaFingerprint.length > 0
114
+ ? record.schemaFingerprint
115
+ : 'missing'
116
+ params.logger?.info?.(
117
+ `Waiting for ${params.label} schema readiness (${attempt}, expected=${expectedFingerprint}, current=${currentFingerprint})`,
118
+ )
119
+ }
120
+ } catch (error) {
121
+ lastError = error
122
+ if (shouldLogRetry(attempt)) {
123
+ params.logger?.warn?.(`Waiting for ${params.label} schema readiness (${attempt}): ${getErrorMessage(error)}`)
124
+ }
125
+ }
126
+
127
+ await Bun.sleep(retryDelayMs)
128
+ }
129
+
130
+ if (lastError instanceof Error) {
131
+ throw lastError
132
+ }
133
+
134
+ throw new Error(`Timed out waiting for ${params.label} schema readiness`)
135
+ }
136
+
137
+ export async function publishDatabaseBootstrap(params: {
138
+ databaseService: SurrealDBService
139
+ schemaFingerprint: string
140
+ }): Promise<void> {
141
+ await params.databaseService.upsert(
142
+ TABLES.RUNTIME_BOOTSTRAP,
143
+ new RecordId(TABLES.RUNTIME_BOOTSTRAP, DATABASE_BOOTSTRAP_KEY),
144
+ {
145
+ key: DATABASE_BOOTSTRAP_KEY,
146
+ schemaFingerprint: params.schemaFingerprint,
147
+ readyAt: new Date(),
148
+ updatedAt: new Date(),
149
+ },
150
+ RuntimeBootstrapRecordSchema,
151
+ )
152
+ }
@@ -0,0 +1,20 @@
1
+ export const TABLES = {
2
+ WORKSTREAM_MESSAGE: 'workstreamMessage',
3
+ WORKSTREAM: 'workstream',
4
+ RUNTIME_BOOTSTRAP: 'runtimeBootstrap',
5
+ WORKSTREAM_ATTACHMENT: 'workstreamAttachment',
6
+ MEMORY: 'memory',
7
+ MEMORY_RELATION: 'memoryRelation',
8
+ MEMORY_HISTORY: 'memoryHistory',
9
+ LEARNED_SKILL: 'learnedSkill',
10
+ PLAN: 'plan',
11
+ PLAN_TASK: 'planTask',
12
+ PLAN_EVENT: 'planEvent',
13
+ ORGANIZATION: 'organization',
14
+ USER: 'user',
15
+ ORG_ACTION: 'orgAction',
16
+ RECENT_ACTIVITY_EVENT: 'recentActivityEvent',
17
+ RECENT_ACTIVITY: 'recentActivity',
18
+ } as const
19
+
20
+ export type DatabaseTable = (typeof TABLES)[keyof typeof TABLES] | (string & {})
@@ -0,0 +1,224 @@
1
+ import { normalizeKey, normalizeTextBody, normalizeWhitespace } from './parsing'
2
+
3
+ export type ParsedDocumentChunk = {
4
+ chunkKey: string
5
+ chunkIndex: number
6
+ content: string
7
+ sectionPath?: string
8
+ pageStart?: number
9
+ pageEnd?: number
10
+ }
11
+
12
+ type ChunkBase = Omit<ParsedDocumentChunk, 'chunkIndex'>
13
+
14
+ type TextPage = { pageNumber: number; text: string }
15
+
16
+ const MARKDOWN_CHUNK_CHARS = 1_700
17
+ const TEXT_CHUNK_CHARS = 1_600
18
+ const PDF_CHUNK_CHARS = 1_400
19
+
20
+ function joinSectionPath(parts: Array<string | undefined>): string | undefined {
21
+ const normalized = parts.map((part) => normalizeWhitespace(part ?? '')).filter((part) => part.length > 0)
22
+ return normalized.length > 0 ? normalized.join(' > ') : undefined
23
+ }
24
+
25
+ function splitParagraphUnits(text: string): string[] {
26
+ const normalized = normalizeTextBody(text)
27
+ if (!normalized) return []
28
+
29
+ return normalized
30
+ .split(/\n{2,}/)
31
+ .map((value) => value.trim())
32
+ .filter((value) => value.length > 0)
33
+ }
34
+
35
+ function splitOversizedUnit(unit: string, maxChars: number): string[] {
36
+ const normalized = normalizeTextBody(unit)
37
+ if (!normalized) return []
38
+ if (normalized.length <= maxChars) return [normalized]
39
+
40
+ const sentenceParts = normalized
41
+ .split(/(?<=[.!?])\s+/)
42
+ .map((value) => value.trim())
43
+ .filter((value) => value.length > 0)
44
+
45
+ if (sentenceParts.length > 1) {
46
+ return sentenceParts.flatMap((part) => splitOversizedUnit(part, maxChars))
47
+ }
48
+
49
+ const lineParts = normalized
50
+ .split(/\n+/)
51
+ .map((value) => value.trim())
52
+ .filter((value) => value.length > 0)
53
+
54
+ if (lineParts.length > 1) {
55
+ return lineParts.flatMap((part) => splitOversizedUnit(part, maxChars))
56
+ }
57
+
58
+ const chunks: string[] = []
59
+ let cursor = 0
60
+ while (cursor < normalized.length) {
61
+ let end = Math.min(cursor + maxChars, normalized.length)
62
+ if (end < normalized.length) {
63
+ const breakAt = normalized.lastIndexOf(' ', end)
64
+ if (breakAt > cursor + Math.floor(maxChars * 0.55)) {
65
+ end = breakAt
66
+ }
67
+ }
68
+
69
+ const slice = normalized.slice(cursor, end).trim()
70
+ if (slice) chunks.push(slice)
71
+ cursor = end
72
+ while (cursor < normalized.length && normalized[cursor] === ' ') {
73
+ cursor += 1
74
+ }
75
+ }
76
+
77
+ return chunks
78
+ }
79
+
80
+ function buildChunkBodies(units: string[], maxChars: number): string[] {
81
+ const chunks: string[] = []
82
+ let current = ''
83
+
84
+ const commit = () => {
85
+ const value = normalizeTextBody(current)
86
+ if (value) chunks.push(value)
87
+ current = ''
88
+ }
89
+
90
+ for (const unit of units) {
91
+ const normalized = normalizeTextBody(unit)
92
+ if (!normalized) continue
93
+
94
+ if (normalized.length > maxChars) {
95
+ if (current) commit()
96
+ for (const split of splitOversizedUnit(normalized, maxChars)) {
97
+ const value = normalizeTextBody(split)
98
+ if (value) chunks.push(value)
99
+ }
100
+ continue
101
+ }
102
+
103
+ const nextValue = current ? `${current}\n\n${normalized}` : normalized
104
+ if (nextValue.length > maxChars && current) {
105
+ commit()
106
+ current = normalized
107
+ continue
108
+ }
109
+
110
+ current = nextValue
111
+ }
112
+
113
+ commit()
114
+ return chunks
115
+ }
116
+
117
+ function toChunkEntries(params: {
118
+ prefix: string
119
+ contents: string[]
120
+ sectionPath?: string
121
+ pageStart?: number
122
+ pageEnd?: number
123
+ }): ChunkBase[] {
124
+ return params.contents.map((content, index) => ({
125
+ chunkKey: `${params.prefix}:${String(index + 1).padStart(3, '0')}`,
126
+ content,
127
+ sectionPath: params.sectionPath,
128
+ pageStart: params.pageStart,
129
+ pageEnd: params.pageEnd,
130
+ }))
131
+ }
132
+
133
+ function withChunkIndexes(chunks: ChunkBase[]): ParsedDocumentChunk[] {
134
+ return chunks.map((chunk, index) => ({ ...chunk, chunkIndex: index }))
135
+ }
136
+
137
+ export function chunkPlainTextDocument(params: {
138
+ text: string
139
+ chunkChars?: number
140
+ chunkKeyPrefix?: string
141
+ sectionPath?: string
142
+ }): ParsedDocumentChunk[] {
143
+ const chunkChars = params.chunkChars ?? TEXT_CHUNK_CHARS
144
+ const prefix = params.chunkKeyPrefix ?? 'text'
145
+ const units = splitParagraphUnits(params.text)
146
+ const contents = buildChunkBodies(units, chunkChars)
147
+ return withChunkIndexes(toChunkEntries({ prefix, contents, sectionPath: params.sectionPath }))
148
+ }
149
+
150
+ export function chunkMarkdownDocument(params: {
151
+ text: string
152
+ chunkChars?: number
153
+ chunkKeyPrefix?: string
154
+ baseSectionPath?: string
155
+ }): ParsedDocumentChunk[] {
156
+ const chunkChars = params.chunkChars ?? MARKDOWN_CHUNK_CHARS
157
+ const prefix = params.chunkKeyPrefix ?? 'markdown'
158
+ const lines = normalizeTextBody(params.text).split('\n')
159
+ const chunks: ChunkBase[] = []
160
+ const headingStack: string[] = []
161
+ let currentLines: string[] = []
162
+ let sectionCounter = 0
163
+
164
+ const flushCurrent = () => {
165
+ const content = normalizeTextBody(currentLines.join('\n'))
166
+ if (!content) {
167
+ currentLines = []
168
+ return
169
+ }
170
+
171
+ sectionCounter += 1
172
+ const sectionPath = joinSectionPath([params.baseSectionPath, ...headingStack])
173
+ const sectionKeyBase = sectionPath ? normalizeKey(sectionPath) : 'section'
174
+ const sectionKey = `${sectionKeyBase}-${String(sectionCounter).padStart(3, '0')}`
175
+ const contents = buildChunkBodies(splitParagraphUnits(content), chunkChars)
176
+ chunks.push(...toChunkEntries({ prefix: `${prefix}:${sectionKey}`, contents, sectionPath }))
177
+ currentLines = []
178
+ }
179
+
180
+ for (const line of lines) {
181
+ const headingMatch = line.match(/^(#{1,6})\s+(.+?)\s*$/)
182
+ if (!headingMatch) {
183
+ currentLines.push(line)
184
+ continue
185
+ }
186
+
187
+ flushCurrent()
188
+ const depth = headingMatch[1].length
189
+ const headingText = normalizeWhitespace(headingMatch[2])
190
+ headingStack.splice(depth - 1)
191
+ headingStack[depth - 1] = headingText
192
+ currentLines = [line]
193
+ }
194
+
195
+ flushCurrent()
196
+ return withChunkIndexes(chunks)
197
+ }
198
+
199
+ export function chunkPagedDocument(params: {
200
+ pages: TextPage[]
201
+ chunkChars?: number
202
+ chunkKeyPrefix?: string
203
+ }): ParsedDocumentChunk[] {
204
+ const chunkChars = params.chunkChars ?? PDF_CHUNK_CHARS
205
+ const prefix = params.chunkKeyPrefix ?? 'page'
206
+ const chunks: ChunkBase[] = []
207
+
208
+ for (const page of params.pages) {
209
+ const pageText = normalizeTextBody(page.text)
210
+ if (!pageText) continue
211
+
212
+ const contents = buildChunkBodies(splitParagraphUnits(pageText), chunkChars)
213
+ chunks.push(
214
+ ...toChunkEntries({
215
+ prefix: `${prefix}:${String(page.pageNumber).padStart(4, '0')}`,
216
+ contents,
217
+ pageStart: page.pageNumber,
218
+ pageEnd: page.pageNumber,
219
+ }),
220
+ )
221
+ }
222
+
223
+ return withChunkIndexes(chunks)
224
+ }
@@ -0,0 +1,40 @@
1
+ export function normalizeWhitespace(value: string): string {
2
+ return value.replace(/\s+/g, ' ').trim()
3
+ }
4
+
5
+ export function normalizeTextBody(value: string): string {
6
+ return value.replaceAll(String.fromCharCode(0), '').replace(/\r/g, '').trim()
7
+ }
8
+
9
+ export function normalizeKey(value: string): string {
10
+ return normalizeWhitespace(value)
11
+ .toLowerCase()
12
+ .replace(/[^\w\s.-]/g, '')
13
+ .replace(/\s+/g, '-')
14
+ .slice(0, 120)
15
+ }
16
+
17
+ export function makeMemoryKey(kind: string, rawKey: string): string {
18
+ const normalized = normalizeKey(rawKey)
19
+ return normalized ? `${kind}:${normalized}` : `${kind}:item`
20
+ }
21
+
22
+ export function truncateForModel(value: string, maxChars: number): string {
23
+ if (value.length <= maxChars) return value
24
+ return `${value.slice(0, maxChars)}\n\n[...truncated due to size...]`
25
+ }
26
+
27
+ export function dedupeStrings(items: string[], limit: number): string[] {
28
+ const out: string[] = []
29
+ const seen = new Set<string>()
30
+ for (const raw of items) {
31
+ const value = normalizeWhitespace(raw)
32
+ if (!value) continue
33
+ const key = value.toLowerCase()
34
+ if (seen.has(key)) continue
35
+ seen.add(key)
36
+ out.push(value)
37
+ if (out.length >= limit) break
38
+ }
39
+ return out
40
+ }
@@ -0,0 +1,76 @@
1
+ import { embed, embedMany } from 'ai'
2
+
3
+ import { bifrostEmbeddingModel } from '../bifrost/bifrost'
4
+ import { env } from '../config/env-shapes'
5
+
6
+ const SUPPORTED_EMBEDDING_PREFIXES = ['openai/', 'openrouter/'] as const
7
+
8
+ function resolveEmbeddingModel(modelId: string) {
9
+ const normalized = modelId.trim()
10
+ if (!normalized) {
11
+ throw new Error('[embeddings-provider] Model id is required.')
12
+ }
13
+
14
+ if (!SUPPORTED_EMBEDDING_PREFIXES.some((prefix) => normalized.startsWith(prefix))) {
15
+ throw new Error(
16
+ `[embeddings-provider] Unsupported model id "${modelId}". Use one of: ${SUPPORTED_EMBEDDING_PREFIXES.join(', ')}*.`,
17
+ )
18
+ }
19
+
20
+ return bifrostEmbeddingModel(normalized)
21
+ }
22
+
23
+ class ProviderEmbeddings {
24
+ private _model: ReturnType<typeof resolveEmbeddingModel> | null = null
25
+
26
+ private getModel() {
27
+ if (!this._model) {
28
+ this._model = resolveEmbeddingModel(env.AI_EMBEDDING_MODEL)
29
+ }
30
+ return this._model
31
+ }
32
+
33
+ async embedQuery(text: string): Promise<number[]> {
34
+ const input = text.trim()
35
+ if (!input) return []
36
+
37
+ const result = await embed({ model: this.getModel(), value: input, maxRetries: 2 })
38
+
39
+ return result.embedding.map((value) => Number(value))
40
+ }
41
+
42
+ async embedDocuments(values: string[]): Promise<number[][]> {
43
+ if (values.length === 0) return []
44
+
45
+ const normalized = values.map((value) => value.trim())
46
+ const nonEmptyEntries = normalized
47
+ .map((value, index) => ({ value, index }))
48
+ .filter((entry) => entry.value.length > 0)
49
+
50
+ if (nonEmptyEntries.length === 0) {
51
+ return normalized.map(() => [])
52
+ }
53
+
54
+ const result = await embedMany({
55
+ model: this.getModel(),
56
+ values: nonEmptyEntries.map((entry) => entry.value),
57
+ maxRetries: 2,
58
+ })
59
+
60
+ const embeddingsByIndex = new Map<number, number[]>()
61
+ result.embeddings.forEach((embedding, index) => {
62
+ const entry = nonEmptyEntries.at(index)
63
+ if (!entry) return
64
+ embeddingsByIndex.set(
65
+ entry.index,
66
+ embedding.map((value) => Number(value)),
67
+ )
68
+ })
69
+
70
+ return normalized.map((_, index) => embeddingsByIndex.get(index) ?? [])
71
+ }
72
+ }
73
+
74
+ export function createDefaultEmbeddings(): ProviderEmbeddings {
75
+ return new ProviderEmbeddings()
76
+ }